xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 0e1234c02b77ef22d9cf78f86b98347ceb170090)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 	.name_size = KVM_STATS_NAME_SIZE,
72 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 	.id_offset = sizeof(struct kvm_stats_header),
74 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 		       sizeof(kvm_vm_stats_desc),
77 };
78 
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 	KVM_GENERIC_VCPU_STATS(),
81 	STATS_DESC_COUNTER(VCPU, exit_userspace),
82 	STATS_DESC_COUNTER(VCPU, exit_null),
83 	STATS_DESC_COUNTER(VCPU, exit_external_request),
84 	STATS_DESC_COUNTER(VCPU, exit_io_request),
85 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 	STATS_DESC_COUNTER(VCPU, exit_validity),
88 	STATS_DESC_COUNTER(VCPU, exit_instruction),
89 	STATS_DESC_COUNTER(VCPU, exit_pei),
90 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_program),
108 	STATS_DESC_COUNTER(VCPU, deliver_io),
109 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 	STATS_DESC_COUNTER(VCPU, inject_ckc),
112 	STATS_DESC_COUNTER(VCPU, inject_cputm),
113 	STATS_DESC_COUNTER(VCPU, inject_external_call),
114 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 	STATS_DESC_COUNTER(VCPU, inject_mchk),
116 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 	STATS_DESC_COUNTER(VCPU, inject_program),
118 	STATS_DESC_COUNTER(VCPU, inject_restart),
119 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 	STATS_DESC_COUNTER(VCPU, instruction_gs),
123 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 	STATS_DESC_COUNTER(VCPU, instruction_sck),
129 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 	STATS_DESC_COUNTER(VCPU, instruction_spx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 	STATS_DESC_COUNTER(VCPU, instruction_stap),
134 	STATS_DESC_COUNTER(VCPU, instruction_iske),
135 	STATS_DESC_COUNTER(VCPU, instruction_ri),
136 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 	STATS_DESC_COUNTER(VCPU, instruction_sske),
138 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 	STATS_DESC_COUNTER(VCPU, instruction_tb),
142 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 	STATS_DESC_COUNTER(VCPU, instruction_sie),
146 	STATS_DESC_COUNTER(VCPU, instruction_essa),
147 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 	STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175 
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 	.name_size = KVM_STATS_NAME_SIZE,
178 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 	.id_offset = sizeof(struct kvm_stats_header),
180 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 		       sizeof(kvm_vcpu_stats_desc),
183 };
184 
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189 
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194 
195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199 
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa  = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204 
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209 
210 /*
211  * For now we handle at most 16 double words as this is what the s390 base
212  * kernel handles and stores in the prefix page. If we ever need to go beyond
213  * this, this requires changes to code, but the external uapi can stay.
214  */
215 #define SIZE_INTERNAL 16
216 
217 /*
218  * Base feature mask that defines default mask for facilities. Consists of the
219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220  */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224  * and defines the facilities that can be enabled via a cpu model.
225  */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227 
228 static unsigned long kvm_s390_fac_size(void)
229 {
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 		sizeof(stfle_fac_list));
234 
235 	return SIZE_INTERNAL;
236 }
237 
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242 
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247 
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
250 {
251 	/* every s390 is virtualization enabled ;-) */
252 	return 0;
253 }
254 
255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 	return 0;
258 }
259 
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 			      unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264 
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 	u8 delta_idx = 0;
268 
269 	/*
270 	 * The TOD jumps by delta, we have to compensate this by adding
271 	 * -delta to the epoch.
272 	 */
273 	delta = -delta;
274 
275 	/* sign-extension - we're adding to signed values below */
276 	if ((s64)delta < 0)
277 		delta_idx = -1;
278 
279 	scb->epoch += delta;
280 	if (scb->ecd & ECD_MEF) {
281 		scb->epdx += delta_idx;
282 		if (scb->epoch < delta)
283 			scb->epdx += 1;
284 	}
285 }
286 
287 /*
288  * This callback is executed during stop_machine(). All CPUs are therefore
289  * temporarily stopped. In order not to change guest behavior, we have to
290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291  * so a CPU won't be stopped while calculating with the epoch.
292  */
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 			  void *v)
295 {
296 	struct kvm *kvm;
297 	struct kvm_vcpu *vcpu;
298 	unsigned long i;
299 	unsigned long long *delta = v;
300 
301 	list_for_each_entry(kvm, &vm_list, vm_list) {
302 		kvm_for_each_vcpu(i, vcpu, kvm) {
303 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 			if (i == 0) {
305 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 			}
308 			if (vcpu->arch.cputm_enabled)
309 				vcpu->arch.cputm_start += *delta;
310 			if (vcpu->arch.vsie_block)
311 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 						   *delta);
313 		}
314 	}
315 	return NOTIFY_OK;
316 }
317 
318 static struct notifier_block kvm_clock_notifier = {
319 	.notifier_call = kvm_clock_sync,
320 };
321 
322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 	gmap_notifier.notifier_call = kvm_gmap_notifier;
325 	gmap_register_pte_notifier(&gmap_notifier);
326 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 	gmap_register_pte_notifier(&vsie_gmap_notifier);
328 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 				       &kvm_clock_notifier);
330 	return 0;
331 }
332 
333 void kvm_arch_hardware_unsetup(void)
334 {
335 	gmap_unregister_pte_notifier(&gmap_notifier);
336 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 					 &kvm_clock_notifier);
339 }
340 
341 static void allow_cpu_feat(unsigned long nr)
342 {
343 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345 
346 static inline int plo_test_bit(unsigned char nr)
347 {
348 	unsigned long function = (unsigned long)nr | 0x100;
349 	int cc;
350 
351 	asm volatile(
352 		"	lgr	0,%[function]\n"
353 		/* Parameter registers are ignored for "test bit" */
354 		"	plo	0,0,0,0(0)\n"
355 		"	ipm	%0\n"
356 		"	srl	%0,28\n"
357 		: "=d" (cc)
358 		: [function] "d" (function)
359 		: "cc", "0");
360 	return cc == 0;
361 }
362 
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 	asm volatile(
366 		"	lghi	0,0\n"
367 		"	lgr	1,%[query]\n"
368 		/* Parameter registers are ignored */
369 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
370 		:
371 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 		: "cc", "memory", "0", "1");
373 }
374 
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377 
378 static void kvm_s390_cpu_feat_init(void)
379 {
380 	int i;
381 
382 	for (i = 0; i < 256; ++i) {
383 		if (plo_test_bit(i))
384 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 	}
386 
387 	if (test_facility(28)) /* TOD-clock steering */
388 		ptff(kvm_s390_available_subfunc.ptff,
389 		     sizeof(kvm_s390_available_subfunc.ptff),
390 		     PTFF_QAF);
391 
392 	if (test_facility(17)) { /* MSA */
393 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmac);
395 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmc);
397 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.km);
399 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kimd);
401 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.klmd);
403 	}
404 	if (test_facility(76)) /* MSA3 */
405 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.pckmo);
407 	if (test_facility(77)) { /* MSA4 */
408 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kmctr);
410 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmf);
412 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmo);
414 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.pcc);
416 	}
417 	if (test_facility(57)) /* MSA5 */
418 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.ppno);
420 
421 	if (test_facility(146)) /* MSA8 */
422 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.kma);
424 
425 	if (test_facility(155)) /* MSA9 */
426 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kdsa);
428 
429 	if (test_facility(150)) /* SORTL */
430 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431 
432 	if (test_facility(151)) /* DFLTCC */
433 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434 
435 	if (MACHINE_HAS_ESOP)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 	/*
438 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 	 */
441 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 	    !test_facility(3) || !nested)
443 		return;
444 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 	if (sclp.has_64bscao)
446 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 	if (sclp.has_siif)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 	if (sclp.has_gpere)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 	if (sclp.has_gsls)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 	if (sclp.has_ib)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 	if (sclp.has_cei)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 	if (sclp.has_ibs)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 	if (sclp.has_kss)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 	/*
462 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 	 * all skey handling functions read/set the skey from the PGSTE
464 	 * instead of the real storage key.
465 	 *
466 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 	 * pages being detected as preserved although they are resident.
468 	 *
469 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 	 *
472 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 	 *
476 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 	 * cannot easily shadow the SCA because of the ipte lock.
478 	 */
479 }
480 
481 int kvm_arch_init(void *opaque)
482 {
483 	int rc = -ENOMEM;
484 
485 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 	if (!kvm_s390_dbf)
487 		return -ENOMEM;
488 
489 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf_uv)
491 		goto out;
492 
493 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 		goto out;
496 
497 	kvm_s390_cpu_feat_init();
498 
499 	/* Register floating interrupt controller interface. */
500 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 	if (rc) {
502 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 		goto out;
504 	}
505 
506 	rc = kvm_s390_gib_init(GAL_ISC);
507 	if (rc)
508 		goto out;
509 
510 	return 0;
511 
512 out:
513 	kvm_arch_exit();
514 	return rc;
515 }
516 
517 void kvm_arch_exit(void)
518 {
519 	kvm_s390_gib_destroy();
520 	debug_unregister(kvm_s390_dbf);
521 	debug_unregister(kvm_s390_dbf_uv);
522 }
523 
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 			unsigned int ioctl, unsigned long arg)
527 {
528 	if (ioctl == KVM_S390_ENABLE_SIE)
529 		return s390_enable_sie();
530 	return -EINVAL;
531 }
532 
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 	int r;
536 
537 	switch (ext) {
538 	case KVM_CAP_S390_PSW:
539 	case KVM_CAP_S390_GMAP:
540 	case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 	case KVM_CAP_S390_UCONTROL:
543 #endif
544 	case KVM_CAP_ASYNC_PF:
545 	case KVM_CAP_SYNC_REGS:
546 	case KVM_CAP_ONE_REG:
547 	case KVM_CAP_ENABLE_CAP:
548 	case KVM_CAP_S390_CSS_SUPPORT:
549 	case KVM_CAP_IOEVENTFD:
550 	case KVM_CAP_DEVICE_CTRL:
551 	case KVM_CAP_S390_IRQCHIP:
552 	case KVM_CAP_VM_ATTRIBUTES:
553 	case KVM_CAP_MP_STATE:
554 	case KVM_CAP_IMMEDIATE_EXIT:
555 	case KVM_CAP_S390_INJECT_IRQ:
556 	case KVM_CAP_S390_USER_SIGP:
557 	case KVM_CAP_S390_USER_STSI:
558 	case KVM_CAP_S390_SKEYS:
559 	case KVM_CAP_S390_IRQ_STATE:
560 	case KVM_CAP_S390_USER_INSTR0:
561 	case KVM_CAP_S390_CMMA_MIGRATION:
562 	case KVM_CAP_S390_AIS:
563 	case KVM_CAP_S390_AIS_MIGRATION:
564 	case KVM_CAP_S390_VCPU_RESETS:
565 	case KVM_CAP_SET_GUEST_DEBUG:
566 	case KVM_CAP_S390_DIAG318:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		if (ext == KVM_CAP_NR_VCPUS)
589 			r = min_t(unsigned int, num_online_cpus(), r);
590 		break;
591 	case KVM_CAP_S390_COW:
592 		r = MACHINE_HAS_ESOP;
593 		break;
594 	case KVM_CAP_S390_VECTOR_REGISTERS:
595 		r = MACHINE_HAS_VX;
596 		break;
597 	case KVM_CAP_S390_RI:
598 		r = test_facility(64);
599 		break;
600 	case KVM_CAP_S390_GS:
601 		r = test_facility(133);
602 		break;
603 	case KVM_CAP_S390_BPB:
604 		r = test_facility(82);
605 		break;
606 	case KVM_CAP_S390_PROTECTED:
607 		r = is_prot_virt_host();
608 		break;
609 	default:
610 		r = 0;
611 	}
612 	return r;
613 }
614 
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617 	int i;
618 	gfn_t cur_gfn, last_gfn;
619 	unsigned long gaddr, vmaddr;
620 	struct gmap *gmap = kvm->arch.gmap;
621 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622 
623 	/* Loop over all guest segments */
624 	cur_gfn = memslot->base_gfn;
625 	last_gfn = memslot->base_gfn + memslot->npages;
626 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 		gaddr = gfn_to_gpa(cur_gfn);
628 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 		if (kvm_is_error_hva(vmaddr))
630 			continue;
631 
632 		bitmap_zero(bitmap, _PAGE_ENTRIES);
633 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 		for (i = 0; i < _PAGE_ENTRIES; i++) {
635 			if (test_bit(i, bitmap))
636 				mark_page_dirty(kvm, cur_gfn + i);
637 		}
638 
639 		if (fatal_signal_pending(current))
640 			return;
641 		cond_resched();
642 	}
643 }
644 
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 			       struct kvm_dirty_log *log)
653 {
654 	int r;
655 	unsigned long n;
656 	struct kvm_memory_slot *memslot;
657 	int is_dirty;
658 
659 	if (kvm_is_ucontrol(kvm))
660 		return -EINVAL;
661 
662 	mutex_lock(&kvm->slots_lock);
663 
664 	r = -EINVAL;
665 	if (log->slot >= KVM_USER_MEM_SLOTS)
666 		goto out;
667 
668 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669 	if (r)
670 		goto out;
671 
672 	/* Clear the dirty log */
673 	if (is_dirty) {
674 		n = kvm_dirty_bitmap_bytes(memslot);
675 		memset(memslot->dirty_bitmap, 0, n);
676 	}
677 	r = 0;
678 out:
679 	mutex_unlock(&kvm->slots_lock);
680 	return r;
681 }
682 
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685 	unsigned long i;
686 	struct kvm_vcpu *vcpu;
687 
688 	kvm_for_each_vcpu(i, vcpu, kvm) {
689 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690 	}
691 }
692 
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695 	int r;
696 
697 	if (cap->flags)
698 		return -EINVAL;
699 
700 	switch (cap->cap) {
701 	case KVM_CAP_S390_IRQCHIP:
702 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 		kvm->arch.use_irqchip = 1;
704 		r = 0;
705 		break;
706 	case KVM_CAP_S390_USER_SIGP:
707 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 		kvm->arch.user_sigp = 1;
709 		r = 0;
710 		break;
711 	case KVM_CAP_S390_VECTOR_REGISTERS:
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (MACHINE_HAS_VX) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 			set_kvm_facility(kvm->arch.model.fac_list, 129);
718 			if (test_facility(134)) {
719 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 				set_kvm_facility(kvm->arch.model.fac_list, 134);
721 			}
722 			if (test_facility(135)) {
723 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 				set_kvm_facility(kvm->arch.model.fac_list, 135);
725 			}
726 			if (test_facility(148)) {
727 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 				set_kvm_facility(kvm->arch.model.fac_list, 148);
729 			}
730 			if (test_facility(152)) {
731 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 				set_kvm_facility(kvm->arch.model.fac_list, 152);
733 			}
734 			if (test_facility(192)) {
735 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 				set_kvm_facility(kvm->arch.model.fac_list, 192);
737 			}
738 			r = 0;
739 		} else
740 			r = -EINVAL;
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_RI:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(64)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 			set_kvm_facility(kvm->arch.model.fac_list, 64);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_AIS:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus) {
762 			r = -EBUSY;
763 		} else {
764 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 			set_kvm_facility(kvm->arch.model.fac_list, 72);
766 			r = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_GS:
773 		r = -EINVAL;
774 		mutex_lock(&kvm->lock);
775 		if (kvm->created_vcpus) {
776 			r = -EBUSY;
777 		} else if (test_facility(133)) {
778 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 			set_kvm_facility(kvm->arch.model.fac_list, 133);
780 			r = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 			 r ? "(not available)" : "(success)");
785 		break;
786 	case KVM_CAP_S390_HPAGE_1M:
787 		mutex_lock(&kvm->lock);
788 		if (kvm->created_vcpus)
789 			r = -EBUSY;
790 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791 			r = -EINVAL;
792 		else {
793 			r = 0;
794 			mmap_write_lock(kvm->mm);
795 			kvm->mm->context.allow_gmap_hpage_1m = 1;
796 			mmap_write_unlock(kvm->mm);
797 			/*
798 			 * We might have to create fake 4k page
799 			 * tables. To avoid that the hardware works on
800 			 * stale PGSTEs, we emulate these instructions.
801 			 */
802 			kvm->arch.use_skf = 0;
803 			kvm->arch.use_pfmfi = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_USER_STSI:
810 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 		kvm->arch.user_stsi = 1;
812 		r = 0;
813 		break;
814 	case KVM_CAP_S390_USER_INSTR0:
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 		kvm->arch.user_instr0 = 1;
817 		icpt_operexc_on_all_vcpus(kvm);
818 		r = 0;
819 		break;
820 	default:
821 		r = -EINVAL;
822 		break;
823 	}
824 	return r;
825 }
826 
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829 	int ret;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_MEM_LIMIT_SIZE:
833 		ret = 0;
834 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 			 kvm->arch.mem_limit);
836 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837 			ret = -EFAULT;
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	int ret;
849 	unsigned int idx;
850 	switch (attr->attr) {
851 	case KVM_S390_VM_MEM_ENABLE_CMMA:
852 		ret = -ENXIO;
853 		if (!sclp.has_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 		mutex_lock(&kvm->lock);
858 		if (kvm->created_vcpus)
859 			ret = -EBUSY;
860 		else if (kvm->mm->context.allow_gmap_hpage_1m)
861 			ret = -EINVAL;
862 		else {
863 			kvm->arch.use_cmma = 1;
864 			/* Not compatible with cmma. */
865 			kvm->arch.use_pfmfi = 0;
866 			ret = 0;
867 		}
868 		mutex_unlock(&kvm->lock);
869 		break;
870 	case KVM_S390_VM_MEM_CLR_CMMA:
871 		ret = -ENXIO;
872 		if (!sclp.has_cmma)
873 			break;
874 		ret = -EINVAL;
875 		if (!kvm->arch.use_cmma)
876 			break;
877 
878 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 		mutex_lock(&kvm->lock);
880 		idx = srcu_read_lock(&kvm->srcu);
881 		s390_reset_cmma(kvm->arch.gmap->mm);
882 		srcu_read_unlock(&kvm->srcu, idx);
883 		mutex_unlock(&kvm->lock);
884 		ret = 0;
885 		break;
886 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 		unsigned long new_limit;
888 
889 		if (kvm_is_ucontrol(kvm))
890 			return -EINVAL;
891 
892 		if (get_user(new_limit, (u64 __user *)attr->addr))
893 			return -EFAULT;
894 
895 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 		    new_limit > kvm->arch.mem_limit)
897 			return -E2BIG;
898 
899 		if (!new_limit)
900 			return -EINVAL;
901 
902 		/* gmap_create takes last usable address */
903 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
904 			new_limit -= 1;
905 
906 		ret = -EBUSY;
907 		mutex_lock(&kvm->lock);
908 		if (!kvm->created_vcpus) {
909 			/* gmap_create will round the limit up */
910 			struct gmap *new = gmap_create(current->mm, new_limit);
911 
912 			if (!new) {
913 				ret = -ENOMEM;
914 			} else {
915 				gmap_remove(kvm->arch.gmap);
916 				new->private = kvm;
917 				kvm->arch.gmap = new;
918 				ret = 0;
919 			}
920 		}
921 		mutex_unlock(&kvm->lock);
922 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 			 (void *) kvm->arch.gmap->asce);
925 		break;
926 	}
927 	default:
928 		ret = -ENXIO;
929 		break;
930 	}
931 	return ret;
932 }
933 
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935 
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938 	struct kvm_vcpu *vcpu;
939 	unsigned long i;
940 
941 	kvm_s390_vcpu_block_all(kvm);
942 
943 	kvm_for_each_vcpu(i, vcpu, kvm) {
944 		kvm_s390_vcpu_crypto_setup(vcpu);
945 		/* recreate the shadow crycb by leaving the VSIE handler */
946 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 	}
948 
949 	kvm_s390_vcpu_unblock_all(kvm);
950 }
951 
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954 	mutex_lock(&kvm->lock);
955 	switch (attr->attr) {
956 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		get_random_bytes(
962 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		kvm->arch.crypto.aes_kw = 1;
965 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 		if (!test_kvm_facility(kvm, 76)) {
969 			mutex_unlock(&kvm->lock);
970 			return -EINVAL;
971 		}
972 		get_random_bytes(
973 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 		kvm->arch.crypto.dea_kw = 1;
976 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 		if (!test_kvm_facility(kvm, 76)) {
980 			mutex_unlock(&kvm->lock);
981 			return -EINVAL;
982 		}
983 		kvm->arch.crypto.aes_kw = 0;
984 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987 		break;
988 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 		if (!test_kvm_facility(kvm, 76)) {
990 			mutex_unlock(&kvm->lock);
991 			return -EINVAL;
992 		}
993 		kvm->arch.crypto.dea_kw = 0;
994 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997 		break;
998 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 		if (!ap_instructions_available()) {
1000 			mutex_unlock(&kvm->lock);
1001 			return -EOPNOTSUPP;
1002 		}
1003 		kvm->arch.crypto.apie = 1;
1004 		break;
1005 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 		if (!ap_instructions_available()) {
1007 			mutex_unlock(&kvm->lock);
1008 			return -EOPNOTSUPP;
1009 		}
1010 		kvm->arch.crypto.apie = 0;
1011 		break;
1012 	default:
1013 		mutex_unlock(&kvm->lock);
1014 		return -ENXIO;
1015 	}
1016 
1017 	kvm_s390_vcpu_crypto_reset_all(kvm);
1018 	mutex_unlock(&kvm->lock);
1019 	return 0;
1020 }
1021 
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024 	unsigned long cx;
1025 	struct kvm_vcpu *vcpu;
1026 
1027 	kvm_for_each_vcpu(cx, vcpu, kvm)
1028 		kvm_s390_sync_request(req, vcpu);
1029 }
1030 
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037 	struct kvm_memory_slot *ms;
1038 	struct kvm_memslots *slots;
1039 	unsigned long ram_pages = 0;
1040 	int bkt;
1041 
1042 	/* migration mode already enabled */
1043 	if (kvm->arch.migration_mode)
1044 		return 0;
1045 	slots = kvm_memslots(kvm);
1046 	if (!slots || kvm_memslots_empty(slots))
1047 		return -EINVAL;
1048 
1049 	if (!kvm->arch.use_cmma) {
1050 		kvm->arch.migration_mode = 1;
1051 		return 0;
1052 	}
1053 	/* mark all the pages in active slots as dirty */
1054 	kvm_for_each_memslot(ms, bkt, slots) {
1055 		if (!ms->dirty_bitmap)
1056 			return -EINVAL;
1057 		/*
1058 		 * The second half of the bitmap is only used on x86,
1059 		 * and would be wasted otherwise, so we put it to good
1060 		 * use here to keep track of the state of the storage
1061 		 * attributes.
1062 		 */
1063 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 		ram_pages += ms->npages;
1065 	}
1066 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 	kvm->arch.migration_mode = 1;
1068 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069 	return 0;
1070 }
1071 
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078 	/* migration mode already disabled */
1079 	if (!kvm->arch.migration_mode)
1080 		return 0;
1081 	kvm->arch.migration_mode = 0;
1082 	if (kvm->arch.use_cmma)
1083 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084 	return 0;
1085 }
1086 
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 				     struct kvm_device_attr *attr)
1089 {
1090 	int res = -ENXIO;
1091 
1092 	mutex_lock(&kvm->slots_lock);
1093 	switch (attr->attr) {
1094 	case KVM_S390_VM_MIGRATION_START:
1095 		res = kvm_s390_vm_start_migration(kvm);
1096 		break;
1097 	case KVM_S390_VM_MIGRATION_STOP:
1098 		res = kvm_s390_vm_stop_migration(kvm);
1099 		break;
1100 	default:
1101 		break;
1102 	}
1103 	mutex_unlock(&kvm->slots_lock);
1104 
1105 	return res;
1106 }
1107 
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 				     struct kvm_device_attr *attr)
1110 {
1111 	u64 mig = kvm->arch.migration_mode;
1112 
1113 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114 		return -ENXIO;
1115 
1116 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117 		return -EFAULT;
1118 	return 0;
1119 }
1120 
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 	struct kvm_s390_vm_tod_clock gtod;
1124 
1125 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126 		return -EFAULT;
1127 
1128 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129 		return -EINVAL;
1130 	kvm_s390_set_tod_clock(kvm, &gtod);
1131 
1132 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 		gtod.epoch_idx, gtod.tod);
1134 
1135 	return 0;
1136 }
1137 
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 	u8 gtod_high;
1141 
1142 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143 					   sizeof(gtod_high)))
1144 		return -EFAULT;
1145 
1146 	if (gtod_high != 0)
1147 		return -EINVAL;
1148 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149 
1150 	return 0;
1151 }
1152 
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1156 
1157 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158 			   sizeof(gtod.tod)))
1159 		return -EFAULT;
1160 
1161 	kvm_s390_set_tod_clock(kvm, &gtod);
1162 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163 	return 0;
1164 }
1165 
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 	int ret;
1169 
1170 	if (attr->flags)
1171 		return -EINVAL;
1172 
1173 	switch (attr->attr) {
1174 	case KVM_S390_VM_TOD_EXT:
1175 		ret = kvm_s390_set_tod_ext(kvm, attr);
1176 		break;
1177 	case KVM_S390_VM_TOD_HIGH:
1178 		ret = kvm_s390_set_tod_high(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_LOW:
1181 		ret = kvm_s390_set_tod_low(kvm, attr);
1182 		break;
1183 	default:
1184 		ret = -ENXIO;
1185 		break;
1186 	}
1187 	return ret;
1188 }
1189 
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 				   struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193 	union tod_clock clk;
1194 
1195 	preempt_disable();
1196 
1197 	store_tod_clock_ext(&clk);
1198 
1199 	gtod->tod = clk.tod + kvm->arch.epoch;
1200 	gtod->epoch_idx = 0;
1201 	if (test_kvm_facility(kvm, 139)) {
1202 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 		if (gtod->tod < clk.tod)
1204 			gtod->epoch_idx += 1;
1205 	}
1206 
1207 	preempt_enable();
1208 }
1209 
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 	struct kvm_s390_vm_tod_clock gtod;
1213 
1214 	memset(&gtod, 0, sizeof(gtod));
1215 	kvm_s390_get_tod_clock(kvm, &gtod);
1216 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 		return -EFAULT;
1218 
1219 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 		gtod.epoch_idx, gtod.tod);
1221 	return 0;
1222 }
1223 
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 	u8 gtod_high = 0;
1227 
1228 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229 					 sizeof(gtod_high)))
1230 		return -EFAULT;
1231 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232 
1233 	return 0;
1234 }
1235 
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	u64 gtod;
1239 
1240 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242 		return -EFAULT;
1243 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244 
1245 	return 0;
1246 }
1247 
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250 	int ret;
1251 
1252 	if (attr->flags)
1253 		return -EINVAL;
1254 
1255 	switch (attr->attr) {
1256 	case KVM_S390_VM_TOD_EXT:
1257 		ret = kvm_s390_get_tod_ext(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_TOD_HIGH:
1260 		ret = kvm_s390_get_tod_high(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_TOD_LOW:
1263 		ret = kvm_s390_get_tod_low(kvm, attr);
1264 		break;
1265 	default:
1266 		ret = -ENXIO;
1267 		break;
1268 	}
1269 	return ret;
1270 }
1271 
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274 	struct kvm_s390_vm_cpu_processor *proc;
1275 	u16 lowest_ibc, unblocked_ibc;
1276 	int ret = 0;
1277 
1278 	mutex_lock(&kvm->lock);
1279 	if (kvm->created_vcpus) {
1280 		ret = -EBUSY;
1281 		goto out;
1282 	}
1283 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284 	if (!proc) {
1285 		ret = -ENOMEM;
1286 		goto out;
1287 	}
1288 	if (!copy_from_user(proc, (void __user *)attr->addr,
1289 			    sizeof(*proc))) {
1290 		kvm->arch.model.cpuid = proc->cpuid;
1291 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 		unblocked_ibc = sclp.ibc & 0xfff;
1293 		if (lowest_ibc && proc->ibc) {
1294 			if (proc->ibc > unblocked_ibc)
1295 				kvm->arch.model.ibc = unblocked_ibc;
1296 			else if (proc->ibc < lowest_ibc)
1297 				kvm->arch.model.ibc = lowest_ibc;
1298 			else
1299 				kvm->arch.model.ibc = proc->ibc;
1300 		}
1301 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 			 kvm->arch.model.ibc,
1305 			 kvm->arch.model.cpuid);
1306 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 			 kvm->arch.model.fac_list[0],
1308 			 kvm->arch.model.fac_list[1],
1309 			 kvm->arch.model.fac_list[2]);
1310 	} else
1311 		ret = -EFAULT;
1312 	kfree(proc);
1313 out:
1314 	mutex_unlock(&kvm->lock);
1315 	return ret;
1316 }
1317 
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 				       struct kvm_device_attr *attr)
1320 {
1321 	struct kvm_s390_vm_cpu_feat data;
1322 
1323 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324 		return -EFAULT;
1325 	if (!bitmap_subset((unsigned long *) data.feat,
1326 			   kvm_s390_available_cpu_feat,
1327 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1328 		return -EINVAL;
1329 
1330 	mutex_lock(&kvm->lock);
1331 	if (kvm->created_vcpus) {
1332 		mutex_unlock(&kvm->lock);
1333 		return -EBUSY;
1334 	}
1335 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1337 	mutex_unlock(&kvm->lock);
1338 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339 			 data.feat[0],
1340 			 data.feat[1],
1341 			 data.feat[2]);
1342 	return 0;
1343 }
1344 
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346 					  struct kvm_device_attr *attr)
1347 {
1348 	mutex_lock(&kvm->lock);
1349 	if (kvm->created_vcpus) {
1350 		mutex_unlock(&kvm->lock);
1351 		return -EBUSY;
1352 	}
1353 
1354 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356 		mutex_unlock(&kvm->lock);
1357 		return -EFAULT;
1358 	}
1359 	mutex_unlock(&kvm->lock);
1360 
1361 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418 
1419 	return 0;
1420 }
1421 
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424 	int ret = -ENXIO;
1425 
1426 	switch (attr->attr) {
1427 	case KVM_S390_VM_CPU_PROCESSOR:
1428 		ret = kvm_s390_set_processor(kvm, attr);
1429 		break;
1430 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431 		ret = kvm_s390_set_processor_feat(kvm, attr);
1432 		break;
1433 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435 		break;
1436 	}
1437 	return ret;
1438 }
1439 
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442 	struct kvm_s390_vm_cpu_processor *proc;
1443 	int ret = 0;
1444 
1445 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446 	if (!proc) {
1447 		ret = -ENOMEM;
1448 		goto out;
1449 	}
1450 	proc->cpuid = kvm->arch.model.cpuid;
1451 	proc->ibc = kvm->arch.model.ibc;
1452 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 		 kvm->arch.model.ibc,
1456 		 kvm->arch.model.cpuid);
1457 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 		 kvm->arch.model.fac_list[0],
1459 		 kvm->arch.model.fac_list[1],
1460 		 kvm->arch.model.fac_list[2]);
1461 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462 		ret = -EFAULT;
1463 	kfree(proc);
1464 out:
1465 	return ret;
1466 }
1467 
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470 	struct kvm_s390_vm_cpu_machine *mach;
1471 	int ret = 0;
1472 
1473 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474 	if (!mach) {
1475 		ret = -ENOMEM;
1476 		goto out;
1477 	}
1478 	get_cpu_id((struct cpuid *) &mach->cpuid);
1479 	mach->ibc = sclp.ibc;
1480 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1482 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483 	       sizeof(stfle_fac_list));
1484 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485 		 kvm->arch.model.ibc,
1486 		 kvm->arch.model.cpuid);
1487 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488 		 mach->fac_mask[0],
1489 		 mach->fac_mask[1],
1490 		 mach->fac_mask[2]);
1491 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492 		 mach->fac_list[0],
1493 		 mach->fac_list[1],
1494 		 mach->fac_list[2]);
1495 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496 		ret = -EFAULT;
1497 	kfree(mach);
1498 out:
1499 	return ret;
1500 }
1501 
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503 				       struct kvm_device_attr *attr)
1504 {
1505 	struct kvm_s390_vm_cpu_feat data;
1506 
1507 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1509 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510 		return -EFAULT;
1511 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512 			 data.feat[0],
1513 			 data.feat[1],
1514 			 data.feat[2]);
1515 	return 0;
1516 }
1517 
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519 				     struct kvm_device_attr *attr)
1520 {
1521 	struct kvm_s390_vm_cpu_feat data;
1522 
1523 	bitmap_copy((unsigned long *) data.feat,
1524 		    kvm_s390_available_cpu_feat,
1525 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1526 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527 		return -EFAULT;
1528 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529 			 data.feat[0],
1530 			 data.feat[1],
1531 			 data.feat[2]);
1532 	return 0;
1533 }
1534 
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536 					  struct kvm_device_attr *attr)
1537 {
1538 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540 		return -EFAULT;
1541 
1542 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599 
1600 	return 0;
1601 }
1602 
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604 					struct kvm_device_attr *attr)
1605 {
1606 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608 		return -EFAULT;
1609 
1610 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667 
1668 	return 0;
1669 }
1670 
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673 	int ret = -ENXIO;
1674 
1675 	switch (attr->attr) {
1676 	case KVM_S390_VM_CPU_PROCESSOR:
1677 		ret = kvm_s390_get_processor(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_CPU_MACHINE:
1680 		ret = kvm_s390_get_machine(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683 		ret = kvm_s390_get_processor_feat(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1686 		ret = kvm_s390_get_machine_feat(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690 		break;
1691 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693 		break;
1694 	}
1695 	return ret;
1696 }
1697 
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700 	int ret;
1701 
1702 	switch (attr->group) {
1703 	case KVM_S390_VM_MEM_CTRL:
1704 		ret = kvm_s390_set_mem_control(kvm, attr);
1705 		break;
1706 	case KVM_S390_VM_TOD:
1707 		ret = kvm_s390_set_tod(kvm, attr);
1708 		break;
1709 	case KVM_S390_VM_CPU_MODEL:
1710 		ret = kvm_s390_set_cpu_model(kvm, attr);
1711 		break;
1712 	case KVM_S390_VM_CRYPTO:
1713 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1714 		break;
1715 	case KVM_S390_VM_MIGRATION:
1716 		ret = kvm_s390_vm_set_migration(kvm, attr);
1717 		break;
1718 	default:
1719 		ret = -ENXIO;
1720 		break;
1721 	}
1722 
1723 	return ret;
1724 }
1725 
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728 	int ret;
1729 
1730 	switch (attr->group) {
1731 	case KVM_S390_VM_MEM_CTRL:
1732 		ret = kvm_s390_get_mem_control(kvm, attr);
1733 		break;
1734 	case KVM_S390_VM_TOD:
1735 		ret = kvm_s390_get_tod(kvm, attr);
1736 		break;
1737 	case KVM_S390_VM_CPU_MODEL:
1738 		ret = kvm_s390_get_cpu_model(kvm, attr);
1739 		break;
1740 	case KVM_S390_VM_MIGRATION:
1741 		ret = kvm_s390_vm_get_migration(kvm, attr);
1742 		break;
1743 	default:
1744 		ret = -ENXIO;
1745 		break;
1746 	}
1747 
1748 	return ret;
1749 }
1750 
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753 	int ret;
1754 
1755 	switch (attr->group) {
1756 	case KVM_S390_VM_MEM_CTRL:
1757 		switch (attr->attr) {
1758 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1759 		case KVM_S390_VM_MEM_CLR_CMMA:
1760 			ret = sclp.has_cmma ? 0 : -ENXIO;
1761 			break;
1762 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1763 			ret = 0;
1764 			break;
1765 		default:
1766 			ret = -ENXIO;
1767 			break;
1768 		}
1769 		break;
1770 	case KVM_S390_VM_TOD:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_TOD_LOW:
1773 		case KVM_S390_VM_TOD_HIGH:
1774 			ret = 0;
1775 			break;
1776 		default:
1777 			ret = -ENXIO;
1778 			break;
1779 		}
1780 		break;
1781 	case KVM_S390_VM_CPU_MODEL:
1782 		switch (attr->attr) {
1783 		case KVM_S390_VM_CPU_PROCESSOR:
1784 		case KVM_S390_VM_CPU_MACHINE:
1785 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1787 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789 			ret = 0;
1790 			break;
1791 		default:
1792 			ret = -ENXIO;
1793 			break;
1794 		}
1795 		break;
1796 	case KVM_S390_VM_CRYPTO:
1797 		switch (attr->attr) {
1798 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802 			ret = 0;
1803 			break;
1804 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806 			ret = ap_instructions_available() ? 0 : -ENXIO;
1807 			break;
1808 		default:
1809 			ret = -ENXIO;
1810 			break;
1811 		}
1812 		break;
1813 	case KVM_S390_VM_MIGRATION:
1814 		ret = 0;
1815 		break;
1816 	default:
1817 		ret = -ENXIO;
1818 		break;
1819 	}
1820 
1821 	return ret;
1822 }
1823 
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826 	uint8_t *keys;
1827 	uint64_t hva;
1828 	int srcu_idx, i, r = 0;
1829 
1830 	if (args->flags != 0)
1831 		return -EINVAL;
1832 
1833 	/* Is this guest using storage keys? */
1834 	if (!mm_uses_skeys(current->mm))
1835 		return KVM_S390_GET_SKEYS_NONE;
1836 
1837 	/* Enforce sane limit on memory allocation */
1838 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839 		return -EINVAL;
1840 
1841 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842 	if (!keys)
1843 		return -ENOMEM;
1844 
1845 	mmap_read_lock(current->mm);
1846 	srcu_idx = srcu_read_lock(&kvm->srcu);
1847 	for (i = 0; i < args->count; i++) {
1848 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1849 		if (kvm_is_error_hva(hva)) {
1850 			r = -EFAULT;
1851 			break;
1852 		}
1853 
1854 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855 		if (r)
1856 			break;
1857 	}
1858 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1859 	mmap_read_unlock(current->mm);
1860 
1861 	if (!r) {
1862 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863 				 sizeof(uint8_t) * args->count);
1864 		if (r)
1865 			r = -EFAULT;
1866 	}
1867 
1868 	kvfree(keys);
1869 	return r;
1870 }
1871 
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874 	uint8_t *keys;
1875 	uint64_t hva;
1876 	int srcu_idx, i, r = 0;
1877 	bool unlocked;
1878 
1879 	if (args->flags != 0)
1880 		return -EINVAL;
1881 
1882 	/* Enforce sane limit on memory allocation */
1883 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884 		return -EINVAL;
1885 
1886 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887 	if (!keys)
1888 		return -ENOMEM;
1889 
1890 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891 			   sizeof(uint8_t) * args->count);
1892 	if (r) {
1893 		r = -EFAULT;
1894 		goto out;
1895 	}
1896 
1897 	/* Enable storage key handling for the guest */
1898 	r = s390_enable_skey();
1899 	if (r)
1900 		goto out;
1901 
1902 	i = 0;
1903 	mmap_read_lock(current->mm);
1904 	srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906 		unlocked = false;
1907 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1908 		if (kvm_is_error_hva(hva)) {
1909 			r = -EFAULT;
1910 			break;
1911 		}
1912 
1913 		/* Lowest order bit is reserved */
1914 		if (keys[i] & 0x01) {
1915 			r = -EINVAL;
1916 			break;
1917 		}
1918 
1919 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920 		if (r) {
1921 			r = fixup_user_fault(current->mm, hva,
1922 					     FAULT_FLAG_WRITE, &unlocked);
1923 			if (r)
1924 				break;
1925 		}
1926 		if (!r)
1927 			i++;
1928 	}
1929 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1930 	mmap_read_unlock(current->mm);
1931 out:
1932 	kvfree(keys);
1933 	return r;
1934 }
1935 
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944 
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946 			      u8 *res, unsigned long bufsize)
1947 {
1948 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1949 
1950 	args->count = 0;
1951 	while (args->count < bufsize) {
1952 		hva = gfn_to_hva(kvm, cur_gfn);
1953 		/*
1954 		 * We return an error if the first value was invalid, but we
1955 		 * return successfully if at least one value was copied.
1956 		 */
1957 		if (kvm_is_error_hva(hva))
1958 			return args->count ? 0 : -EFAULT;
1959 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1960 			pgstev = 0;
1961 		res[args->count++] = (pgstev >> 24) & 0x43;
1962 		cur_gfn++;
1963 	}
1964 
1965 	return 0;
1966 }
1967 
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1969 						     gfn_t gfn)
1970 {
1971 	return ____gfn_to_memslot(slots, gfn, true);
1972 }
1973 
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975 					      unsigned long cur_gfn)
1976 {
1977 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978 	unsigned long ofs = cur_gfn - ms->base_gfn;
1979 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1980 
1981 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1982 		mnode = rb_next(mnode);
1983 		/* If we are above the highest slot, wrap around */
1984 		if (!mnode)
1985 			mnode = rb_first(&slots->gfn_tree);
1986 
1987 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1988 		ofs = 0;
1989 	}
1990 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1994 	}
1995 	return ms->base_gfn + ofs;
1996 }
1997 
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 			     u8 *res, unsigned long bufsize)
2000 {
2001 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 	struct kvm_memslots *slots = kvm_memslots(kvm);
2003 	struct kvm_memory_slot *ms;
2004 
2005 	if (unlikely(kvm_memslots_empty(slots)))
2006 		return 0;
2007 
2008 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 	ms = gfn_to_memslot(kvm, cur_gfn);
2010 	args->count = 0;
2011 	args->start_gfn = cur_gfn;
2012 	if (!ms)
2013 		return 0;
2014 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 	mem_end = kvm_s390_get_gfn_end(slots);
2016 
2017 	while (args->count < bufsize) {
2018 		hva = gfn_to_hva(kvm, cur_gfn);
2019 		if (kvm_is_error_hva(hva))
2020 			return 0;
2021 		/* Decrement only if we actually flipped the bit to 0 */
2022 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025 			pgstev = 0;
2026 		/* Save the value */
2027 		res[args->count++] = (pgstev >> 24) & 0x43;
2028 		/* If the next bit is too far away, stop. */
2029 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030 			return 0;
2031 		/* If we reached the previous "next", find the next one */
2032 		if (cur_gfn == next_gfn)
2033 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 		/* Reached the end of memory or of the buffer, stop */
2035 		if ((next_gfn >= mem_end) ||
2036 		    (next_gfn - args->start_gfn >= bufsize))
2037 			return 0;
2038 		cur_gfn++;
2039 		/* Reached the end of the current memslot, take the next one. */
2040 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 			ms = gfn_to_memslot(kvm, cur_gfn);
2042 			if (!ms)
2043 				return 0;
2044 		}
2045 	}
2046 	return 0;
2047 }
2048 
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 				  struct kvm_s390_cmma_log *args)
2059 {
2060 	unsigned long bufsize;
2061 	int srcu_idx, peek, ret;
2062 	u8 *values;
2063 
2064 	if (!kvm->arch.use_cmma)
2065 		return -ENXIO;
2066 	/* Invalid/unsupported flags were specified */
2067 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2068 		return -EINVAL;
2069 	/* Migration mode query, and we are not doing a migration */
2070 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 	if (!peek && !kvm->arch.migration_mode)
2072 		return -EINVAL;
2073 	/* CMMA is disabled or was not used, or the buffer has length zero */
2074 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 		memset(args, 0, sizeof(*args));
2077 		return 0;
2078 	}
2079 	/* We are not peeking, and there are no dirty pages */
2080 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 		memset(args, 0, sizeof(*args));
2082 		return 0;
2083 	}
2084 
2085 	values = vmalloc(bufsize);
2086 	if (!values)
2087 		return -ENOMEM;
2088 
2089 	mmap_read_lock(kvm->mm);
2090 	srcu_idx = srcu_read_lock(&kvm->srcu);
2091 	if (peek)
2092 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093 	else
2094 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 	mmap_read_unlock(kvm->mm);
2097 
2098 	if (kvm->arch.migration_mode)
2099 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100 	else
2101 		args->remaining = 0;
2102 
2103 	if (copy_to_user((void __user *)args->values, values, args->count))
2104 		ret = -EFAULT;
2105 
2106 	vfree(values);
2107 	return ret;
2108 }
2109 
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 				  const struct kvm_s390_cmma_log *args)
2117 {
2118 	unsigned long hva, mask, pgstev, i;
2119 	uint8_t *bits;
2120 	int srcu_idx, r = 0;
2121 
2122 	mask = args->mask;
2123 
2124 	if (!kvm->arch.use_cmma)
2125 		return -ENXIO;
2126 	/* invalid/unsupported flags */
2127 	if (args->flags != 0)
2128 		return -EINVAL;
2129 	/* Enforce sane limit on memory allocation */
2130 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131 		return -EINVAL;
2132 	/* Nothing to do */
2133 	if (args->count == 0)
2134 		return 0;
2135 
2136 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2137 	if (!bits)
2138 		return -ENOMEM;
2139 
2140 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2141 	if (r) {
2142 		r = -EFAULT;
2143 		goto out;
2144 	}
2145 
2146 	mmap_read_lock(kvm->mm);
2147 	srcu_idx = srcu_read_lock(&kvm->srcu);
2148 	for (i = 0; i < args->count; i++) {
2149 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 		if (kvm_is_error_hva(hva)) {
2151 			r = -EFAULT;
2152 			break;
2153 		}
2154 
2155 		pgstev = bits[i];
2156 		pgstev = pgstev << 24;
2157 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159 	}
2160 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 	mmap_read_unlock(kvm->mm);
2162 
2163 	if (!kvm->mm->context.uses_cmm) {
2164 		mmap_write_lock(kvm->mm);
2165 		kvm->mm->context.uses_cmm = 1;
2166 		mmap_write_unlock(kvm->mm);
2167 	}
2168 out:
2169 	vfree(bits);
2170 	return r;
2171 }
2172 
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175 	struct kvm_vcpu *vcpu;
2176 	u16 rc, rrc;
2177 	int ret = 0;
2178 	unsigned long i;
2179 
2180 	/*
2181 	 * We ignore failures and try to destroy as many CPUs as possible.
2182 	 * At the same time we must not free the assigned resources when
2183 	 * this fails, as the ultravisor has still access to that memory.
2184 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185 	 * behind.
2186 	 * We want to return the first failure rc and rrc, though.
2187 	 */
2188 	kvm_for_each_vcpu(i, vcpu, kvm) {
2189 		mutex_lock(&vcpu->mutex);
2190 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191 			*rcp = rc;
2192 			*rrcp = rrc;
2193 			ret = -EIO;
2194 		}
2195 		mutex_unlock(&vcpu->mutex);
2196 	}
2197 	return ret;
2198 }
2199 
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2201 {
2202 	unsigned long i;
2203 	int r = 0;
2204 	u16 dummy;
2205 
2206 	struct kvm_vcpu *vcpu;
2207 
2208 	kvm_for_each_vcpu(i, vcpu, kvm) {
2209 		mutex_lock(&vcpu->mutex);
2210 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2211 		mutex_unlock(&vcpu->mutex);
2212 		if (r)
2213 			break;
2214 	}
2215 	if (r)
2216 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2217 	return r;
2218 }
2219 
2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2221 {
2222 	int r = 0;
2223 	u16 dummy;
2224 	void __user *argp = (void __user *)cmd->data;
2225 
2226 	switch (cmd->cmd) {
2227 	case KVM_PV_ENABLE: {
2228 		r = -EINVAL;
2229 		if (kvm_s390_pv_is_protected(kvm))
2230 			break;
2231 
2232 		/*
2233 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2234 		 *  esca, we need no cleanup in the error cases below
2235 		 */
2236 		r = sca_switch_to_extended(kvm);
2237 		if (r)
2238 			break;
2239 
2240 		mmap_write_lock(current->mm);
2241 		r = gmap_mark_unmergeable();
2242 		mmap_write_unlock(current->mm);
2243 		if (r)
2244 			break;
2245 
2246 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2247 		if (r)
2248 			break;
2249 
2250 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2251 		if (r)
2252 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2253 
2254 		/* we need to block service interrupts from now on */
2255 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2256 		break;
2257 	}
2258 	case KVM_PV_DISABLE: {
2259 		r = -EINVAL;
2260 		if (!kvm_s390_pv_is_protected(kvm))
2261 			break;
2262 
2263 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2264 		/*
2265 		 * If a CPU could not be destroyed, destroy VM will also fail.
2266 		 * There is no point in trying to destroy it. Instead return
2267 		 * the rc and rrc from the first CPU that failed destroying.
2268 		 */
2269 		if (r)
2270 			break;
2271 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2272 
2273 		/* no need to block service interrupts any more */
2274 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2275 		break;
2276 	}
2277 	case KVM_PV_SET_SEC_PARMS: {
2278 		struct kvm_s390_pv_sec_parm parms = {};
2279 		void *hdr;
2280 
2281 		r = -EINVAL;
2282 		if (!kvm_s390_pv_is_protected(kvm))
2283 			break;
2284 
2285 		r = -EFAULT;
2286 		if (copy_from_user(&parms, argp, sizeof(parms)))
2287 			break;
2288 
2289 		/* Currently restricted to 8KB */
2290 		r = -EINVAL;
2291 		if (parms.length > PAGE_SIZE * 2)
2292 			break;
2293 
2294 		r = -ENOMEM;
2295 		hdr = vmalloc(parms.length);
2296 		if (!hdr)
2297 			break;
2298 
2299 		r = -EFAULT;
2300 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2301 				    parms.length))
2302 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2303 						      &cmd->rc, &cmd->rrc);
2304 
2305 		vfree(hdr);
2306 		break;
2307 	}
2308 	case KVM_PV_UNPACK: {
2309 		struct kvm_s390_pv_unp unp = {};
2310 
2311 		r = -EINVAL;
2312 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2313 			break;
2314 
2315 		r = -EFAULT;
2316 		if (copy_from_user(&unp, argp, sizeof(unp)))
2317 			break;
2318 
2319 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2320 				       &cmd->rc, &cmd->rrc);
2321 		break;
2322 	}
2323 	case KVM_PV_VERIFY: {
2324 		r = -EINVAL;
2325 		if (!kvm_s390_pv_is_protected(kvm))
2326 			break;
2327 
2328 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2329 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2330 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2331 			     cmd->rrc);
2332 		break;
2333 	}
2334 	case KVM_PV_PREP_RESET: {
2335 		r = -EINVAL;
2336 		if (!kvm_s390_pv_is_protected(kvm))
2337 			break;
2338 
2339 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2340 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2341 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2342 			     cmd->rc, cmd->rrc);
2343 		break;
2344 	}
2345 	case KVM_PV_UNSHARE_ALL: {
2346 		r = -EINVAL;
2347 		if (!kvm_s390_pv_is_protected(kvm))
2348 			break;
2349 
2350 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2352 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2353 			     cmd->rc, cmd->rrc);
2354 		break;
2355 	}
2356 	default:
2357 		r = -ENOTTY;
2358 	}
2359 	return r;
2360 }
2361 
2362 static bool access_key_invalid(u8 access_key)
2363 {
2364 	return access_key > 0xf;
2365 }
2366 
2367 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2368 {
2369 	void __user *uaddr = (void __user *)mop->buf;
2370 	u64 supported_flags;
2371 	void *tmpbuf = NULL;
2372 	int r, srcu_idx;
2373 
2374 	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2375 			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2376 	if (mop->flags & ~supported_flags)
2377 		return -EINVAL;
2378 	if (mop->size > MEM_OP_MAX_SIZE)
2379 		return -E2BIG;
2380 	if (kvm_s390_pv_is_protected(kvm))
2381 		return -EINVAL;
2382 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2383 		if (access_key_invalid(mop->key))
2384 			return -EINVAL;
2385 	} else {
2386 		mop->key = 0;
2387 	}
2388 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2389 		tmpbuf = vmalloc(mop->size);
2390 		if (!tmpbuf)
2391 			return -ENOMEM;
2392 	}
2393 
2394 	srcu_idx = srcu_read_lock(&kvm->srcu);
2395 
2396 	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2397 		r = PGM_ADDRESSING;
2398 		goto out_unlock;
2399 	}
2400 
2401 	switch (mop->op) {
2402 	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2403 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2404 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2405 		} else {
2406 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2407 						      mop->size, GACC_FETCH, mop->key);
2408 			if (r == 0) {
2409 				if (copy_to_user(uaddr, tmpbuf, mop->size))
2410 					r = -EFAULT;
2411 			}
2412 		}
2413 		break;
2414 	}
2415 	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2416 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2417 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2418 		} else {
2419 			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2420 				r = -EFAULT;
2421 				break;
2422 			}
2423 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2424 						      mop->size, GACC_STORE, mop->key);
2425 		}
2426 		break;
2427 	}
2428 	default:
2429 		r = -EINVAL;
2430 	}
2431 
2432 out_unlock:
2433 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2434 
2435 	vfree(tmpbuf);
2436 	return r;
2437 }
2438 
2439 long kvm_arch_vm_ioctl(struct file *filp,
2440 		       unsigned int ioctl, unsigned long arg)
2441 {
2442 	struct kvm *kvm = filp->private_data;
2443 	void __user *argp = (void __user *)arg;
2444 	struct kvm_device_attr attr;
2445 	int r;
2446 
2447 	switch (ioctl) {
2448 	case KVM_S390_INTERRUPT: {
2449 		struct kvm_s390_interrupt s390int;
2450 
2451 		r = -EFAULT;
2452 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2453 			break;
2454 		r = kvm_s390_inject_vm(kvm, &s390int);
2455 		break;
2456 	}
2457 	case KVM_CREATE_IRQCHIP: {
2458 		struct kvm_irq_routing_entry routing;
2459 
2460 		r = -EINVAL;
2461 		if (kvm->arch.use_irqchip) {
2462 			/* Set up dummy routing. */
2463 			memset(&routing, 0, sizeof(routing));
2464 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2465 		}
2466 		break;
2467 	}
2468 	case KVM_SET_DEVICE_ATTR: {
2469 		r = -EFAULT;
2470 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2471 			break;
2472 		r = kvm_s390_vm_set_attr(kvm, &attr);
2473 		break;
2474 	}
2475 	case KVM_GET_DEVICE_ATTR: {
2476 		r = -EFAULT;
2477 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2478 			break;
2479 		r = kvm_s390_vm_get_attr(kvm, &attr);
2480 		break;
2481 	}
2482 	case KVM_HAS_DEVICE_ATTR: {
2483 		r = -EFAULT;
2484 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2485 			break;
2486 		r = kvm_s390_vm_has_attr(kvm, &attr);
2487 		break;
2488 	}
2489 	case KVM_S390_GET_SKEYS: {
2490 		struct kvm_s390_skeys args;
2491 
2492 		r = -EFAULT;
2493 		if (copy_from_user(&args, argp,
2494 				   sizeof(struct kvm_s390_skeys)))
2495 			break;
2496 		r = kvm_s390_get_skeys(kvm, &args);
2497 		break;
2498 	}
2499 	case KVM_S390_SET_SKEYS: {
2500 		struct kvm_s390_skeys args;
2501 
2502 		r = -EFAULT;
2503 		if (copy_from_user(&args, argp,
2504 				   sizeof(struct kvm_s390_skeys)))
2505 			break;
2506 		r = kvm_s390_set_skeys(kvm, &args);
2507 		break;
2508 	}
2509 	case KVM_S390_GET_CMMA_BITS: {
2510 		struct kvm_s390_cmma_log args;
2511 
2512 		r = -EFAULT;
2513 		if (copy_from_user(&args, argp, sizeof(args)))
2514 			break;
2515 		mutex_lock(&kvm->slots_lock);
2516 		r = kvm_s390_get_cmma_bits(kvm, &args);
2517 		mutex_unlock(&kvm->slots_lock);
2518 		if (!r) {
2519 			r = copy_to_user(argp, &args, sizeof(args));
2520 			if (r)
2521 				r = -EFAULT;
2522 		}
2523 		break;
2524 	}
2525 	case KVM_S390_SET_CMMA_BITS: {
2526 		struct kvm_s390_cmma_log args;
2527 
2528 		r = -EFAULT;
2529 		if (copy_from_user(&args, argp, sizeof(args)))
2530 			break;
2531 		mutex_lock(&kvm->slots_lock);
2532 		r = kvm_s390_set_cmma_bits(kvm, &args);
2533 		mutex_unlock(&kvm->slots_lock);
2534 		break;
2535 	}
2536 	case KVM_S390_PV_COMMAND: {
2537 		struct kvm_pv_cmd args;
2538 
2539 		/* protvirt means user cpu state */
2540 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2541 		r = 0;
2542 		if (!is_prot_virt_host()) {
2543 			r = -EINVAL;
2544 			break;
2545 		}
2546 		if (copy_from_user(&args, argp, sizeof(args))) {
2547 			r = -EFAULT;
2548 			break;
2549 		}
2550 		if (args.flags) {
2551 			r = -EINVAL;
2552 			break;
2553 		}
2554 		mutex_lock(&kvm->lock);
2555 		r = kvm_s390_handle_pv(kvm, &args);
2556 		mutex_unlock(&kvm->lock);
2557 		if (copy_to_user(argp, &args, sizeof(args))) {
2558 			r = -EFAULT;
2559 			break;
2560 		}
2561 		break;
2562 	}
2563 	case KVM_S390_MEM_OP: {
2564 		struct kvm_s390_mem_op mem_op;
2565 
2566 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2567 			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2568 		else
2569 			r = -EFAULT;
2570 		break;
2571 	}
2572 	default:
2573 		r = -ENOTTY;
2574 	}
2575 
2576 	return r;
2577 }
2578 
2579 static int kvm_s390_apxa_installed(void)
2580 {
2581 	struct ap_config_info info;
2582 
2583 	if (ap_instructions_available()) {
2584 		if (ap_qci(&info) == 0)
2585 			return info.apxa;
2586 	}
2587 
2588 	return 0;
2589 }
2590 
2591 /*
2592  * The format of the crypto control block (CRYCB) is specified in the 3 low
2593  * order bits of the CRYCB designation (CRYCBD) field as follows:
2594  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2595  *	     AP extended addressing (APXA) facility are installed.
2596  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2597  * Format 2: Both the APXA and MSAX3 facilities are installed
2598  */
2599 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2600 {
2601 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2602 
2603 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2604 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2605 
2606 	/* Check whether MSAX3 is installed */
2607 	if (!test_kvm_facility(kvm, 76))
2608 		return;
2609 
2610 	if (kvm_s390_apxa_installed())
2611 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2612 	else
2613 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2614 }
2615 
2616 /*
2617  * kvm_arch_crypto_set_masks
2618  *
2619  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2620  *	 to be set.
2621  * @apm: the mask identifying the accessible AP adapters
2622  * @aqm: the mask identifying the accessible AP domains
2623  * @adm: the mask identifying the accessible AP control domains
2624  *
2625  * Set the masks that identify the adapters, domains and control domains to
2626  * which the KVM guest is granted access.
2627  *
2628  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2629  *	 function.
2630  */
2631 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2632 			       unsigned long *aqm, unsigned long *adm)
2633 {
2634 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2635 
2636 	kvm_s390_vcpu_block_all(kvm);
2637 
2638 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2639 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2640 		memcpy(crycb->apcb1.apm, apm, 32);
2641 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2642 			 apm[0], apm[1], apm[2], apm[3]);
2643 		memcpy(crycb->apcb1.aqm, aqm, 32);
2644 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2645 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2646 		memcpy(crycb->apcb1.adm, adm, 32);
2647 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2648 			 adm[0], adm[1], adm[2], adm[3]);
2649 		break;
2650 	case CRYCB_FORMAT1:
2651 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2652 		memcpy(crycb->apcb0.apm, apm, 8);
2653 		memcpy(crycb->apcb0.aqm, aqm, 2);
2654 		memcpy(crycb->apcb0.adm, adm, 2);
2655 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2656 			 apm[0], *((unsigned short *)aqm),
2657 			 *((unsigned short *)adm));
2658 		break;
2659 	default:	/* Can not happen */
2660 		break;
2661 	}
2662 
2663 	/* recreate the shadow crycb for each vcpu */
2664 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2665 	kvm_s390_vcpu_unblock_all(kvm);
2666 }
2667 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2668 
2669 /*
2670  * kvm_arch_crypto_clear_masks
2671  *
2672  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2673  *	 to be cleared.
2674  *
2675  * Clear the masks that identify the adapters, domains and control domains to
2676  * which the KVM guest is granted access.
2677  *
2678  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2679  *	 function.
2680  */
2681 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2682 {
2683 	kvm_s390_vcpu_block_all(kvm);
2684 
2685 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2686 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2687 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2688 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2689 
2690 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2691 	/* recreate the shadow crycb for each vcpu */
2692 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2693 	kvm_s390_vcpu_unblock_all(kvm);
2694 }
2695 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2696 
2697 static u64 kvm_s390_get_initial_cpuid(void)
2698 {
2699 	struct cpuid cpuid;
2700 
2701 	get_cpu_id(&cpuid);
2702 	cpuid.version = 0xff;
2703 	return *((u64 *) &cpuid);
2704 }
2705 
2706 static void kvm_s390_crypto_init(struct kvm *kvm)
2707 {
2708 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2709 	kvm_s390_set_crycb_format(kvm);
2710 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2711 
2712 	if (!test_kvm_facility(kvm, 76))
2713 		return;
2714 
2715 	/* Enable AES/DEA protected key functions by default */
2716 	kvm->arch.crypto.aes_kw = 1;
2717 	kvm->arch.crypto.dea_kw = 1;
2718 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2719 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2720 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2721 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2722 }
2723 
2724 static void sca_dispose(struct kvm *kvm)
2725 {
2726 	if (kvm->arch.use_esca)
2727 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2728 	else
2729 		free_page((unsigned long)(kvm->arch.sca));
2730 	kvm->arch.sca = NULL;
2731 }
2732 
2733 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2734 {
2735 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2736 	int i, rc;
2737 	char debug_name[16];
2738 	static unsigned long sca_offset;
2739 
2740 	rc = -EINVAL;
2741 #ifdef CONFIG_KVM_S390_UCONTROL
2742 	if (type & ~KVM_VM_S390_UCONTROL)
2743 		goto out_err;
2744 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2745 		goto out_err;
2746 #else
2747 	if (type)
2748 		goto out_err;
2749 #endif
2750 
2751 	rc = s390_enable_sie();
2752 	if (rc)
2753 		goto out_err;
2754 
2755 	rc = -ENOMEM;
2756 
2757 	if (!sclp.has_64bscao)
2758 		alloc_flags |= GFP_DMA;
2759 	rwlock_init(&kvm->arch.sca_lock);
2760 	/* start with basic SCA */
2761 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2762 	if (!kvm->arch.sca)
2763 		goto out_err;
2764 	mutex_lock(&kvm_lock);
2765 	sca_offset += 16;
2766 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2767 		sca_offset = 0;
2768 	kvm->arch.sca = (struct bsca_block *)
2769 			((char *) kvm->arch.sca + sca_offset);
2770 	mutex_unlock(&kvm_lock);
2771 
2772 	sprintf(debug_name, "kvm-%u", current->pid);
2773 
2774 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2775 	if (!kvm->arch.dbf)
2776 		goto out_err;
2777 
2778 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2779 	kvm->arch.sie_page2 =
2780 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2781 	if (!kvm->arch.sie_page2)
2782 		goto out_err;
2783 
2784 	kvm->arch.sie_page2->kvm = kvm;
2785 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2786 
2787 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2788 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2789 					      (kvm_s390_fac_base[i] |
2790 					       kvm_s390_fac_ext[i]);
2791 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2792 					      kvm_s390_fac_base[i];
2793 	}
2794 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2795 
2796 	/* we are always in czam mode - even on pre z14 machines */
2797 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2798 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2799 	/* we emulate STHYI in kvm */
2800 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2801 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2802 	if (MACHINE_HAS_TLB_GUEST) {
2803 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2804 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2805 	}
2806 
2807 	if (css_general_characteristics.aiv && test_facility(65))
2808 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2809 
2810 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2811 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2812 
2813 	kvm_s390_crypto_init(kvm);
2814 
2815 	mutex_init(&kvm->arch.float_int.ais_lock);
2816 	spin_lock_init(&kvm->arch.float_int.lock);
2817 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2818 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2819 	init_waitqueue_head(&kvm->arch.ipte_wq);
2820 	mutex_init(&kvm->arch.ipte_mutex);
2821 
2822 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2823 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2824 
2825 	if (type & KVM_VM_S390_UCONTROL) {
2826 		kvm->arch.gmap = NULL;
2827 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2828 	} else {
2829 		if (sclp.hamax == U64_MAX)
2830 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2831 		else
2832 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2833 						    sclp.hamax + 1);
2834 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2835 		if (!kvm->arch.gmap)
2836 			goto out_err;
2837 		kvm->arch.gmap->private = kvm;
2838 		kvm->arch.gmap->pfault_enabled = 0;
2839 	}
2840 
2841 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2842 	kvm->arch.use_skf = sclp.has_skey;
2843 	spin_lock_init(&kvm->arch.start_stop_lock);
2844 	kvm_s390_vsie_init(kvm);
2845 	if (use_gisa)
2846 		kvm_s390_gisa_init(kvm);
2847 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2848 
2849 	return 0;
2850 out_err:
2851 	free_page((unsigned long)kvm->arch.sie_page2);
2852 	debug_unregister(kvm->arch.dbf);
2853 	sca_dispose(kvm);
2854 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2855 	return rc;
2856 }
2857 
2858 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2859 {
2860 	u16 rc, rrc;
2861 
2862 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2863 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2864 	kvm_s390_clear_local_irqs(vcpu);
2865 	kvm_clear_async_pf_completion_queue(vcpu);
2866 	if (!kvm_is_ucontrol(vcpu->kvm))
2867 		sca_del_vcpu(vcpu);
2868 
2869 	if (kvm_is_ucontrol(vcpu->kvm))
2870 		gmap_remove(vcpu->arch.gmap);
2871 
2872 	if (vcpu->kvm->arch.use_cmma)
2873 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2874 	/* We can not hold the vcpu mutex here, we are already dying */
2875 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2876 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2877 	free_page((unsigned long)(vcpu->arch.sie_block));
2878 }
2879 
2880 void kvm_arch_destroy_vm(struct kvm *kvm)
2881 {
2882 	u16 rc, rrc;
2883 
2884 	kvm_destroy_vcpus(kvm);
2885 	sca_dispose(kvm);
2886 	kvm_s390_gisa_destroy(kvm);
2887 	/*
2888 	 * We are already at the end of life and kvm->lock is not taken.
2889 	 * This is ok as the file descriptor is closed by now and nobody
2890 	 * can mess with the pv state. To avoid lockdep_assert_held from
2891 	 * complaining we do not use kvm_s390_pv_is_protected.
2892 	 */
2893 	if (kvm_s390_pv_get_handle(kvm))
2894 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2895 	debug_unregister(kvm->arch.dbf);
2896 	free_page((unsigned long)kvm->arch.sie_page2);
2897 	if (!kvm_is_ucontrol(kvm))
2898 		gmap_remove(kvm->arch.gmap);
2899 	kvm_s390_destroy_adapters(kvm);
2900 	kvm_s390_clear_float_irqs(kvm);
2901 	kvm_s390_vsie_destroy(kvm);
2902 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2903 }
2904 
2905 /* Section: vcpu related */
2906 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2907 {
2908 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2909 	if (!vcpu->arch.gmap)
2910 		return -ENOMEM;
2911 	vcpu->arch.gmap->private = vcpu->kvm;
2912 
2913 	return 0;
2914 }
2915 
2916 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2917 {
2918 	if (!kvm_s390_use_sca_entries())
2919 		return;
2920 	read_lock(&vcpu->kvm->arch.sca_lock);
2921 	if (vcpu->kvm->arch.use_esca) {
2922 		struct esca_block *sca = vcpu->kvm->arch.sca;
2923 
2924 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2925 		sca->cpu[vcpu->vcpu_id].sda = 0;
2926 	} else {
2927 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2928 
2929 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2930 		sca->cpu[vcpu->vcpu_id].sda = 0;
2931 	}
2932 	read_unlock(&vcpu->kvm->arch.sca_lock);
2933 }
2934 
2935 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2936 {
2937 	if (!kvm_s390_use_sca_entries()) {
2938 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2939 
2940 		/* we still need the basic sca for the ipte control */
2941 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2942 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2943 		return;
2944 	}
2945 	read_lock(&vcpu->kvm->arch.sca_lock);
2946 	if (vcpu->kvm->arch.use_esca) {
2947 		struct esca_block *sca = vcpu->kvm->arch.sca;
2948 
2949 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2950 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2951 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2952 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2953 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2954 	} else {
2955 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2956 
2957 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2958 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2959 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2960 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2961 	}
2962 	read_unlock(&vcpu->kvm->arch.sca_lock);
2963 }
2964 
2965 /* Basic SCA to Extended SCA data copy routines */
2966 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2967 {
2968 	d->sda = s->sda;
2969 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2970 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2971 }
2972 
2973 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2974 {
2975 	int i;
2976 
2977 	d->ipte_control = s->ipte_control;
2978 	d->mcn[0] = s->mcn;
2979 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2980 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2981 }
2982 
2983 static int sca_switch_to_extended(struct kvm *kvm)
2984 {
2985 	struct bsca_block *old_sca = kvm->arch.sca;
2986 	struct esca_block *new_sca;
2987 	struct kvm_vcpu *vcpu;
2988 	unsigned long vcpu_idx;
2989 	u32 scaol, scaoh;
2990 
2991 	if (kvm->arch.use_esca)
2992 		return 0;
2993 
2994 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2995 	if (!new_sca)
2996 		return -ENOMEM;
2997 
2998 	scaoh = (u32)((u64)(new_sca) >> 32);
2999 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
3000 
3001 	kvm_s390_vcpu_block_all(kvm);
3002 	write_lock(&kvm->arch.sca_lock);
3003 
3004 	sca_copy_b_to_e(new_sca, old_sca);
3005 
3006 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3007 		vcpu->arch.sie_block->scaoh = scaoh;
3008 		vcpu->arch.sie_block->scaol = scaol;
3009 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3010 	}
3011 	kvm->arch.sca = new_sca;
3012 	kvm->arch.use_esca = 1;
3013 
3014 	write_unlock(&kvm->arch.sca_lock);
3015 	kvm_s390_vcpu_unblock_all(kvm);
3016 
3017 	free_page((unsigned long)old_sca);
3018 
3019 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3020 		 old_sca, kvm->arch.sca);
3021 	return 0;
3022 }
3023 
3024 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3025 {
3026 	int rc;
3027 
3028 	if (!kvm_s390_use_sca_entries()) {
3029 		if (id < KVM_MAX_VCPUS)
3030 			return true;
3031 		return false;
3032 	}
3033 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3034 		return true;
3035 	if (!sclp.has_esca || !sclp.has_64bscao)
3036 		return false;
3037 
3038 	mutex_lock(&kvm->lock);
3039 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3040 	mutex_unlock(&kvm->lock);
3041 
3042 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3043 }
3044 
3045 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3046 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3047 {
3048 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3049 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3050 	vcpu->arch.cputm_start = get_tod_clock_fast();
3051 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3052 }
3053 
3054 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3055 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3056 {
3057 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3058 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3059 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3060 	vcpu->arch.cputm_start = 0;
3061 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3062 }
3063 
3064 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3065 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3066 {
3067 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3068 	vcpu->arch.cputm_enabled = true;
3069 	__start_cpu_timer_accounting(vcpu);
3070 }
3071 
3072 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3073 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3074 {
3075 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3076 	__stop_cpu_timer_accounting(vcpu);
3077 	vcpu->arch.cputm_enabled = false;
3078 }
3079 
3080 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3081 {
3082 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3083 	__enable_cpu_timer_accounting(vcpu);
3084 	preempt_enable();
3085 }
3086 
3087 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3088 {
3089 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3090 	__disable_cpu_timer_accounting(vcpu);
3091 	preempt_enable();
3092 }
3093 
3094 /* set the cpu timer - may only be called from the VCPU thread itself */
3095 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3096 {
3097 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3098 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3099 	if (vcpu->arch.cputm_enabled)
3100 		vcpu->arch.cputm_start = get_tod_clock_fast();
3101 	vcpu->arch.sie_block->cputm = cputm;
3102 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3103 	preempt_enable();
3104 }
3105 
3106 /* update and get the cpu timer - can also be called from other VCPU threads */
3107 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3108 {
3109 	unsigned int seq;
3110 	__u64 value;
3111 
3112 	if (unlikely(!vcpu->arch.cputm_enabled))
3113 		return vcpu->arch.sie_block->cputm;
3114 
3115 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3116 	do {
3117 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3118 		/*
3119 		 * If the writer would ever execute a read in the critical
3120 		 * section, e.g. in irq context, we have a deadlock.
3121 		 */
3122 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3123 		value = vcpu->arch.sie_block->cputm;
3124 		/* if cputm_start is 0, accounting is being started/stopped */
3125 		if (likely(vcpu->arch.cputm_start))
3126 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3127 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3128 	preempt_enable();
3129 	return value;
3130 }
3131 
3132 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3133 {
3134 
3135 	gmap_enable(vcpu->arch.enabled_gmap);
3136 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3137 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3138 		__start_cpu_timer_accounting(vcpu);
3139 	vcpu->cpu = cpu;
3140 }
3141 
3142 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3143 {
3144 	vcpu->cpu = -1;
3145 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3146 		__stop_cpu_timer_accounting(vcpu);
3147 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3148 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3149 	gmap_disable(vcpu->arch.enabled_gmap);
3150 
3151 }
3152 
3153 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3154 {
3155 	mutex_lock(&vcpu->kvm->lock);
3156 	preempt_disable();
3157 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3158 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3159 	preempt_enable();
3160 	mutex_unlock(&vcpu->kvm->lock);
3161 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3162 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3163 		sca_add_vcpu(vcpu);
3164 	}
3165 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3166 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3167 	/* make vcpu_load load the right gmap on the first trigger */
3168 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3169 }
3170 
3171 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3172 {
3173 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3174 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3175 		return true;
3176 	return false;
3177 }
3178 
3179 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3180 {
3181 	/* At least one ECC subfunction must be present */
3182 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3183 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3184 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3185 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3186 	       kvm_has_pckmo_subfunc(kvm, 41);
3187 
3188 }
3189 
3190 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3191 {
3192 	/*
3193 	 * If the AP instructions are not being interpreted and the MSAX3
3194 	 * facility is not configured for the guest, there is nothing to set up.
3195 	 */
3196 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3197 		return;
3198 
3199 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3200 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3201 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3202 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3203 
3204 	if (vcpu->kvm->arch.crypto.apie)
3205 		vcpu->arch.sie_block->eca |= ECA_APIE;
3206 
3207 	/* Set up protected key support */
3208 	if (vcpu->kvm->arch.crypto.aes_kw) {
3209 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3210 		/* ecc is also wrapped with AES key */
3211 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3212 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3213 	}
3214 
3215 	if (vcpu->kvm->arch.crypto.dea_kw)
3216 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3217 }
3218 
3219 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3220 {
3221 	free_page(vcpu->arch.sie_block->cbrlo);
3222 	vcpu->arch.sie_block->cbrlo = 0;
3223 }
3224 
3225 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3226 {
3227 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3228 	if (!vcpu->arch.sie_block->cbrlo)
3229 		return -ENOMEM;
3230 	return 0;
3231 }
3232 
3233 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3234 {
3235 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3236 
3237 	vcpu->arch.sie_block->ibc = model->ibc;
3238 	if (test_kvm_facility(vcpu->kvm, 7))
3239 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3240 }
3241 
3242 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3243 {
3244 	int rc = 0;
3245 	u16 uvrc, uvrrc;
3246 
3247 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3248 						    CPUSTAT_SM |
3249 						    CPUSTAT_STOPPED);
3250 
3251 	if (test_kvm_facility(vcpu->kvm, 78))
3252 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3253 	else if (test_kvm_facility(vcpu->kvm, 8))
3254 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3255 
3256 	kvm_s390_vcpu_setup_model(vcpu);
3257 
3258 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3259 	if (MACHINE_HAS_ESOP)
3260 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3261 	if (test_kvm_facility(vcpu->kvm, 9))
3262 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3263 	if (test_kvm_facility(vcpu->kvm, 73))
3264 		vcpu->arch.sie_block->ecb |= ECB_TE;
3265 	if (!kvm_is_ucontrol(vcpu->kvm))
3266 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3267 
3268 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3269 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3270 	if (test_kvm_facility(vcpu->kvm, 130))
3271 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3272 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3273 	if (sclp.has_cei)
3274 		vcpu->arch.sie_block->eca |= ECA_CEI;
3275 	if (sclp.has_ib)
3276 		vcpu->arch.sie_block->eca |= ECA_IB;
3277 	if (sclp.has_siif)
3278 		vcpu->arch.sie_block->eca |= ECA_SII;
3279 	if (sclp.has_sigpif)
3280 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3281 	if (test_kvm_facility(vcpu->kvm, 129)) {
3282 		vcpu->arch.sie_block->eca |= ECA_VX;
3283 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3284 	}
3285 	if (test_kvm_facility(vcpu->kvm, 139))
3286 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3287 	if (test_kvm_facility(vcpu->kvm, 156))
3288 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3289 	if (vcpu->arch.sie_block->gd) {
3290 		vcpu->arch.sie_block->eca |= ECA_AIV;
3291 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3292 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3293 	}
3294 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3295 					| SDNXC;
3296 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3297 
3298 	if (sclp.has_kss)
3299 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3300 	else
3301 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3302 
3303 	if (vcpu->kvm->arch.use_cmma) {
3304 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3305 		if (rc)
3306 			return rc;
3307 	}
3308 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3309 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3310 
3311 	vcpu->arch.sie_block->hpid = HPID_KVM;
3312 
3313 	kvm_s390_vcpu_crypto_setup(vcpu);
3314 
3315 	mutex_lock(&vcpu->kvm->lock);
3316 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3317 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3318 		if (rc)
3319 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3320 	}
3321 	mutex_unlock(&vcpu->kvm->lock);
3322 
3323 	return rc;
3324 }
3325 
3326 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3327 {
3328 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3329 		return -EINVAL;
3330 	return 0;
3331 }
3332 
3333 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3334 {
3335 	struct sie_page *sie_page;
3336 	int rc;
3337 
3338 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3339 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3340 	if (!sie_page)
3341 		return -ENOMEM;
3342 
3343 	vcpu->arch.sie_block = &sie_page->sie_block;
3344 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3345 
3346 	/* the real guest size will always be smaller than msl */
3347 	vcpu->arch.sie_block->mso = 0;
3348 	vcpu->arch.sie_block->msl = sclp.hamax;
3349 
3350 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3351 	spin_lock_init(&vcpu->arch.local_int.lock);
3352 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3353 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3354 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3355 	seqcount_init(&vcpu->arch.cputm_seqcount);
3356 
3357 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3358 	kvm_clear_async_pf_completion_queue(vcpu);
3359 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3360 				    KVM_SYNC_GPRS |
3361 				    KVM_SYNC_ACRS |
3362 				    KVM_SYNC_CRS |
3363 				    KVM_SYNC_ARCH0 |
3364 				    KVM_SYNC_PFAULT |
3365 				    KVM_SYNC_DIAG318;
3366 	kvm_s390_set_prefix(vcpu, 0);
3367 	if (test_kvm_facility(vcpu->kvm, 64))
3368 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3369 	if (test_kvm_facility(vcpu->kvm, 82))
3370 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3371 	if (test_kvm_facility(vcpu->kvm, 133))
3372 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3373 	if (test_kvm_facility(vcpu->kvm, 156))
3374 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3375 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3376 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3377 	 */
3378 	if (MACHINE_HAS_VX)
3379 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3380 	else
3381 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3382 
3383 	if (kvm_is_ucontrol(vcpu->kvm)) {
3384 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3385 		if (rc)
3386 			goto out_free_sie_block;
3387 	}
3388 
3389 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3390 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3391 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3392 
3393 	rc = kvm_s390_vcpu_setup(vcpu);
3394 	if (rc)
3395 		goto out_ucontrol_uninit;
3396 	return 0;
3397 
3398 out_ucontrol_uninit:
3399 	if (kvm_is_ucontrol(vcpu->kvm))
3400 		gmap_remove(vcpu->arch.gmap);
3401 out_free_sie_block:
3402 	free_page((unsigned long)(vcpu->arch.sie_block));
3403 	return rc;
3404 }
3405 
3406 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3407 {
3408 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3409 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3410 }
3411 
3412 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3413 {
3414 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3415 }
3416 
3417 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3418 {
3419 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3420 	exit_sie(vcpu);
3421 }
3422 
3423 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3424 {
3425 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3426 }
3427 
3428 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3429 {
3430 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3431 	exit_sie(vcpu);
3432 }
3433 
3434 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3435 {
3436 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3437 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3438 }
3439 
3440 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3441 {
3442 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3443 }
3444 
3445 /*
3446  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3447  * If the CPU is not running (e.g. waiting as idle) the function will
3448  * return immediately. */
3449 void exit_sie(struct kvm_vcpu *vcpu)
3450 {
3451 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3452 	kvm_s390_vsie_kick(vcpu);
3453 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3454 		cpu_relax();
3455 }
3456 
3457 /* Kick a guest cpu out of SIE to process a request synchronously */
3458 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3459 {
3460 	kvm_make_request(req, vcpu);
3461 	kvm_s390_vcpu_request(vcpu);
3462 }
3463 
3464 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3465 			      unsigned long end)
3466 {
3467 	struct kvm *kvm = gmap->private;
3468 	struct kvm_vcpu *vcpu;
3469 	unsigned long prefix;
3470 	unsigned long i;
3471 
3472 	if (gmap_is_shadow(gmap))
3473 		return;
3474 	if (start >= 1UL << 31)
3475 		/* We are only interested in prefix pages */
3476 		return;
3477 	kvm_for_each_vcpu(i, vcpu, kvm) {
3478 		/* match against both prefix pages */
3479 		prefix = kvm_s390_get_prefix(vcpu);
3480 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3481 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3482 				   start, end);
3483 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3484 		}
3485 	}
3486 }
3487 
3488 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3489 {
3490 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3491 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3492 	    READ_ONCE(halt_poll_max_steal)) {
3493 		vcpu->stat.halt_no_poll_steal++;
3494 		return true;
3495 	}
3496 	return false;
3497 }
3498 
3499 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3500 {
3501 	/* kvm common code refers to this, but never calls it */
3502 	BUG();
3503 	return 0;
3504 }
3505 
3506 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3507 					   struct kvm_one_reg *reg)
3508 {
3509 	int r = -EINVAL;
3510 
3511 	switch (reg->id) {
3512 	case KVM_REG_S390_TODPR:
3513 		r = put_user(vcpu->arch.sie_block->todpr,
3514 			     (u32 __user *)reg->addr);
3515 		break;
3516 	case KVM_REG_S390_EPOCHDIFF:
3517 		r = put_user(vcpu->arch.sie_block->epoch,
3518 			     (u64 __user *)reg->addr);
3519 		break;
3520 	case KVM_REG_S390_CPU_TIMER:
3521 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3522 			     (u64 __user *)reg->addr);
3523 		break;
3524 	case KVM_REG_S390_CLOCK_COMP:
3525 		r = put_user(vcpu->arch.sie_block->ckc,
3526 			     (u64 __user *)reg->addr);
3527 		break;
3528 	case KVM_REG_S390_PFTOKEN:
3529 		r = put_user(vcpu->arch.pfault_token,
3530 			     (u64 __user *)reg->addr);
3531 		break;
3532 	case KVM_REG_S390_PFCOMPARE:
3533 		r = put_user(vcpu->arch.pfault_compare,
3534 			     (u64 __user *)reg->addr);
3535 		break;
3536 	case KVM_REG_S390_PFSELECT:
3537 		r = put_user(vcpu->arch.pfault_select,
3538 			     (u64 __user *)reg->addr);
3539 		break;
3540 	case KVM_REG_S390_PP:
3541 		r = put_user(vcpu->arch.sie_block->pp,
3542 			     (u64 __user *)reg->addr);
3543 		break;
3544 	case KVM_REG_S390_GBEA:
3545 		r = put_user(vcpu->arch.sie_block->gbea,
3546 			     (u64 __user *)reg->addr);
3547 		break;
3548 	default:
3549 		break;
3550 	}
3551 
3552 	return r;
3553 }
3554 
3555 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3556 					   struct kvm_one_reg *reg)
3557 {
3558 	int r = -EINVAL;
3559 	__u64 val;
3560 
3561 	switch (reg->id) {
3562 	case KVM_REG_S390_TODPR:
3563 		r = get_user(vcpu->arch.sie_block->todpr,
3564 			     (u32 __user *)reg->addr);
3565 		break;
3566 	case KVM_REG_S390_EPOCHDIFF:
3567 		r = get_user(vcpu->arch.sie_block->epoch,
3568 			     (u64 __user *)reg->addr);
3569 		break;
3570 	case KVM_REG_S390_CPU_TIMER:
3571 		r = get_user(val, (u64 __user *)reg->addr);
3572 		if (!r)
3573 			kvm_s390_set_cpu_timer(vcpu, val);
3574 		break;
3575 	case KVM_REG_S390_CLOCK_COMP:
3576 		r = get_user(vcpu->arch.sie_block->ckc,
3577 			     (u64 __user *)reg->addr);
3578 		break;
3579 	case KVM_REG_S390_PFTOKEN:
3580 		r = get_user(vcpu->arch.pfault_token,
3581 			     (u64 __user *)reg->addr);
3582 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3583 			kvm_clear_async_pf_completion_queue(vcpu);
3584 		break;
3585 	case KVM_REG_S390_PFCOMPARE:
3586 		r = get_user(vcpu->arch.pfault_compare,
3587 			     (u64 __user *)reg->addr);
3588 		break;
3589 	case KVM_REG_S390_PFSELECT:
3590 		r = get_user(vcpu->arch.pfault_select,
3591 			     (u64 __user *)reg->addr);
3592 		break;
3593 	case KVM_REG_S390_PP:
3594 		r = get_user(vcpu->arch.sie_block->pp,
3595 			     (u64 __user *)reg->addr);
3596 		break;
3597 	case KVM_REG_S390_GBEA:
3598 		r = get_user(vcpu->arch.sie_block->gbea,
3599 			     (u64 __user *)reg->addr);
3600 		break;
3601 	default:
3602 		break;
3603 	}
3604 
3605 	return r;
3606 }
3607 
3608 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3609 {
3610 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3611 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3612 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3613 
3614 	kvm_clear_async_pf_completion_queue(vcpu);
3615 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3616 		kvm_s390_vcpu_stop(vcpu);
3617 	kvm_s390_clear_local_irqs(vcpu);
3618 }
3619 
3620 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3621 {
3622 	/* Initial reset is a superset of the normal reset */
3623 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3624 
3625 	/*
3626 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3627 	 * We do not only reset the internal data, but also ...
3628 	 */
3629 	vcpu->arch.sie_block->gpsw.mask = 0;
3630 	vcpu->arch.sie_block->gpsw.addr = 0;
3631 	kvm_s390_set_prefix(vcpu, 0);
3632 	kvm_s390_set_cpu_timer(vcpu, 0);
3633 	vcpu->arch.sie_block->ckc = 0;
3634 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3635 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3636 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3637 
3638 	/* ... the data in sync regs */
3639 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3640 	vcpu->run->s.regs.ckc = 0;
3641 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3642 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3643 	vcpu->run->psw_addr = 0;
3644 	vcpu->run->psw_mask = 0;
3645 	vcpu->run->s.regs.todpr = 0;
3646 	vcpu->run->s.regs.cputm = 0;
3647 	vcpu->run->s.regs.ckc = 0;
3648 	vcpu->run->s.regs.pp = 0;
3649 	vcpu->run->s.regs.gbea = 1;
3650 	vcpu->run->s.regs.fpc = 0;
3651 	/*
3652 	 * Do not reset these registers in the protected case, as some of
3653 	 * them are overlayed and they are not accessible in this case
3654 	 * anyway.
3655 	 */
3656 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3657 		vcpu->arch.sie_block->gbea = 1;
3658 		vcpu->arch.sie_block->pp = 0;
3659 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3660 		vcpu->arch.sie_block->todpr = 0;
3661 	}
3662 }
3663 
3664 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3665 {
3666 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3667 
3668 	/* Clear reset is a superset of the initial reset */
3669 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3670 
3671 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3672 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3673 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3674 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3675 
3676 	regs->etoken = 0;
3677 	regs->etoken_extension = 0;
3678 }
3679 
3680 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3681 {
3682 	vcpu_load(vcpu);
3683 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3684 	vcpu_put(vcpu);
3685 	return 0;
3686 }
3687 
3688 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3689 {
3690 	vcpu_load(vcpu);
3691 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3692 	vcpu_put(vcpu);
3693 	return 0;
3694 }
3695 
3696 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3697 				  struct kvm_sregs *sregs)
3698 {
3699 	vcpu_load(vcpu);
3700 
3701 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3702 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3703 
3704 	vcpu_put(vcpu);
3705 	return 0;
3706 }
3707 
3708 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3709 				  struct kvm_sregs *sregs)
3710 {
3711 	vcpu_load(vcpu);
3712 
3713 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3714 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3715 
3716 	vcpu_put(vcpu);
3717 	return 0;
3718 }
3719 
3720 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3721 {
3722 	int ret = 0;
3723 
3724 	vcpu_load(vcpu);
3725 
3726 	if (test_fp_ctl(fpu->fpc)) {
3727 		ret = -EINVAL;
3728 		goto out;
3729 	}
3730 	vcpu->run->s.regs.fpc = fpu->fpc;
3731 	if (MACHINE_HAS_VX)
3732 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3733 				 (freg_t *) fpu->fprs);
3734 	else
3735 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3736 
3737 out:
3738 	vcpu_put(vcpu);
3739 	return ret;
3740 }
3741 
3742 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3743 {
3744 	vcpu_load(vcpu);
3745 
3746 	/* make sure we have the latest values */
3747 	save_fpu_regs();
3748 	if (MACHINE_HAS_VX)
3749 		convert_vx_to_fp((freg_t *) fpu->fprs,
3750 				 (__vector128 *) vcpu->run->s.regs.vrs);
3751 	else
3752 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3753 	fpu->fpc = vcpu->run->s.regs.fpc;
3754 
3755 	vcpu_put(vcpu);
3756 	return 0;
3757 }
3758 
3759 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3760 {
3761 	int rc = 0;
3762 
3763 	if (!is_vcpu_stopped(vcpu))
3764 		rc = -EBUSY;
3765 	else {
3766 		vcpu->run->psw_mask = psw.mask;
3767 		vcpu->run->psw_addr = psw.addr;
3768 	}
3769 	return rc;
3770 }
3771 
3772 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3773 				  struct kvm_translation *tr)
3774 {
3775 	return -EINVAL; /* not implemented yet */
3776 }
3777 
3778 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3779 			      KVM_GUESTDBG_USE_HW_BP | \
3780 			      KVM_GUESTDBG_ENABLE)
3781 
3782 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3783 					struct kvm_guest_debug *dbg)
3784 {
3785 	int rc = 0;
3786 
3787 	vcpu_load(vcpu);
3788 
3789 	vcpu->guest_debug = 0;
3790 	kvm_s390_clear_bp_data(vcpu);
3791 
3792 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3793 		rc = -EINVAL;
3794 		goto out;
3795 	}
3796 	if (!sclp.has_gpere) {
3797 		rc = -EINVAL;
3798 		goto out;
3799 	}
3800 
3801 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3802 		vcpu->guest_debug = dbg->control;
3803 		/* enforce guest PER */
3804 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3805 
3806 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3807 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3808 	} else {
3809 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3810 		vcpu->arch.guestdbg.last_bp = 0;
3811 	}
3812 
3813 	if (rc) {
3814 		vcpu->guest_debug = 0;
3815 		kvm_s390_clear_bp_data(vcpu);
3816 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3817 	}
3818 
3819 out:
3820 	vcpu_put(vcpu);
3821 	return rc;
3822 }
3823 
3824 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3825 				    struct kvm_mp_state *mp_state)
3826 {
3827 	int ret;
3828 
3829 	vcpu_load(vcpu);
3830 
3831 	/* CHECK_STOP and LOAD are not supported yet */
3832 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3833 				      KVM_MP_STATE_OPERATING;
3834 
3835 	vcpu_put(vcpu);
3836 	return ret;
3837 }
3838 
3839 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3840 				    struct kvm_mp_state *mp_state)
3841 {
3842 	int rc = 0;
3843 
3844 	vcpu_load(vcpu);
3845 
3846 	/* user space knows about this interface - let it control the state */
3847 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3848 
3849 	switch (mp_state->mp_state) {
3850 	case KVM_MP_STATE_STOPPED:
3851 		rc = kvm_s390_vcpu_stop(vcpu);
3852 		break;
3853 	case KVM_MP_STATE_OPERATING:
3854 		rc = kvm_s390_vcpu_start(vcpu);
3855 		break;
3856 	case KVM_MP_STATE_LOAD:
3857 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3858 			rc = -ENXIO;
3859 			break;
3860 		}
3861 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3862 		break;
3863 	case KVM_MP_STATE_CHECK_STOP:
3864 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3865 	default:
3866 		rc = -ENXIO;
3867 	}
3868 
3869 	vcpu_put(vcpu);
3870 	return rc;
3871 }
3872 
3873 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3874 {
3875 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3876 }
3877 
3878 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3879 {
3880 retry:
3881 	kvm_s390_vcpu_request_handled(vcpu);
3882 	if (!kvm_request_pending(vcpu))
3883 		return 0;
3884 	/*
3885 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3886 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3887 	 * This ensures that the ipte instruction for this request has
3888 	 * already finished. We might race against a second unmapper that
3889 	 * wants to set the blocking bit. Lets just retry the request loop.
3890 	 */
3891 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3892 		int rc;
3893 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3894 					  kvm_s390_get_prefix(vcpu),
3895 					  PAGE_SIZE * 2, PROT_WRITE);
3896 		if (rc) {
3897 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3898 			return rc;
3899 		}
3900 		goto retry;
3901 	}
3902 
3903 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3904 		vcpu->arch.sie_block->ihcpu = 0xffff;
3905 		goto retry;
3906 	}
3907 
3908 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3909 		if (!ibs_enabled(vcpu)) {
3910 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3911 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3912 		}
3913 		goto retry;
3914 	}
3915 
3916 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3917 		if (ibs_enabled(vcpu)) {
3918 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3919 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3920 		}
3921 		goto retry;
3922 	}
3923 
3924 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3925 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3926 		goto retry;
3927 	}
3928 
3929 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3930 		/*
3931 		 * Disable CMM virtualization; we will emulate the ESSA
3932 		 * instruction manually, in order to provide additional
3933 		 * functionalities needed for live migration.
3934 		 */
3935 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3936 		goto retry;
3937 	}
3938 
3939 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3940 		/*
3941 		 * Re-enable CMM virtualization if CMMA is available and
3942 		 * CMM has been used.
3943 		 */
3944 		if ((vcpu->kvm->arch.use_cmma) &&
3945 		    (vcpu->kvm->mm->context.uses_cmm))
3946 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3947 		goto retry;
3948 	}
3949 
3950 	/* nothing to do, just clear the request */
3951 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3952 	/* we left the vsie handler, nothing to do, just clear the request */
3953 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3954 
3955 	return 0;
3956 }
3957 
3958 void kvm_s390_set_tod_clock(struct kvm *kvm,
3959 			    const struct kvm_s390_vm_tod_clock *gtod)
3960 {
3961 	struct kvm_vcpu *vcpu;
3962 	union tod_clock clk;
3963 	unsigned long i;
3964 
3965 	mutex_lock(&kvm->lock);
3966 	preempt_disable();
3967 
3968 	store_tod_clock_ext(&clk);
3969 
3970 	kvm->arch.epoch = gtod->tod - clk.tod;
3971 	kvm->arch.epdx = 0;
3972 	if (test_kvm_facility(kvm, 139)) {
3973 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3974 		if (kvm->arch.epoch > gtod->tod)
3975 			kvm->arch.epdx -= 1;
3976 	}
3977 
3978 	kvm_s390_vcpu_block_all(kvm);
3979 	kvm_for_each_vcpu(i, vcpu, kvm) {
3980 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3981 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3982 	}
3983 
3984 	kvm_s390_vcpu_unblock_all(kvm);
3985 	preempt_enable();
3986 	mutex_unlock(&kvm->lock);
3987 }
3988 
3989 /**
3990  * kvm_arch_fault_in_page - fault-in guest page if necessary
3991  * @vcpu: The corresponding virtual cpu
3992  * @gpa: Guest physical address
3993  * @writable: Whether the page should be writable or not
3994  *
3995  * Make sure that a guest page has been faulted-in on the host.
3996  *
3997  * Return: Zero on success, negative error code otherwise.
3998  */
3999 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4000 {
4001 	return gmap_fault(vcpu->arch.gmap, gpa,
4002 			  writable ? FAULT_FLAG_WRITE : 0);
4003 }
4004 
4005 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4006 				      unsigned long token)
4007 {
4008 	struct kvm_s390_interrupt inti;
4009 	struct kvm_s390_irq irq;
4010 
4011 	if (start_token) {
4012 		irq.u.ext.ext_params2 = token;
4013 		irq.type = KVM_S390_INT_PFAULT_INIT;
4014 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4015 	} else {
4016 		inti.type = KVM_S390_INT_PFAULT_DONE;
4017 		inti.parm64 = token;
4018 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4019 	}
4020 }
4021 
4022 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4023 				     struct kvm_async_pf *work)
4024 {
4025 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4026 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4027 
4028 	return true;
4029 }
4030 
4031 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4032 				 struct kvm_async_pf *work)
4033 {
4034 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4035 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4036 }
4037 
4038 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4039 			       struct kvm_async_pf *work)
4040 {
4041 	/* s390 will always inject the page directly */
4042 }
4043 
4044 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4045 {
4046 	/*
4047 	 * s390 will always inject the page directly,
4048 	 * but we still want check_async_completion to cleanup
4049 	 */
4050 	return true;
4051 }
4052 
4053 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4054 {
4055 	hva_t hva;
4056 	struct kvm_arch_async_pf arch;
4057 
4058 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4059 		return false;
4060 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4061 	    vcpu->arch.pfault_compare)
4062 		return false;
4063 	if (psw_extint_disabled(vcpu))
4064 		return false;
4065 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4066 		return false;
4067 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4068 		return false;
4069 	if (!vcpu->arch.gmap->pfault_enabled)
4070 		return false;
4071 
4072 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4073 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4074 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4075 		return false;
4076 
4077 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4078 }
4079 
4080 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4081 {
4082 	int rc, cpuflags;
4083 
4084 	/*
4085 	 * On s390 notifications for arriving pages will be delivered directly
4086 	 * to the guest but the house keeping for completed pfaults is
4087 	 * handled outside the worker.
4088 	 */
4089 	kvm_check_async_pf_completion(vcpu);
4090 
4091 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4092 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4093 
4094 	if (need_resched())
4095 		schedule();
4096 
4097 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4098 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4099 		if (rc)
4100 			return rc;
4101 	}
4102 
4103 	rc = kvm_s390_handle_requests(vcpu);
4104 	if (rc)
4105 		return rc;
4106 
4107 	if (guestdbg_enabled(vcpu)) {
4108 		kvm_s390_backup_guest_per_regs(vcpu);
4109 		kvm_s390_patch_guest_per_regs(vcpu);
4110 	}
4111 
4112 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4113 
4114 	vcpu->arch.sie_block->icptcode = 0;
4115 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4116 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4117 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4118 
4119 	return 0;
4120 }
4121 
4122 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4123 {
4124 	struct kvm_s390_pgm_info pgm_info = {
4125 		.code = PGM_ADDRESSING,
4126 	};
4127 	u8 opcode, ilen;
4128 	int rc;
4129 
4130 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4131 	trace_kvm_s390_sie_fault(vcpu);
4132 
4133 	/*
4134 	 * We want to inject an addressing exception, which is defined as a
4135 	 * suppressing or terminating exception. However, since we came here
4136 	 * by a DAT access exception, the PSW still points to the faulting
4137 	 * instruction since DAT exceptions are nullifying. So we've got
4138 	 * to look up the current opcode to get the length of the instruction
4139 	 * to be able to forward the PSW.
4140 	 */
4141 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4142 	ilen = insn_length(opcode);
4143 	if (rc < 0) {
4144 		return rc;
4145 	} else if (rc) {
4146 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4147 		 * Forward by arbitrary ilc, injection will take care of
4148 		 * nullification if necessary.
4149 		 */
4150 		pgm_info = vcpu->arch.pgm;
4151 		ilen = 4;
4152 	}
4153 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4154 	kvm_s390_forward_psw(vcpu, ilen);
4155 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4156 }
4157 
4158 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4159 {
4160 	struct mcck_volatile_info *mcck_info;
4161 	struct sie_page *sie_page;
4162 
4163 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4164 		   vcpu->arch.sie_block->icptcode);
4165 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4166 
4167 	if (guestdbg_enabled(vcpu))
4168 		kvm_s390_restore_guest_per_regs(vcpu);
4169 
4170 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4171 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4172 
4173 	if (exit_reason == -EINTR) {
4174 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4175 		sie_page = container_of(vcpu->arch.sie_block,
4176 					struct sie_page, sie_block);
4177 		mcck_info = &sie_page->mcck_info;
4178 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4179 		return 0;
4180 	}
4181 
4182 	if (vcpu->arch.sie_block->icptcode > 0) {
4183 		int rc = kvm_handle_sie_intercept(vcpu);
4184 
4185 		if (rc != -EOPNOTSUPP)
4186 			return rc;
4187 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4188 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4189 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4190 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4191 		return -EREMOTE;
4192 	} else if (exit_reason != -EFAULT) {
4193 		vcpu->stat.exit_null++;
4194 		return 0;
4195 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4196 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4197 		vcpu->run->s390_ucontrol.trans_exc_code =
4198 						current->thread.gmap_addr;
4199 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4200 		return -EREMOTE;
4201 	} else if (current->thread.gmap_pfault) {
4202 		trace_kvm_s390_major_guest_pfault(vcpu);
4203 		current->thread.gmap_pfault = 0;
4204 		if (kvm_arch_setup_async_pf(vcpu))
4205 			return 0;
4206 		vcpu->stat.pfault_sync++;
4207 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4208 	}
4209 	return vcpu_post_run_fault_in_sie(vcpu);
4210 }
4211 
4212 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4213 static int __vcpu_run(struct kvm_vcpu *vcpu)
4214 {
4215 	int rc, exit_reason;
4216 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4217 
4218 	/*
4219 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4220 	 * ning the guest), so that memslots (and other stuff) are protected
4221 	 */
4222 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4223 
4224 	do {
4225 		rc = vcpu_pre_run(vcpu);
4226 		if (rc)
4227 			break;
4228 
4229 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4230 		/*
4231 		 * As PF_VCPU will be used in fault handler, between
4232 		 * guest_enter and guest_exit should be no uaccess.
4233 		 */
4234 		local_irq_disable();
4235 		guest_enter_irqoff();
4236 		__disable_cpu_timer_accounting(vcpu);
4237 		local_irq_enable();
4238 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4239 			memcpy(sie_page->pv_grregs,
4240 			       vcpu->run->s.regs.gprs,
4241 			       sizeof(sie_page->pv_grregs));
4242 		}
4243 		if (test_cpu_flag(CIF_FPU))
4244 			load_fpu_regs();
4245 		exit_reason = sie64a(vcpu->arch.sie_block,
4246 				     vcpu->run->s.regs.gprs);
4247 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4248 			memcpy(vcpu->run->s.regs.gprs,
4249 			       sie_page->pv_grregs,
4250 			       sizeof(sie_page->pv_grregs));
4251 			/*
4252 			 * We're not allowed to inject interrupts on intercepts
4253 			 * that leave the guest state in an "in-between" state
4254 			 * where the next SIE entry will do a continuation.
4255 			 * Fence interrupts in our "internal" PSW.
4256 			 */
4257 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4258 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4259 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4260 			}
4261 		}
4262 		local_irq_disable();
4263 		__enable_cpu_timer_accounting(vcpu);
4264 		guest_exit_irqoff();
4265 		local_irq_enable();
4266 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4267 
4268 		rc = vcpu_post_run(vcpu, exit_reason);
4269 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4270 
4271 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4272 	return rc;
4273 }
4274 
4275 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4276 {
4277 	struct kvm_run *kvm_run = vcpu->run;
4278 	struct runtime_instr_cb *riccb;
4279 	struct gs_cb *gscb;
4280 
4281 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4282 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4283 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4284 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4285 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4286 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4287 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4288 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4289 	}
4290 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4291 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4292 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4293 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4294 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4295 			kvm_clear_async_pf_completion_queue(vcpu);
4296 	}
4297 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4298 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4299 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4300 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4301 	}
4302 	/*
4303 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4304 	 * we should enable RI here instead of doing the lazy enablement.
4305 	 */
4306 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4307 	    test_kvm_facility(vcpu->kvm, 64) &&
4308 	    riccb->v &&
4309 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4310 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4311 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4312 	}
4313 	/*
4314 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4315 	 * we should enable GS here instead of doing the lazy enablement.
4316 	 */
4317 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4318 	    test_kvm_facility(vcpu->kvm, 133) &&
4319 	    gscb->gssm &&
4320 	    !vcpu->arch.gs_enabled) {
4321 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4322 		vcpu->arch.sie_block->ecb |= ECB_GS;
4323 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4324 		vcpu->arch.gs_enabled = 1;
4325 	}
4326 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4327 	    test_kvm_facility(vcpu->kvm, 82)) {
4328 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4329 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4330 	}
4331 	if (MACHINE_HAS_GS) {
4332 		preempt_disable();
4333 		__ctl_set_bit(2, 4);
4334 		if (current->thread.gs_cb) {
4335 			vcpu->arch.host_gscb = current->thread.gs_cb;
4336 			save_gs_cb(vcpu->arch.host_gscb);
4337 		}
4338 		if (vcpu->arch.gs_enabled) {
4339 			current->thread.gs_cb = (struct gs_cb *)
4340 						&vcpu->run->s.regs.gscb;
4341 			restore_gs_cb(current->thread.gs_cb);
4342 		}
4343 		preempt_enable();
4344 	}
4345 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4346 }
4347 
4348 static void sync_regs(struct kvm_vcpu *vcpu)
4349 {
4350 	struct kvm_run *kvm_run = vcpu->run;
4351 
4352 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4353 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4354 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4355 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4356 		/* some control register changes require a tlb flush */
4357 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4358 	}
4359 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4360 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4361 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4362 	}
4363 	save_access_regs(vcpu->arch.host_acrs);
4364 	restore_access_regs(vcpu->run->s.regs.acrs);
4365 	/* save host (userspace) fprs/vrs */
4366 	save_fpu_regs();
4367 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4368 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4369 	if (MACHINE_HAS_VX)
4370 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4371 	else
4372 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4373 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4374 	if (test_fp_ctl(current->thread.fpu.fpc))
4375 		/* User space provided an invalid FPC, let's clear it */
4376 		current->thread.fpu.fpc = 0;
4377 
4378 	/* Sync fmt2 only data */
4379 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4380 		sync_regs_fmt2(vcpu);
4381 	} else {
4382 		/*
4383 		 * In several places we have to modify our internal view to
4384 		 * not do things that are disallowed by the ultravisor. For
4385 		 * example we must not inject interrupts after specific exits
4386 		 * (e.g. 112 prefix page not secure). We do this by turning
4387 		 * off the machine check, external and I/O interrupt bits
4388 		 * of our PSW copy. To avoid getting validity intercepts, we
4389 		 * do only accept the condition code from userspace.
4390 		 */
4391 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4392 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4393 						   PSW_MASK_CC;
4394 	}
4395 
4396 	kvm_run->kvm_dirty_regs = 0;
4397 }
4398 
4399 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4400 {
4401 	struct kvm_run *kvm_run = vcpu->run;
4402 
4403 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4404 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4405 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4406 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4407 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4408 	if (MACHINE_HAS_GS) {
4409 		preempt_disable();
4410 		__ctl_set_bit(2, 4);
4411 		if (vcpu->arch.gs_enabled)
4412 			save_gs_cb(current->thread.gs_cb);
4413 		current->thread.gs_cb = vcpu->arch.host_gscb;
4414 		restore_gs_cb(vcpu->arch.host_gscb);
4415 		if (!vcpu->arch.host_gscb)
4416 			__ctl_clear_bit(2, 4);
4417 		vcpu->arch.host_gscb = NULL;
4418 		preempt_enable();
4419 	}
4420 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4421 }
4422 
4423 static void store_regs(struct kvm_vcpu *vcpu)
4424 {
4425 	struct kvm_run *kvm_run = vcpu->run;
4426 
4427 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4428 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4429 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4430 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4431 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4432 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4433 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4434 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4435 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4436 	save_access_regs(vcpu->run->s.regs.acrs);
4437 	restore_access_regs(vcpu->arch.host_acrs);
4438 	/* Save guest register state */
4439 	save_fpu_regs();
4440 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4441 	/* Restore will be done lazily at return */
4442 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4443 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4444 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4445 		store_regs_fmt2(vcpu);
4446 }
4447 
4448 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4449 {
4450 	struct kvm_run *kvm_run = vcpu->run;
4451 	int rc;
4452 
4453 	if (kvm_run->immediate_exit)
4454 		return -EINTR;
4455 
4456 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4457 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4458 		return -EINVAL;
4459 
4460 	vcpu_load(vcpu);
4461 
4462 	if (guestdbg_exit_pending(vcpu)) {
4463 		kvm_s390_prepare_debug_exit(vcpu);
4464 		rc = 0;
4465 		goto out;
4466 	}
4467 
4468 	kvm_sigset_activate(vcpu);
4469 
4470 	/*
4471 	 * no need to check the return value of vcpu_start as it can only have
4472 	 * an error for protvirt, but protvirt means user cpu state
4473 	 */
4474 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4475 		kvm_s390_vcpu_start(vcpu);
4476 	} else if (is_vcpu_stopped(vcpu)) {
4477 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4478 				   vcpu->vcpu_id);
4479 		rc = -EINVAL;
4480 		goto out;
4481 	}
4482 
4483 	sync_regs(vcpu);
4484 	enable_cpu_timer_accounting(vcpu);
4485 
4486 	might_fault();
4487 	rc = __vcpu_run(vcpu);
4488 
4489 	if (signal_pending(current) && !rc) {
4490 		kvm_run->exit_reason = KVM_EXIT_INTR;
4491 		rc = -EINTR;
4492 	}
4493 
4494 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4495 		kvm_s390_prepare_debug_exit(vcpu);
4496 		rc = 0;
4497 	}
4498 
4499 	if (rc == -EREMOTE) {
4500 		/* userspace support is needed, kvm_run has been prepared */
4501 		rc = 0;
4502 	}
4503 
4504 	disable_cpu_timer_accounting(vcpu);
4505 	store_regs(vcpu);
4506 
4507 	kvm_sigset_deactivate(vcpu);
4508 
4509 	vcpu->stat.exit_userspace++;
4510 out:
4511 	vcpu_put(vcpu);
4512 	return rc;
4513 }
4514 
4515 /*
4516  * store status at address
4517  * we use have two special cases:
4518  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4519  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4520  */
4521 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4522 {
4523 	unsigned char archmode = 1;
4524 	freg_t fprs[NUM_FPRS];
4525 	unsigned int px;
4526 	u64 clkcomp, cputm;
4527 	int rc;
4528 
4529 	px = kvm_s390_get_prefix(vcpu);
4530 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4531 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4532 			return -EFAULT;
4533 		gpa = 0;
4534 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4535 		if (write_guest_real(vcpu, 163, &archmode, 1))
4536 			return -EFAULT;
4537 		gpa = px;
4538 	} else
4539 		gpa -= __LC_FPREGS_SAVE_AREA;
4540 
4541 	/* manually convert vector registers if necessary */
4542 	if (MACHINE_HAS_VX) {
4543 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4544 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4545 				     fprs, 128);
4546 	} else {
4547 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4548 				     vcpu->run->s.regs.fprs, 128);
4549 	}
4550 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4551 			      vcpu->run->s.regs.gprs, 128);
4552 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4553 			      &vcpu->arch.sie_block->gpsw, 16);
4554 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4555 			      &px, 4);
4556 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4557 			      &vcpu->run->s.regs.fpc, 4);
4558 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4559 			      &vcpu->arch.sie_block->todpr, 4);
4560 	cputm = kvm_s390_get_cpu_timer(vcpu);
4561 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4562 			      &cputm, 8);
4563 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4564 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4565 			      &clkcomp, 8);
4566 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4567 			      &vcpu->run->s.regs.acrs, 64);
4568 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4569 			      &vcpu->arch.sie_block->gcr, 128);
4570 	return rc ? -EFAULT : 0;
4571 }
4572 
4573 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4574 {
4575 	/*
4576 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4577 	 * switch in the run ioctl. Let's update our copies before we save
4578 	 * it into the save area
4579 	 */
4580 	save_fpu_regs();
4581 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4582 	save_access_regs(vcpu->run->s.regs.acrs);
4583 
4584 	return kvm_s390_store_status_unloaded(vcpu, addr);
4585 }
4586 
4587 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4588 {
4589 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4590 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4591 }
4592 
4593 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4594 {
4595 	unsigned long i;
4596 	struct kvm_vcpu *vcpu;
4597 
4598 	kvm_for_each_vcpu(i, vcpu, kvm) {
4599 		__disable_ibs_on_vcpu(vcpu);
4600 	}
4601 }
4602 
4603 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4604 {
4605 	if (!sclp.has_ibs)
4606 		return;
4607 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4608 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4609 }
4610 
4611 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4612 {
4613 	int i, online_vcpus, r = 0, started_vcpus = 0;
4614 
4615 	if (!is_vcpu_stopped(vcpu))
4616 		return 0;
4617 
4618 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4619 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4620 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4621 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4622 
4623 	/* Let's tell the UV that we want to change into the operating state */
4624 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4625 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4626 		if (r) {
4627 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4628 			return r;
4629 		}
4630 	}
4631 
4632 	for (i = 0; i < online_vcpus; i++) {
4633 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4634 			started_vcpus++;
4635 	}
4636 
4637 	if (started_vcpus == 0) {
4638 		/* we're the only active VCPU -> speed it up */
4639 		__enable_ibs_on_vcpu(vcpu);
4640 	} else if (started_vcpus == 1) {
4641 		/*
4642 		 * As we are starting a second VCPU, we have to disable
4643 		 * the IBS facility on all VCPUs to remove potentially
4644 		 * outstanding ENABLE requests.
4645 		 */
4646 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4647 	}
4648 
4649 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4650 	/*
4651 	 * The real PSW might have changed due to a RESTART interpreted by the
4652 	 * ultravisor. We block all interrupts and let the next sie exit
4653 	 * refresh our view.
4654 	 */
4655 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4656 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4657 	/*
4658 	 * Another VCPU might have used IBS while we were offline.
4659 	 * Let's play safe and flush the VCPU at startup.
4660 	 */
4661 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4662 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4663 	return 0;
4664 }
4665 
4666 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4667 {
4668 	int i, online_vcpus, r = 0, started_vcpus = 0;
4669 	struct kvm_vcpu *started_vcpu = NULL;
4670 
4671 	if (is_vcpu_stopped(vcpu))
4672 		return 0;
4673 
4674 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4675 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4676 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4677 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4678 
4679 	/* Let's tell the UV that we want to change into the stopped state */
4680 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4681 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4682 		if (r) {
4683 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4684 			return r;
4685 		}
4686 	}
4687 
4688 	/*
4689 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4690 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4691 	 * have been fully processed. This will ensure that the VCPU
4692 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4693 	 */
4694 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4695 	kvm_s390_clear_stop_irq(vcpu);
4696 
4697 	__disable_ibs_on_vcpu(vcpu);
4698 
4699 	for (i = 0; i < online_vcpus; i++) {
4700 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4701 
4702 		if (!is_vcpu_stopped(tmp)) {
4703 			started_vcpus++;
4704 			started_vcpu = tmp;
4705 		}
4706 	}
4707 
4708 	if (started_vcpus == 1) {
4709 		/*
4710 		 * As we only have one VCPU left, we want to enable the
4711 		 * IBS facility for that VCPU to speed it up.
4712 		 */
4713 		__enable_ibs_on_vcpu(started_vcpu);
4714 	}
4715 
4716 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4717 	return 0;
4718 }
4719 
4720 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4721 				     struct kvm_enable_cap *cap)
4722 {
4723 	int r;
4724 
4725 	if (cap->flags)
4726 		return -EINVAL;
4727 
4728 	switch (cap->cap) {
4729 	case KVM_CAP_S390_CSS_SUPPORT:
4730 		if (!vcpu->kvm->arch.css_support) {
4731 			vcpu->kvm->arch.css_support = 1;
4732 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4733 			trace_kvm_s390_enable_css(vcpu->kvm);
4734 		}
4735 		r = 0;
4736 		break;
4737 	default:
4738 		r = -EINVAL;
4739 		break;
4740 	}
4741 	return r;
4742 }
4743 
4744 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4745 				  struct kvm_s390_mem_op *mop)
4746 {
4747 	void __user *uaddr = (void __user *)mop->buf;
4748 	int r = 0;
4749 
4750 	if (mop->flags || !mop->size)
4751 		return -EINVAL;
4752 	if (mop->size + mop->sida_offset < mop->size)
4753 		return -EINVAL;
4754 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4755 		return -E2BIG;
4756 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4757 		return -EINVAL;
4758 
4759 	switch (mop->op) {
4760 	case KVM_S390_MEMOP_SIDA_READ:
4761 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4762 				 mop->sida_offset), mop->size))
4763 			r = -EFAULT;
4764 
4765 		break;
4766 	case KVM_S390_MEMOP_SIDA_WRITE:
4767 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4768 				   mop->sida_offset), uaddr, mop->size))
4769 			r = -EFAULT;
4770 		break;
4771 	}
4772 	return r;
4773 }
4774 
4775 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4776 				 struct kvm_s390_mem_op *mop)
4777 {
4778 	void __user *uaddr = (void __user *)mop->buf;
4779 	void *tmpbuf = NULL;
4780 	int r = 0;
4781 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4782 				    | KVM_S390_MEMOP_F_CHECK_ONLY
4783 				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4784 
4785 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4786 		return -EINVAL;
4787 	if (mop->size > MEM_OP_MAX_SIZE)
4788 		return -E2BIG;
4789 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4790 		return -EINVAL;
4791 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4792 		if (access_key_invalid(mop->key))
4793 			return -EINVAL;
4794 	} else {
4795 		mop->key = 0;
4796 	}
4797 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4798 		tmpbuf = vmalloc(mop->size);
4799 		if (!tmpbuf)
4800 			return -ENOMEM;
4801 	}
4802 
4803 	switch (mop->op) {
4804 	case KVM_S390_MEMOP_LOGICAL_READ:
4805 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4806 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4807 					    GACC_FETCH, mop->key);
4808 			break;
4809 		}
4810 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4811 					mop->size, mop->key);
4812 		if (r == 0) {
4813 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4814 				r = -EFAULT;
4815 		}
4816 		break;
4817 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4818 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4819 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4820 					    GACC_STORE, mop->key);
4821 			break;
4822 		}
4823 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4824 			r = -EFAULT;
4825 			break;
4826 		}
4827 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4828 					 mop->size, mop->key);
4829 		break;
4830 	}
4831 
4832 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4833 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4834 
4835 	vfree(tmpbuf);
4836 	return r;
4837 }
4838 
4839 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4840 				     struct kvm_s390_mem_op *mop)
4841 {
4842 	int r, srcu_idx;
4843 
4844 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4845 
4846 	switch (mop->op) {
4847 	case KVM_S390_MEMOP_LOGICAL_READ:
4848 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4849 		r = kvm_s390_vcpu_mem_op(vcpu, mop);
4850 		break;
4851 	case KVM_S390_MEMOP_SIDA_READ:
4852 	case KVM_S390_MEMOP_SIDA_WRITE:
4853 		/* we are locked against sida going away by the vcpu->mutex */
4854 		r = kvm_s390_vcpu_sida_op(vcpu, mop);
4855 		break;
4856 	default:
4857 		r = -EINVAL;
4858 	}
4859 
4860 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4861 	return r;
4862 }
4863 
4864 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4865 			       unsigned int ioctl, unsigned long arg)
4866 {
4867 	struct kvm_vcpu *vcpu = filp->private_data;
4868 	void __user *argp = (void __user *)arg;
4869 
4870 	switch (ioctl) {
4871 	case KVM_S390_IRQ: {
4872 		struct kvm_s390_irq s390irq;
4873 
4874 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4875 			return -EFAULT;
4876 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4877 	}
4878 	case KVM_S390_INTERRUPT: {
4879 		struct kvm_s390_interrupt s390int;
4880 		struct kvm_s390_irq s390irq = {};
4881 
4882 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4883 			return -EFAULT;
4884 		if (s390int_to_s390irq(&s390int, &s390irq))
4885 			return -EINVAL;
4886 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4887 	}
4888 	}
4889 	return -ENOIOCTLCMD;
4890 }
4891 
4892 long kvm_arch_vcpu_ioctl(struct file *filp,
4893 			 unsigned int ioctl, unsigned long arg)
4894 {
4895 	struct kvm_vcpu *vcpu = filp->private_data;
4896 	void __user *argp = (void __user *)arg;
4897 	int idx;
4898 	long r;
4899 	u16 rc, rrc;
4900 
4901 	vcpu_load(vcpu);
4902 
4903 	switch (ioctl) {
4904 	case KVM_S390_STORE_STATUS:
4905 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4906 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4907 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4908 		break;
4909 	case KVM_S390_SET_INITIAL_PSW: {
4910 		psw_t psw;
4911 
4912 		r = -EFAULT;
4913 		if (copy_from_user(&psw, argp, sizeof(psw)))
4914 			break;
4915 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4916 		break;
4917 	}
4918 	case KVM_S390_CLEAR_RESET:
4919 		r = 0;
4920 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4921 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4922 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4923 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4924 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4925 				   rc, rrc);
4926 		}
4927 		break;
4928 	case KVM_S390_INITIAL_RESET:
4929 		r = 0;
4930 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4931 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4932 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4933 					  UVC_CMD_CPU_RESET_INITIAL,
4934 					  &rc, &rrc);
4935 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4936 				   rc, rrc);
4937 		}
4938 		break;
4939 	case KVM_S390_NORMAL_RESET:
4940 		r = 0;
4941 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4942 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4943 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4944 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4945 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4946 				   rc, rrc);
4947 		}
4948 		break;
4949 	case KVM_SET_ONE_REG:
4950 	case KVM_GET_ONE_REG: {
4951 		struct kvm_one_reg reg;
4952 		r = -EINVAL;
4953 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4954 			break;
4955 		r = -EFAULT;
4956 		if (copy_from_user(&reg, argp, sizeof(reg)))
4957 			break;
4958 		if (ioctl == KVM_SET_ONE_REG)
4959 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4960 		else
4961 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4962 		break;
4963 	}
4964 #ifdef CONFIG_KVM_S390_UCONTROL
4965 	case KVM_S390_UCAS_MAP: {
4966 		struct kvm_s390_ucas_mapping ucasmap;
4967 
4968 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4969 			r = -EFAULT;
4970 			break;
4971 		}
4972 
4973 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4974 			r = -EINVAL;
4975 			break;
4976 		}
4977 
4978 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4979 				     ucasmap.vcpu_addr, ucasmap.length);
4980 		break;
4981 	}
4982 	case KVM_S390_UCAS_UNMAP: {
4983 		struct kvm_s390_ucas_mapping ucasmap;
4984 
4985 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4986 			r = -EFAULT;
4987 			break;
4988 		}
4989 
4990 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4991 			r = -EINVAL;
4992 			break;
4993 		}
4994 
4995 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4996 			ucasmap.length);
4997 		break;
4998 	}
4999 #endif
5000 	case KVM_S390_VCPU_FAULT: {
5001 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5002 		break;
5003 	}
5004 	case KVM_ENABLE_CAP:
5005 	{
5006 		struct kvm_enable_cap cap;
5007 		r = -EFAULT;
5008 		if (copy_from_user(&cap, argp, sizeof(cap)))
5009 			break;
5010 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5011 		break;
5012 	}
5013 	case KVM_S390_MEM_OP: {
5014 		struct kvm_s390_mem_op mem_op;
5015 
5016 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5017 			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5018 		else
5019 			r = -EFAULT;
5020 		break;
5021 	}
5022 	case KVM_S390_SET_IRQ_STATE: {
5023 		struct kvm_s390_irq_state irq_state;
5024 
5025 		r = -EFAULT;
5026 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5027 			break;
5028 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5029 		    irq_state.len == 0 ||
5030 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5031 			r = -EINVAL;
5032 			break;
5033 		}
5034 		/* do not use irq_state.flags, it will break old QEMUs */
5035 		r = kvm_s390_set_irq_state(vcpu,
5036 					   (void __user *) irq_state.buf,
5037 					   irq_state.len);
5038 		break;
5039 	}
5040 	case KVM_S390_GET_IRQ_STATE: {
5041 		struct kvm_s390_irq_state irq_state;
5042 
5043 		r = -EFAULT;
5044 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5045 			break;
5046 		if (irq_state.len == 0) {
5047 			r = -EINVAL;
5048 			break;
5049 		}
5050 		/* do not use irq_state.flags, it will break old QEMUs */
5051 		r = kvm_s390_get_irq_state(vcpu,
5052 					   (__u8 __user *)  irq_state.buf,
5053 					   irq_state.len);
5054 		break;
5055 	}
5056 	default:
5057 		r = -ENOTTY;
5058 	}
5059 
5060 	vcpu_put(vcpu);
5061 	return r;
5062 }
5063 
5064 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5065 {
5066 #ifdef CONFIG_KVM_S390_UCONTROL
5067 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5068 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5069 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5070 		get_page(vmf->page);
5071 		return 0;
5072 	}
5073 #endif
5074 	return VM_FAULT_SIGBUS;
5075 }
5076 
5077 /* Section: memory related */
5078 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5079 				   const struct kvm_memory_slot *old,
5080 				   struct kvm_memory_slot *new,
5081 				   enum kvm_mr_change change)
5082 {
5083 	gpa_t size;
5084 
5085 	/* When we are protected, we should not change the memory slots */
5086 	if (kvm_s390_pv_get_handle(kvm))
5087 		return -EINVAL;
5088 
5089 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5090 		return 0;
5091 
5092 	/* A few sanity checks. We can have memory slots which have to be
5093 	   located/ended at a segment boundary (1MB). The memory in userland is
5094 	   ok to be fragmented into various different vmas. It is okay to mmap()
5095 	   and munmap() stuff in this slot after doing this call at any time */
5096 
5097 	if (new->userspace_addr & 0xffffful)
5098 		return -EINVAL;
5099 
5100 	size = new->npages * PAGE_SIZE;
5101 	if (size & 0xffffful)
5102 		return -EINVAL;
5103 
5104 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5105 		return -EINVAL;
5106 
5107 	return 0;
5108 }
5109 
5110 void kvm_arch_commit_memory_region(struct kvm *kvm,
5111 				struct kvm_memory_slot *old,
5112 				const struct kvm_memory_slot *new,
5113 				enum kvm_mr_change change)
5114 {
5115 	int rc = 0;
5116 
5117 	switch (change) {
5118 	case KVM_MR_DELETE:
5119 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5120 					old->npages * PAGE_SIZE);
5121 		break;
5122 	case KVM_MR_MOVE:
5123 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5124 					old->npages * PAGE_SIZE);
5125 		if (rc)
5126 			break;
5127 		fallthrough;
5128 	case KVM_MR_CREATE:
5129 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5130 				      new->base_gfn * PAGE_SIZE,
5131 				      new->npages * PAGE_SIZE);
5132 		break;
5133 	case KVM_MR_FLAGS_ONLY:
5134 		break;
5135 	default:
5136 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5137 	}
5138 	if (rc)
5139 		pr_warn("failed to commit memory region\n");
5140 	return;
5141 }
5142 
5143 static inline unsigned long nonhyp_mask(int i)
5144 {
5145 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5146 
5147 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5148 }
5149 
5150 static int __init kvm_s390_init(void)
5151 {
5152 	int i;
5153 
5154 	if (!sclp.has_sief2) {
5155 		pr_info("SIE is not available\n");
5156 		return -ENODEV;
5157 	}
5158 
5159 	if (nested && hpage) {
5160 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5161 		return -EINVAL;
5162 	}
5163 
5164 	for (i = 0; i < 16; i++)
5165 		kvm_s390_fac_base[i] |=
5166 			stfle_fac_list[i] & nonhyp_mask(i);
5167 
5168 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5169 }
5170 
5171 static void __exit kvm_s390_exit(void)
5172 {
5173 	kvm_exit();
5174 }
5175 
5176 module_init(kvm_s390_init);
5177 module_exit(kvm_s390_exit);
5178 
5179 /*
5180  * Enable autoloading of the kvm module.
5181  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5182  * since x86 takes a different approach.
5183  */
5184 #include <linux/miscdevice.h>
5185 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5186 MODULE_ALIAS("devname:kvm");
5187