xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 6a656832aa75784d02dccd8d37fc5c0896064c2e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 	.name_size = KVM_STATS_NAME_SIZE,
72 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 	.id_offset = sizeof(struct kvm_stats_header),
74 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 		       sizeof(kvm_vm_stats_desc),
77 };
78 
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 	KVM_GENERIC_VCPU_STATS(),
81 	STATS_DESC_COUNTER(VCPU, exit_userspace),
82 	STATS_DESC_COUNTER(VCPU, exit_null),
83 	STATS_DESC_COUNTER(VCPU, exit_external_request),
84 	STATS_DESC_COUNTER(VCPU, exit_io_request),
85 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 	STATS_DESC_COUNTER(VCPU, exit_validity),
88 	STATS_DESC_COUNTER(VCPU, exit_instruction),
89 	STATS_DESC_COUNTER(VCPU, exit_pei),
90 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_program),
108 	STATS_DESC_COUNTER(VCPU, deliver_io),
109 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 	STATS_DESC_COUNTER(VCPU, inject_ckc),
112 	STATS_DESC_COUNTER(VCPU, inject_cputm),
113 	STATS_DESC_COUNTER(VCPU, inject_external_call),
114 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 	STATS_DESC_COUNTER(VCPU, inject_mchk),
116 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 	STATS_DESC_COUNTER(VCPU, inject_program),
118 	STATS_DESC_COUNTER(VCPU, inject_restart),
119 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 	STATS_DESC_COUNTER(VCPU, instruction_gs),
123 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 	STATS_DESC_COUNTER(VCPU, instruction_sck),
129 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 	STATS_DESC_COUNTER(VCPU, instruction_spx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 	STATS_DESC_COUNTER(VCPU, instruction_stap),
134 	STATS_DESC_COUNTER(VCPU, instruction_iske),
135 	STATS_DESC_COUNTER(VCPU, instruction_ri),
136 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 	STATS_DESC_COUNTER(VCPU, instruction_sske),
138 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 	STATS_DESC_COUNTER(VCPU, instruction_tb),
142 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 	STATS_DESC_COUNTER(VCPU, instruction_sie),
146 	STATS_DESC_COUNTER(VCPU, instruction_essa),
147 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 	STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175 
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 	.name_size = KVM_STATS_NAME_SIZE,
178 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 	.id_offset = sizeof(struct kvm_stats_header),
180 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 		       sizeof(kvm_vcpu_stats_desc),
183 };
184 
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189 
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194 
195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199 
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa  = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204 
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209 
210 /*
211  * For now we handle at most 16 double words as this is what the s390 base
212  * kernel handles and stores in the prefix page. If we ever need to go beyond
213  * this, this requires changes to code, but the external uapi can stay.
214  */
215 #define SIZE_INTERNAL 16
216 
217 /*
218  * Base feature mask that defines default mask for facilities. Consists of the
219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220  */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224  * and defines the facilities that can be enabled via a cpu model.
225  */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227 
228 static unsigned long kvm_s390_fac_size(void)
229 {
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 		sizeof(stfle_fac_list));
234 
235 	return SIZE_INTERNAL;
236 }
237 
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242 
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247 
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
250 {
251 	/* every s390 is virtualization enabled ;-) */
252 	return 0;
253 }
254 
255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 	return 0;
258 }
259 
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 			      unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264 
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 	u8 delta_idx = 0;
268 
269 	/*
270 	 * The TOD jumps by delta, we have to compensate this by adding
271 	 * -delta to the epoch.
272 	 */
273 	delta = -delta;
274 
275 	/* sign-extension - we're adding to signed values below */
276 	if ((s64)delta < 0)
277 		delta_idx = -1;
278 
279 	scb->epoch += delta;
280 	if (scb->ecd & ECD_MEF) {
281 		scb->epdx += delta_idx;
282 		if (scb->epoch < delta)
283 			scb->epdx += 1;
284 	}
285 }
286 
287 /*
288  * This callback is executed during stop_machine(). All CPUs are therefore
289  * temporarily stopped. In order not to change guest behavior, we have to
290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291  * so a CPU won't be stopped while calculating with the epoch.
292  */
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 			  void *v)
295 {
296 	struct kvm *kvm;
297 	struct kvm_vcpu *vcpu;
298 	unsigned long i;
299 	unsigned long long *delta = v;
300 
301 	list_for_each_entry(kvm, &vm_list, vm_list) {
302 		kvm_for_each_vcpu(i, vcpu, kvm) {
303 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 			if (i == 0) {
305 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 			}
308 			if (vcpu->arch.cputm_enabled)
309 				vcpu->arch.cputm_start += *delta;
310 			if (vcpu->arch.vsie_block)
311 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 						   *delta);
313 		}
314 	}
315 	return NOTIFY_OK;
316 }
317 
318 static struct notifier_block kvm_clock_notifier = {
319 	.notifier_call = kvm_clock_sync,
320 };
321 
322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 	gmap_notifier.notifier_call = kvm_gmap_notifier;
325 	gmap_register_pte_notifier(&gmap_notifier);
326 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 	gmap_register_pte_notifier(&vsie_gmap_notifier);
328 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 				       &kvm_clock_notifier);
330 	return 0;
331 }
332 
333 void kvm_arch_hardware_unsetup(void)
334 {
335 	gmap_unregister_pte_notifier(&gmap_notifier);
336 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 					 &kvm_clock_notifier);
339 }
340 
341 static void allow_cpu_feat(unsigned long nr)
342 {
343 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345 
346 static inline int plo_test_bit(unsigned char nr)
347 {
348 	unsigned long function = (unsigned long)nr | 0x100;
349 	int cc;
350 
351 	asm volatile(
352 		"	lgr	0,%[function]\n"
353 		/* Parameter registers are ignored for "test bit" */
354 		"	plo	0,0,0,0(0)\n"
355 		"	ipm	%0\n"
356 		"	srl	%0,28\n"
357 		: "=d" (cc)
358 		: [function] "d" (function)
359 		: "cc", "0");
360 	return cc == 0;
361 }
362 
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 	asm volatile(
366 		"	lghi	0,0\n"
367 		"	lgr	1,%[query]\n"
368 		/* Parameter registers are ignored */
369 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
370 		:
371 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 		: "cc", "memory", "0", "1");
373 }
374 
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377 
378 static void kvm_s390_cpu_feat_init(void)
379 {
380 	int i;
381 
382 	for (i = 0; i < 256; ++i) {
383 		if (plo_test_bit(i))
384 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 	}
386 
387 	if (test_facility(28)) /* TOD-clock steering */
388 		ptff(kvm_s390_available_subfunc.ptff,
389 		     sizeof(kvm_s390_available_subfunc.ptff),
390 		     PTFF_QAF);
391 
392 	if (test_facility(17)) { /* MSA */
393 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmac);
395 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmc);
397 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.km);
399 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kimd);
401 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.klmd);
403 	}
404 	if (test_facility(76)) /* MSA3 */
405 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.pckmo);
407 	if (test_facility(77)) { /* MSA4 */
408 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kmctr);
410 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmf);
412 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmo);
414 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.pcc);
416 	}
417 	if (test_facility(57)) /* MSA5 */
418 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.ppno);
420 
421 	if (test_facility(146)) /* MSA8 */
422 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.kma);
424 
425 	if (test_facility(155)) /* MSA9 */
426 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kdsa);
428 
429 	if (test_facility(150)) /* SORTL */
430 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431 
432 	if (test_facility(151)) /* DFLTCC */
433 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434 
435 	if (MACHINE_HAS_ESOP)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 	/*
438 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 	 */
441 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 	    !test_facility(3) || !nested)
443 		return;
444 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 	if (sclp.has_64bscao)
446 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 	if (sclp.has_siif)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 	if (sclp.has_gpere)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 	if (sclp.has_gsls)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 	if (sclp.has_ib)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 	if (sclp.has_cei)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 	if (sclp.has_ibs)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 	if (sclp.has_kss)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 	/*
462 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 	 * all skey handling functions read/set the skey from the PGSTE
464 	 * instead of the real storage key.
465 	 *
466 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 	 * pages being detected as preserved although they are resident.
468 	 *
469 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 	 *
472 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 	 *
476 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 	 * cannot easily shadow the SCA because of the ipte lock.
478 	 */
479 }
480 
481 int kvm_arch_init(void *opaque)
482 {
483 	int rc = -ENOMEM;
484 
485 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 	if (!kvm_s390_dbf)
487 		return -ENOMEM;
488 
489 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf_uv)
491 		goto out;
492 
493 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 		goto out;
496 
497 	kvm_s390_cpu_feat_init();
498 
499 	/* Register floating interrupt controller interface. */
500 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 	if (rc) {
502 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 		goto out;
504 	}
505 
506 	rc = kvm_s390_gib_init(GAL_ISC);
507 	if (rc)
508 		goto out;
509 
510 	return 0;
511 
512 out:
513 	kvm_arch_exit();
514 	return rc;
515 }
516 
517 void kvm_arch_exit(void)
518 {
519 	kvm_s390_gib_destroy();
520 	debug_unregister(kvm_s390_dbf);
521 	debug_unregister(kvm_s390_dbf_uv);
522 }
523 
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 			unsigned int ioctl, unsigned long arg)
527 {
528 	if (ioctl == KVM_S390_ENABLE_SIE)
529 		return s390_enable_sie();
530 	return -EINVAL;
531 }
532 
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 	int r;
536 
537 	switch (ext) {
538 	case KVM_CAP_S390_PSW:
539 	case KVM_CAP_S390_GMAP:
540 	case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 	case KVM_CAP_S390_UCONTROL:
543 #endif
544 	case KVM_CAP_ASYNC_PF:
545 	case KVM_CAP_SYNC_REGS:
546 	case KVM_CAP_ONE_REG:
547 	case KVM_CAP_ENABLE_CAP:
548 	case KVM_CAP_S390_CSS_SUPPORT:
549 	case KVM_CAP_IOEVENTFD:
550 	case KVM_CAP_DEVICE_CTRL:
551 	case KVM_CAP_S390_IRQCHIP:
552 	case KVM_CAP_VM_ATTRIBUTES:
553 	case KVM_CAP_MP_STATE:
554 	case KVM_CAP_IMMEDIATE_EXIT:
555 	case KVM_CAP_S390_INJECT_IRQ:
556 	case KVM_CAP_S390_USER_SIGP:
557 	case KVM_CAP_S390_USER_STSI:
558 	case KVM_CAP_S390_SKEYS:
559 	case KVM_CAP_S390_IRQ_STATE:
560 	case KVM_CAP_S390_USER_INSTR0:
561 	case KVM_CAP_S390_CMMA_MIGRATION:
562 	case KVM_CAP_S390_AIS:
563 	case KVM_CAP_S390_AIS_MIGRATION:
564 	case KVM_CAP_S390_VCPU_RESETS:
565 	case KVM_CAP_SET_GUEST_DEBUG:
566 	case KVM_CAP_S390_DIAG318:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		if (ext == KVM_CAP_NR_VCPUS)
589 			r = min_t(unsigned int, num_online_cpus(), r);
590 		break;
591 	case KVM_CAP_S390_COW:
592 		r = MACHINE_HAS_ESOP;
593 		break;
594 	case KVM_CAP_S390_VECTOR_REGISTERS:
595 		r = MACHINE_HAS_VX;
596 		break;
597 	case KVM_CAP_S390_RI:
598 		r = test_facility(64);
599 		break;
600 	case KVM_CAP_S390_GS:
601 		r = test_facility(133);
602 		break;
603 	case KVM_CAP_S390_BPB:
604 		r = test_facility(82);
605 		break;
606 	case KVM_CAP_S390_PROTECTED:
607 		r = is_prot_virt_host();
608 		break;
609 	default:
610 		r = 0;
611 	}
612 	return r;
613 }
614 
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617 	int i;
618 	gfn_t cur_gfn, last_gfn;
619 	unsigned long gaddr, vmaddr;
620 	struct gmap *gmap = kvm->arch.gmap;
621 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622 
623 	/* Loop over all guest segments */
624 	cur_gfn = memslot->base_gfn;
625 	last_gfn = memslot->base_gfn + memslot->npages;
626 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 		gaddr = gfn_to_gpa(cur_gfn);
628 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 		if (kvm_is_error_hva(vmaddr))
630 			continue;
631 
632 		bitmap_zero(bitmap, _PAGE_ENTRIES);
633 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 		for (i = 0; i < _PAGE_ENTRIES; i++) {
635 			if (test_bit(i, bitmap))
636 				mark_page_dirty(kvm, cur_gfn + i);
637 		}
638 
639 		if (fatal_signal_pending(current))
640 			return;
641 		cond_resched();
642 	}
643 }
644 
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 			       struct kvm_dirty_log *log)
653 {
654 	int r;
655 	unsigned long n;
656 	struct kvm_memory_slot *memslot;
657 	int is_dirty;
658 
659 	if (kvm_is_ucontrol(kvm))
660 		return -EINVAL;
661 
662 	mutex_lock(&kvm->slots_lock);
663 
664 	r = -EINVAL;
665 	if (log->slot >= KVM_USER_MEM_SLOTS)
666 		goto out;
667 
668 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669 	if (r)
670 		goto out;
671 
672 	/* Clear the dirty log */
673 	if (is_dirty) {
674 		n = kvm_dirty_bitmap_bytes(memslot);
675 		memset(memslot->dirty_bitmap, 0, n);
676 	}
677 	r = 0;
678 out:
679 	mutex_unlock(&kvm->slots_lock);
680 	return r;
681 }
682 
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685 	unsigned long i;
686 	struct kvm_vcpu *vcpu;
687 
688 	kvm_for_each_vcpu(i, vcpu, kvm) {
689 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690 	}
691 }
692 
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695 	int r;
696 
697 	if (cap->flags)
698 		return -EINVAL;
699 
700 	switch (cap->cap) {
701 	case KVM_CAP_S390_IRQCHIP:
702 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 		kvm->arch.use_irqchip = 1;
704 		r = 0;
705 		break;
706 	case KVM_CAP_S390_USER_SIGP:
707 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 		kvm->arch.user_sigp = 1;
709 		r = 0;
710 		break;
711 	case KVM_CAP_S390_VECTOR_REGISTERS:
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (MACHINE_HAS_VX) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 			set_kvm_facility(kvm->arch.model.fac_list, 129);
718 			if (test_facility(134)) {
719 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 				set_kvm_facility(kvm->arch.model.fac_list, 134);
721 			}
722 			if (test_facility(135)) {
723 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 				set_kvm_facility(kvm->arch.model.fac_list, 135);
725 			}
726 			if (test_facility(148)) {
727 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 				set_kvm_facility(kvm->arch.model.fac_list, 148);
729 			}
730 			if (test_facility(152)) {
731 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 				set_kvm_facility(kvm->arch.model.fac_list, 152);
733 			}
734 			if (test_facility(192)) {
735 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 				set_kvm_facility(kvm->arch.model.fac_list, 192);
737 			}
738 			r = 0;
739 		} else
740 			r = -EINVAL;
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_RI:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(64)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 			set_kvm_facility(kvm->arch.model.fac_list, 64);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_AIS:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus) {
762 			r = -EBUSY;
763 		} else {
764 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 			set_kvm_facility(kvm->arch.model.fac_list, 72);
766 			r = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_GS:
773 		r = -EINVAL;
774 		mutex_lock(&kvm->lock);
775 		if (kvm->created_vcpus) {
776 			r = -EBUSY;
777 		} else if (test_facility(133)) {
778 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 			set_kvm_facility(kvm->arch.model.fac_list, 133);
780 			r = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 			 r ? "(not available)" : "(success)");
785 		break;
786 	case KVM_CAP_S390_HPAGE_1M:
787 		mutex_lock(&kvm->lock);
788 		if (kvm->created_vcpus)
789 			r = -EBUSY;
790 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791 			r = -EINVAL;
792 		else {
793 			r = 0;
794 			mmap_write_lock(kvm->mm);
795 			kvm->mm->context.allow_gmap_hpage_1m = 1;
796 			mmap_write_unlock(kvm->mm);
797 			/*
798 			 * We might have to create fake 4k page
799 			 * tables. To avoid that the hardware works on
800 			 * stale PGSTEs, we emulate these instructions.
801 			 */
802 			kvm->arch.use_skf = 0;
803 			kvm->arch.use_pfmfi = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_USER_STSI:
810 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 		kvm->arch.user_stsi = 1;
812 		r = 0;
813 		break;
814 	case KVM_CAP_S390_USER_INSTR0:
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 		kvm->arch.user_instr0 = 1;
817 		icpt_operexc_on_all_vcpus(kvm);
818 		r = 0;
819 		break;
820 	default:
821 		r = -EINVAL;
822 		break;
823 	}
824 	return r;
825 }
826 
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829 	int ret;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_MEM_LIMIT_SIZE:
833 		ret = 0;
834 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 			 kvm->arch.mem_limit);
836 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837 			ret = -EFAULT;
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	int ret;
849 	unsigned int idx;
850 	switch (attr->attr) {
851 	case KVM_S390_VM_MEM_ENABLE_CMMA:
852 		ret = -ENXIO;
853 		if (!sclp.has_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 		mutex_lock(&kvm->lock);
858 		if (kvm->created_vcpus)
859 			ret = -EBUSY;
860 		else if (kvm->mm->context.allow_gmap_hpage_1m)
861 			ret = -EINVAL;
862 		else {
863 			kvm->arch.use_cmma = 1;
864 			/* Not compatible with cmma. */
865 			kvm->arch.use_pfmfi = 0;
866 			ret = 0;
867 		}
868 		mutex_unlock(&kvm->lock);
869 		break;
870 	case KVM_S390_VM_MEM_CLR_CMMA:
871 		ret = -ENXIO;
872 		if (!sclp.has_cmma)
873 			break;
874 		ret = -EINVAL;
875 		if (!kvm->arch.use_cmma)
876 			break;
877 
878 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 		mutex_lock(&kvm->lock);
880 		idx = srcu_read_lock(&kvm->srcu);
881 		s390_reset_cmma(kvm->arch.gmap->mm);
882 		srcu_read_unlock(&kvm->srcu, idx);
883 		mutex_unlock(&kvm->lock);
884 		ret = 0;
885 		break;
886 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 		unsigned long new_limit;
888 
889 		if (kvm_is_ucontrol(kvm))
890 			return -EINVAL;
891 
892 		if (get_user(new_limit, (u64 __user *)attr->addr))
893 			return -EFAULT;
894 
895 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 		    new_limit > kvm->arch.mem_limit)
897 			return -E2BIG;
898 
899 		if (!new_limit)
900 			return -EINVAL;
901 
902 		/* gmap_create takes last usable address */
903 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
904 			new_limit -= 1;
905 
906 		ret = -EBUSY;
907 		mutex_lock(&kvm->lock);
908 		if (!kvm->created_vcpus) {
909 			/* gmap_create will round the limit up */
910 			struct gmap *new = gmap_create(current->mm, new_limit);
911 
912 			if (!new) {
913 				ret = -ENOMEM;
914 			} else {
915 				gmap_remove(kvm->arch.gmap);
916 				new->private = kvm;
917 				kvm->arch.gmap = new;
918 				ret = 0;
919 			}
920 		}
921 		mutex_unlock(&kvm->lock);
922 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 			 (void *) kvm->arch.gmap->asce);
925 		break;
926 	}
927 	default:
928 		ret = -ENXIO;
929 		break;
930 	}
931 	return ret;
932 }
933 
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935 
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938 	struct kvm_vcpu *vcpu;
939 	unsigned long i;
940 
941 	kvm_s390_vcpu_block_all(kvm);
942 
943 	kvm_for_each_vcpu(i, vcpu, kvm) {
944 		kvm_s390_vcpu_crypto_setup(vcpu);
945 		/* recreate the shadow crycb by leaving the VSIE handler */
946 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 	}
948 
949 	kvm_s390_vcpu_unblock_all(kvm);
950 }
951 
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954 	mutex_lock(&kvm->lock);
955 	switch (attr->attr) {
956 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		get_random_bytes(
962 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		kvm->arch.crypto.aes_kw = 1;
965 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 		if (!test_kvm_facility(kvm, 76)) {
969 			mutex_unlock(&kvm->lock);
970 			return -EINVAL;
971 		}
972 		get_random_bytes(
973 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 		kvm->arch.crypto.dea_kw = 1;
976 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 		if (!test_kvm_facility(kvm, 76)) {
980 			mutex_unlock(&kvm->lock);
981 			return -EINVAL;
982 		}
983 		kvm->arch.crypto.aes_kw = 0;
984 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987 		break;
988 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 		if (!test_kvm_facility(kvm, 76)) {
990 			mutex_unlock(&kvm->lock);
991 			return -EINVAL;
992 		}
993 		kvm->arch.crypto.dea_kw = 0;
994 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997 		break;
998 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 		if (!ap_instructions_available()) {
1000 			mutex_unlock(&kvm->lock);
1001 			return -EOPNOTSUPP;
1002 		}
1003 		kvm->arch.crypto.apie = 1;
1004 		break;
1005 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 		if (!ap_instructions_available()) {
1007 			mutex_unlock(&kvm->lock);
1008 			return -EOPNOTSUPP;
1009 		}
1010 		kvm->arch.crypto.apie = 0;
1011 		break;
1012 	default:
1013 		mutex_unlock(&kvm->lock);
1014 		return -ENXIO;
1015 	}
1016 
1017 	kvm_s390_vcpu_crypto_reset_all(kvm);
1018 	mutex_unlock(&kvm->lock);
1019 	return 0;
1020 }
1021 
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024 	unsigned long cx;
1025 	struct kvm_vcpu *vcpu;
1026 
1027 	kvm_for_each_vcpu(cx, vcpu, kvm)
1028 		kvm_s390_sync_request(req, vcpu);
1029 }
1030 
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037 	struct kvm_memory_slot *ms;
1038 	struct kvm_memslots *slots;
1039 	unsigned long ram_pages = 0;
1040 	int slotnr;
1041 
1042 	/* migration mode already enabled */
1043 	if (kvm->arch.migration_mode)
1044 		return 0;
1045 	slots = kvm_memslots(kvm);
1046 	if (!slots || !slots->used_slots)
1047 		return -EINVAL;
1048 
1049 	if (!kvm->arch.use_cmma) {
1050 		kvm->arch.migration_mode = 1;
1051 		return 0;
1052 	}
1053 	/* mark all the pages in active slots as dirty */
1054 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1055 		ms = slots->memslots + slotnr;
1056 		if (!ms->dirty_bitmap)
1057 			return -EINVAL;
1058 		/*
1059 		 * The second half of the bitmap is only used on x86,
1060 		 * and would be wasted otherwise, so we put it to good
1061 		 * use here to keep track of the state of the storage
1062 		 * attributes.
1063 		 */
1064 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1065 		ram_pages += ms->npages;
1066 	}
1067 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1068 	kvm->arch.migration_mode = 1;
1069 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1070 	return 0;
1071 }
1072 
1073 /*
1074  * Must be called with kvm->slots_lock to avoid races with ourselves and
1075  * kvm_s390_vm_start_migration.
1076  */
1077 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1078 {
1079 	/* migration mode already disabled */
1080 	if (!kvm->arch.migration_mode)
1081 		return 0;
1082 	kvm->arch.migration_mode = 0;
1083 	if (kvm->arch.use_cmma)
1084 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1085 	return 0;
1086 }
1087 
1088 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1089 				     struct kvm_device_attr *attr)
1090 {
1091 	int res = -ENXIO;
1092 
1093 	mutex_lock(&kvm->slots_lock);
1094 	switch (attr->attr) {
1095 	case KVM_S390_VM_MIGRATION_START:
1096 		res = kvm_s390_vm_start_migration(kvm);
1097 		break;
1098 	case KVM_S390_VM_MIGRATION_STOP:
1099 		res = kvm_s390_vm_stop_migration(kvm);
1100 		break;
1101 	default:
1102 		break;
1103 	}
1104 	mutex_unlock(&kvm->slots_lock);
1105 
1106 	return res;
1107 }
1108 
1109 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1110 				     struct kvm_device_attr *attr)
1111 {
1112 	u64 mig = kvm->arch.migration_mode;
1113 
1114 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1115 		return -ENXIO;
1116 
1117 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1118 		return -EFAULT;
1119 	return 0;
1120 }
1121 
1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124 	struct kvm_s390_vm_tod_clock gtod;
1125 
1126 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1127 		return -EFAULT;
1128 
1129 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130 		return -EINVAL;
1131 	kvm_s390_set_tod_clock(kvm, &gtod);
1132 
1133 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134 		gtod.epoch_idx, gtod.tod);
1135 
1136 	return 0;
1137 }
1138 
1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141 	u8 gtod_high;
1142 
1143 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1144 					   sizeof(gtod_high)))
1145 		return -EFAULT;
1146 
1147 	if (gtod_high != 0)
1148 		return -EINVAL;
1149 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1150 
1151 	return 0;
1152 }
1153 
1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1157 
1158 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1159 			   sizeof(gtod.tod)))
1160 		return -EFAULT;
1161 
1162 	kvm_s390_set_tod_clock(kvm, &gtod);
1163 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1164 	return 0;
1165 }
1166 
1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 {
1169 	int ret;
1170 
1171 	if (attr->flags)
1172 		return -EINVAL;
1173 
1174 	switch (attr->attr) {
1175 	case KVM_S390_VM_TOD_EXT:
1176 		ret = kvm_s390_set_tod_ext(kvm, attr);
1177 		break;
1178 	case KVM_S390_VM_TOD_HIGH:
1179 		ret = kvm_s390_set_tod_high(kvm, attr);
1180 		break;
1181 	case KVM_S390_VM_TOD_LOW:
1182 		ret = kvm_s390_set_tod_low(kvm, attr);
1183 		break;
1184 	default:
1185 		ret = -ENXIO;
1186 		break;
1187 	}
1188 	return ret;
1189 }
1190 
1191 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1192 				   struct kvm_s390_vm_tod_clock *gtod)
1193 {
1194 	union tod_clock clk;
1195 
1196 	preempt_disable();
1197 
1198 	store_tod_clock_ext(&clk);
1199 
1200 	gtod->tod = clk.tod + kvm->arch.epoch;
1201 	gtod->epoch_idx = 0;
1202 	if (test_kvm_facility(kvm, 139)) {
1203 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1204 		if (gtod->tod < clk.tod)
1205 			gtod->epoch_idx += 1;
1206 	}
1207 
1208 	preempt_enable();
1209 }
1210 
1211 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213 	struct kvm_s390_vm_tod_clock gtod;
1214 
1215 	memset(&gtod, 0, sizeof(gtod));
1216 	kvm_s390_get_tod_clock(kvm, &gtod);
1217 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1218 		return -EFAULT;
1219 
1220 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1221 		gtod.epoch_idx, gtod.tod);
1222 	return 0;
1223 }
1224 
1225 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227 	u8 gtod_high = 0;
1228 
1229 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1230 					 sizeof(gtod_high)))
1231 		return -EFAULT;
1232 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1233 
1234 	return 0;
1235 }
1236 
1237 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239 	u64 gtod;
1240 
1241 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1242 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1243 		return -EFAULT;
1244 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1245 
1246 	return 0;
1247 }
1248 
1249 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1250 {
1251 	int ret;
1252 
1253 	if (attr->flags)
1254 		return -EINVAL;
1255 
1256 	switch (attr->attr) {
1257 	case KVM_S390_VM_TOD_EXT:
1258 		ret = kvm_s390_get_tod_ext(kvm, attr);
1259 		break;
1260 	case KVM_S390_VM_TOD_HIGH:
1261 		ret = kvm_s390_get_tod_high(kvm, attr);
1262 		break;
1263 	case KVM_S390_VM_TOD_LOW:
1264 		ret = kvm_s390_get_tod_low(kvm, attr);
1265 		break;
1266 	default:
1267 		ret = -ENXIO;
1268 		break;
1269 	}
1270 	return ret;
1271 }
1272 
1273 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1274 {
1275 	struct kvm_s390_vm_cpu_processor *proc;
1276 	u16 lowest_ibc, unblocked_ibc;
1277 	int ret = 0;
1278 
1279 	mutex_lock(&kvm->lock);
1280 	if (kvm->created_vcpus) {
1281 		ret = -EBUSY;
1282 		goto out;
1283 	}
1284 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1285 	if (!proc) {
1286 		ret = -ENOMEM;
1287 		goto out;
1288 	}
1289 	if (!copy_from_user(proc, (void __user *)attr->addr,
1290 			    sizeof(*proc))) {
1291 		kvm->arch.model.cpuid = proc->cpuid;
1292 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1293 		unblocked_ibc = sclp.ibc & 0xfff;
1294 		if (lowest_ibc && proc->ibc) {
1295 			if (proc->ibc > unblocked_ibc)
1296 				kvm->arch.model.ibc = unblocked_ibc;
1297 			else if (proc->ibc < lowest_ibc)
1298 				kvm->arch.model.ibc = lowest_ibc;
1299 			else
1300 				kvm->arch.model.ibc = proc->ibc;
1301 		}
1302 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1303 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1304 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1305 			 kvm->arch.model.ibc,
1306 			 kvm->arch.model.cpuid);
1307 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1308 			 kvm->arch.model.fac_list[0],
1309 			 kvm->arch.model.fac_list[1],
1310 			 kvm->arch.model.fac_list[2]);
1311 	} else
1312 		ret = -EFAULT;
1313 	kfree(proc);
1314 out:
1315 	mutex_unlock(&kvm->lock);
1316 	return ret;
1317 }
1318 
1319 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1320 				       struct kvm_device_attr *attr)
1321 {
1322 	struct kvm_s390_vm_cpu_feat data;
1323 
1324 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1325 		return -EFAULT;
1326 	if (!bitmap_subset((unsigned long *) data.feat,
1327 			   kvm_s390_available_cpu_feat,
1328 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1329 		return -EINVAL;
1330 
1331 	mutex_lock(&kvm->lock);
1332 	if (kvm->created_vcpus) {
1333 		mutex_unlock(&kvm->lock);
1334 		return -EBUSY;
1335 	}
1336 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1337 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1338 	mutex_unlock(&kvm->lock);
1339 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1340 			 data.feat[0],
1341 			 data.feat[1],
1342 			 data.feat[2]);
1343 	return 0;
1344 }
1345 
1346 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1347 					  struct kvm_device_attr *attr)
1348 {
1349 	mutex_lock(&kvm->lock);
1350 	if (kvm->created_vcpus) {
1351 		mutex_unlock(&kvm->lock);
1352 		return -EBUSY;
1353 	}
1354 
1355 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1356 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1357 		mutex_unlock(&kvm->lock);
1358 		return -EFAULT;
1359 	}
1360 	mutex_unlock(&kvm->lock);
1361 
1362 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1367 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1370 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1373 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1376 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1379 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1382 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1385 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1388 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1391 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1394 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1397 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1399 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1400 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1402 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1403 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1405 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1406 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1408 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1409 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1413 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1414 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1418 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1419 
1420 	return 0;
1421 }
1422 
1423 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1424 {
1425 	int ret = -ENXIO;
1426 
1427 	switch (attr->attr) {
1428 	case KVM_S390_VM_CPU_PROCESSOR:
1429 		ret = kvm_s390_set_processor(kvm, attr);
1430 		break;
1431 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1432 		ret = kvm_s390_set_processor_feat(kvm, attr);
1433 		break;
1434 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1435 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1436 		break;
1437 	}
1438 	return ret;
1439 }
1440 
1441 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1442 {
1443 	struct kvm_s390_vm_cpu_processor *proc;
1444 	int ret = 0;
1445 
1446 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1447 	if (!proc) {
1448 		ret = -ENOMEM;
1449 		goto out;
1450 	}
1451 	proc->cpuid = kvm->arch.model.cpuid;
1452 	proc->ibc = kvm->arch.model.ibc;
1453 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1454 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1455 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1456 		 kvm->arch.model.ibc,
1457 		 kvm->arch.model.cpuid);
1458 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1459 		 kvm->arch.model.fac_list[0],
1460 		 kvm->arch.model.fac_list[1],
1461 		 kvm->arch.model.fac_list[2]);
1462 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1463 		ret = -EFAULT;
1464 	kfree(proc);
1465 out:
1466 	return ret;
1467 }
1468 
1469 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1470 {
1471 	struct kvm_s390_vm_cpu_machine *mach;
1472 	int ret = 0;
1473 
1474 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1475 	if (!mach) {
1476 		ret = -ENOMEM;
1477 		goto out;
1478 	}
1479 	get_cpu_id((struct cpuid *) &mach->cpuid);
1480 	mach->ibc = sclp.ibc;
1481 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1482 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1483 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1484 	       sizeof(stfle_fac_list));
1485 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1486 		 kvm->arch.model.ibc,
1487 		 kvm->arch.model.cpuid);
1488 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1489 		 mach->fac_mask[0],
1490 		 mach->fac_mask[1],
1491 		 mach->fac_mask[2]);
1492 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1493 		 mach->fac_list[0],
1494 		 mach->fac_list[1],
1495 		 mach->fac_list[2]);
1496 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1497 		ret = -EFAULT;
1498 	kfree(mach);
1499 out:
1500 	return ret;
1501 }
1502 
1503 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1504 				       struct kvm_device_attr *attr)
1505 {
1506 	struct kvm_s390_vm_cpu_feat data;
1507 
1508 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1509 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1510 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1511 		return -EFAULT;
1512 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1513 			 data.feat[0],
1514 			 data.feat[1],
1515 			 data.feat[2]);
1516 	return 0;
1517 }
1518 
1519 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1520 				     struct kvm_device_attr *attr)
1521 {
1522 	struct kvm_s390_vm_cpu_feat data;
1523 
1524 	bitmap_copy((unsigned long *) data.feat,
1525 		    kvm_s390_available_cpu_feat,
1526 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1527 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1528 		return -EFAULT;
1529 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1530 			 data.feat[0],
1531 			 data.feat[1],
1532 			 data.feat[2]);
1533 	return 0;
1534 }
1535 
1536 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1537 					  struct kvm_device_attr *attr)
1538 {
1539 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1540 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1541 		return -EFAULT;
1542 
1543 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1548 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1551 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1554 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1557 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1560 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1563 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1566 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1569 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1572 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1575 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1578 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1580 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1581 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1583 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1584 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1586 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1587 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1589 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1590 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1594 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1595 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1597 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1599 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1600 
1601 	return 0;
1602 }
1603 
1604 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1605 					struct kvm_device_attr *attr)
1606 {
1607 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1608 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1609 		return -EFAULT;
1610 
1611 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1616 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1619 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1622 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1625 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1628 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1631 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1634 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1637 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1640 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1643 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1646 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1648 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1649 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1651 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1652 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1654 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1655 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1657 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1658 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1662 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1663 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1665 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1667 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1668 
1669 	return 0;
1670 }
1671 
1672 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1673 {
1674 	int ret = -ENXIO;
1675 
1676 	switch (attr->attr) {
1677 	case KVM_S390_VM_CPU_PROCESSOR:
1678 		ret = kvm_s390_get_processor(kvm, attr);
1679 		break;
1680 	case KVM_S390_VM_CPU_MACHINE:
1681 		ret = kvm_s390_get_machine(kvm, attr);
1682 		break;
1683 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1684 		ret = kvm_s390_get_processor_feat(kvm, attr);
1685 		break;
1686 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1687 		ret = kvm_s390_get_machine_feat(kvm, attr);
1688 		break;
1689 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1690 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1691 		break;
1692 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1693 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1694 		break;
1695 	}
1696 	return ret;
1697 }
1698 
1699 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1700 {
1701 	int ret;
1702 
1703 	switch (attr->group) {
1704 	case KVM_S390_VM_MEM_CTRL:
1705 		ret = kvm_s390_set_mem_control(kvm, attr);
1706 		break;
1707 	case KVM_S390_VM_TOD:
1708 		ret = kvm_s390_set_tod(kvm, attr);
1709 		break;
1710 	case KVM_S390_VM_CPU_MODEL:
1711 		ret = kvm_s390_set_cpu_model(kvm, attr);
1712 		break;
1713 	case KVM_S390_VM_CRYPTO:
1714 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1715 		break;
1716 	case KVM_S390_VM_MIGRATION:
1717 		ret = kvm_s390_vm_set_migration(kvm, attr);
1718 		break;
1719 	default:
1720 		ret = -ENXIO;
1721 		break;
1722 	}
1723 
1724 	return ret;
1725 }
1726 
1727 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1728 {
1729 	int ret;
1730 
1731 	switch (attr->group) {
1732 	case KVM_S390_VM_MEM_CTRL:
1733 		ret = kvm_s390_get_mem_control(kvm, attr);
1734 		break;
1735 	case KVM_S390_VM_TOD:
1736 		ret = kvm_s390_get_tod(kvm, attr);
1737 		break;
1738 	case KVM_S390_VM_CPU_MODEL:
1739 		ret = kvm_s390_get_cpu_model(kvm, attr);
1740 		break;
1741 	case KVM_S390_VM_MIGRATION:
1742 		ret = kvm_s390_vm_get_migration(kvm, attr);
1743 		break;
1744 	default:
1745 		ret = -ENXIO;
1746 		break;
1747 	}
1748 
1749 	return ret;
1750 }
1751 
1752 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1753 {
1754 	int ret;
1755 
1756 	switch (attr->group) {
1757 	case KVM_S390_VM_MEM_CTRL:
1758 		switch (attr->attr) {
1759 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1760 		case KVM_S390_VM_MEM_CLR_CMMA:
1761 			ret = sclp.has_cmma ? 0 : -ENXIO;
1762 			break;
1763 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1764 			ret = 0;
1765 			break;
1766 		default:
1767 			ret = -ENXIO;
1768 			break;
1769 		}
1770 		break;
1771 	case KVM_S390_VM_TOD:
1772 		switch (attr->attr) {
1773 		case KVM_S390_VM_TOD_LOW:
1774 		case KVM_S390_VM_TOD_HIGH:
1775 			ret = 0;
1776 			break;
1777 		default:
1778 			ret = -ENXIO;
1779 			break;
1780 		}
1781 		break;
1782 	case KVM_S390_VM_CPU_MODEL:
1783 		switch (attr->attr) {
1784 		case KVM_S390_VM_CPU_PROCESSOR:
1785 		case KVM_S390_VM_CPU_MACHINE:
1786 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1787 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1788 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1789 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1790 			ret = 0;
1791 			break;
1792 		default:
1793 			ret = -ENXIO;
1794 			break;
1795 		}
1796 		break;
1797 	case KVM_S390_VM_CRYPTO:
1798 		switch (attr->attr) {
1799 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1800 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1801 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1802 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1803 			ret = 0;
1804 			break;
1805 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1806 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1807 			ret = ap_instructions_available() ? 0 : -ENXIO;
1808 			break;
1809 		default:
1810 			ret = -ENXIO;
1811 			break;
1812 		}
1813 		break;
1814 	case KVM_S390_VM_MIGRATION:
1815 		ret = 0;
1816 		break;
1817 	default:
1818 		ret = -ENXIO;
1819 		break;
1820 	}
1821 
1822 	return ret;
1823 }
1824 
1825 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1826 {
1827 	uint8_t *keys;
1828 	uint64_t hva;
1829 	int srcu_idx, i, r = 0;
1830 
1831 	if (args->flags != 0)
1832 		return -EINVAL;
1833 
1834 	/* Is this guest using storage keys? */
1835 	if (!mm_uses_skeys(current->mm))
1836 		return KVM_S390_GET_SKEYS_NONE;
1837 
1838 	/* Enforce sane limit on memory allocation */
1839 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1840 		return -EINVAL;
1841 
1842 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1843 	if (!keys)
1844 		return -ENOMEM;
1845 
1846 	mmap_read_lock(current->mm);
1847 	srcu_idx = srcu_read_lock(&kvm->srcu);
1848 	for (i = 0; i < args->count; i++) {
1849 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1850 		if (kvm_is_error_hva(hva)) {
1851 			r = -EFAULT;
1852 			break;
1853 		}
1854 
1855 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1856 		if (r)
1857 			break;
1858 	}
1859 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1860 	mmap_read_unlock(current->mm);
1861 
1862 	if (!r) {
1863 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1864 				 sizeof(uint8_t) * args->count);
1865 		if (r)
1866 			r = -EFAULT;
1867 	}
1868 
1869 	kvfree(keys);
1870 	return r;
1871 }
1872 
1873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1874 {
1875 	uint8_t *keys;
1876 	uint64_t hva;
1877 	int srcu_idx, i, r = 0;
1878 	bool unlocked;
1879 
1880 	if (args->flags != 0)
1881 		return -EINVAL;
1882 
1883 	/* Enforce sane limit on memory allocation */
1884 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1885 		return -EINVAL;
1886 
1887 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1888 	if (!keys)
1889 		return -ENOMEM;
1890 
1891 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1892 			   sizeof(uint8_t) * args->count);
1893 	if (r) {
1894 		r = -EFAULT;
1895 		goto out;
1896 	}
1897 
1898 	/* Enable storage key handling for the guest */
1899 	r = s390_enable_skey();
1900 	if (r)
1901 		goto out;
1902 
1903 	i = 0;
1904 	mmap_read_lock(current->mm);
1905 	srcu_idx = srcu_read_lock(&kvm->srcu);
1906         while (i < args->count) {
1907 		unlocked = false;
1908 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1909 		if (kvm_is_error_hva(hva)) {
1910 			r = -EFAULT;
1911 			break;
1912 		}
1913 
1914 		/* Lowest order bit is reserved */
1915 		if (keys[i] & 0x01) {
1916 			r = -EINVAL;
1917 			break;
1918 		}
1919 
1920 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1921 		if (r) {
1922 			r = fixup_user_fault(current->mm, hva,
1923 					     FAULT_FLAG_WRITE, &unlocked);
1924 			if (r)
1925 				break;
1926 		}
1927 		if (!r)
1928 			i++;
1929 	}
1930 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1931 	mmap_read_unlock(current->mm);
1932 out:
1933 	kvfree(keys);
1934 	return r;
1935 }
1936 
1937 /*
1938  * Base address and length must be sent at the start of each block, therefore
1939  * it's cheaper to send some clean data, as long as it's less than the size of
1940  * two longs.
1941  */
1942 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1943 /* for consistency */
1944 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1945 
1946 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1947 			      u8 *res, unsigned long bufsize)
1948 {
1949 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1950 
1951 	args->count = 0;
1952 	while (args->count < bufsize) {
1953 		hva = gfn_to_hva(kvm, cur_gfn);
1954 		/*
1955 		 * We return an error if the first value was invalid, but we
1956 		 * return successfully if at least one value was copied.
1957 		 */
1958 		if (kvm_is_error_hva(hva))
1959 			return args->count ? 0 : -EFAULT;
1960 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1961 			pgstev = 0;
1962 		res[args->count++] = (pgstev >> 24) & 0x43;
1963 		cur_gfn++;
1964 	}
1965 
1966 	return 0;
1967 }
1968 
1969 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1970 						     gfn_t gfn)
1971 {
1972 	return ____gfn_to_memslot(slots, gfn, true);
1973 }
1974 
1975 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1976 					      unsigned long cur_gfn)
1977 {
1978 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1979 	int slotidx = ms - slots->memslots;
1980 	unsigned long ofs = cur_gfn - ms->base_gfn;
1981 
1982 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1983 		slotidx--;
1984 		/* If we are above the highest slot, wrap around */
1985 		if (slotidx < 0)
1986 			slotidx = slots->used_slots - 1;
1987 
1988 		ms = slots->memslots + slotidx;
1989 		ofs = 0;
1990 	}
1991 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1992 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1993 		slotidx--;
1994 		ms = slots->memslots + slotidx;
1995 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1996 	}
1997 	return ms->base_gfn + ofs;
1998 }
1999 
2000 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2001 			     u8 *res, unsigned long bufsize)
2002 {
2003 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2004 	struct kvm_memslots *slots = kvm_memslots(kvm);
2005 	struct kvm_memory_slot *ms;
2006 
2007 	if (unlikely(!slots->used_slots))
2008 		return 0;
2009 
2010 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2011 	ms = gfn_to_memslot(kvm, cur_gfn);
2012 	args->count = 0;
2013 	args->start_gfn = cur_gfn;
2014 	if (!ms)
2015 		return 0;
2016 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2017 	mem_end = kvm_s390_get_gfn_end(slots);
2018 
2019 	while (args->count < bufsize) {
2020 		hva = gfn_to_hva(kvm, cur_gfn);
2021 		if (kvm_is_error_hva(hva))
2022 			return 0;
2023 		/* Decrement only if we actually flipped the bit to 0 */
2024 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2025 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2026 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2027 			pgstev = 0;
2028 		/* Save the value */
2029 		res[args->count++] = (pgstev >> 24) & 0x43;
2030 		/* If the next bit is too far away, stop. */
2031 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2032 			return 0;
2033 		/* If we reached the previous "next", find the next one */
2034 		if (cur_gfn == next_gfn)
2035 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2036 		/* Reached the end of memory or of the buffer, stop */
2037 		if ((next_gfn >= mem_end) ||
2038 		    (next_gfn - args->start_gfn >= bufsize))
2039 			return 0;
2040 		cur_gfn++;
2041 		/* Reached the end of the current memslot, take the next one. */
2042 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2043 			ms = gfn_to_memslot(kvm, cur_gfn);
2044 			if (!ms)
2045 				return 0;
2046 		}
2047 	}
2048 	return 0;
2049 }
2050 
2051 /*
2052  * This function searches for the next page with dirty CMMA attributes, and
2053  * saves the attributes in the buffer up to either the end of the buffer or
2054  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2055  * no trailing clean bytes are saved.
2056  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2057  * output buffer will indicate 0 as length.
2058  */
2059 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2060 				  struct kvm_s390_cmma_log *args)
2061 {
2062 	unsigned long bufsize;
2063 	int srcu_idx, peek, ret;
2064 	u8 *values;
2065 
2066 	if (!kvm->arch.use_cmma)
2067 		return -ENXIO;
2068 	/* Invalid/unsupported flags were specified */
2069 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2070 		return -EINVAL;
2071 	/* Migration mode query, and we are not doing a migration */
2072 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2073 	if (!peek && !kvm->arch.migration_mode)
2074 		return -EINVAL;
2075 	/* CMMA is disabled or was not used, or the buffer has length zero */
2076 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2077 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2078 		memset(args, 0, sizeof(*args));
2079 		return 0;
2080 	}
2081 	/* We are not peeking, and there are no dirty pages */
2082 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2083 		memset(args, 0, sizeof(*args));
2084 		return 0;
2085 	}
2086 
2087 	values = vmalloc(bufsize);
2088 	if (!values)
2089 		return -ENOMEM;
2090 
2091 	mmap_read_lock(kvm->mm);
2092 	srcu_idx = srcu_read_lock(&kvm->srcu);
2093 	if (peek)
2094 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2095 	else
2096 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2097 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2098 	mmap_read_unlock(kvm->mm);
2099 
2100 	if (kvm->arch.migration_mode)
2101 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2102 	else
2103 		args->remaining = 0;
2104 
2105 	if (copy_to_user((void __user *)args->values, values, args->count))
2106 		ret = -EFAULT;
2107 
2108 	vfree(values);
2109 	return ret;
2110 }
2111 
2112 /*
2113  * This function sets the CMMA attributes for the given pages. If the input
2114  * buffer has zero length, no action is taken, otherwise the attributes are
2115  * set and the mm->context.uses_cmm flag is set.
2116  */
2117 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2118 				  const struct kvm_s390_cmma_log *args)
2119 {
2120 	unsigned long hva, mask, pgstev, i;
2121 	uint8_t *bits;
2122 	int srcu_idx, r = 0;
2123 
2124 	mask = args->mask;
2125 
2126 	if (!kvm->arch.use_cmma)
2127 		return -ENXIO;
2128 	/* invalid/unsupported flags */
2129 	if (args->flags != 0)
2130 		return -EINVAL;
2131 	/* Enforce sane limit on memory allocation */
2132 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2133 		return -EINVAL;
2134 	/* Nothing to do */
2135 	if (args->count == 0)
2136 		return 0;
2137 
2138 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2139 	if (!bits)
2140 		return -ENOMEM;
2141 
2142 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2143 	if (r) {
2144 		r = -EFAULT;
2145 		goto out;
2146 	}
2147 
2148 	mmap_read_lock(kvm->mm);
2149 	srcu_idx = srcu_read_lock(&kvm->srcu);
2150 	for (i = 0; i < args->count; i++) {
2151 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2152 		if (kvm_is_error_hva(hva)) {
2153 			r = -EFAULT;
2154 			break;
2155 		}
2156 
2157 		pgstev = bits[i];
2158 		pgstev = pgstev << 24;
2159 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2160 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2161 	}
2162 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2163 	mmap_read_unlock(kvm->mm);
2164 
2165 	if (!kvm->mm->context.uses_cmm) {
2166 		mmap_write_lock(kvm->mm);
2167 		kvm->mm->context.uses_cmm = 1;
2168 		mmap_write_unlock(kvm->mm);
2169 	}
2170 out:
2171 	vfree(bits);
2172 	return r;
2173 }
2174 
2175 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2176 {
2177 	struct kvm_vcpu *vcpu;
2178 	u16 rc, rrc;
2179 	int ret = 0;
2180 	unsigned long i;
2181 
2182 	/*
2183 	 * We ignore failures and try to destroy as many CPUs as possible.
2184 	 * At the same time we must not free the assigned resources when
2185 	 * this fails, as the ultravisor has still access to that memory.
2186 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2187 	 * behind.
2188 	 * We want to return the first failure rc and rrc, though.
2189 	 */
2190 	kvm_for_each_vcpu(i, vcpu, kvm) {
2191 		mutex_lock(&vcpu->mutex);
2192 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2193 			*rcp = rc;
2194 			*rrcp = rrc;
2195 			ret = -EIO;
2196 		}
2197 		mutex_unlock(&vcpu->mutex);
2198 	}
2199 	return ret;
2200 }
2201 
2202 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2203 {
2204 	unsigned long i;
2205 	int r = 0;
2206 	u16 dummy;
2207 
2208 	struct kvm_vcpu *vcpu;
2209 
2210 	kvm_for_each_vcpu(i, vcpu, kvm) {
2211 		mutex_lock(&vcpu->mutex);
2212 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2213 		mutex_unlock(&vcpu->mutex);
2214 		if (r)
2215 			break;
2216 	}
2217 	if (r)
2218 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2219 	return r;
2220 }
2221 
2222 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2223 {
2224 	int r = 0;
2225 	u16 dummy;
2226 	void __user *argp = (void __user *)cmd->data;
2227 
2228 	switch (cmd->cmd) {
2229 	case KVM_PV_ENABLE: {
2230 		r = -EINVAL;
2231 		if (kvm_s390_pv_is_protected(kvm))
2232 			break;
2233 
2234 		/*
2235 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2236 		 *  esca, we need no cleanup in the error cases below
2237 		 */
2238 		r = sca_switch_to_extended(kvm);
2239 		if (r)
2240 			break;
2241 
2242 		mmap_write_lock(current->mm);
2243 		r = gmap_mark_unmergeable();
2244 		mmap_write_unlock(current->mm);
2245 		if (r)
2246 			break;
2247 
2248 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2249 		if (r)
2250 			break;
2251 
2252 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2253 		if (r)
2254 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2255 
2256 		/* we need to block service interrupts from now on */
2257 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2258 		break;
2259 	}
2260 	case KVM_PV_DISABLE: {
2261 		r = -EINVAL;
2262 		if (!kvm_s390_pv_is_protected(kvm))
2263 			break;
2264 
2265 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2266 		/*
2267 		 * If a CPU could not be destroyed, destroy VM will also fail.
2268 		 * There is no point in trying to destroy it. Instead return
2269 		 * the rc and rrc from the first CPU that failed destroying.
2270 		 */
2271 		if (r)
2272 			break;
2273 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2274 
2275 		/* no need to block service interrupts any more */
2276 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2277 		break;
2278 	}
2279 	case KVM_PV_SET_SEC_PARMS: {
2280 		struct kvm_s390_pv_sec_parm parms = {};
2281 		void *hdr;
2282 
2283 		r = -EINVAL;
2284 		if (!kvm_s390_pv_is_protected(kvm))
2285 			break;
2286 
2287 		r = -EFAULT;
2288 		if (copy_from_user(&parms, argp, sizeof(parms)))
2289 			break;
2290 
2291 		/* Currently restricted to 8KB */
2292 		r = -EINVAL;
2293 		if (parms.length > PAGE_SIZE * 2)
2294 			break;
2295 
2296 		r = -ENOMEM;
2297 		hdr = vmalloc(parms.length);
2298 		if (!hdr)
2299 			break;
2300 
2301 		r = -EFAULT;
2302 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2303 				    parms.length))
2304 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2305 						      &cmd->rc, &cmd->rrc);
2306 
2307 		vfree(hdr);
2308 		break;
2309 	}
2310 	case KVM_PV_UNPACK: {
2311 		struct kvm_s390_pv_unp unp = {};
2312 
2313 		r = -EINVAL;
2314 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2315 			break;
2316 
2317 		r = -EFAULT;
2318 		if (copy_from_user(&unp, argp, sizeof(unp)))
2319 			break;
2320 
2321 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2322 				       &cmd->rc, &cmd->rrc);
2323 		break;
2324 	}
2325 	case KVM_PV_VERIFY: {
2326 		r = -EINVAL;
2327 		if (!kvm_s390_pv_is_protected(kvm))
2328 			break;
2329 
2330 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2331 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2332 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2333 			     cmd->rrc);
2334 		break;
2335 	}
2336 	case KVM_PV_PREP_RESET: {
2337 		r = -EINVAL;
2338 		if (!kvm_s390_pv_is_protected(kvm))
2339 			break;
2340 
2341 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2342 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2343 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2344 			     cmd->rc, cmd->rrc);
2345 		break;
2346 	}
2347 	case KVM_PV_UNSHARE_ALL: {
2348 		r = -EINVAL;
2349 		if (!kvm_s390_pv_is_protected(kvm))
2350 			break;
2351 
2352 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2353 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2354 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2355 			     cmd->rc, cmd->rrc);
2356 		break;
2357 	}
2358 	default:
2359 		r = -ENOTTY;
2360 	}
2361 	return r;
2362 }
2363 
2364 long kvm_arch_vm_ioctl(struct file *filp,
2365 		       unsigned int ioctl, unsigned long arg)
2366 {
2367 	struct kvm *kvm = filp->private_data;
2368 	void __user *argp = (void __user *)arg;
2369 	struct kvm_device_attr attr;
2370 	int r;
2371 
2372 	switch (ioctl) {
2373 	case KVM_S390_INTERRUPT: {
2374 		struct kvm_s390_interrupt s390int;
2375 
2376 		r = -EFAULT;
2377 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2378 			break;
2379 		r = kvm_s390_inject_vm(kvm, &s390int);
2380 		break;
2381 	}
2382 	case KVM_CREATE_IRQCHIP: {
2383 		struct kvm_irq_routing_entry routing;
2384 
2385 		r = -EINVAL;
2386 		if (kvm->arch.use_irqchip) {
2387 			/* Set up dummy routing. */
2388 			memset(&routing, 0, sizeof(routing));
2389 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2390 		}
2391 		break;
2392 	}
2393 	case KVM_SET_DEVICE_ATTR: {
2394 		r = -EFAULT;
2395 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2396 			break;
2397 		r = kvm_s390_vm_set_attr(kvm, &attr);
2398 		break;
2399 	}
2400 	case KVM_GET_DEVICE_ATTR: {
2401 		r = -EFAULT;
2402 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2403 			break;
2404 		r = kvm_s390_vm_get_attr(kvm, &attr);
2405 		break;
2406 	}
2407 	case KVM_HAS_DEVICE_ATTR: {
2408 		r = -EFAULT;
2409 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2410 			break;
2411 		r = kvm_s390_vm_has_attr(kvm, &attr);
2412 		break;
2413 	}
2414 	case KVM_S390_GET_SKEYS: {
2415 		struct kvm_s390_skeys args;
2416 
2417 		r = -EFAULT;
2418 		if (copy_from_user(&args, argp,
2419 				   sizeof(struct kvm_s390_skeys)))
2420 			break;
2421 		r = kvm_s390_get_skeys(kvm, &args);
2422 		break;
2423 	}
2424 	case KVM_S390_SET_SKEYS: {
2425 		struct kvm_s390_skeys args;
2426 
2427 		r = -EFAULT;
2428 		if (copy_from_user(&args, argp,
2429 				   sizeof(struct kvm_s390_skeys)))
2430 			break;
2431 		r = kvm_s390_set_skeys(kvm, &args);
2432 		break;
2433 	}
2434 	case KVM_S390_GET_CMMA_BITS: {
2435 		struct kvm_s390_cmma_log args;
2436 
2437 		r = -EFAULT;
2438 		if (copy_from_user(&args, argp, sizeof(args)))
2439 			break;
2440 		mutex_lock(&kvm->slots_lock);
2441 		r = kvm_s390_get_cmma_bits(kvm, &args);
2442 		mutex_unlock(&kvm->slots_lock);
2443 		if (!r) {
2444 			r = copy_to_user(argp, &args, sizeof(args));
2445 			if (r)
2446 				r = -EFAULT;
2447 		}
2448 		break;
2449 	}
2450 	case KVM_S390_SET_CMMA_BITS: {
2451 		struct kvm_s390_cmma_log args;
2452 
2453 		r = -EFAULT;
2454 		if (copy_from_user(&args, argp, sizeof(args)))
2455 			break;
2456 		mutex_lock(&kvm->slots_lock);
2457 		r = kvm_s390_set_cmma_bits(kvm, &args);
2458 		mutex_unlock(&kvm->slots_lock);
2459 		break;
2460 	}
2461 	case KVM_S390_PV_COMMAND: {
2462 		struct kvm_pv_cmd args;
2463 
2464 		/* protvirt means user cpu state */
2465 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2466 		r = 0;
2467 		if (!is_prot_virt_host()) {
2468 			r = -EINVAL;
2469 			break;
2470 		}
2471 		if (copy_from_user(&args, argp, sizeof(args))) {
2472 			r = -EFAULT;
2473 			break;
2474 		}
2475 		if (args.flags) {
2476 			r = -EINVAL;
2477 			break;
2478 		}
2479 		mutex_lock(&kvm->lock);
2480 		r = kvm_s390_handle_pv(kvm, &args);
2481 		mutex_unlock(&kvm->lock);
2482 		if (copy_to_user(argp, &args, sizeof(args))) {
2483 			r = -EFAULT;
2484 			break;
2485 		}
2486 		break;
2487 	}
2488 	default:
2489 		r = -ENOTTY;
2490 	}
2491 
2492 	return r;
2493 }
2494 
2495 static int kvm_s390_apxa_installed(void)
2496 {
2497 	struct ap_config_info info;
2498 
2499 	if (ap_instructions_available()) {
2500 		if (ap_qci(&info) == 0)
2501 			return info.apxa;
2502 	}
2503 
2504 	return 0;
2505 }
2506 
2507 /*
2508  * The format of the crypto control block (CRYCB) is specified in the 3 low
2509  * order bits of the CRYCB designation (CRYCBD) field as follows:
2510  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2511  *	     AP extended addressing (APXA) facility are installed.
2512  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2513  * Format 2: Both the APXA and MSAX3 facilities are installed
2514  */
2515 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2516 {
2517 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2518 
2519 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2520 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2521 
2522 	/* Check whether MSAX3 is installed */
2523 	if (!test_kvm_facility(kvm, 76))
2524 		return;
2525 
2526 	if (kvm_s390_apxa_installed())
2527 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2528 	else
2529 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2530 }
2531 
2532 /*
2533  * kvm_arch_crypto_set_masks
2534  *
2535  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2536  *	 to be set.
2537  * @apm: the mask identifying the accessible AP adapters
2538  * @aqm: the mask identifying the accessible AP domains
2539  * @adm: the mask identifying the accessible AP control domains
2540  *
2541  * Set the masks that identify the adapters, domains and control domains to
2542  * which the KVM guest is granted access.
2543  *
2544  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2545  *	 function.
2546  */
2547 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2548 			       unsigned long *aqm, unsigned long *adm)
2549 {
2550 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2551 
2552 	kvm_s390_vcpu_block_all(kvm);
2553 
2554 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2555 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2556 		memcpy(crycb->apcb1.apm, apm, 32);
2557 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2558 			 apm[0], apm[1], apm[2], apm[3]);
2559 		memcpy(crycb->apcb1.aqm, aqm, 32);
2560 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2561 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2562 		memcpy(crycb->apcb1.adm, adm, 32);
2563 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2564 			 adm[0], adm[1], adm[2], adm[3]);
2565 		break;
2566 	case CRYCB_FORMAT1:
2567 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2568 		memcpy(crycb->apcb0.apm, apm, 8);
2569 		memcpy(crycb->apcb0.aqm, aqm, 2);
2570 		memcpy(crycb->apcb0.adm, adm, 2);
2571 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2572 			 apm[0], *((unsigned short *)aqm),
2573 			 *((unsigned short *)adm));
2574 		break;
2575 	default:	/* Can not happen */
2576 		break;
2577 	}
2578 
2579 	/* recreate the shadow crycb for each vcpu */
2580 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2581 	kvm_s390_vcpu_unblock_all(kvm);
2582 }
2583 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2584 
2585 /*
2586  * kvm_arch_crypto_clear_masks
2587  *
2588  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2589  *	 to be cleared.
2590  *
2591  * Clear the masks that identify the adapters, domains and control domains to
2592  * which the KVM guest is granted access.
2593  *
2594  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2595  *	 function.
2596  */
2597 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2598 {
2599 	kvm_s390_vcpu_block_all(kvm);
2600 
2601 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2602 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2603 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2604 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2605 
2606 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2607 	/* recreate the shadow crycb for each vcpu */
2608 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2609 	kvm_s390_vcpu_unblock_all(kvm);
2610 }
2611 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2612 
2613 static u64 kvm_s390_get_initial_cpuid(void)
2614 {
2615 	struct cpuid cpuid;
2616 
2617 	get_cpu_id(&cpuid);
2618 	cpuid.version = 0xff;
2619 	return *((u64 *) &cpuid);
2620 }
2621 
2622 static void kvm_s390_crypto_init(struct kvm *kvm)
2623 {
2624 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2625 	kvm_s390_set_crycb_format(kvm);
2626 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2627 
2628 	if (!test_kvm_facility(kvm, 76))
2629 		return;
2630 
2631 	/* Enable AES/DEA protected key functions by default */
2632 	kvm->arch.crypto.aes_kw = 1;
2633 	kvm->arch.crypto.dea_kw = 1;
2634 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2635 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2636 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2637 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2638 }
2639 
2640 static void sca_dispose(struct kvm *kvm)
2641 {
2642 	if (kvm->arch.use_esca)
2643 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2644 	else
2645 		free_page((unsigned long)(kvm->arch.sca));
2646 	kvm->arch.sca = NULL;
2647 }
2648 
2649 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2650 {
2651 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2652 	int i, rc;
2653 	char debug_name[16];
2654 	static unsigned long sca_offset;
2655 
2656 	rc = -EINVAL;
2657 #ifdef CONFIG_KVM_S390_UCONTROL
2658 	if (type & ~KVM_VM_S390_UCONTROL)
2659 		goto out_err;
2660 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2661 		goto out_err;
2662 #else
2663 	if (type)
2664 		goto out_err;
2665 #endif
2666 
2667 	rc = s390_enable_sie();
2668 	if (rc)
2669 		goto out_err;
2670 
2671 	rc = -ENOMEM;
2672 
2673 	if (!sclp.has_64bscao)
2674 		alloc_flags |= GFP_DMA;
2675 	rwlock_init(&kvm->arch.sca_lock);
2676 	/* start with basic SCA */
2677 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2678 	if (!kvm->arch.sca)
2679 		goto out_err;
2680 	mutex_lock(&kvm_lock);
2681 	sca_offset += 16;
2682 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2683 		sca_offset = 0;
2684 	kvm->arch.sca = (struct bsca_block *)
2685 			((char *) kvm->arch.sca + sca_offset);
2686 	mutex_unlock(&kvm_lock);
2687 
2688 	sprintf(debug_name, "kvm-%u", current->pid);
2689 
2690 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2691 	if (!kvm->arch.dbf)
2692 		goto out_err;
2693 
2694 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2695 	kvm->arch.sie_page2 =
2696 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2697 	if (!kvm->arch.sie_page2)
2698 		goto out_err;
2699 
2700 	kvm->arch.sie_page2->kvm = kvm;
2701 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2702 
2703 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2704 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2705 					      (kvm_s390_fac_base[i] |
2706 					       kvm_s390_fac_ext[i]);
2707 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2708 					      kvm_s390_fac_base[i];
2709 	}
2710 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2711 
2712 	/* we are always in czam mode - even on pre z14 machines */
2713 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2714 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2715 	/* we emulate STHYI in kvm */
2716 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2717 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2718 	if (MACHINE_HAS_TLB_GUEST) {
2719 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2720 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2721 	}
2722 
2723 	if (css_general_characteristics.aiv && test_facility(65))
2724 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2725 
2726 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2727 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2728 
2729 	kvm_s390_crypto_init(kvm);
2730 
2731 	mutex_init(&kvm->arch.float_int.ais_lock);
2732 	spin_lock_init(&kvm->arch.float_int.lock);
2733 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2734 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2735 	init_waitqueue_head(&kvm->arch.ipte_wq);
2736 	mutex_init(&kvm->arch.ipte_mutex);
2737 
2738 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2739 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2740 
2741 	if (type & KVM_VM_S390_UCONTROL) {
2742 		kvm->arch.gmap = NULL;
2743 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2744 	} else {
2745 		if (sclp.hamax == U64_MAX)
2746 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2747 		else
2748 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2749 						    sclp.hamax + 1);
2750 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2751 		if (!kvm->arch.gmap)
2752 			goto out_err;
2753 		kvm->arch.gmap->private = kvm;
2754 		kvm->arch.gmap->pfault_enabled = 0;
2755 	}
2756 
2757 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2758 	kvm->arch.use_skf = sclp.has_skey;
2759 	spin_lock_init(&kvm->arch.start_stop_lock);
2760 	kvm_s390_vsie_init(kvm);
2761 	if (use_gisa)
2762 		kvm_s390_gisa_init(kvm);
2763 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2764 
2765 	return 0;
2766 out_err:
2767 	free_page((unsigned long)kvm->arch.sie_page2);
2768 	debug_unregister(kvm->arch.dbf);
2769 	sca_dispose(kvm);
2770 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2771 	return rc;
2772 }
2773 
2774 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2775 {
2776 	u16 rc, rrc;
2777 
2778 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2779 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2780 	kvm_s390_clear_local_irqs(vcpu);
2781 	kvm_clear_async_pf_completion_queue(vcpu);
2782 	if (!kvm_is_ucontrol(vcpu->kvm))
2783 		sca_del_vcpu(vcpu);
2784 
2785 	if (kvm_is_ucontrol(vcpu->kvm))
2786 		gmap_remove(vcpu->arch.gmap);
2787 
2788 	if (vcpu->kvm->arch.use_cmma)
2789 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2790 	/* We can not hold the vcpu mutex here, we are already dying */
2791 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2792 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2793 	free_page((unsigned long)(vcpu->arch.sie_block));
2794 }
2795 
2796 void kvm_arch_destroy_vm(struct kvm *kvm)
2797 {
2798 	u16 rc, rrc;
2799 
2800 	kvm_destroy_vcpus(kvm);
2801 	sca_dispose(kvm);
2802 	kvm_s390_gisa_destroy(kvm);
2803 	/*
2804 	 * We are already at the end of life and kvm->lock is not taken.
2805 	 * This is ok as the file descriptor is closed by now and nobody
2806 	 * can mess with the pv state. To avoid lockdep_assert_held from
2807 	 * complaining we do not use kvm_s390_pv_is_protected.
2808 	 */
2809 	if (kvm_s390_pv_get_handle(kvm))
2810 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2811 	debug_unregister(kvm->arch.dbf);
2812 	free_page((unsigned long)kvm->arch.sie_page2);
2813 	if (!kvm_is_ucontrol(kvm))
2814 		gmap_remove(kvm->arch.gmap);
2815 	kvm_s390_destroy_adapters(kvm);
2816 	kvm_s390_clear_float_irqs(kvm);
2817 	kvm_s390_vsie_destroy(kvm);
2818 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2819 }
2820 
2821 /* Section: vcpu related */
2822 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2823 {
2824 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2825 	if (!vcpu->arch.gmap)
2826 		return -ENOMEM;
2827 	vcpu->arch.gmap->private = vcpu->kvm;
2828 
2829 	return 0;
2830 }
2831 
2832 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2833 {
2834 	if (!kvm_s390_use_sca_entries())
2835 		return;
2836 	read_lock(&vcpu->kvm->arch.sca_lock);
2837 	if (vcpu->kvm->arch.use_esca) {
2838 		struct esca_block *sca = vcpu->kvm->arch.sca;
2839 
2840 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2841 		sca->cpu[vcpu->vcpu_id].sda = 0;
2842 	} else {
2843 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2844 
2845 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2846 		sca->cpu[vcpu->vcpu_id].sda = 0;
2847 	}
2848 	read_unlock(&vcpu->kvm->arch.sca_lock);
2849 }
2850 
2851 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2852 {
2853 	if (!kvm_s390_use_sca_entries()) {
2854 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2855 
2856 		/* we still need the basic sca for the ipte control */
2857 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2858 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2859 		return;
2860 	}
2861 	read_lock(&vcpu->kvm->arch.sca_lock);
2862 	if (vcpu->kvm->arch.use_esca) {
2863 		struct esca_block *sca = vcpu->kvm->arch.sca;
2864 
2865 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2866 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2867 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2868 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2869 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2870 	} else {
2871 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2872 
2873 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2874 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2875 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2876 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2877 	}
2878 	read_unlock(&vcpu->kvm->arch.sca_lock);
2879 }
2880 
2881 /* Basic SCA to Extended SCA data copy routines */
2882 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2883 {
2884 	d->sda = s->sda;
2885 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2886 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2887 }
2888 
2889 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2890 {
2891 	int i;
2892 
2893 	d->ipte_control = s->ipte_control;
2894 	d->mcn[0] = s->mcn;
2895 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2896 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2897 }
2898 
2899 static int sca_switch_to_extended(struct kvm *kvm)
2900 {
2901 	struct bsca_block *old_sca = kvm->arch.sca;
2902 	struct esca_block *new_sca;
2903 	struct kvm_vcpu *vcpu;
2904 	unsigned long vcpu_idx;
2905 	u32 scaol, scaoh;
2906 
2907 	if (kvm->arch.use_esca)
2908 		return 0;
2909 
2910 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2911 	if (!new_sca)
2912 		return -ENOMEM;
2913 
2914 	scaoh = (u32)((u64)(new_sca) >> 32);
2915 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2916 
2917 	kvm_s390_vcpu_block_all(kvm);
2918 	write_lock(&kvm->arch.sca_lock);
2919 
2920 	sca_copy_b_to_e(new_sca, old_sca);
2921 
2922 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2923 		vcpu->arch.sie_block->scaoh = scaoh;
2924 		vcpu->arch.sie_block->scaol = scaol;
2925 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2926 	}
2927 	kvm->arch.sca = new_sca;
2928 	kvm->arch.use_esca = 1;
2929 
2930 	write_unlock(&kvm->arch.sca_lock);
2931 	kvm_s390_vcpu_unblock_all(kvm);
2932 
2933 	free_page((unsigned long)old_sca);
2934 
2935 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2936 		 old_sca, kvm->arch.sca);
2937 	return 0;
2938 }
2939 
2940 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2941 {
2942 	int rc;
2943 
2944 	if (!kvm_s390_use_sca_entries()) {
2945 		if (id < KVM_MAX_VCPUS)
2946 			return true;
2947 		return false;
2948 	}
2949 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2950 		return true;
2951 	if (!sclp.has_esca || !sclp.has_64bscao)
2952 		return false;
2953 
2954 	mutex_lock(&kvm->lock);
2955 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2956 	mutex_unlock(&kvm->lock);
2957 
2958 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2959 }
2960 
2961 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2962 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2963 {
2964 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2965 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2966 	vcpu->arch.cputm_start = get_tod_clock_fast();
2967 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2968 }
2969 
2970 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2971 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2972 {
2973 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2974 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2975 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2976 	vcpu->arch.cputm_start = 0;
2977 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2978 }
2979 
2980 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2981 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2982 {
2983 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2984 	vcpu->arch.cputm_enabled = true;
2985 	__start_cpu_timer_accounting(vcpu);
2986 }
2987 
2988 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2989 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990 {
2991 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2992 	__stop_cpu_timer_accounting(vcpu);
2993 	vcpu->arch.cputm_enabled = false;
2994 }
2995 
2996 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2997 {
2998 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2999 	__enable_cpu_timer_accounting(vcpu);
3000 	preempt_enable();
3001 }
3002 
3003 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3004 {
3005 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3006 	__disable_cpu_timer_accounting(vcpu);
3007 	preempt_enable();
3008 }
3009 
3010 /* set the cpu timer - may only be called from the VCPU thread itself */
3011 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3012 {
3013 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3014 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3015 	if (vcpu->arch.cputm_enabled)
3016 		vcpu->arch.cputm_start = get_tod_clock_fast();
3017 	vcpu->arch.sie_block->cputm = cputm;
3018 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3019 	preempt_enable();
3020 }
3021 
3022 /* update and get the cpu timer - can also be called from other VCPU threads */
3023 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3024 {
3025 	unsigned int seq;
3026 	__u64 value;
3027 
3028 	if (unlikely(!vcpu->arch.cputm_enabled))
3029 		return vcpu->arch.sie_block->cputm;
3030 
3031 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3032 	do {
3033 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3034 		/*
3035 		 * If the writer would ever execute a read in the critical
3036 		 * section, e.g. in irq context, we have a deadlock.
3037 		 */
3038 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3039 		value = vcpu->arch.sie_block->cputm;
3040 		/* if cputm_start is 0, accounting is being started/stopped */
3041 		if (likely(vcpu->arch.cputm_start))
3042 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3043 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3044 	preempt_enable();
3045 	return value;
3046 }
3047 
3048 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3049 {
3050 
3051 	gmap_enable(vcpu->arch.enabled_gmap);
3052 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3053 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3054 		__start_cpu_timer_accounting(vcpu);
3055 	vcpu->cpu = cpu;
3056 }
3057 
3058 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3059 {
3060 	vcpu->cpu = -1;
3061 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3062 		__stop_cpu_timer_accounting(vcpu);
3063 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3064 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3065 	gmap_disable(vcpu->arch.enabled_gmap);
3066 
3067 }
3068 
3069 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3070 {
3071 	mutex_lock(&vcpu->kvm->lock);
3072 	preempt_disable();
3073 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3074 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3075 	preempt_enable();
3076 	mutex_unlock(&vcpu->kvm->lock);
3077 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3078 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3079 		sca_add_vcpu(vcpu);
3080 	}
3081 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3082 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3083 	/* make vcpu_load load the right gmap on the first trigger */
3084 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3085 }
3086 
3087 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3088 {
3089 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3090 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3091 		return true;
3092 	return false;
3093 }
3094 
3095 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3096 {
3097 	/* At least one ECC subfunction must be present */
3098 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3099 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3100 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3101 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3102 	       kvm_has_pckmo_subfunc(kvm, 41);
3103 
3104 }
3105 
3106 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3107 {
3108 	/*
3109 	 * If the AP instructions are not being interpreted and the MSAX3
3110 	 * facility is not configured for the guest, there is nothing to set up.
3111 	 */
3112 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3113 		return;
3114 
3115 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3116 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3117 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3118 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3119 
3120 	if (vcpu->kvm->arch.crypto.apie)
3121 		vcpu->arch.sie_block->eca |= ECA_APIE;
3122 
3123 	/* Set up protected key support */
3124 	if (vcpu->kvm->arch.crypto.aes_kw) {
3125 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3126 		/* ecc is also wrapped with AES key */
3127 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3128 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3129 	}
3130 
3131 	if (vcpu->kvm->arch.crypto.dea_kw)
3132 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3133 }
3134 
3135 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3136 {
3137 	free_page(vcpu->arch.sie_block->cbrlo);
3138 	vcpu->arch.sie_block->cbrlo = 0;
3139 }
3140 
3141 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3142 {
3143 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3144 	if (!vcpu->arch.sie_block->cbrlo)
3145 		return -ENOMEM;
3146 	return 0;
3147 }
3148 
3149 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3150 {
3151 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3152 
3153 	vcpu->arch.sie_block->ibc = model->ibc;
3154 	if (test_kvm_facility(vcpu->kvm, 7))
3155 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3156 }
3157 
3158 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3159 {
3160 	int rc = 0;
3161 	u16 uvrc, uvrrc;
3162 
3163 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3164 						    CPUSTAT_SM |
3165 						    CPUSTAT_STOPPED);
3166 
3167 	if (test_kvm_facility(vcpu->kvm, 78))
3168 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3169 	else if (test_kvm_facility(vcpu->kvm, 8))
3170 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3171 
3172 	kvm_s390_vcpu_setup_model(vcpu);
3173 
3174 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3175 	if (MACHINE_HAS_ESOP)
3176 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3177 	if (test_kvm_facility(vcpu->kvm, 9))
3178 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3179 	if (test_kvm_facility(vcpu->kvm, 73))
3180 		vcpu->arch.sie_block->ecb |= ECB_TE;
3181 	if (!kvm_is_ucontrol(vcpu->kvm))
3182 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3183 
3184 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3185 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3186 	if (test_kvm_facility(vcpu->kvm, 130))
3187 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3188 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3189 	if (sclp.has_cei)
3190 		vcpu->arch.sie_block->eca |= ECA_CEI;
3191 	if (sclp.has_ib)
3192 		vcpu->arch.sie_block->eca |= ECA_IB;
3193 	if (sclp.has_siif)
3194 		vcpu->arch.sie_block->eca |= ECA_SII;
3195 	if (sclp.has_sigpif)
3196 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3197 	if (test_kvm_facility(vcpu->kvm, 129)) {
3198 		vcpu->arch.sie_block->eca |= ECA_VX;
3199 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3200 	}
3201 	if (test_kvm_facility(vcpu->kvm, 139))
3202 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3203 	if (test_kvm_facility(vcpu->kvm, 156))
3204 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3205 	if (vcpu->arch.sie_block->gd) {
3206 		vcpu->arch.sie_block->eca |= ECA_AIV;
3207 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3208 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3209 	}
3210 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3211 					| SDNXC;
3212 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3213 
3214 	if (sclp.has_kss)
3215 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3216 	else
3217 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3218 
3219 	if (vcpu->kvm->arch.use_cmma) {
3220 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3221 		if (rc)
3222 			return rc;
3223 	}
3224 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3225 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3226 
3227 	vcpu->arch.sie_block->hpid = HPID_KVM;
3228 
3229 	kvm_s390_vcpu_crypto_setup(vcpu);
3230 
3231 	mutex_lock(&vcpu->kvm->lock);
3232 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3233 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3234 		if (rc)
3235 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3236 	}
3237 	mutex_unlock(&vcpu->kvm->lock);
3238 
3239 	return rc;
3240 }
3241 
3242 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3243 {
3244 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3245 		return -EINVAL;
3246 	return 0;
3247 }
3248 
3249 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3250 {
3251 	struct sie_page *sie_page;
3252 	int rc;
3253 
3254 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3255 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3256 	if (!sie_page)
3257 		return -ENOMEM;
3258 
3259 	vcpu->arch.sie_block = &sie_page->sie_block;
3260 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3261 
3262 	/* the real guest size will always be smaller than msl */
3263 	vcpu->arch.sie_block->mso = 0;
3264 	vcpu->arch.sie_block->msl = sclp.hamax;
3265 
3266 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3267 	spin_lock_init(&vcpu->arch.local_int.lock);
3268 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3269 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3270 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3271 	seqcount_init(&vcpu->arch.cputm_seqcount);
3272 
3273 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3274 	kvm_clear_async_pf_completion_queue(vcpu);
3275 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3276 				    KVM_SYNC_GPRS |
3277 				    KVM_SYNC_ACRS |
3278 				    KVM_SYNC_CRS |
3279 				    KVM_SYNC_ARCH0 |
3280 				    KVM_SYNC_PFAULT |
3281 				    KVM_SYNC_DIAG318;
3282 	kvm_s390_set_prefix(vcpu, 0);
3283 	if (test_kvm_facility(vcpu->kvm, 64))
3284 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3285 	if (test_kvm_facility(vcpu->kvm, 82))
3286 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3287 	if (test_kvm_facility(vcpu->kvm, 133))
3288 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3289 	if (test_kvm_facility(vcpu->kvm, 156))
3290 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3291 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3292 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3293 	 */
3294 	if (MACHINE_HAS_VX)
3295 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3296 	else
3297 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3298 
3299 	if (kvm_is_ucontrol(vcpu->kvm)) {
3300 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3301 		if (rc)
3302 			goto out_free_sie_block;
3303 	}
3304 
3305 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3306 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3307 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3308 
3309 	rc = kvm_s390_vcpu_setup(vcpu);
3310 	if (rc)
3311 		goto out_ucontrol_uninit;
3312 	return 0;
3313 
3314 out_ucontrol_uninit:
3315 	if (kvm_is_ucontrol(vcpu->kvm))
3316 		gmap_remove(vcpu->arch.gmap);
3317 out_free_sie_block:
3318 	free_page((unsigned long)(vcpu->arch.sie_block));
3319 	return rc;
3320 }
3321 
3322 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3323 {
3324 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3325 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3326 }
3327 
3328 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3329 {
3330 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3331 }
3332 
3333 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3334 {
3335 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3336 	exit_sie(vcpu);
3337 }
3338 
3339 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3340 {
3341 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3342 }
3343 
3344 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3345 {
3346 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3347 	exit_sie(vcpu);
3348 }
3349 
3350 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3351 {
3352 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3353 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3354 }
3355 
3356 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3357 {
3358 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3359 }
3360 
3361 /*
3362  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3363  * If the CPU is not running (e.g. waiting as idle) the function will
3364  * return immediately. */
3365 void exit_sie(struct kvm_vcpu *vcpu)
3366 {
3367 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3368 	kvm_s390_vsie_kick(vcpu);
3369 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3370 		cpu_relax();
3371 }
3372 
3373 /* Kick a guest cpu out of SIE to process a request synchronously */
3374 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3375 {
3376 	kvm_make_request(req, vcpu);
3377 	kvm_s390_vcpu_request(vcpu);
3378 }
3379 
3380 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3381 			      unsigned long end)
3382 {
3383 	struct kvm *kvm = gmap->private;
3384 	struct kvm_vcpu *vcpu;
3385 	unsigned long prefix;
3386 	unsigned long i;
3387 
3388 	if (gmap_is_shadow(gmap))
3389 		return;
3390 	if (start >= 1UL << 31)
3391 		/* We are only interested in prefix pages */
3392 		return;
3393 	kvm_for_each_vcpu(i, vcpu, kvm) {
3394 		/* match against both prefix pages */
3395 		prefix = kvm_s390_get_prefix(vcpu);
3396 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3397 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3398 				   start, end);
3399 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3400 		}
3401 	}
3402 }
3403 
3404 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3405 {
3406 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3407 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3408 	    halt_poll_max_steal) {
3409 		vcpu->stat.halt_no_poll_steal++;
3410 		return true;
3411 	}
3412 	return false;
3413 }
3414 
3415 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3416 {
3417 	/* kvm common code refers to this, but never calls it */
3418 	BUG();
3419 	return 0;
3420 }
3421 
3422 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3423 					   struct kvm_one_reg *reg)
3424 {
3425 	int r = -EINVAL;
3426 
3427 	switch (reg->id) {
3428 	case KVM_REG_S390_TODPR:
3429 		r = put_user(vcpu->arch.sie_block->todpr,
3430 			     (u32 __user *)reg->addr);
3431 		break;
3432 	case KVM_REG_S390_EPOCHDIFF:
3433 		r = put_user(vcpu->arch.sie_block->epoch,
3434 			     (u64 __user *)reg->addr);
3435 		break;
3436 	case KVM_REG_S390_CPU_TIMER:
3437 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3438 			     (u64 __user *)reg->addr);
3439 		break;
3440 	case KVM_REG_S390_CLOCK_COMP:
3441 		r = put_user(vcpu->arch.sie_block->ckc,
3442 			     (u64 __user *)reg->addr);
3443 		break;
3444 	case KVM_REG_S390_PFTOKEN:
3445 		r = put_user(vcpu->arch.pfault_token,
3446 			     (u64 __user *)reg->addr);
3447 		break;
3448 	case KVM_REG_S390_PFCOMPARE:
3449 		r = put_user(vcpu->arch.pfault_compare,
3450 			     (u64 __user *)reg->addr);
3451 		break;
3452 	case KVM_REG_S390_PFSELECT:
3453 		r = put_user(vcpu->arch.pfault_select,
3454 			     (u64 __user *)reg->addr);
3455 		break;
3456 	case KVM_REG_S390_PP:
3457 		r = put_user(vcpu->arch.sie_block->pp,
3458 			     (u64 __user *)reg->addr);
3459 		break;
3460 	case KVM_REG_S390_GBEA:
3461 		r = put_user(vcpu->arch.sie_block->gbea,
3462 			     (u64 __user *)reg->addr);
3463 		break;
3464 	default:
3465 		break;
3466 	}
3467 
3468 	return r;
3469 }
3470 
3471 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3472 					   struct kvm_one_reg *reg)
3473 {
3474 	int r = -EINVAL;
3475 	__u64 val;
3476 
3477 	switch (reg->id) {
3478 	case KVM_REG_S390_TODPR:
3479 		r = get_user(vcpu->arch.sie_block->todpr,
3480 			     (u32 __user *)reg->addr);
3481 		break;
3482 	case KVM_REG_S390_EPOCHDIFF:
3483 		r = get_user(vcpu->arch.sie_block->epoch,
3484 			     (u64 __user *)reg->addr);
3485 		break;
3486 	case KVM_REG_S390_CPU_TIMER:
3487 		r = get_user(val, (u64 __user *)reg->addr);
3488 		if (!r)
3489 			kvm_s390_set_cpu_timer(vcpu, val);
3490 		break;
3491 	case KVM_REG_S390_CLOCK_COMP:
3492 		r = get_user(vcpu->arch.sie_block->ckc,
3493 			     (u64 __user *)reg->addr);
3494 		break;
3495 	case KVM_REG_S390_PFTOKEN:
3496 		r = get_user(vcpu->arch.pfault_token,
3497 			     (u64 __user *)reg->addr);
3498 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3499 			kvm_clear_async_pf_completion_queue(vcpu);
3500 		break;
3501 	case KVM_REG_S390_PFCOMPARE:
3502 		r = get_user(vcpu->arch.pfault_compare,
3503 			     (u64 __user *)reg->addr);
3504 		break;
3505 	case KVM_REG_S390_PFSELECT:
3506 		r = get_user(vcpu->arch.pfault_select,
3507 			     (u64 __user *)reg->addr);
3508 		break;
3509 	case KVM_REG_S390_PP:
3510 		r = get_user(vcpu->arch.sie_block->pp,
3511 			     (u64 __user *)reg->addr);
3512 		break;
3513 	case KVM_REG_S390_GBEA:
3514 		r = get_user(vcpu->arch.sie_block->gbea,
3515 			     (u64 __user *)reg->addr);
3516 		break;
3517 	default:
3518 		break;
3519 	}
3520 
3521 	return r;
3522 }
3523 
3524 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3525 {
3526 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3527 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3528 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3529 
3530 	kvm_clear_async_pf_completion_queue(vcpu);
3531 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3532 		kvm_s390_vcpu_stop(vcpu);
3533 	kvm_s390_clear_local_irqs(vcpu);
3534 }
3535 
3536 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3537 {
3538 	/* Initial reset is a superset of the normal reset */
3539 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3540 
3541 	/*
3542 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3543 	 * We do not only reset the internal data, but also ...
3544 	 */
3545 	vcpu->arch.sie_block->gpsw.mask = 0;
3546 	vcpu->arch.sie_block->gpsw.addr = 0;
3547 	kvm_s390_set_prefix(vcpu, 0);
3548 	kvm_s390_set_cpu_timer(vcpu, 0);
3549 	vcpu->arch.sie_block->ckc = 0;
3550 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3551 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3552 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3553 
3554 	/* ... the data in sync regs */
3555 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3556 	vcpu->run->s.regs.ckc = 0;
3557 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3558 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3559 	vcpu->run->psw_addr = 0;
3560 	vcpu->run->psw_mask = 0;
3561 	vcpu->run->s.regs.todpr = 0;
3562 	vcpu->run->s.regs.cputm = 0;
3563 	vcpu->run->s.regs.ckc = 0;
3564 	vcpu->run->s.regs.pp = 0;
3565 	vcpu->run->s.regs.gbea = 1;
3566 	vcpu->run->s.regs.fpc = 0;
3567 	/*
3568 	 * Do not reset these registers in the protected case, as some of
3569 	 * them are overlayed and they are not accessible in this case
3570 	 * anyway.
3571 	 */
3572 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3573 		vcpu->arch.sie_block->gbea = 1;
3574 		vcpu->arch.sie_block->pp = 0;
3575 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3576 		vcpu->arch.sie_block->todpr = 0;
3577 	}
3578 }
3579 
3580 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3581 {
3582 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3583 
3584 	/* Clear reset is a superset of the initial reset */
3585 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3586 
3587 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3588 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3589 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3590 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3591 
3592 	regs->etoken = 0;
3593 	regs->etoken_extension = 0;
3594 }
3595 
3596 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3597 {
3598 	vcpu_load(vcpu);
3599 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3600 	vcpu_put(vcpu);
3601 	return 0;
3602 }
3603 
3604 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3605 {
3606 	vcpu_load(vcpu);
3607 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3608 	vcpu_put(vcpu);
3609 	return 0;
3610 }
3611 
3612 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3613 				  struct kvm_sregs *sregs)
3614 {
3615 	vcpu_load(vcpu);
3616 
3617 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3618 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3619 
3620 	vcpu_put(vcpu);
3621 	return 0;
3622 }
3623 
3624 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3625 				  struct kvm_sregs *sregs)
3626 {
3627 	vcpu_load(vcpu);
3628 
3629 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3630 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3631 
3632 	vcpu_put(vcpu);
3633 	return 0;
3634 }
3635 
3636 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3637 {
3638 	int ret = 0;
3639 
3640 	vcpu_load(vcpu);
3641 
3642 	if (test_fp_ctl(fpu->fpc)) {
3643 		ret = -EINVAL;
3644 		goto out;
3645 	}
3646 	vcpu->run->s.regs.fpc = fpu->fpc;
3647 	if (MACHINE_HAS_VX)
3648 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3649 				 (freg_t *) fpu->fprs);
3650 	else
3651 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3652 
3653 out:
3654 	vcpu_put(vcpu);
3655 	return ret;
3656 }
3657 
3658 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3659 {
3660 	vcpu_load(vcpu);
3661 
3662 	/* make sure we have the latest values */
3663 	save_fpu_regs();
3664 	if (MACHINE_HAS_VX)
3665 		convert_vx_to_fp((freg_t *) fpu->fprs,
3666 				 (__vector128 *) vcpu->run->s.regs.vrs);
3667 	else
3668 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3669 	fpu->fpc = vcpu->run->s.regs.fpc;
3670 
3671 	vcpu_put(vcpu);
3672 	return 0;
3673 }
3674 
3675 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3676 {
3677 	int rc = 0;
3678 
3679 	if (!is_vcpu_stopped(vcpu))
3680 		rc = -EBUSY;
3681 	else {
3682 		vcpu->run->psw_mask = psw.mask;
3683 		vcpu->run->psw_addr = psw.addr;
3684 	}
3685 	return rc;
3686 }
3687 
3688 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3689 				  struct kvm_translation *tr)
3690 {
3691 	return -EINVAL; /* not implemented yet */
3692 }
3693 
3694 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3695 			      KVM_GUESTDBG_USE_HW_BP | \
3696 			      KVM_GUESTDBG_ENABLE)
3697 
3698 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3699 					struct kvm_guest_debug *dbg)
3700 {
3701 	int rc = 0;
3702 
3703 	vcpu_load(vcpu);
3704 
3705 	vcpu->guest_debug = 0;
3706 	kvm_s390_clear_bp_data(vcpu);
3707 
3708 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3709 		rc = -EINVAL;
3710 		goto out;
3711 	}
3712 	if (!sclp.has_gpere) {
3713 		rc = -EINVAL;
3714 		goto out;
3715 	}
3716 
3717 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3718 		vcpu->guest_debug = dbg->control;
3719 		/* enforce guest PER */
3720 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3721 
3722 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3723 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3724 	} else {
3725 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3726 		vcpu->arch.guestdbg.last_bp = 0;
3727 	}
3728 
3729 	if (rc) {
3730 		vcpu->guest_debug = 0;
3731 		kvm_s390_clear_bp_data(vcpu);
3732 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3733 	}
3734 
3735 out:
3736 	vcpu_put(vcpu);
3737 	return rc;
3738 }
3739 
3740 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3741 				    struct kvm_mp_state *mp_state)
3742 {
3743 	int ret;
3744 
3745 	vcpu_load(vcpu);
3746 
3747 	/* CHECK_STOP and LOAD are not supported yet */
3748 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3749 				      KVM_MP_STATE_OPERATING;
3750 
3751 	vcpu_put(vcpu);
3752 	return ret;
3753 }
3754 
3755 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3756 				    struct kvm_mp_state *mp_state)
3757 {
3758 	int rc = 0;
3759 
3760 	vcpu_load(vcpu);
3761 
3762 	/* user space knows about this interface - let it control the state */
3763 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3764 
3765 	switch (mp_state->mp_state) {
3766 	case KVM_MP_STATE_STOPPED:
3767 		rc = kvm_s390_vcpu_stop(vcpu);
3768 		break;
3769 	case KVM_MP_STATE_OPERATING:
3770 		rc = kvm_s390_vcpu_start(vcpu);
3771 		break;
3772 	case KVM_MP_STATE_LOAD:
3773 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3774 			rc = -ENXIO;
3775 			break;
3776 		}
3777 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3778 		break;
3779 	case KVM_MP_STATE_CHECK_STOP:
3780 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3781 	default:
3782 		rc = -ENXIO;
3783 	}
3784 
3785 	vcpu_put(vcpu);
3786 	return rc;
3787 }
3788 
3789 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3790 {
3791 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3792 }
3793 
3794 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3795 {
3796 retry:
3797 	kvm_s390_vcpu_request_handled(vcpu);
3798 	if (!kvm_request_pending(vcpu))
3799 		return 0;
3800 	/*
3801 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3802 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3803 	 * This ensures that the ipte instruction for this request has
3804 	 * already finished. We might race against a second unmapper that
3805 	 * wants to set the blocking bit. Lets just retry the request loop.
3806 	 */
3807 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3808 		int rc;
3809 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3810 					  kvm_s390_get_prefix(vcpu),
3811 					  PAGE_SIZE * 2, PROT_WRITE);
3812 		if (rc) {
3813 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3814 			return rc;
3815 		}
3816 		goto retry;
3817 	}
3818 
3819 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3820 		vcpu->arch.sie_block->ihcpu = 0xffff;
3821 		goto retry;
3822 	}
3823 
3824 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3825 		if (!ibs_enabled(vcpu)) {
3826 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3827 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3828 		}
3829 		goto retry;
3830 	}
3831 
3832 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3833 		if (ibs_enabled(vcpu)) {
3834 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3835 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3836 		}
3837 		goto retry;
3838 	}
3839 
3840 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3841 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3842 		goto retry;
3843 	}
3844 
3845 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3846 		/*
3847 		 * Disable CMM virtualization; we will emulate the ESSA
3848 		 * instruction manually, in order to provide additional
3849 		 * functionalities needed for live migration.
3850 		 */
3851 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3852 		goto retry;
3853 	}
3854 
3855 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3856 		/*
3857 		 * Re-enable CMM virtualization if CMMA is available and
3858 		 * CMM has been used.
3859 		 */
3860 		if ((vcpu->kvm->arch.use_cmma) &&
3861 		    (vcpu->kvm->mm->context.uses_cmm))
3862 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3863 		goto retry;
3864 	}
3865 
3866 	/* nothing to do, just clear the request */
3867 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3868 	/* we left the vsie handler, nothing to do, just clear the request */
3869 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3870 
3871 	return 0;
3872 }
3873 
3874 void kvm_s390_set_tod_clock(struct kvm *kvm,
3875 			    const struct kvm_s390_vm_tod_clock *gtod)
3876 {
3877 	struct kvm_vcpu *vcpu;
3878 	union tod_clock clk;
3879 	unsigned long i;
3880 
3881 	mutex_lock(&kvm->lock);
3882 	preempt_disable();
3883 
3884 	store_tod_clock_ext(&clk);
3885 
3886 	kvm->arch.epoch = gtod->tod - clk.tod;
3887 	kvm->arch.epdx = 0;
3888 	if (test_kvm_facility(kvm, 139)) {
3889 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3890 		if (kvm->arch.epoch > gtod->tod)
3891 			kvm->arch.epdx -= 1;
3892 	}
3893 
3894 	kvm_s390_vcpu_block_all(kvm);
3895 	kvm_for_each_vcpu(i, vcpu, kvm) {
3896 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3897 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3898 	}
3899 
3900 	kvm_s390_vcpu_unblock_all(kvm);
3901 	preempt_enable();
3902 	mutex_unlock(&kvm->lock);
3903 }
3904 
3905 /**
3906  * kvm_arch_fault_in_page - fault-in guest page if necessary
3907  * @vcpu: The corresponding virtual cpu
3908  * @gpa: Guest physical address
3909  * @writable: Whether the page should be writable or not
3910  *
3911  * Make sure that a guest page has been faulted-in on the host.
3912  *
3913  * Return: Zero on success, negative error code otherwise.
3914  */
3915 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3916 {
3917 	return gmap_fault(vcpu->arch.gmap, gpa,
3918 			  writable ? FAULT_FLAG_WRITE : 0);
3919 }
3920 
3921 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3922 				      unsigned long token)
3923 {
3924 	struct kvm_s390_interrupt inti;
3925 	struct kvm_s390_irq irq;
3926 
3927 	if (start_token) {
3928 		irq.u.ext.ext_params2 = token;
3929 		irq.type = KVM_S390_INT_PFAULT_INIT;
3930 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3931 	} else {
3932 		inti.type = KVM_S390_INT_PFAULT_DONE;
3933 		inti.parm64 = token;
3934 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3935 	}
3936 }
3937 
3938 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3939 				     struct kvm_async_pf *work)
3940 {
3941 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3942 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3943 
3944 	return true;
3945 }
3946 
3947 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3948 				 struct kvm_async_pf *work)
3949 {
3950 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3951 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3952 }
3953 
3954 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3955 			       struct kvm_async_pf *work)
3956 {
3957 	/* s390 will always inject the page directly */
3958 }
3959 
3960 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3961 {
3962 	/*
3963 	 * s390 will always inject the page directly,
3964 	 * but we still want check_async_completion to cleanup
3965 	 */
3966 	return true;
3967 }
3968 
3969 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3970 {
3971 	hva_t hva;
3972 	struct kvm_arch_async_pf arch;
3973 
3974 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3975 		return false;
3976 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3977 	    vcpu->arch.pfault_compare)
3978 		return false;
3979 	if (psw_extint_disabled(vcpu))
3980 		return false;
3981 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3982 		return false;
3983 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3984 		return false;
3985 	if (!vcpu->arch.gmap->pfault_enabled)
3986 		return false;
3987 
3988 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3989 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3990 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3991 		return false;
3992 
3993 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3994 }
3995 
3996 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3997 {
3998 	int rc, cpuflags;
3999 
4000 	/*
4001 	 * On s390 notifications for arriving pages will be delivered directly
4002 	 * to the guest but the house keeping for completed pfaults is
4003 	 * handled outside the worker.
4004 	 */
4005 	kvm_check_async_pf_completion(vcpu);
4006 
4007 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4008 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4009 
4010 	if (need_resched())
4011 		schedule();
4012 
4013 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4014 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4015 		if (rc)
4016 			return rc;
4017 	}
4018 
4019 	rc = kvm_s390_handle_requests(vcpu);
4020 	if (rc)
4021 		return rc;
4022 
4023 	if (guestdbg_enabled(vcpu)) {
4024 		kvm_s390_backup_guest_per_regs(vcpu);
4025 		kvm_s390_patch_guest_per_regs(vcpu);
4026 	}
4027 
4028 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4029 
4030 	vcpu->arch.sie_block->icptcode = 0;
4031 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4032 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4033 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4034 
4035 	return 0;
4036 }
4037 
4038 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4039 {
4040 	struct kvm_s390_pgm_info pgm_info = {
4041 		.code = PGM_ADDRESSING,
4042 	};
4043 	u8 opcode, ilen;
4044 	int rc;
4045 
4046 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4047 	trace_kvm_s390_sie_fault(vcpu);
4048 
4049 	/*
4050 	 * We want to inject an addressing exception, which is defined as a
4051 	 * suppressing or terminating exception. However, since we came here
4052 	 * by a DAT access exception, the PSW still points to the faulting
4053 	 * instruction since DAT exceptions are nullifying. So we've got
4054 	 * to look up the current opcode to get the length of the instruction
4055 	 * to be able to forward the PSW.
4056 	 */
4057 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4058 	ilen = insn_length(opcode);
4059 	if (rc < 0) {
4060 		return rc;
4061 	} else if (rc) {
4062 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4063 		 * Forward by arbitrary ilc, injection will take care of
4064 		 * nullification if necessary.
4065 		 */
4066 		pgm_info = vcpu->arch.pgm;
4067 		ilen = 4;
4068 	}
4069 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4070 	kvm_s390_forward_psw(vcpu, ilen);
4071 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4072 }
4073 
4074 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4075 {
4076 	struct mcck_volatile_info *mcck_info;
4077 	struct sie_page *sie_page;
4078 
4079 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4080 		   vcpu->arch.sie_block->icptcode);
4081 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4082 
4083 	if (guestdbg_enabled(vcpu))
4084 		kvm_s390_restore_guest_per_regs(vcpu);
4085 
4086 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4087 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4088 
4089 	if (exit_reason == -EINTR) {
4090 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4091 		sie_page = container_of(vcpu->arch.sie_block,
4092 					struct sie_page, sie_block);
4093 		mcck_info = &sie_page->mcck_info;
4094 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4095 		return 0;
4096 	}
4097 
4098 	if (vcpu->arch.sie_block->icptcode > 0) {
4099 		int rc = kvm_handle_sie_intercept(vcpu);
4100 
4101 		if (rc != -EOPNOTSUPP)
4102 			return rc;
4103 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4104 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4105 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4106 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4107 		return -EREMOTE;
4108 	} else if (exit_reason != -EFAULT) {
4109 		vcpu->stat.exit_null++;
4110 		return 0;
4111 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4112 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4113 		vcpu->run->s390_ucontrol.trans_exc_code =
4114 						current->thread.gmap_addr;
4115 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4116 		return -EREMOTE;
4117 	} else if (current->thread.gmap_pfault) {
4118 		trace_kvm_s390_major_guest_pfault(vcpu);
4119 		current->thread.gmap_pfault = 0;
4120 		if (kvm_arch_setup_async_pf(vcpu))
4121 			return 0;
4122 		vcpu->stat.pfault_sync++;
4123 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4124 	}
4125 	return vcpu_post_run_fault_in_sie(vcpu);
4126 }
4127 
4128 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4129 static int __vcpu_run(struct kvm_vcpu *vcpu)
4130 {
4131 	int rc, exit_reason;
4132 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4133 
4134 	/*
4135 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4136 	 * ning the guest), so that memslots (and other stuff) are protected
4137 	 */
4138 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4139 
4140 	do {
4141 		rc = vcpu_pre_run(vcpu);
4142 		if (rc)
4143 			break;
4144 
4145 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4146 		/*
4147 		 * As PF_VCPU will be used in fault handler, between
4148 		 * guest_enter and guest_exit should be no uaccess.
4149 		 */
4150 		local_irq_disable();
4151 		guest_enter_irqoff();
4152 		__disable_cpu_timer_accounting(vcpu);
4153 		local_irq_enable();
4154 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4155 			memcpy(sie_page->pv_grregs,
4156 			       vcpu->run->s.regs.gprs,
4157 			       sizeof(sie_page->pv_grregs));
4158 		}
4159 		if (test_cpu_flag(CIF_FPU))
4160 			load_fpu_regs();
4161 		exit_reason = sie64a(vcpu->arch.sie_block,
4162 				     vcpu->run->s.regs.gprs);
4163 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4164 			memcpy(vcpu->run->s.regs.gprs,
4165 			       sie_page->pv_grregs,
4166 			       sizeof(sie_page->pv_grregs));
4167 			/*
4168 			 * We're not allowed to inject interrupts on intercepts
4169 			 * that leave the guest state in an "in-between" state
4170 			 * where the next SIE entry will do a continuation.
4171 			 * Fence interrupts in our "internal" PSW.
4172 			 */
4173 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4174 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4175 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4176 			}
4177 		}
4178 		local_irq_disable();
4179 		__enable_cpu_timer_accounting(vcpu);
4180 		guest_exit_irqoff();
4181 		local_irq_enable();
4182 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4183 
4184 		rc = vcpu_post_run(vcpu, exit_reason);
4185 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4186 
4187 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4188 	return rc;
4189 }
4190 
4191 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4192 {
4193 	struct kvm_run *kvm_run = vcpu->run;
4194 	struct runtime_instr_cb *riccb;
4195 	struct gs_cb *gscb;
4196 
4197 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4198 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4199 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4200 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4201 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4202 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4203 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4204 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4205 	}
4206 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4207 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4208 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4209 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4210 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4211 			kvm_clear_async_pf_completion_queue(vcpu);
4212 	}
4213 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4214 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4215 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4216 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4217 	}
4218 	/*
4219 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4220 	 * we should enable RI here instead of doing the lazy enablement.
4221 	 */
4222 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4223 	    test_kvm_facility(vcpu->kvm, 64) &&
4224 	    riccb->v &&
4225 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4226 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4227 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4228 	}
4229 	/*
4230 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4231 	 * we should enable GS here instead of doing the lazy enablement.
4232 	 */
4233 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4234 	    test_kvm_facility(vcpu->kvm, 133) &&
4235 	    gscb->gssm &&
4236 	    !vcpu->arch.gs_enabled) {
4237 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4238 		vcpu->arch.sie_block->ecb |= ECB_GS;
4239 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4240 		vcpu->arch.gs_enabled = 1;
4241 	}
4242 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4243 	    test_kvm_facility(vcpu->kvm, 82)) {
4244 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4245 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4246 	}
4247 	if (MACHINE_HAS_GS) {
4248 		preempt_disable();
4249 		__ctl_set_bit(2, 4);
4250 		if (current->thread.gs_cb) {
4251 			vcpu->arch.host_gscb = current->thread.gs_cb;
4252 			save_gs_cb(vcpu->arch.host_gscb);
4253 		}
4254 		if (vcpu->arch.gs_enabled) {
4255 			current->thread.gs_cb = (struct gs_cb *)
4256 						&vcpu->run->s.regs.gscb;
4257 			restore_gs_cb(current->thread.gs_cb);
4258 		}
4259 		preempt_enable();
4260 	}
4261 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4262 }
4263 
4264 static void sync_regs(struct kvm_vcpu *vcpu)
4265 {
4266 	struct kvm_run *kvm_run = vcpu->run;
4267 
4268 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4269 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4270 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4271 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4272 		/* some control register changes require a tlb flush */
4273 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4274 	}
4275 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4276 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4277 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4278 	}
4279 	save_access_regs(vcpu->arch.host_acrs);
4280 	restore_access_regs(vcpu->run->s.regs.acrs);
4281 	/* save host (userspace) fprs/vrs */
4282 	save_fpu_regs();
4283 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4284 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4285 	if (MACHINE_HAS_VX)
4286 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4287 	else
4288 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4289 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4290 	if (test_fp_ctl(current->thread.fpu.fpc))
4291 		/* User space provided an invalid FPC, let's clear it */
4292 		current->thread.fpu.fpc = 0;
4293 
4294 	/* Sync fmt2 only data */
4295 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4296 		sync_regs_fmt2(vcpu);
4297 	} else {
4298 		/*
4299 		 * In several places we have to modify our internal view to
4300 		 * not do things that are disallowed by the ultravisor. For
4301 		 * example we must not inject interrupts after specific exits
4302 		 * (e.g. 112 prefix page not secure). We do this by turning
4303 		 * off the machine check, external and I/O interrupt bits
4304 		 * of our PSW copy. To avoid getting validity intercepts, we
4305 		 * do only accept the condition code from userspace.
4306 		 */
4307 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4308 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4309 						   PSW_MASK_CC;
4310 	}
4311 
4312 	kvm_run->kvm_dirty_regs = 0;
4313 }
4314 
4315 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4316 {
4317 	struct kvm_run *kvm_run = vcpu->run;
4318 
4319 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4320 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4321 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4322 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4323 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4324 	if (MACHINE_HAS_GS) {
4325 		preempt_disable();
4326 		__ctl_set_bit(2, 4);
4327 		if (vcpu->arch.gs_enabled)
4328 			save_gs_cb(current->thread.gs_cb);
4329 		current->thread.gs_cb = vcpu->arch.host_gscb;
4330 		restore_gs_cb(vcpu->arch.host_gscb);
4331 		if (!vcpu->arch.host_gscb)
4332 			__ctl_clear_bit(2, 4);
4333 		vcpu->arch.host_gscb = NULL;
4334 		preempt_enable();
4335 	}
4336 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4337 }
4338 
4339 static void store_regs(struct kvm_vcpu *vcpu)
4340 {
4341 	struct kvm_run *kvm_run = vcpu->run;
4342 
4343 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4344 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4345 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4346 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4347 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4348 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4349 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4350 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4351 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4352 	save_access_regs(vcpu->run->s.regs.acrs);
4353 	restore_access_regs(vcpu->arch.host_acrs);
4354 	/* Save guest register state */
4355 	save_fpu_regs();
4356 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4357 	/* Restore will be done lazily at return */
4358 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4359 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4360 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4361 		store_regs_fmt2(vcpu);
4362 }
4363 
4364 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4365 {
4366 	struct kvm_run *kvm_run = vcpu->run;
4367 	int rc;
4368 
4369 	if (kvm_run->immediate_exit)
4370 		return -EINTR;
4371 
4372 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4373 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4374 		return -EINVAL;
4375 
4376 	vcpu_load(vcpu);
4377 
4378 	if (guestdbg_exit_pending(vcpu)) {
4379 		kvm_s390_prepare_debug_exit(vcpu);
4380 		rc = 0;
4381 		goto out;
4382 	}
4383 
4384 	kvm_sigset_activate(vcpu);
4385 
4386 	/*
4387 	 * no need to check the return value of vcpu_start as it can only have
4388 	 * an error for protvirt, but protvirt means user cpu state
4389 	 */
4390 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4391 		kvm_s390_vcpu_start(vcpu);
4392 	} else if (is_vcpu_stopped(vcpu)) {
4393 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4394 				   vcpu->vcpu_id);
4395 		rc = -EINVAL;
4396 		goto out;
4397 	}
4398 
4399 	sync_regs(vcpu);
4400 	enable_cpu_timer_accounting(vcpu);
4401 
4402 	might_fault();
4403 	rc = __vcpu_run(vcpu);
4404 
4405 	if (signal_pending(current) && !rc) {
4406 		kvm_run->exit_reason = KVM_EXIT_INTR;
4407 		rc = -EINTR;
4408 	}
4409 
4410 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4411 		kvm_s390_prepare_debug_exit(vcpu);
4412 		rc = 0;
4413 	}
4414 
4415 	if (rc == -EREMOTE) {
4416 		/* userspace support is needed, kvm_run has been prepared */
4417 		rc = 0;
4418 	}
4419 
4420 	disable_cpu_timer_accounting(vcpu);
4421 	store_regs(vcpu);
4422 
4423 	kvm_sigset_deactivate(vcpu);
4424 
4425 	vcpu->stat.exit_userspace++;
4426 out:
4427 	vcpu_put(vcpu);
4428 	return rc;
4429 }
4430 
4431 /*
4432  * store status at address
4433  * we use have two special cases:
4434  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4435  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4436  */
4437 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4438 {
4439 	unsigned char archmode = 1;
4440 	freg_t fprs[NUM_FPRS];
4441 	unsigned int px;
4442 	u64 clkcomp, cputm;
4443 	int rc;
4444 
4445 	px = kvm_s390_get_prefix(vcpu);
4446 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4447 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4448 			return -EFAULT;
4449 		gpa = 0;
4450 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4451 		if (write_guest_real(vcpu, 163, &archmode, 1))
4452 			return -EFAULT;
4453 		gpa = px;
4454 	} else
4455 		gpa -= __LC_FPREGS_SAVE_AREA;
4456 
4457 	/* manually convert vector registers if necessary */
4458 	if (MACHINE_HAS_VX) {
4459 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4460 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4461 				     fprs, 128);
4462 	} else {
4463 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4464 				     vcpu->run->s.regs.fprs, 128);
4465 	}
4466 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4467 			      vcpu->run->s.regs.gprs, 128);
4468 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4469 			      &vcpu->arch.sie_block->gpsw, 16);
4470 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4471 			      &px, 4);
4472 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4473 			      &vcpu->run->s.regs.fpc, 4);
4474 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4475 			      &vcpu->arch.sie_block->todpr, 4);
4476 	cputm = kvm_s390_get_cpu_timer(vcpu);
4477 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4478 			      &cputm, 8);
4479 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4480 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4481 			      &clkcomp, 8);
4482 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4483 			      &vcpu->run->s.regs.acrs, 64);
4484 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4485 			      &vcpu->arch.sie_block->gcr, 128);
4486 	return rc ? -EFAULT : 0;
4487 }
4488 
4489 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4490 {
4491 	/*
4492 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4493 	 * switch in the run ioctl. Let's update our copies before we save
4494 	 * it into the save area
4495 	 */
4496 	save_fpu_regs();
4497 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4498 	save_access_regs(vcpu->run->s.regs.acrs);
4499 
4500 	return kvm_s390_store_status_unloaded(vcpu, addr);
4501 }
4502 
4503 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4504 {
4505 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4506 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4507 }
4508 
4509 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4510 {
4511 	unsigned long i;
4512 	struct kvm_vcpu *vcpu;
4513 
4514 	kvm_for_each_vcpu(i, vcpu, kvm) {
4515 		__disable_ibs_on_vcpu(vcpu);
4516 	}
4517 }
4518 
4519 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4520 {
4521 	if (!sclp.has_ibs)
4522 		return;
4523 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4524 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4525 }
4526 
4527 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4528 {
4529 	int i, online_vcpus, r = 0, started_vcpus = 0;
4530 
4531 	if (!is_vcpu_stopped(vcpu))
4532 		return 0;
4533 
4534 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4535 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4536 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4537 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4538 
4539 	/* Let's tell the UV that we want to change into the operating state */
4540 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4541 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4542 		if (r) {
4543 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4544 			return r;
4545 		}
4546 	}
4547 
4548 	for (i = 0; i < online_vcpus; i++) {
4549 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4550 			started_vcpus++;
4551 	}
4552 
4553 	if (started_vcpus == 0) {
4554 		/* we're the only active VCPU -> speed it up */
4555 		__enable_ibs_on_vcpu(vcpu);
4556 	} else if (started_vcpus == 1) {
4557 		/*
4558 		 * As we are starting a second VCPU, we have to disable
4559 		 * the IBS facility on all VCPUs to remove potentially
4560 		 * outstanding ENABLE requests.
4561 		 */
4562 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4563 	}
4564 
4565 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4566 	/*
4567 	 * The real PSW might have changed due to a RESTART interpreted by the
4568 	 * ultravisor. We block all interrupts and let the next sie exit
4569 	 * refresh our view.
4570 	 */
4571 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4572 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4573 	/*
4574 	 * Another VCPU might have used IBS while we were offline.
4575 	 * Let's play safe and flush the VCPU at startup.
4576 	 */
4577 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4578 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4579 	return 0;
4580 }
4581 
4582 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4583 {
4584 	int i, online_vcpus, r = 0, started_vcpus = 0;
4585 	struct kvm_vcpu *started_vcpu = NULL;
4586 
4587 	if (is_vcpu_stopped(vcpu))
4588 		return 0;
4589 
4590 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4591 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4592 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4593 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4594 
4595 	/* Let's tell the UV that we want to change into the stopped state */
4596 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4597 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4598 		if (r) {
4599 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4600 			return r;
4601 		}
4602 	}
4603 
4604 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4605 	kvm_s390_clear_stop_irq(vcpu);
4606 
4607 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4608 	__disable_ibs_on_vcpu(vcpu);
4609 
4610 	for (i = 0; i < online_vcpus; i++) {
4611 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4612 
4613 		if (!is_vcpu_stopped(tmp)) {
4614 			started_vcpus++;
4615 			started_vcpu = tmp;
4616 		}
4617 	}
4618 
4619 	if (started_vcpus == 1) {
4620 		/*
4621 		 * As we only have one VCPU left, we want to enable the
4622 		 * IBS facility for that VCPU to speed it up.
4623 		 */
4624 		__enable_ibs_on_vcpu(started_vcpu);
4625 	}
4626 
4627 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4628 	return 0;
4629 }
4630 
4631 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4632 				     struct kvm_enable_cap *cap)
4633 {
4634 	int r;
4635 
4636 	if (cap->flags)
4637 		return -EINVAL;
4638 
4639 	switch (cap->cap) {
4640 	case KVM_CAP_S390_CSS_SUPPORT:
4641 		if (!vcpu->kvm->arch.css_support) {
4642 			vcpu->kvm->arch.css_support = 1;
4643 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4644 			trace_kvm_s390_enable_css(vcpu->kvm);
4645 		}
4646 		r = 0;
4647 		break;
4648 	default:
4649 		r = -EINVAL;
4650 		break;
4651 	}
4652 	return r;
4653 }
4654 
4655 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4656 				   struct kvm_s390_mem_op *mop)
4657 {
4658 	void __user *uaddr = (void __user *)mop->buf;
4659 	int r = 0;
4660 
4661 	if (mop->flags || !mop->size)
4662 		return -EINVAL;
4663 	if (mop->size + mop->sida_offset < mop->size)
4664 		return -EINVAL;
4665 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4666 		return -E2BIG;
4667 
4668 	switch (mop->op) {
4669 	case KVM_S390_MEMOP_SIDA_READ:
4670 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4671 				 mop->sida_offset), mop->size))
4672 			r = -EFAULT;
4673 
4674 		break;
4675 	case KVM_S390_MEMOP_SIDA_WRITE:
4676 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4677 				   mop->sida_offset), uaddr, mop->size))
4678 			r = -EFAULT;
4679 		break;
4680 	}
4681 	return r;
4682 }
4683 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4684 				  struct kvm_s390_mem_op *mop)
4685 {
4686 	void __user *uaddr = (void __user *)mop->buf;
4687 	void *tmpbuf = NULL;
4688 	int r = 0;
4689 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4690 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4691 
4692 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4693 		return -EINVAL;
4694 
4695 	if (mop->size > MEM_OP_MAX_SIZE)
4696 		return -E2BIG;
4697 
4698 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4699 		return -EINVAL;
4700 
4701 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4702 		tmpbuf = vmalloc(mop->size);
4703 		if (!tmpbuf)
4704 			return -ENOMEM;
4705 	}
4706 
4707 	switch (mop->op) {
4708 	case KVM_S390_MEMOP_LOGICAL_READ:
4709 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4710 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4711 					    mop->size, GACC_FETCH);
4712 			break;
4713 		}
4714 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4715 		if (r == 0) {
4716 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4717 				r = -EFAULT;
4718 		}
4719 		break;
4720 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4721 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4722 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4723 					    mop->size, GACC_STORE);
4724 			break;
4725 		}
4726 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4727 			r = -EFAULT;
4728 			break;
4729 		}
4730 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4731 		break;
4732 	}
4733 
4734 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4735 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4736 
4737 	vfree(tmpbuf);
4738 	return r;
4739 }
4740 
4741 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4742 				      struct kvm_s390_mem_op *mop)
4743 {
4744 	int r, srcu_idx;
4745 
4746 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4747 
4748 	switch (mop->op) {
4749 	case KVM_S390_MEMOP_LOGICAL_READ:
4750 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4751 		r = kvm_s390_guest_mem_op(vcpu, mop);
4752 		break;
4753 	case KVM_S390_MEMOP_SIDA_READ:
4754 	case KVM_S390_MEMOP_SIDA_WRITE:
4755 		/* we are locked against sida going away by the vcpu->mutex */
4756 		r = kvm_s390_guest_sida_op(vcpu, mop);
4757 		break;
4758 	default:
4759 		r = -EINVAL;
4760 	}
4761 
4762 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4763 	return r;
4764 }
4765 
4766 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4767 			       unsigned int ioctl, unsigned long arg)
4768 {
4769 	struct kvm_vcpu *vcpu = filp->private_data;
4770 	void __user *argp = (void __user *)arg;
4771 
4772 	switch (ioctl) {
4773 	case KVM_S390_IRQ: {
4774 		struct kvm_s390_irq s390irq;
4775 
4776 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4777 			return -EFAULT;
4778 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4779 	}
4780 	case KVM_S390_INTERRUPT: {
4781 		struct kvm_s390_interrupt s390int;
4782 		struct kvm_s390_irq s390irq = {};
4783 
4784 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4785 			return -EFAULT;
4786 		if (s390int_to_s390irq(&s390int, &s390irq))
4787 			return -EINVAL;
4788 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4789 	}
4790 	}
4791 	return -ENOIOCTLCMD;
4792 }
4793 
4794 long kvm_arch_vcpu_ioctl(struct file *filp,
4795 			 unsigned int ioctl, unsigned long arg)
4796 {
4797 	struct kvm_vcpu *vcpu = filp->private_data;
4798 	void __user *argp = (void __user *)arg;
4799 	int idx;
4800 	long r;
4801 	u16 rc, rrc;
4802 
4803 	vcpu_load(vcpu);
4804 
4805 	switch (ioctl) {
4806 	case KVM_S390_STORE_STATUS:
4807 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4808 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4809 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4810 		break;
4811 	case KVM_S390_SET_INITIAL_PSW: {
4812 		psw_t psw;
4813 
4814 		r = -EFAULT;
4815 		if (copy_from_user(&psw, argp, sizeof(psw)))
4816 			break;
4817 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4818 		break;
4819 	}
4820 	case KVM_S390_CLEAR_RESET:
4821 		r = 0;
4822 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4823 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4824 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4825 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4826 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4827 				   rc, rrc);
4828 		}
4829 		break;
4830 	case KVM_S390_INITIAL_RESET:
4831 		r = 0;
4832 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4833 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4834 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4835 					  UVC_CMD_CPU_RESET_INITIAL,
4836 					  &rc, &rrc);
4837 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4838 				   rc, rrc);
4839 		}
4840 		break;
4841 	case KVM_S390_NORMAL_RESET:
4842 		r = 0;
4843 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4844 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4845 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4846 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4847 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4848 				   rc, rrc);
4849 		}
4850 		break;
4851 	case KVM_SET_ONE_REG:
4852 	case KVM_GET_ONE_REG: {
4853 		struct kvm_one_reg reg;
4854 		r = -EINVAL;
4855 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4856 			break;
4857 		r = -EFAULT;
4858 		if (copy_from_user(&reg, argp, sizeof(reg)))
4859 			break;
4860 		if (ioctl == KVM_SET_ONE_REG)
4861 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4862 		else
4863 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4864 		break;
4865 	}
4866 #ifdef CONFIG_KVM_S390_UCONTROL
4867 	case KVM_S390_UCAS_MAP: {
4868 		struct kvm_s390_ucas_mapping ucasmap;
4869 
4870 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4871 			r = -EFAULT;
4872 			break;
4873 		}
4874 
4875 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4876 			r = -EINVAL;
4877 			break;
4878 		}
4879 
4880 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4881 				     ucasmap.vcpu_addr, ucasmap.length);
4882 		break;
4883 	}
4884 	case KVM_S390_UCAS_UNMAP: {
4885 		struct kvm_s390_ucas_mapping ucasmap;
4886 
4887 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4888 			r = -EFAULT;
4889 			break;
4890 		}
4891 
4892 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4893 			r = -EINVAL;
4894 			break;
4895 		}
4896 
4897 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4898 			ucasmap.length);
4899 		break;
4900 	}
4901 #endif
4902 	case KVM_S390_VCPU_FAULT: {
4903 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4904 		break;
4905 	}
4906 	case KVM_ENABLE_CAP:
4907 	{
4908 		struct kvm_enable_cap cap;
4909 		r = -EFAULT;
4910 		if (copy_from_user(&cap, argp, sizeof(cap)))
4911 			break;
4912 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4913 		break;
4914 	}
4915 	case KVM_S390_MEM_OP: {
4916 		struct kvm_s390_mem_op mem_op;
4917 
4918 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4919 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4920 		else
4921 			r = -EFAULT;
4922 		break;
4923 	}
4924 	case KVM_S390_SET_IRQ_STATE: {
4925 		struct kvm_s390_irq_state irq_state;
4926 
4927 		r = -EFAULT;
4928 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4929 			break;
4930 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4931 		    irq_state.len == 0 ||
4932 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4933 			r = -EINVAL;
4934 			break;
4935 		}
4936 		/* do not use irq_state.flags, it will break old QEMUs */
4937 		r = kvm_s390_set_irq_state(vcpu,
4938 					   (void __user *) irq_state.buf,
4939 					   irq_state.len);
4940 		break;
4941 	}
4942 	case KVM_S390_GET_IRQ_STATE: {
4943 		struct kvm_s390_irq_state irq_state;
4944 
4945 		r = -EFAULT;
4946 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4947 			break;
4948 		if (irq_state.len == 0) {
4949 			r = -EINVAL;
4950 			break;
4951 		}
4952 		/* do not use irq_state.flags, it will break old QEMUs */
4953 		r = kvm_s390_get_irq_state(vcpu,
4954 					   (__u8 __user *)  irq_state.buf,
4955 					   irq_state.len);
4956 		break;
4957 	}
4958 	default:
4959 		r = -ENOTTY;
4960 	}
4961 
4962 	vcpu_put(vcpu);
4963 	return r;
4964 }
4965 
4966 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4967 {
4968 #ifdef CONFIG_KVM_S390_UCONTROL
4969 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4970 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4971 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4972 		get_page(vmf->page);
4973 		return 0;
4974 	}
4975 #endif
4976 	return VM_FAULT_SIGBUS;
4977 }
4978 
4979 /* Section: memory related */
4980 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4981 				   const struct kvm_memory_slot *old,
4982 				   struct kvm_memory_slot *new,
4983 				   enum kvm_mr_change change)
4984 {
4985 	gpa_t size;
4986 
4987 	/* When we are protected, we should not change the memory slots */
4988 	if (kvm_s390_pv_get_handle(kvm))
4989 		return -EINVAL;
4990 
4991 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
4992 		return 0;
4993 
4994 	/* A few sanity checks. We can have memory slots which have to be
4995 	   located/ended at a segment boundary (1MB). The memory in userland is
4996 	   ok to be fragmented into various different vmas. It is okay to mmap()
4997 	   and munmap() stuff in this slot after doing this call at any time */
4998 
4999 	if (new->userspace_addr & 0xffffful)
5000 		return -EINVAL;
5001 
5002 	size = new->npages * PAGE_SIZE;
5003 	if (size & 0xffffful)
5004 		return -EINVAL;
5005 
5006 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5007 		return -EINVAL;
5008 
5009 	return 0;
5010 }
5011 
5012 void kvm_arch_commit_memory_region(struct kvm *kvm,
5013 				struct kvm_memory_slot *old,
5014 				const struct kvm_memory_slot *new,
5015 				enum kvm_mr_change change)
5016 {
5017 	int rc = 0;
5018 
5019 	switch (change) {
5020 	case KVM_MR_DELETE:
5021 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5022 					old->npages * PAGE_SIZE);
5023 		break;
5024 	case KVM_MR_MOVE:
5025 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5026 					old->npages * PAGE_SIZE);
5027 		if (rc)
5028 			break;
5029 		fallthrough;
5030 	case KVM_MR_CREATE:
5031 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5032 				      new->base_gfn * PAGE_SIZE,
5033 				      new->npages * PAGE_SIZE);
5034 		break;
5035 	case KVM_MR_FLAGS_ONLY:
5036 		break;
5037 	default:
5038 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5039 	}
5040 	if (rc)
5041 		pr_warn("failed to commit memory region\n");
5042 	return;
5043 }
5044 
5045 static inline unsigned long nonhyp_mask(int i)
5046 {
5047 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5048 
5049 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5050 }
5051 
5052 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5053 {
5054 	vcpu->valid_wakeup = false;
5055 }
5056 
5057 static int __init kvm_s390_init(void)
5058 {
5059 	int i;
5060 
5061 	if (!sclp.has_sief2) {
5062 		pr_info("SIE is not available\n");
5063 		return -ENODEV;
5064 	}
5065 
5066 	if (nested && hpage) {
5067 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5068 		return -EINVAL;
5069 	}
5070 
5071 	for (i = 0; i < 16; i++)
5072 		kvm_s390_fac_base[i] |=
5073 			stfle_fac_list[i] & nonhyp_mask(i);
5074 
5075 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5076 }
5077 
5078 static void __exit kvm_s390_exit(void)
5079 {
5080 	kvm_exit();
5081 }
5082 
5083 module_init(kvm_s390_init);
5084 module_exit(kvm_s390_exit);
5085 
5086 /*
5087  * Enable autoloading of the kvm module.
5088  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5089  * since x86 takes a different approach.
5090  */
5091 #include <linux/miscdevice.h>
5092 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5093 MODULE_ALIAS("devname:kvm");
5094