xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 1e70d57e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10  *               Jason J. Herne <jjherne@us.ibm.com>
11  */
12 
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include <asm/ap.h>
46 #include <asm/uv.h>
47 #include <asm/fpu/api.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
61 	KVM_GENERIC_VM_STATS(),
62 	STATS_DESC_COUNTER(VM, inject_io),
63 	STATS_DESC_COUNTER(VM, inject_float_mchk),
64 	STATS_DESC_COUNTER(VM, inject_pfault_done),
65 	STATS_DESC_COUNTER(VM, inject_service_signal),
66 	STATS_DESC_COUNTER(VM, inject_virtio)
67 };
68 
69 const struct kvm_stats_header kvm_vm_stats_header = {
70 	.name_size = KVM_STATS_NAME_SIZE,
71 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
72 	.id_offset = sizeof(struct kvm_stats_header),
73 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
74 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
75 		       sizeof(kvm_vm_stats_desc),
76 };
77 
78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
79 	KVM_GENERIC_VCPU_STATS(),
80 	STATS_DESC_COUNTER(VCPU, exit_userspace),
81 	STATS_DESC_COUNTER(VCPU, exit_null),
82 	STATS_DESC_COUNTER(VCPU, exit_external_request),
83 	STATS_DESC_COUNTER(VCPU, exit_io_request),
84 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
85 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
86 	STATS_DESC_COUNTER(VCPU, exit_validity),
87 	STATS_DESC_COUNTER(VCPU, exit_instruction),
88 	STATS_DESC_COUNTER(VCPU, exit_pei),
89 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
90 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
92 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
94 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
95 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
96 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
97 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
98 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
99 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
100 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
101 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
103 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_program),
107 	STATS_DESC_COUNTER(VCPU, deliver_io),
108 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
109 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
110 	STATS_DESC_COUNTER(VCPU, inject_ckc),
111 	STATS_DESC_COUNTER(VCPU, inject_cputm),
112 	STATS_DESC_COUNTER(VCPU, inject_external_call),
113 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
114 	STATS_DESC_COUNTER(VCPU, inject_mchk),
115 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
116 	STATS_DESC_COUNTER(VCPU, inject_program),
117 	STATS_DESC_COUNTER(VCPU, inject_restart),
118 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
119 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
120 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
121 	STATS_DESC_COUNTER(VCPU, instruction_gs),
122 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
123 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
125 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
126 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
127 	STATS_DESC_COUNTER(VCPU, instruction_sck),
128 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
129 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
130 	STATS_DESC_COUNTER(VCPU, instruction_spx),
131 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stap),
133 	STATS_DESC_COUNTER(VCPU, instruction_iske),
134 	STATS_DESC_COUNTER(VCPU, instruction_ri),
135 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
136 	STATS_DESC_COUNTER(VCPU, instruction_sske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
138 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
139 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
140 	STATS_DESC_COUNTER(VCPU, instruction_tb),
141 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
142 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
143 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
144 	STATS_DESC_COUNTER(VCPU, instruction_sie),
145 	STATS_DESC_COUNTER(VCPU, instruction_essa),
146 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
147 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
163 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
166 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
168 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
172 	STATS_DESC_COUNTER(VCPU, pfault_sync)
173 };
174 
175 const struct kvm_stats_header kvm_vcpu_stats_header = {
176 	.name_size = KVM_STATS_NAME_SIZE,
177 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
178 	.id_offset = sizeof(struct kvm_stats_header),
179 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
180 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
181 		       sizeof(kvm_vcpu_stats_desc),
182 };
183 
184 /* allow nested virtualization in KVM (if enabled by user space) */
185 static int nested;
186 module_param(nested, int, S_IRUGO);
187 MODULE_PARM_DESC(nested, "Nested virtualization support");
188 
189 /* allow 1m huge page guest backing, if !nested */
190 static int hpage;
191 module_param(hpage, int, 0444);
192 MODULE_PARM_DESC(hpage, "1m huge page backing support");
193 
194 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
195 static u8 halt_poll_max_steal = 10;
196 module_param(halt_poll_max_steal, byte, 0644);
197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
198 
199 /* if set to true, the GISA will be initialized and used if available */
200 static bool use_gisa  = true;
201 module_param(use_gisa, bool, 0644);
202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
203 
204 /* maximum diag9c forwarding per second */
205 unsigned int diag9c_forwarding_hz;
206 module_param(diag9c_forwarding_hz, uint, 0644);
207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
208 
209 /*
210  * For now we handle at most 16 double words as this is what the s390 base
211  * kernel handles and stores in the prefix page. If we ever need to go beyond
212  * this, this requires changes to code, but the external uapi can stay.
213  */
214 #define SIZE_INTERNAL 16
215 
216 /*
217  * Base feature mask that defines default mask for facilities. Consists of the
218  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
219  */
220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
221 /*
222  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
223  * and defines the facilities that can be enabled via a cpu model.
224  */
225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
226 
227 static unsigned long kvm_s390_fac_size(void)
228 {
229 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
232 		sizeof(stfle_fac_list));
233 
234 	return SIZE_INTERNAL;
235 }
236 
237 /* available cpu features supported by kvm */
238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
239 /* available subfunctions indicated via query / "test bit" */
240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
241 
242 static struct gmap_notifier gmap_notifier;
243 static struct gmap_notifier vsie_gmap_notifier;
244 debug_info_t *kvm_s390_dbf;
245 debug_info_t *kvm_s390_dbf_uv;
246 
247 /* Section: not file related */
248 int kvm_arch_hardware_enable(void)
249 {
250 	/* every s390 is virtualization enabled ;-) */
251 	return 0;
252 }
253 
254 int kvm_arch_check_processor_compat(void *opaque)
255 {
256 	return 0;
257 }
258 
259 /* forward declarations */
260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
261 			      unsigned long end);
262 static int sca_switch_to_extended(struct kvm *kvm);
263 
264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
265 {
266 	u8 delta_idx = 0;
267 
268 	/*
269 	 * The TOD jumps by delta, we have to compensate this by adding
270 	 * -delta to the epoch.
271 	 */
272 	delta = -delta;
273 
274 	/* sign-extension - we're adding to signed values below */
275 	if ((s64)delta < 0)
276 		delta_idx = -1;
277 
278 	scb->epoch += delta;
279 	if (scb->ecd & ECD_MEF) {
280 		scb->epdx += delta_idx;
281 		if (scb->epoch < delta)
282 			scb->epdx += 1;
283 	}
284 }
285 
286 /*
287  * This callback is executed during stop_machine(). All CPUs are therefore
288  * temporarily stopped. In order not to change guest behavior, we have to
289  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
290  * so a CPU won't be stopped while calculating with the epoch.
291  */
292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
293 			  void *v)
294 {
295 	struct kvm *kvm;
296 	struct kvm_vcpu *vcpu;
297 	unsigned long i;
298 	unsigned long long *delta = v;
299 
300 	list_for_each_entry(kvm, &vm_list, vm_list) {
301 		kvm_for_each_vcpu(i, vcpu, kvm) {
302 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
303 			if (i == 0) {
304 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
305 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
306 			}
307 			if (vcpu->arch.cputm_enabled)
308 				vcpu->arch.cputm_start += *delta;
309 			if (vcpu->arch.vsie_block)
310 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
311 						   *delta);
312 		}
313 	}
314 	return NOTIFY_OK;
315 }
316 
317 static struct notifier_block kvm_clock_notifier = {
318 	.notifier_call = kvm_clock_sync,
319 };
320 
321 int kvm_arch_hardware_setup(void *opaque)
322 {
323 	gmap_notifier.notifier_call = kvm_gmap_notifier;
324 	gmap_register_pte_notifier(&gmap_notifier);
325 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
326 	gmap_register_pte_notifier(&vsie_gmap_notifier);
327 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
328 				       &kvm_clock_notifier);
329 	return 0;
330 }
331 
332 void kvm_arch_hardware_unsetup(void)
333 {
334 	gmap_unregister_pte_notifier(&gmap_notifier);
335 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
336 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
337 					 &kvm_clock_notifier);
338 }
339 
340 static void allow_cpu_feat(unsigned long nr)
341 {
342 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
343 }
344 
345 static inline int plo_test_bit(unsigned char nr)
346 {
347 	unsigned long function = (unsigned long)nr | 0x100;
348 	int cc;
349 
350 	asm volatile(
351 		"	lgr	0,%[function]\n"
352 		/* Parameter registers are ignored for "test bit" */
353 		"	plo	0,0,0,0(0)\n"
354 		"	ipm	%0\n"
355 		"	srl	%0,28\n"
356 		: "=d" (cc)
357 		: [function] "d" (function)
358 		: "cc", "0");
359 	return cc == 0;
360 }
361 
362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
363 {
364 	asm volatile(
365 		"	lghi	0,0\n"
366 		"	lgr	1,%[query]\n"
367 		/* Parameter registers are ignored */
368 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
369 		:
370 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
371 		: "cc", "memory", "0", "1");
372 }
373 
374 #define INSN_SORTL 0xb938
375 #define INSN_DFLTCC 0xb939
376 
377 static void kvm_s390_cpu_feat_init(void)
378 {
379 	int i;
380 
381 	for (i = 0; i < 256; ++i) {
382 		if (plo_test_bit(i))
383 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
384 	}
385 
386 	if (test_facility(28)) /* TOD-clock steering */
387 		ptff(kvm_s390_available_subfunc.ptff,
388 		     sizeof(kvm_s390_available_subfunc.ptff),
389 		     PTFF_QAF);
390 
391 	if (test_facility(17)) { /* MSA */
392 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmac);
394 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmc);
396 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.km);
398 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
399 			      kvm_s390_available_subfunc.kimd);
400 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.klmd);
402 	}
403 	if (test_facility(76)) /* MSA3 */
404 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.pckmo);
406 	if (test_facility(77)) { /* MSA4 */
407 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
408 			      kvm_s390_available_subfunc.kmctr);
409 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.kmf);
411 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
412 			      kvm_s390_available_subfunc.kmo);
413 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
414 			      kvm_s390_available_subfunc.pcc);
415 	}
416 	if (test_facility(57)) /* MSA5 */
417 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
418 			      kvm_s390_available_subfunc.ppno);
419 
420 	if (test_facility(146)) /* MSA8 */
421 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
422 			      kvm_s390_available_subfunc.kma);
423 
424 	if (test_facility(155)) /* MSA9 */
425 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
426 			      kvm_s390_available_subfunc.kdsa);
427 
428 	if (test_facility(150)) /* SORTL */
429 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
430 
431 	if (test_facility(151)) /* DFLTCC */
432 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
433 
434 	if (MACHINE_HAS_ESOP)
435 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
436 	/*
437 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
438 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
439 	 */
440 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
441 	    !test_facility(3) || !nested)
442 		return;
443 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
444 	if (sclp.has_64bscao)
445 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
446 	if (sclp.has_siif)
447 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
448 	if (sclp.has_gpere)
449 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
450 	if (sclp.has_gsls)
451 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
452 	if (sclp.has_ib)
453 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
454 	if (sclp.has_cei)
455 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
456 	if (sclp.has_ibs)
457 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
458 	if (sclp.has_kss)
459 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
460 	/*
461 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
462 	 * all skey handling functions read/set the skey from the PGSTE
463 	 * instead of the real storage key.
464 	 *
465 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
466 	 * pages being detected as preserved although they are resident.
467 	 *
468 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
469 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
470 	 *
471 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
472 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
473 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
474 	 *
475 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
476 	 * cannot easily shadow the SCA because of the ipte lock.
477 	 */
478 }
479 
480 int kvm_arch_init(void *opaque)
481 {
482 	int rc = -ENOMEM;
483 
484 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
485 	if (!kvm_s390_dbf)
486 		return -ENOMEM;
487 
488 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
489 	if (!kvm_s390_dbf_uv)
490 		goto out;
491 
492 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
493 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
494 		goto out;
495 
496 	kvm_s390_cpu_feat_init();
497 
498 	/* Register floating interrupt controller interface. */
499 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
500 	if (rc) {
501 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
502 		goto out;
503 	}
504 
505 	rc = kvm_s390_gib_init(GAL_ISC);
506 	if (rc)
507 		goto out;
508 
509 	return 0;
510 
511 out:
512 	kvm_arch_exit();
513 	return rc;
514 }
515 
516 void kvm_arch_exit(void)
517 {
518 	kvm_s390_gib_destroy();
519 	debug_unregister(kvm_s390_dbf);
520 	debug_unregister(kvm_s390_dbf_uv);
521 }
522 
523 /* Section: device related */
524 long kvm_arch_dev_ioctl(struct file *filp,
525 			unsigned int ioctl, unsigned long arg)
526 {
527 	if (ioctl == KVM_S390_ENABLE_SIE)
528 		return s390_enable_sie();
529 	return -EINVAL;
530 }
531 
532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
533 {
534 	int r;
535 
536 	switch (ext) {
537 	case KVM_CAP_S390_PSW:
538 	case KVM_CAP_S390_GMAP:
539 	case KVM_CAP_SYNC_MMU:
540 #ifdef CONFIG_KVM_S390_UCONTROL
541 	case KVM_CAP_S390_UCONTROL:
542 #endif
543 	case KVM_CAP_ASYNC_PF:
544 	case KVM_CAP_SYNC_REGS:
545 	case KVM_CAP_ONE_REG:
546 	case KVM_CAP_ENABLE_CAP:
547 	case KVM_CAP_S390_CSS_SUPPORT:
548 	case KVM_CAP_IOEVENTFD:
549 	case KVM_CAP_DEVICE_CTRL:
550 	case KVM_CAP_S390_IRQCHIP:
551 	case KVM_CAP_VM_ATTRIBUTES:
552 	case KVM_CAP_MP_STATE:
553 	case KVM_CAP_IMMEDIATE_EXIT:
554 	case KVM_CAP_S390_INJECT_IRQ:
555 	case KVM_CAP_S390_USER_SIGP:
556 	case KVM_CAP_S390_USER_STSI:
557 	case KVM_CAP_S390_SKEYS:
558 	case KVM_CAP_S390_IRQ_STATE:
559 	case KVM_CAP_S390_USER_INSTR0:
560 	case KVM_CAP_S390_CMMA_MIGRATION:
561 	case KVM_CAP_S390_AIS:
562 	case KVM_CAP_S390_AIS_MIGRATION:
563 	case KVM_CAP_S390_VCPU_RESETS:
564 	case KVM_CAP_SET_GUEST_DEBUG:
565 	case KVM_CAP_S390_DIAG318:
566 	case KVM_CAP_S390_MEM_OP_EXTENSION:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		if (ext == KVM_CAP_NR_VCPUS)
589 			r = min_t(unsigned int, num_online_cpus(), r);
590 		break;
591 	case KVM_CAP_S390_COW:
592 		r = MACHINE_HAS_ESOP;
593 		break;
594 	case KVM_CAP_S390_VECTOR_REGISTERS:
595 		r = MACHINE_HAS_VX;
596 		break;
597 	case KVM_CAP_S390_RI:
598 		r = test_facility(64);
599 		break;
600 	case KVM_CAP_S390_GS:
601 		r = test_facility(133);
602 		break;
603 	case KVM_CAP_S390_BPB:
604 		r = test_facility(82);
605 		break;
606 	case KVM_CAP_S390_PROTECTED:
607 		r = is_prot_virt_host();
608 		break;
609 	default:
610 		r = 0;
611 	}
612 	return r;
613 }
614 
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617 	int i;
618 	gfn_t cur_gfn, last_gfn;
619 	unsigned long gaddr, vmaddr;
620 	struct gmap *gmap = kvm->arch.gmap;
621 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622 
623 	/* Loop over all guest segments */
624 	cur_gfn = memslot->base_gfn;
625 	last_gfn = memslot->base_gfn + memslot->npages;
626 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 		gaddr = gfn_to_gpa(cur_gfn);
628 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 		if (kvm_is_error_hva(vmaddr))
630 			continue;
631 
632 		bitmap_zero(bitmap, _PAGE_ENTRIES);
633 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 		for (i = 0; i < _PAGE_ENTRIES; i++) {
635 			if (test_bit(i, bitmap))
636 				mark_page_dirty(kvm, cur_gfn + i);
637 		}
638 
639 		if (fatal_signal_pending(current))
640 			return;
641 		cond_resched();
642 	}
643 }
644 
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 			       struct kvm_dirty_log *log)
653 {
654 	int r;
655 	unsigned long n;
656 	struct kvm_memory_slot *memslot;
657 	int is_dirty;
658 
659 	if (kvm_is_ucontrol(kvm))
660 		return -EINVAL;
661 
662 	mutex_lock(&kvm->slots_lock);
663 
664 	r = -EINVAL;
665 	if (log->slot >= KVM_USER_MEM_SLOTS)
666 		goto out;
667 
668 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669 	if (r)
670 		goto out;
671 
672 	/* Clear the dirty log */
673 	if (is_dirty) {
674 		n = kvm_dirty_bitmap_bytes(memslot);
675 		memset(memslot->dirty_bitmap, 0, n);
676 	}
677 	r = 0;
678 out:
679 	mutex_unlock(&kvm->slots_lock);
680 	return r;
681 }
682 
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685 	unsigned long i;
686 	struct kvm_vcpu *vcpu;
687 
688 	kvm_for_each_vcpu(i, vcpu, kvm) {
689 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690 	}
691 }
692 
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695 	int r;
696 
697 	if (cap->flags)
698 		return -EINVAL;
699 
700 	switch (cap->cap) {
701 	case KVM_CAP_S390_IRQCHIP:
702 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 		kvm->arch.use_irqchip = 1;
704 		r = 0;
705 		break;
706 	case KVM_CAP_S390_USER_SIGP:
707 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 		kvm->arch.user_sigp = 1;
709 		r = 0;
710 		break;
711 	case KVM_CAP_S390_VECTOR_REGISTERS:
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (MACHINE_HAS_VX) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 			set_kvm_facility(kvm->arch.model.fac_list, 129);
718 			if (test_facility(134)) {
719 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 				set_kvm_facility(kvm->arch.model.fac_list, 134);
721 			}
722 			if (test_facility(135)) {
723 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 				set_kvm_facility(kvm->arch.model.fac_list, 135);
725 			}
726 			if (test_facility(148)) {
727 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 				set_kvm_facility(kvm->arch.model.fac_list, 148);
729 			}
730 			if (test_facility(152)) {
731 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 				set_kvm_facility(kvm->arch.model.fac_list, 152);
733 			}
734 			if (test_facility(192)) {
735 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 				set_kvm_facility(kvm->arch.model.fac_list, 192);
737 			}
738 			r = 0;
739 		} else
740 			r = -EINVAL;
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_RI:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(64)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 			set_kvm_facility(kvm->arch.model.fac_list, 64);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_AIS:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus) {
762 			r = -EBUSY;
763 		} else {
764 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 			set_kvm_facility(kvm->arch.model.fac_list, 72);
766 			r = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_GS:
773 		r = -EINVAL;
774 		mutex_lock(&kvm->lock);
775 		if (kvm->created_vcpus) {
776 			r = -EBUSY;
777 		} else if (test_facility(133)) {
778 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 			set_kvm_facility(kvm->arch.model.fac_list, 133);
780 			r = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 			 r ? "(not available)" : "(success)");
785 		break;
786 	case KVM_CAP_S390_HPAGE_1M:
787 		mutex_lock(&kvm->lock);
788 		if (kvm->created_vcpus)
789 			r = -EBUSY;
790 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791 			r = -EINVAL;
792 		else {
793 			r = 0;
794 			mmap_write_lock(kvm->mm);
795 			kvm->mm->context.allow_gmap_hpage_1m = 1;
796 			mmap_write_unlock(kvm->mm);
797 			/*
798 			 * We might have to create fake 4k page
799 			 * tables. To avoid that the hardware works on
800 			 * stale PGSTEs, we emulate these instructions.
801 			 */
802 			kvm->arch.use_skf = 0;
803 			kvm->arch.use_pfmfi = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_USER_STSI:
810 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 		kvm->arch.user_stsi = 1;
812 		r = 0;
813 		break;
814 	case KVM_CAP_S390_USER_INSTR0:
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 		kvm->arch.user_instr0 = 1;
817 		icpt_operexc_on_all_vcpus(kvm);
818 		r = 0;
819 		break;
820 	default:
821 		r = -EINVAL;
822 		break;
823 	}
824 	return r;
825 }
826 
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829 	int ret;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_MEM_LIMIT_SIZE:
833 		ret = 0;
834 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 			 kvm->arch.mem_limit);
836 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837 			ret = -EFAULT;
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	int ret;
849 	unsigned int idx;
850 	switch (attr->attr) {
851 	case KVM_S390_VM_MEM_ENABLE_CMMA:
852 		ret = -ENXIO;
853 		if (!sclp.has_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 		mutex_lock(&kvm->lock);
858 		if (kvm->created_vcpus)
859 			ret = -EBUSY;
860 		else if (kvm->mm->context.allow_gmap_hpage_1m)
861 			ret = -EINVAL;
862 		else {
863 			kvm->arch.use_cmma = 1;
864 			/* Not compatible with cmma. */
865 			kvm->arch.use_pfmfi = 0;
866 			ret = 0;
867 		}
868 		mutex_unlock(&kvm->lock);
869 		break;
870 	case KVM_S390_VM_MEM_CLR_CMMA:
871 		ret = -ENXIO;
872 		if (!sclp.has_cmma)
873 			break;
874 		ret = -EINVAL;
875 		if (!kvm->arch.use_cmma)
876 			break;
877 
878 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 		mutex_lock(&kvm->lock);
880 		idx = srcu_read_lock(&kvm->srcu);
881 		s390_reset_cmma(kvm->arch.gmap->mm);
882 		srcu_read_unlock(&kvm->srcu, idx);
883 		mutex_unlock(&kvm->lock);
884 		ret = 0;
885 		break;
886 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 		unsigned long new_limit;
888 
889 		if (kvm_is_ucontrol(kvm))
890 			return -EINVAL;
891 
892 		if (get_user(new_limit, (u64 __user *)attr->addr))
893 			return -EFAULT;
894 
895 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 		    new_limit > kvm->arch.mem_limit)
897 			return -E2BIG;
898 
899 		if (!new_limit)
900 			return -EINVAL;
901 
902 		/* gmap_create takes last usable address */
903 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
904 			new_limit -= 1;
905 
906 		ret = -EBUSY;
907 		mutex_lock(&kvm->lock);
908 		if (!kvm->created_vcpus) {
909 			/* gmap_create will round the limit up */
910 			struct gmap *new = gmap_create(current->mm, new_limit);
911 
912 			if (!new) {
913 				ret = -ENOMEM;
914 			} else {
915 				gmap_remove(kvm->arch.gmap);
916 				new->private = kvm;
917 				kvm->arch.gmap = new;
918 				ret = 0;
919 			}
920 		}
921 		mutex_unlock(&kvm->lock);
922 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 			 (void *) kvm->arch.gmap->asce);
925 		break;
926 	}
927 	default:
928 		ret = -ENXIO;
929 		break;
930 	}
931 	return ret;
932 }
933 
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935 
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938 	struct kvm_vcpu *vcpu;
939 	unsigned long i;
940 
941 	kvm_s390_vcpu_block_all(kvm);
942 
943 	kvm_for_each_vcpu(i, vcpu, kvm) {
944 		kvm_s390_vcpu_crypto_setup(vcpu);
945 		/* recreate the shadow crycb by leaving the VSIE handler */
946 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 	}
948 
949 	kvm_s390_vcpu_unblock_all(kvm);
950 }
951 
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954 	mutex_lock(&kvm->lock);
955 	switch (attr->attr) {
956 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		get_random_bytes(
962 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		kvm->arch.crypto.aes_kw = 1;
965 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 		if (!test_kvm_facility(kvm, 76)) {
969 			mutex_unlock(&kvm->lock);
970 			return -EINVAL;
971 		}
972 		get_random_bytes(
973 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 		kvm->arch.crypto.dea_kw = 1;
976 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 		if (!test_kvm_facility(kvm, 76)) {
980 			mutex_unlock(&kvm->lock);
981 			return -EINVAL;
982 		}
983 		kvm->arch.crypto.aes_kw = 0;
984 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987 		break;
988 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 		if (!test_kvm_facility(kvm, 76)) {
990 			mutex_unlock(&kvm->lock);
991 			return -EINVAL;
992 		}
993 		kvm->arch.crypto.dea_kw = 0;
994 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997 		break;
998 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 		if (!ap_instructions_available()) {
1000 			mutex_unlock(&kvm->lock);
1001 			return -EOPNOTSUPP;
1002 		}
1003 		kvm->arch.crypto.apie = 1;
1004 		break;
1005 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 		if (!ap_instructions_available()) {
1007 			mutex_unlock(&kvm->lock);
1008 			return -EOPNOTSUPP;
1009 		}
1010 		kvm->arch.crypto.apie = 0;
1011 		break;
1012 	default:
1013 		mutex_unlock(&kvm->lock);
1014 		return -ENXIO;
1015 	}
1016 
1017 	kvm_s390_vcpu_crypto_reset_all(kvm);
1018 	mutex_unlock(&kvm->lock);
1019 	return 0;
1020 }
1021 
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024 	unsigned long cx;
1025 	struct kvm_vcpu *vcpu;
1026 
1027 	kvm_for_each_vcpu(cx, vcpu, kvm)
1028 		kvm_s390_sync_request(req, vcpu);
1029 }
1030 
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037 	struct kvm_memory_slot *ms;
1038 	struct kvm_memslots *slots;
1039 	unsigned long ram_pages = 0;
1040 	int bkt;
1041 
1042 	/* migration mode already enabled */
1043 	if (kvm->arch.migration_mode)
1044 		return 0;
1045 	slots = kvm_memslots(kvm);
1046 	if (!slots || kvm_memslots_empty(slots))
1047 		return -EINVAL;
1048 
1049 	if (!kvm->arch.use_cmma) {
1050 		kvm->arch.migration_mode = 1;
1051 		return 0;
1052 	}
1053 	/* mark all the pages in active slots as dirty */
1054 	kvm_for_each_memslot(ms, bkt, slots) {
1055 		if (!ms->dirty_bitmap)
1056 			return -EINVAL;
1057 		/*
1058 		 * The second half of the bitmap is only used on x86,
1059 		 * and would be wasted otherwise, so we put it to good
1060 		 * use here to keep track of the state of the storage
1061 		 * attributes.
1062 		 */
1063 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 		ram_pages += ms->npages;
1065 	}
1066 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 	kvm->arch.migration_mode = 1;
1068 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069 	return 0;
1070 }
1071 
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078 	/* migration mode already disabled */
1079 	if (!kvm->arch.migration_mode)
1080 		return 0;
1081 	kvm->arch.migration_mode = 0;
1082 	if (kvm->arch.use_cmma)
1083 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084 	return 0;
1085 }
1086 
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 				     struct kvm_device_attr *attr)
1089 {
1090 	int res = -ENXIO;
1091 
1092 	mutex_lock(&kvm->slots_lock);
1093 	switch (attr->attr) {
1094 	case KVM_S390_VM_MIGRATION_START:
1095 		res = kvm_s390_vm_start_migration(kvm);
1096 		break;
1097 	case KVM_S390_VM_MIGRATION_STOP:
1098 		res = kvm_s390_vm_stop_migration(kvm);
1099 		break;
1100 	default:
1101 		break;
1102 	}
1103 	mutex_unlock(&kvm->slots_lock);
1104 
1105 	return res;
1106 }
1107 
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 				     struct kvm_device_attr *attr)
1110 {
1111 	u64 mig = kvm->arch.migration_mode;
1112 
1113 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114 		return -ENXIO;
1115 
1116 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117 		return -EFAULT;
1118 	return 0;
1119 }
1120 
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 	struct kvm_s390_vm_tod_clock gtod;
1124 
1125 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126 		return -EFAULT;
1127 
1128 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129 		return -EINVAL;
1130 	kvm_s390_set_tod_clock(kvm, &gtod);
1131 
1132 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 		gtod.epoch_idx, gtod.tod);
1134 
1135 	return 0;
1136 }
1137 
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 	u8 gtod_high;
1141 
1142 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143 					   sizeof(gtod_high)))
1144 		return -EFAULT;
1145 
1146 	if (gtod_high != 0)
1147 		return -EINVAL;
1148 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149 
1150 	return 0;
1151 }
1152 
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1156 
1157 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158 			   sizeof(gtod.tod)))
1159 		return -EFAULT;
1160 
1161 	kvm_s390_set_tod_clock(kvm, &gtod);
1162 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163 	return 0;
1164 }
1165 
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 	int ret;
1169 
1170 	if (attr->flags)
1171 		return -EINVAL;
1172 
1173 	switch (attr->attr) {
1174 	case KVM_S390_VM_TOD_EXT:
1175 		ret = kvm_s390_set_tod_ext(kvm, attr);
1176 		break;
1177 	case KVM_S390_VM_TOD_HIGH:
1178 		ret = kvm_s390_set_tod_high(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_LOW:
1181 		ret = kvm_s390_set_tod_low(kvm, attr);
1182 		break;
1183 	default:
1184 		ret = -ENXIO;
1185 		break;
1186 	}
1187 	return ret;
1188 }
1189 
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 				   struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193 	union tod_clock clk;
1194 
1195 	preempt_disable();
1196 
1197 	store_tod_clock_ext(&clk);
1198 
1199 	gtod->tod = clk.tod + kvm->arch.epoch;
1200 	gtod->epoch_idx = 0;
1201 	if (test_kvm_facility(kvm, 139)) {
1202 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 		if (gtod->tod < clk.tod)
1204 			gtod->epoch_idx += 1;
1205 	}
1206 
1207 	preempt_enable();
1208 }
1209 
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 	struct kvm_s390_vm_tod_clock gtod;
1213 
1214 	memset(&gtod, 0, sizeof(gtod));
1215 	kvm_s390_get_tod_clock(kvm, &gtod);
1216 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 		return -EFAULT;
1218 
1219 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 		gtod.epoch_idx, gtod.tod);
1221 	return 0;
1222 }
1223 
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 	u8 gtod_high = 0;
1227 
1228 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229 					 sizeof(gtod_high)))
1230 		return -EFAULT;
1231 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232 
1233 	return 0;
1234 }
1235 
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	u64 gtod;
1239 
1240 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242 		return -EFAULT;
1243 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244 
1245 	return 0;
1246 }
1247 
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250 	int ret;
1251 
1252 	if (attr->flags)
1253 		return -EINVAL;
1254 
1255 	switch (attr->attr) {
1256 	case KVM_S390_VM_TOD_EXT:
1257 		ret = kvm_s390_get_tod_ext(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_TOD_HIGH:
1260 		ret = kvm_s390_get_tod_high(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_TOD_LOW:
1263 		ret = kvm_s390_get_tod_low(kvm, attr);
1264 		break;
1265 	default:
1266 		ret = -ENXIO;
1267 		break;
1268 	}
1269 	return ret;
1270 }
1271 
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274 	struct kvm_s390_vm_cpu_processor *proc;
1275 	u16 lowest_ibc, unblocked_ibc;
1276 	int ret = 0;
1277 
1278 	mutex_lock(&kvm->lock);
1279 	if (kvm->created_vcpus) {
1280 		ret = -EBUSY;
1281 		goto out;
1282 	}
1283 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284 	if (!proc) {
1285 		ret = -ENOMEM;
1286 		goto out;
1287 	}
1288 	if (!copy_from_user(proc, (void __user *)attr->addr,
1289 			    sizeof(*proc))) {
1290 		kvm->arch.model.cpuid = proc->cpuid;
1291 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 		unblocked_ibc = sclp.ibc & 0xfff;
1293 		if (lowest_ibc && proc->ibc) {
1294 			if (proc->ibc > unblocked_ibc)
1295 				kvm->arch.model.ibc = unblocked_ibc;
1296 			else if (proc->ibc < lowest_ibc)
1297 				kvm->arch.model.ibc = lowest_ibc;
1298 			else
1299 				kvm->arch.model.ibc = proc->ibc;
1300 		}
1301 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 			 kvm->arch.model.ibc,
1305 			 kvm->arch.model.cpuid);
1306 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 			 kvm->arch.model.fac_list[0],
1308 			 kvm->arch.model.fac_list[1],
1309 			 kvm->arch.model.fac_list[2]);
1310 	} else
1311 		ret = -EFAULT;
1312 	kfree(proc);
1313 out:
1314 	mutex_unlock(&kvm->lock);
1315 	return ret;
1316 }
1317 
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 				       struct kvm_device_attr *attr)
1320 {
1321 	struct kvm_s390_vm_cpu_feat data;
1322 
1323 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324 		return -EFAULT;
1325 	if (!bitmap_subset((unsigned long *) data.feat,
1326 			   kvm_s390_available_cpu_feat,
1327 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1328 		return -EINVAL;
1329 
1330 	mutex_lock(&kvm->lock);
1331 	if (kvm->created_vcpus) {
1332 		mutex_unlock(&kvm->lock);
1333 		return -EBUSY;
1334 	}
1335 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1337 	mutex_unlock(&kvm->lock);
1338 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339 			 data.feat[0],
1340 			 data.feat[1],
1341 			 data.feat[2]);
1342 	return 0;
1343 }
1344 
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346 					  struct kvm_device_attr *attr)
1347 {
1348 	mutex_lock(&kvm->lock);
1349 	if (kvm->created_vcpus) {
1350 		mutex_unlock(&kvm->lock);
1351 		return -EBUSY;
1352 	}
1353 
1354 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356 		mutex_unlock(&kvm->lock);
1357 		return -EFAULT;
1358 	}
1359 	mutex_unlock(&kvm->lock);
1360 
1361 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418 
1419 	return 0;
1420 }
1421 
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424 	int ret = -ENXIO;
1425 
1426 	switch (attr->attr) {
1427 	case KVM_S390_VM_CPU_PROCESSOR:
1428 		ret = kvm_s390_set_processor(kvm, attr);
1429 		break;
1430 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431 		ret = kvm_s390_set_processor_feat(kvm, attr);
1432 		break;
1433 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435 		break;
1436 	}
1437 	return ret;
1438 }
1439 
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442 	struct kvm_s390_vm_cpu_processor *proc;
1443 	int ret = 0;
1444 
1445 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446 	if (!proc) {
1447 		ret = -ENOMEM;
1448 		goto out;
1449 	}
1450 	proc->cpuid = kvm->arch.model.cpuid;
1451 	proc->ibc = kvm->arch.model.ibc;
1452 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 		 kvm->arch.model.ibc,
1456 		 kvm->arch.model.cpuid);
1457 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 		 kvm->arch.model.fac_list[0],
1459 		 kvm->arch.model.fac_list[1],
1460 		 kvm->arch.model.fac_list[2]);
1461 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462 		ret = -EFAULT;
1463 	kfree(proc);
1464 out:
1465 	return ret;
1466 }
1467 
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470 	struct kvm_s390_vm_cpu_machine *mach;
1471 	int ret = 0;
1472 
1473 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474 	if (!mach) {
1475 		ret = -ENOMEM;
1476 		goto out;
1477 	}
1478 	get_cpu_id((struct cpuid *) &mach->cpuid);
1479 	mach->ibc = sclp.ibc;
1480 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1482 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483 	       sizeof(stfle_fac_list));
1484 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485 		 kvm->arch.model.ibc,
1486 		 kvm->arch.model.cpuid);
1487 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488 		 mach->fac_mask[0],
1489 		 mach->fac_mask[1],
1490 		 mach->fac_mask[2]);
1491 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492 		 mach->fac_list[0],
1493 		 mach->fac_list[1],
1494 		 mach->fac_list[2]);
1495 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496 		ret = -EFAULT;
1497 	kfree(mach);
1498 out:
1499 	return ret;
1500 }
1501 
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503 				       struct kvm_device_attr *attr)
1504 {
1505 	struct kvm_s390_vm_cpu_feat data;
1506 
1507 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1509 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510 		return -EFAULT;
1511 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512 			 data.feat[0],
1513 			 data.feat[1],
1514 			 data.feat[2]);
1515 	return 0;
1516 }
1517 
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519 				     struct kvm_device_attr *attr)
1520 {
1521 	struct kvm_s390_vm_cpu_feat data;
1522 
1523 	bitmap_copy((unsigned long *) data.feat,
1524 		    kvm_s390_available_cpu_feat,
1525 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1526 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527 		return -EFAULT;
1528 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529 			 data.feat[0],
1530 			 data.feat[1],
1531 			 data.feat[2]);
1532 	return 0;
1533 }
1534 
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536 					  struct kvm_device_attr *attr)
1537 {
1538 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540 		return -EFAULT;
1541 
1542 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599 
1600 	return 0;
1601 }
1602 
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604 					struct kvm_device_attr *attr)
1605 {
1606 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608 		return -EFAULT;
1609 
1610 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667 
1668 	return 0;
1669 }
1670 
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673 	int ret = -ENXIO;
1674 
1675 	switch (attr->attr) {
1676 	case KVM_S390_VM_CPU_PROCESSOR:
1677 		ret = kvm_s390_get_processor(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_CPU_MACHINE:
1680 		ret = kvm_s390_get_machine(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683 		ret = kvm_s390_get_processor_feat(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1686 		ret = kvm_s390_get_machine_feat(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690 		break;
1691 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693 		break;
1694 	}
1695 	return ret;
1696 }
1697 
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700 	int ret;
1701 
1702 	switch (attr->group) {
1703 	case KVM_S390_VM_MEM_CTRL:
1704 		ret = kvm_s390_set_mem_control(kvm, attr);
1705 		break;
1706 	case KVM_S390_VM_TOD:
1707 		ret = kvm_s390_set_tod(kvm, attr);
1708 		break;
1709 	case KVM_S390_VM_CPU_MODEL:
1710 		ret = kvm_s390_set_cpu_model(kvm, attr);
1711 		break;
1712 	case KVM_S390_VM_CRYPTO:
1713 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1714 		break;
1715 	case KVM_S390_VM_MIGRATION:
1716 		ret = kvm_s390_vm_set_migration(kvm, attr);
1717 		break;
1718 	default:
1719 		ret = -ENXIO;
1720 		break;
1721 	}
1722 
1723 	return ret;
1724 }
1725 
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728 	int ret;
1729 
1730 	switch (attr->group) {
1731 	case KVM_S390_VM_MEM_CTRL:
1732 		ret = kvm_s390_get_mem_control(kvm, attr);
1733 		break;
1734 	case KVM_S390_VM_TOD:
1735 		ret = kvm_s390_get_tod(kvm, attr);
1736 		break;
1737 	case KVM_S390_VM_CPU_MODEL:
1738 		ret = kvm_s390_get_cpu_model(kvm, attr);
1739 		break;
1740 	case KVM_S390_VM_MIGRATION:
1741 		ret = kvm_s390_vm_get_migration(kvm, attr);
1742 		break;
1743 	default:
1744 		ret = -ENXIO;
1745 		break;
1746 	}
1747 
1748 	return ret;
1749 }
1750 
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753 	int ret;
1754 
1755 	switch (attr->group) {
1756 	case KVM_S390_VM_MEM_CTRL:
1757 		switch (attr->attr) {
1758 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1759 		case KVM_S390_VM_MEM_CLR_CMMA:
1760 			ret = sclp.has_cmma ? 0 : -ENXIO;
1761 			break;
1762 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1763 			ret = 0;
1764 			break;
1765 		default:
1766 			ret = -ENXIO;
1767 			break;
1768 		}
1769 		break;
1770 	case KVM_S390_VM_TOD:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_TOD_LOW:
1773 		case KVM_S390_VM_TOD_HIGH:
1774 			ret = 0;
1775 			break;
1776 		default:
1777 			ret = -ENXIO;
1778 			break;
1779 		}
1780 		break;
1781 	case KVM_S390_VM_CPU_MODEL:
1782 		switch (attr->attr) {
1783 		case KVM_S390_VM_CPU_PROCESSOR:
1784 		case KVM_S390_VM_CPU_MACHINE:
1785 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1787 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789 			ret = 0;
1790 			break;
1791 		default:
1792 			ret = -ENXIO;
1793 			break;
1794 		}
1795 		break;
1796 	case KVM_S390_VM_CRYPTO:
1797 		switch (attr->attr) {
1798 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802 			ret = 0;
1803 			break;
1804 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806 			ret = ap_instructions_available() ? 0 : -ENXIO;
1807 			break;
1808 		default:
1809 			ret = -ENXIO;
1810 			break;
1811 		}
1812 		break;
1813 	case KVM_S390_VM_MIGRATION:
1814 		ret = 0;
1815 		break;
1816 	default:
1817 		ret = -ENXIO;
1818 		break;
1819 	}
1820 
1821 	return ret;
1822 }
1823 
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826 	uint8_t *keys;
1827 	uint64_t hva;
1828 	int srcu_idx, i, r = 0;
1829 
1830 	if (args->flags != 0)
1831 		return -EINVAL;
1832 
1833 	/* Is this guest using storage keys? */
1834 	if (!mm_uses_skeys(current->mm))
1835 		return KVM_S390_GET_SKEYS_NONE;
1836 
1837 	/* Enforce sane limit on memory allocation */
1838 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839 		return -EINVAL;
1840 
1841 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842 	if (!keys)
1843 		return -ENOMEM;
1844 
1845 	mmap_read_lock(current->mm);
1846 	srcu_idx = srcu_read_lock(&kvm->srcu);
1847 	for (i = 0; i < args->count; i++) {
1848 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1849 		if (kvm_is_error_hva(hva)) {
1850 			r = -EFAULT;
1851 			break;
1852 		}
1853 
1854 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855 		if (r)
1856 			break;
1857 	}
1858 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1859 	mmap_read_unlock(current->mm);
1860 
1861 	if (!r) {
1862 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863 				 sizeof(uint8_t) * args->count);
1864 		if (r)
1865 			r = -EFAULT;
1866 	}
1867 
1868 	kvfree(keys);
1869 	return r;
1870 }
1871 
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874 	uint8_t *keys;
1875 	uint64_t hva;
1876 	int srcu_idx, i, r = 0;
1877 	bool unlocked;
1878 
1879 	if (args->flags != 0)
1880 		return -EINVAL;
1881 
1882 	/* Enforce sane limit on memory allocation */
1883 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884 		return -EINVAL;
1885 
1886 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887 	if (!keys)
1888 		return -ENOMEM;
1889 
1890 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891 			   sizeof(uint8_t) * args->count);
1892 	if (r) {
1893 		r = -EFAULT;
1894 		goto out;
1895 	}
1896 
1897 	/* Enable storage key handling for the guest */
1898 	r = s390_enable_skey();
1899 	if (r)
1900 		goto out;
1901 
1902 	i = 0;
1903 	mmap_read_lock(current->mm);
1904 	srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906 		unlocked = false;
1907 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1908 		if (kvm_is_error_hva(hva)) {
1909 			r = -EFAULT;
1910 			break;
1911 		}
1912 
1913 		/* Lowest order bit is reserved */
1914 		if (keys[i] & 0x01) {
1915 			r = -EINVAL;
1916 			break;
1917 		}
1918 
1919 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920 		if (r) {
1921 			r = fixup_user_fault(current->mm, hva,
1922 					     FAULT_FLAG_WRITE, &unlocked);
1923 			if (r)
1924 				break;
1925 		}
1926 		if (!r)
1927 			i++;
1928 	}
1929 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1930 	mmap_read_unlock(current->mm);
1931 out:
1932 	kvfree(keys);
1933 	return r;
1934 }
1935 
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944 
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946 			      u8 *res, unsigned long bufsize)
1947 {
1948 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1949 
1950 	args->count = 0;
1951 	while (args->count < bufsize) {
1952 		hva = gfn_to_hva(kvm, cur_gfn);
1953 		/*
1954 		 * We return an error if the first value was invalid, but we
1955 		 * return successfully if at least one value was copied.
1956 		 */
1957 		if (kvm_is_error_hva(hva))
1958 			return args->count ? 0 : -EFAULT;
1959 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1960 			pgstev = 0;
1961 		res[args->count++] = (pgstev >> 24) & 0x43;
1962 		cur_gfn++;
1963 	}
1964 
1965 	return 0;
1966 }
1967 
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1969 						     gfn_t gfn)
1970 {
1971 	return ____gfn_to_memslot(slots, gfn, true);
1972 }
1973 
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975 					      unsigned long cur_gfn)
1976 {
1977 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978 	unsigned long ofs = cur_gfn - ms->base_gfn;
1979 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1980 
1981 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1982 		mnode = rb_next(mnode);
1983 		/* If we are above the highest slot, wrap around */
1984 		if (!mnode)
1985 			mnode = rb_first(&slots->gfn_tree);
1986 
1987 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1988 		ofs = 0;
1989 	}
1990 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1994 	}
1995 	return ms->base_gfn + ofs;
1996 }
1997 
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 			     u8 *res, unsigned long bufsize)
2000 {
2001 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 	struct kvm_memslots *slots = kvm_memslots(kvm);
2003 	struct kvm_memory_slot *ms;
2004 
2005 	if (unlikely(kvm_memslots_empty(slots)))
2006 		return 0;
2007 
2008 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 	ms = gfn_to_memslot(kvm, cur_gfn);
2010 	args->count = 0;
2011 	args->start_gfn = cur_gfn;
2012 	if (!ms)
2013 		return 0;
2014 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 	mem_end = kvm_s390_get_gfn_end(slots);
2016 
2017 	while (args->count < bufsize) {
2018 		hva = gfn_to_hva(kvm, cur_gfn);
2019 		if (kvm_is_error_hva(hva))
2020 			return 0;
2021 		/* Decrement only if we actually flipped the bit to 0 */
2022 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025 			pgstev = 0;
2026 		/* Save the value */
2027 		res[args->count++] = (pgstev >> 24) & 0x43;
2028 		/* If the next bit is too far away, stop. */
2029 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030 			return 0;
2031 		/* If we reached the previous "next", find the next one */
2032 		if (cur_gfn == next_gfn)
2033 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 		/* Reached the end of memory or of the buffer, stop */
2035 		if ((next_gfn >= mem_end) ||
2036 		    (next_gfn - args->start_gfn >= bufsize))
2037 			return 0;
2038 		cur_gfn++;
2039 		/* Reached the end of the current memslot, take the next one. */
2040 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 			ms = gfn_to_memslot(kvm, cur_gfn);
2042 			if (!ms)
2043 				return 0;
2044 		}
2045 	}
2046 	return 0;
2047 }
2048 
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 				  struct kvm_s390_cmma_log *args)
2059 {
2060 	unsigned long bufsize;
2061 	int srcu_idx, peek, ret;
2062 	u8 *values;
2063 
2064 	if (!kvm->arch.use_cmma)
2065 		return -ENXIO;
2066 	/* Invalid/unsupported flags were specified */
2067 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2068 		return -EINVAL;
2069 	/* Migration mode query, and we are not doing a migration */
2070 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 	if (!peek && !kvm->arch.migration_mode)
2072 		return -EINVAL;
2073 	/* CMMA is disabled or was not used, or the buffer has length zero */
2074 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 		memset(args, 0, sizeof(*args));
2077 		return 0;
2078 	}
2079 	/* We are not peeking, and there are no dirty pages */
2080 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 		memset(args, 0, sizeof(*args));
2082 		return 0;
2083 	}
2084 
2085 	values = vmalloc(bufsize);
2086 	if (!values)
2087 		return -ENOMEM;
2088 
2089 	mmap_read_lock(kvm->mm);
2090 	srcu_idx = srcu_read_lock(&kvm->srcu);
2091 	if (peek)
2092 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093 	else
2094 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 	mmap_read_unlock(kvm->mm);
2097 
2098 	if (kvm->arch.migration_mode)
2099 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100 	else
2101 		args->remaining = 0;
2102 
2103 	if (copy_to_user((void __user *)args->values, values, args->count))
2104 		ret = -EFAULT;
2105 
2106 	vfree(values);
2107 	return ret;
2108 }
2109 
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 				  const struct kvm_s390_cmma_log *args)
2117 {
2118 	unsigned long hva, mask, pgstev, i;
2119 	uint8_t *bits;
2120 	int srcu_idx, r = 0;
2121 
2122 	mask = args->mask;
2123 
2124 	if (!kvm->arch.use_cmma)
2125 		return -ENXIO;
2126 	/* invalid/unsupported flags */
2127 	if (args->flags != 0)
2128 		return -EINVAL;
2129 	/* Enforce sane limit on memory allocation */
2130 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131 		return -EINVAL;
2132 	/* Nothing to do */
2133 	if (args->count == 0)
2134 		return 0;
2135 
2136 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2137 	if (!bits)
2138 		return -ENOMEM;
2139 
2140 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2141 	if (r) {
2142 		r = -EFAULT;
2143 		goto out;
2144 	}
2145 
2146 	mmap_read_lock(kvm->mm);
2147 	srcu_idx = srcu_read_lock(&kvm->srcu);
2148 	for (i = 0; i < args->count; i++) {
2149 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 		if (kvm_is_error_hva(hva)) {
2151 			r = -EFAULT;
2152 			break;
2153 		}
2154 
2155 		pgstev = bits[i];
2156 		pgstev = pgstev << 24;
2157 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159 	}
2160 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 	mmap_read_unlock(kvm->mm);
2162 
2163 	if (!kvm->mm->context.uses_cmm) {
2164 		mmap_write_lock(kvm->mm);
2165 		kvm->mm->context.uses_cmm = 1;
2166 		mmap_write_unlock(kvm->mm);
2167 	}
2168 out:
2169 	vfree(bits);
2170 	return r;
2171 }
2172 
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175 	struct kvm_vcpu *vcpu;
2176 	u16 rc, rrc;
2177 	int ret = 0;
2178 	unsigned long i;
2179 
2180 	/*
2181 	 * We ignore failures and try to destroy as many CPUs as possible.
2182 	 * At the same time we must not free the assigned resources when
2183 	 * this fails, as the ultravisor has still access to that memory.
2184 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185 	 * behind.
2186 	 * We want to return the first failure rc and rrc, though.
2187 	 */
2188 	kvm_for_each_vcpu(i, vcpu, kvm) {
2189 		mutex_lock(&vcpu->mutex);
2190 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191 			*rcp = rc;
2192 			*rrcp = rrc;
2193 			ret = -EIO;
2194 		}
2195 		mutex_unlock(&vcpu->mutex);
2196 	}
2197 	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2198 	if (use_gisa)
2199 		kvm_s390_gisa_enable(kvm);
2200 	return ret;
2201 }
2202 
2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2204 {
2205 	unsigned long i;
2206 	int r = 0;
2207 	u16 dummy;
2208 
2209 	struct kvm_vcpu *vcpu;
2210 
2211 	/* Disable the GISA if the ultravisor does not support AIV. */
2212 	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2213 		kvm_s390_gisa_disable(kvm);
2214 
2215 	kvm_for_each_vcpu(i, vcpu, kvm) {
2216 		mutex_lock(&vcpu->mutex);
2217 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2218 		mutex_unlock(&vcpu->mutex);
2219 		if (r)
2220 			break;
2221 	}
2222 	if (r)
2223 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2224 	return r;
2225 }
2226 
2227 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2228 {
2229 	int r = 0;
2230 	u16 dummy;
2231 	void __user *argp = (void __user *)cmd->data;
2232 
2233 	switch (cmd->cmd) {
2234 	case KVM_PV_ENABLE: {
2235 		r = -EINVAL;
2236 		if (kvm_s390_pv_is_protected(kvm))
2237 			break;
2238 
2239 		/*
2240 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2241 		 *  esca, we need no cleanup in the error cases below
2242 		 */
2243 		r = sca_switch_to_extended(kvm);
2244 		if (r)
2245 			break;
2246 
2247 		mmap_write_lock(current->mm);
2248 		r = gmap_mark_unmergeable();
2249 		mmap_write_unlock(current->mm);
2250 		if (r)
2251 			break;
2252 
2253 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2254 		if (r)
2255 			break;
2256 
2257 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2258 		if (r)
2259 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2260 
2261 		/* we need to block service interrupts from now on */
2262 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2263 		break;
2264 	}
2265 	case KVM_PV_DISABLE: {
2266 		r = -EINVAL;
2267 		if (!kvm_s390_pv_is_protected(kvm))
2268 			break;
2269 
2270 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2271 		/*
2272 		 * If a CPU could not be destroyed, destroy VM will also fail.
2273 		 * There is no point in trying to destroy it. Instead return
2274 		 * the rc and rrc from the first CPU that failed destroying.
2275 		 */
2276 		if (r)
2277 			break;
2278 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2279 
2280 		/* no need to block service interrupts any more */
2281 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2282 		break;
2283 	}
2284 	case KVM_PV_SET_SEC_PARMS: {
2285 		struct kvm_s390_pv_sec_parm parms = {};
2286 		void *hdr;
2287 
2288 		r = -EINVAL;
2289 		if (!kvm_s390_pv_is_protected(kvm))
2290 			break;
2291 
2292 		r = -EFAULT;
2293 		if (copy_from_user(&parms, argp, sizeof(parms)))
2294 			break;
2295 
2296 		/* Currently restricted to 8KB */
2297 		r = -EINVAL;
2298 		if (parms.length > PAGE_SIZE * 2)
2299 			break;
2300 
2301 		r = -ENOMEM;
2302 		hdr = vmalloc(parms.length);
2303 		if (!hdr)
2304 			break;
2305 
2306 		r = -EFAULT;
2307 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2308 				    parms.length))
2309 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2310 						      &cmd->rc, &cmd->rrc);
2311 
2312 		vfree(hdr);
2313 		break;
2314 	}
2315 	case KVM_PV_UNPACK: {
2316 		struct kvm_s390_pv_unp unp = {};
2317 
2318 		r = -EINVAL;
2319 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2320 			break;
2321 
2322 		r = -EFAULT;
2323 		if (copy_from_user(&unp, argp, sizeof(unp)))
2324 			break;
2325 
2326 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2327 				       &cmd->rc, &cmd->rrc);
2328 		break;
2329 	}
2330 	case KVM_PV_VERIFY: {
2331 		r = -EINVAL;
2332 		if (!kvm_s390_pv_is_protected(kvm))
2333 			break;
2334 
2335 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2336 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2337 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2338 			     cmd->rrc);
2339 		break;
2340 	}
2341 	case KVM_PV_PREP_RESET: {
2342 		r = -EINVAL;
2343 		if (!kvm_s390_pv_is_protected(kvm))
2344 			break;
2345 
2346 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2347 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2348 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2349 			     cmd->rc, cmd->rrc);
2350 		break;
2351 	}
2352 	case KVM_PV_UNSHARE_ALL: {
2353 		r = -EINVAL;
2354 		if (!kvm_s390_pv_is_protected(kvm))
2355 			break;
2356 
2357 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2358 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2359 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2360 			     cmd->rc, cmd->rrc);
2361 		break;
2362 	}
2363 	default:
2364 		r = -ENOTTY;
2365 	}
2366 	return r;
2367 }
2368 
2369 static bool access_key_invalid(u8 access_key)
2370 {
2371 	return access_key > 0xf;
2372 }
2373 
2374 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2375 {
2376 	void __user *uaddr = (void __user *)mop->buf;
2377 	u64 supported_flags;
2378 	void *tmpbuf = NULL;
2379 	int r, srcu_idx;
2380 
2381 	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2382 			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2383 	if (mop->flags & ~supported_flags || !mop->size)
2384 		return -EINVAL;
2385 	if (mop->size > MEM_OP_MAX_SIZE)
2386 		return -E2BIG;
2387 	if (kvm_s390_pv_is_protected(kvm))
2388 		return -EINVAL;
2389 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2390 		if (access_key_invalid(mop->key))
2391 			return -EINVAL;
2392 	} else {
2393 		mop->key = 0;
2394 	}
2395 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2396 		tmpbuf = vmalloc(mop->size);
2397 		if (!tmpbuf)
2398 			return -ENOMEM;
2399 	}
2400 
2401 	srcu_idx = srcu_read_lock(&kvm->srcu);
2402 
2403 	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2404 		r = PGM_ADDRESSING;
2405 		goto out_unlock;
2406 	}
2407 
2408 	switch (mop->op) {
2409 	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2410 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2411 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2412 		} else {
2413 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2414 						      mop->size, GACC_FETCH, mop->key);
2415 			if (r == 0) {
2416 				if (copy_to_user(uaddr, tmpbuf, mop->size))
2417 					r = -EFAULT;
2418 			}
2419 		}
2420 		break;
2421 	}
2422 	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2423 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2424 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2425 		} else {
2426 			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2427 				r = -EFAULT;
2428 				break;
2429 			}
2430 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2431 						      mop->size, GACC_STORE, mop->key);
2432 		}
2433 		break;
2434 	}
2435 	default:
2436 		r = -EINVAL;
2437 	}
2438 
2439 out_unlock:
2440 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2441 
2442 	vfree(tmpbuf);
2443 	return r;
2444 }
2445 
2446 long kvm_arch_vm_ioctl(struct file *filp,
2447 		       unsigned int ioctl, unsigned long arg)
2448 {
2449 	struct kvm *kvm = filp->private_data;
2450 	void __user *argp = (void __user *)arg;
2451 	struct kvm_device_attr attr;
2452 	int r;
2453 
2454 	switch (ioctl) {
2455 	case KVM_S390_INTERRUPT: {
2456 		struct kvm_s390_interrupt s390int;
2457 
2458 		r = -EFAULT;
2459 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2460 			break;
2461 		r = kvm_s390_inject_vm(kvm, &s390int);
2462 		break;
2463 	}
2464 	case KVM_CREATE_IRQCHIP: {
2465 		struct kvm_irq_routing_entry routing;
2466 
2467 		r = -EINVAL;
2468 		if (kvm->arch.use_irqchip) {
2469 			/* Set up dummy routing. */
2470 			memset(&routing, 0, sizeof(routing));
2471 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2472 		}
2473 		break;
2474 	}
2475 	case KVM_SET_DEVICE_ATTR: {
2476 		r = -EFAULT;
2477 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2478 			break;
2479 		r = kvm_s390_vm_set_attr(kvm, &attr);
2480 		break;
2481 	}
2482 	case KVM_GET_DEVICE_ATTR: {
2483 		r = -EFAULT;
2484 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2485 			break;
2486 		r = kvm_s390_vm_get_attr(kvm, &attr);
2487 		break;
2488 	}
2489 	case KVM_HAS_DEVICE_ATTR: {
2490 		r = -EFAULT;
2491 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2492 			break;
2493 		r = kvm_s390_vm_has_attr(kvm, &attr);
2494 		break;
2495 	}
2496 	case KVM_S390_GET_SKEYS: {
2497 		struct kvm_s390_skeys args;
2498 
2499 		r = -EFAULT;
2500 		if (copy_from_user(&args, argp,
2501 				   sizeof(struct kvm_s390_skeys)))
2502 			break;
2503 		r = kvm_s390_get_skeys(kvm, &args);
2504 		break;
2505 	}
2506 	case KVM_S390_SET_SKEYS: {
2507 		struct kvm_s390_skeys args;
2508 
2509 		r = -EFAULT;
2510 		if (copy_from_user(&args, argp,
2511 				   sizeof(struct kvm_s390_skeys)))
2512 			break;
2513 		r = kvm_s390_set_skeys(kvm, &args);
2514 		break;
2515 	}
2516 	case KVM_S390_GET_CMMA_BITS: {
2517 		struct kvm_s390_cmma_log args;
2518 
2519 		r = -EFAULT;
2520 		if (copy_from_user(&args, argp, sizeof(args)))
2521 			break;
2522 		mutex_lock(&kvm->slots_lock);
2523 		r = kvm_s390_get_cmma_bits(kvm, &args);
2524 		mutex_unlock(&kvm->slots_lock);
2525 		if (!r) {
2526 			r = copy_to_user(argp, &args, sizeof(args));
2527 			if (r)
2528 				r = -EFAULT;
2529 		}
2530 		break;
2531 	}
2532 	case KVM_S390_SET_CMMA_BITS: {
2533 		struct kvm_s390_cmma_log args;
2534 
2535 		r = -EFAULT;
2536 		if (copy_from_user(&args, argp, sizeof(args)))
2537 			break;
2538 		mutex_lock(&kvm->slots_lock);
2539 		r = kvm_s390_set_cmma_bits(kvm, &args);
2540 		mutex_unlock(&kvm->slots_lock);
2541 		break;
2542 	}
2543 	case KVM_S390_PV_COMMAND: {
2544 		struct kvm_pv_cmd args;
2545 
2546 		/* protvirt means user cpu state */
2547 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2548 		r = 0;
2549 		if (!is_prot_virt_host()) {
2550 			r = -EINVAL;
2551 			break;
2552 		}
2553 		if (copy_from_user(&args, argp, sizeof(args))) {
2554 			r = -EFAULT;
2555 			break;
2556 		}
2557 		if (args.flags) {
2558 			r = -EINVAL;
2559 			break;
2560 		}
2561 		mutex_lock(&kvm->lock);
2562 		r = kvm_s390_handle_pv(kvm, &args);
2563 		mutex_unlock(&kvm->lock);
2564 		if (copy_to_user(argp, &args, sizeof(args))) {
2565 			r = -EFAULT;
2566 			break;
2567 		}
2568 		break;
2569 	}
2570 	case KVM_S390_MEM_OP: {
2571 		struct kvm_s390_mem_op mem_op;
2572 
2573 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2574 			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2575 		else
2576 			r = -EFAULT;
2577 		break;
2578 	}
2579 	default:
2580 		r = -ENOTTY;
2581 	}
2582 
2583 	return r;
2584 }
2585 
2586 static int kvm_s390_apxa_installed(void)
2587 {
2588 	struct ap_config_info info;
2589 
2590 	if (ap_instructions_available()) {
2591 		if (ap_qci(&info) == 0)
2592 			return info.apxa;
2593 	}
2594 
2595 	return 0;
2596 }
2597 
2598 /*
2599  * The format of the crypto control block (CRYCB) is specified in the 3 low
2600  * order bits of the CRYCB designation (CRYCBD) field as follows:
2601  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2602  *	     AP extended addressing (APXA) facility are installed.
2603  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2604  * Format 2: Both the APXA and MSAX3 facilities are installed
2605  */
2606 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2607 {
2608 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2609 
2610 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2611 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2612 
2613 	/* Check whether MSAX3 is installed */
2614 	if (!test_kvm_facility(kvm, 76))
2615 		return;
2616 
2617 	if (kvm_s390_apxa_installed())
2618 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2619 	else
2620 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2621 }
2622 
2623 /*
2624  * kvm_arch_crypto_set_masks
2625  *
2626  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2627  *	 to be set.
2628  * @apm: the mask identifying the accessible AP adapters
2629  * @aqm: the mask identifying the accessible AP domains
2630  * @adm: the mask identifying the accessible AP control domains
2631  *
2632  * Set the masks that identify the adapters, domains and control domains to
2633  * which the KVM guest is granted access.
2634  *
2635  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2636  *	 function.
2637  */
2638 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2639 			       unsigned long *aqm, unsigned long *adm)
2640 {
2641 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2642 
2643 	kvm_s390_vcpu_block_all(kvm);
2644 
2645 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2646 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2647 		memcpy(crycb->apcb1.apm, apm, 32);
2648 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2649 			 apm[0], apm[1], apm[2], apm[3]);
2650 		memcpy(crycb->apcb1.aqm, aqm, 32);
2651 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2652 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2653 		memcpy(crycb->apcb1.adm, adm, 32);
2654 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2655 			 adm[0], adm[1], adm[2], adm[3]);
2656 		break;
2657 	case CRYCB_FORMAT1:
2658 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2659 		memcpy(crycb->apcb0.apm, apm, 8);
2660 		memcpy(crycb->apcb0.aqm, aqm, 2);
2661 		memcpy(crycb->apcb0.adm, adm, 2);
2662 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2663 			 apm[0], *((unsigned short *)aqm),
2664 			 *((unsigned short *)adm));
2665 		break;
2666 	default:	/* Can not happen */
2667 		break;
2668 	}
2669 
2670 	/* recreate the shadow crycb for each vcpu */
2671 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2672 	kvm_s390_vcpu_unblock_all(kvm);
2673 }
2674 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2675 
2676 /*
2677  * kvm_arch_crypto_clear_masks
2678  *
2679  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2680  *	 to be cleared.
2681  *
2682  * Clear the masks that identify the adapters, domains and control domains to
2683  * which the KVM guest is granted access.
2684  *
2685  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2686  *	 function.
2687  */
2688 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2689 {
2690 	kvm_s390_vcpu_block_all(kvm);
2691 
2692 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2693 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2694 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2695 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2696 
2697 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2698 	/* recreate the shadow crycb for each vcpu */
2699 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2700 	kvm_s390_vcpu_unblock_all(kvm);
2701 }
2702 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2703 
2704 static u64 kvm_s390_get_initial_cpuid(void)
2705 {
2706 	struct cpuid cpuid;
2707 
2708 	get_cpu_id(&cpuid);
2709 	cpuid.version = 0xff;
2710 	return *((u64 *) &cpuid);
2711 }
2712 
2713 static void kvm_s390_crypto_init(struct kvm *kvm)
2714 {
2715 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2716 	kvm_s390_set_crycb_format(kvm);
2717 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2718 
2719 	if (!test_kvm_facility(kvm, 76))
2720 		return;
2721 
2722 	/* Enable AES/DEA protected key functions by default */
2723 	kvm->arch.crypto.aes_kw = 1;
2724 	kvm->arch.crypto.dea_kw = 1;
2725 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2726 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2727 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2728 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2729 }
2730 
2731 static void sca_dispose(struct kvm *kvm)
2732 {
2733 	if (kvm->arch.use_esca)
2734 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2735 	else
2736 		free_page((unsigned long)(kvm->arch.sca));
2737 	kvm->arch.sca = NULL;
2738 }
2739 
2740 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2741 {
2742 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2743 	int i, rc;
2744 	char debug_name[16];
2745 	static unsigned long sca_offset;
2746 
2747 	rc = -EINVAL;
2748 #ifdef CONFIG_KVM_S390_UCONTROL
2749 	if (type & ~KVM_VM_S390_UCONTROL)
2750 		goto out_err;
2751 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2752 		goto out_err;
2753 #else
2754 	if (type)
2755 		goto out_err;
2756 #endif
2757 
2758 	rc = s390_enable_sie();
2759 	if (rc)
2760 		goto out_err;
2761 
2762 	rc = -ENOMEM;
2763 
2764 	if (!sclp.has_64bscao)
2765 		alloc_flags |= GFP_DMA;
2766 	rwlock_init(&kvm->arch.sca_lock);
2767 	/* start with basic SCA */
2768 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2769 	if (!kvm->arch.sca)
2770 		goto out_err;
2771 	mutex_lock(&kvm_lock);
2772 	sca_offset += 16;
2773 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2774 		sca_offset = 0;
2775 	kvm->arch.sca = (struct bsca_block *)
2776 			((char *) kvm->arch.sca + sca_offset);
2777 	mutex_unlock(&kvm_lock);
2778 
2779 	sprintf(debug_name, "kvm-%u", current->pid);
2780 
2781 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2782 	if (!kvm->arch.dbf)
2783 		goto out_err;
2784 
2785 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2786 	kvm->arch.sie_page2 =
2787 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2788 	if (!kvm->arch.sie_page2)
2789 		goto out_err;
2790 
2791 	kvm->arch.sie_page2->kvm = kvm;
2792 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2793 
2794 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2795 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2796 					      (kvm_s390_fac_base[i] |
2797 					       kvm_s390_fac_ext[i]);
2798 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2799 					      kvm_s390_fac_base[i];
2800 	}
2801 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2802 
2803 	/* we are always in czam mode - even on pre z14 machines */
2804 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2805 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2806 	/* we emulate STHYI in kvm */
2807 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2808 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2809 	if (MACHINE_HAS_TLB_GUEST) {
2810 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2811 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2812 	}
2813 
2814 	if (css_general_characteristics.aiv && test_facility(65))
2815 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2816 
2817 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2818 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2819 
2820 	kvm_s390_crypto_init(kvm);
2821 
2822 	mutex_init(&kvm->arch.float_int.ais_lock);
2823 	spin_lock_init(&kvm->arch.float_int.lock);
2824 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2825 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2826 	init_waitqueue_head(&kvm->arch.ipte_wq);
2827 	mutex_init(&kvm->arch.ipte_mutex);
2828 
2829 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2830 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2831 
2832 	if (type & KVM_VM_S390_UCONTROL) {
2833 		kvm->arch.gmap = NULL;
2834 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2835 	} else {
2836 		if (sclp.hamax == U64_MAX)
2837 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2838 		else
2839 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2840 						    sclp.hamax + 1);
2841 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2842 		if (!kvm->arch.gmap)
2843 			goto out_err;
2844 		kvm->arch.gmap->private = kvm;
2845 		kvm->arch.gmap->pfault_enabled = 0;
2846 	}
2847 
2848 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2849 	kvm->arch.use_skf = sclp.has_skey;
2850 	spin_lock_init(&kvm->arch.start_stop_lock);
2851 	kvm_s390_vsie_init(kvm);
2852 	if (use_gisa)
2853 		kvm_s390_gisa_init(kvm);
2854 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2855 
2856 	return 0;
2857 out_err:
2858 	free_page((unsigned long)kvm->arch.sie_page2);
2859 	debug_unregister(kvm->arch.dbf);
2860 	sca_dispose(kvm);
2861 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2862 	return rc;
2863 }
2864 
2865 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2866 {
2867 	u16 rc, rrc;
2868 
2869 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2870 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2871 	kvm_s390_clear_local_irqs(vcpu);
2872 	kvm_clear_async_pf_completion_queue(vcpu);
2873 	if (!kvm_is_ucontrol(vcpu->kvm))
2874 		sca_del_vcpu(vcpu);
2875 
2876 	if (kvm_is_ucontrol(vcpu->kvm))
2877 		gmap_remove(vcpu->arch.gmap);
2878 
2879 	if (vcpu->kvm->arch.use_cmma)
2880 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2881 	/* We can not hold the vcpu mutex here, we are already dying */
2882 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2883 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2884 	free_page((unsigned long)(vcpu->arch.sie_block));
2885 }
2886 
2887 void kvm_arch_destroy_vm(struct kvm *kvm)
2888 {
2889 	u16 rc, rrc;
2890 
2891 	kvm_destroy_vcpus(kvm);
2892 	sca_dispose(kvm);
2893 	kvm_s390_gisa_destroy(kvm);
2894 	/*
2895 	 * We are already at the end of life and kvm->lock is not taken.
2896 	 * This is ok as the file descriptor is closed by now and nobody
2897 	 * can mess with the pv state. To avoid lockdep_assert_held from
2898 	 * complaining we do not use kvm_s390_pv_is_protected.
2899 	 */
2900 	if (kvm_s390_pv_get_handle(kvm))
2901 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2902 	debug_unregister(kvm->arch.dbf);
2903 	free_page((unsigned long)kvm->arch.sie_page2);
2904 	if (!kvm_is_ucontrol(kvm))
2905 		gmap_remove(kvm->arch.gmap);
2906 	kvm_s390_destroy_adapters(kvm);
2907 	kvm_s390_clear_float_irqs(kvm);
2908 	kvm_s390_vsie_destroy(kvm);
2909 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2910 }
2911 
2912 /* Section: vcpu related */
2913 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2914 {
2915 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2916 	if (!vcpu->arch.gmap)
2917 		return -ENOMEM;
2918 	vcpu->arch.gmap->private = vcpu->kvm;
2919 
2920 	return 0;
2921 }
2922 
2923 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2924 {
2925 	if (!kvm_s390_use_sca_entries())
2926 		return;
2927 	read_lock(&vcpu->kvm->arch.sca_lock);
2928 	if (vcpu->kvm->arch.use_esca) {
2929 		struct esca_block *sca = vcpu->kvm->arch.sca;
2930 
2931 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2932 		sca->cpu[vcpu->vcpu_id].sda = 0;
2933 	} else {
2934 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2935 
2936 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2937 		sca->cpu[vcpu->vcpu_id].sda = 0;
2938 	}
2939 	read_unlock(&vcpu->kvm->arch.sca_lock);
2940 }
2941 
2942 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2943 {
2944 	if (!kvm_s390_use_sca_entries()) {
2945 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2946 
2947 		/* we still need the basic sca for the ipte control */
2948 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2949 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2950 		return;
2951 	}
2952 	read_lock(&vcpu->kvm->arch.sca_lock);
2953 	if (vcpu->kvm->arch.use_esca) {
2954 		struct esca_block *sca = vcpu->kvm->arch.sca;
2955 
2956 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2957 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2958 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2959 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2960 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2961 	} else {
2962 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2963 
2964 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2965 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2966 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2967 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2968 	}
2969 	read_unlock(&vcpu->kvm->arch.sca_lock);
2970 }
2971 
2972 /* Basic SCA to Extended SCA data copy routines */
2973 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2974 {
2975 	d->sda = s->sda;
2976 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2977 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2978 }
2979 
2980 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2981 {
2982 	int i;
2983 
2984 	d->ipte_control = s->ipte_control;
2985 	d->mcn[0] = s->mcn;
2986 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2987 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2988 }
2989 
2990 static int sca_switch_to_extended(struct kvm *kvm)
2991 {
2992 	struct bsca_block *old_sca = kvm->arch.sca;
2993 	struct esca_block *new_sca;
2994 	struct kvm_vcpu *vcpu;
2995 	unsigned long vcpu_idx;
2996 	u32 scaol, scaoh;
2997 
2998 	if (kvm->arch.use_esca)
2999 		return 0;
3000 
3001 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3002 	if (!new_sca)
3003 		return -ENOMEM;
3004 
3005 	scaoh = (u32)((u64)(new_sca) >> 32);
3006 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
3007 
3008 	kvm_s390_vcpu_block_all(kvm);
3009 	write_lock(&kvm->arch.sca_lock);
3010 
3011 	sca_copy_b_to_e(new_sca, old_sca);
3012 
3013 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3014 		vcpu->arch.sie_block->scaoh = scaoh;
3015 		vcpu->arch.sie_block->scaol = scaol;
3016 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3017 	}
3018 	kvm->arch.sca = new_sca;
3019 	kvm->arch.use_esca = 1;
3020 
3021 	write_unlock(&kvm->arch.sca_lock);
3022 	kvm_s390_vcpu_unblock_all(kvm);
3023 
3024 	free_page((unsigned long)old_sca);
3025 
3026 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3027 		 old_sca, kvm->arch.sca);
3028 	return 0;
3029 }
3030 
3031 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3032 {
3033 	int rc;
3034 
3035 	if (!kvm_s390_use_sca_entries()) {
3036 		if (id < KVM_MAX_VCPUS)
3037 			return true;
3038 		return false;
3039 	}
3040 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3041 		return true;
3042 	if (!sclp.has_esca || !sclp.has_64bscao)
3043 		return false;
3044 
3045 	mutex_lock(&kvm->lock);
3046 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3047 	mutex_unlock(&kvm->lock);
3048 
3049 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3050 }
3051 
3052 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3053 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3054 {
3055 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3056 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3057 	vcpu->arch.cputm_start = get_tod_clock_fast();
3058 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3059 }
3060 
3061 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3062 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3063 {
3064 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3065 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3066 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3067 	vcpu->arch.cputm_start = 0;
3068 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3069 }
3070 
3071 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3072 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3073 {
3074 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3075 	vcpu->arch.cputm_enabled = true;
3076 	__start_cpu_timer_accounting(vcpu);
3077 }
3078 
3079 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3080 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3081 {
3082 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3083 	__stop_cpu_timer_accounting(vcpu);
3084 	vcpu->arch.cputm_enabled = false;
3085 }
3086 
3087 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3088 {
3089 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3090 	__enable_cpu_timer_accounting(vcpu);
3091 	preempt_enable();
3092 }
3093 
3094 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3095 {
3096 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3097 	__disable_cpu_timer_accounting(vcpu);
3098 	preempt_enable();
3099 }
3100 
3101 /* set the cpu timer - may only be called from the VCPU thread itself */
3102 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3103 {
3104 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3105 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3106 	if (vcpu->arch.cputm_enabled)
3107 		vcpu->arch.cputm_start = get_tod_clock_fast();
3108 	vcpu->arch.sie_block->cputm = cputm;
3109 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3110 	preempt_enable();
3111 }
3112 
3113 /* update and get the cpu timer - can also be called from other VCPU threads */
3114 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3115 {
3116 	unsigned int seq;
3117 	__u64 value;
3118 
3119 	if (unlikely(!vcpu->arch.cputm_enabled))
3120 		return vcpu->arch.sie_block->cputm;
3121 
3122 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3123 	do {
3124 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3125 		/*
3126 		 * If the writer would ever execute a read in the critical
3127 		 * section, e.g. in irq context, we have a deadlock.
3128 		 */
3129 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3130 		value = vcpu->arch.sie_block->cputm;
3131 		/* if cputm_start is 0, accounting is being started/stopped */
3132 		if (likely(vcpu->arch.cputm_start))
3133 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3134 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3135 	preempt_enable();
3136 	return value;
3137 }
3138 
3139 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3140 {
3141 
3142 	gmap_enable(vcpu->arch.enabled_gmap);
3143 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3144 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3145 		__start_cpu_timer_accounting(vcpu);
3146 	vcpu->cpu = cpu;
3147 }
3148 
3149 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3150 {
3151 	vcpu->cpu = -1;
3152 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3153 		__stop_cpu_timer_accounting(vcpu);
3154 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3155 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3156 	gmap_disable(vcpu->arch.enabled_gmap);
3157 
3158 }
3159 
3160 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3161 {
3162 	mutex_lock(&vcpu->kvm->lock);
3163 	preempt_disable();
3164 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3165 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3166 	preempt_enable();
3167 	mutex_unlock(&vcpu->kvm->lock);
3168 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3169 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3170 		sca_add_vcpu(vcpu);
3171 	}
3172 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3173 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3174 	/* make vcpu_load load the right gmap on the first trigger */
3175 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3176 }
3177 
3178 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3179 {
3180 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3181 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3182 		return true;
3183 	return false;
3184 }
3185 
3186 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3187 {
3188 	/* At least one ECC subfunction must be present */
3189 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3190 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3191 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3192 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3193 	       kvm_has_pckmo_subfunc(kvm, 41);
3194 
3195 }
3196 
3197 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3198 {
3199 	/*
3200 	 * If the AP instructions are not being interpreted and the MSAX3
3201 	 * facility is not configured for the guest, there is nothing to set up.
3202 	 */
3203 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3204 		return;
3205 
3206 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3207 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3208 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3209 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3210 
3211 	if (vcpu->kvm->arch.crypto.apie)
3212 		vcpu->arch.sie_block->eca |= ECA_APIE;
3213 
3214 	/* Set up protected key support */
3215 	if (vcpu->kvm->arch.crypto.aes_kw) {
3216 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3217 		/* ecc is also wrapped with AES key */
3218 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3219 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3220 	}
3221 
3222 	if (vcpu->kvm->arch.crypto.dea_kw)
3223 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3224 }
3225 
3226 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3227 {
3228 	free_page(vcpu->arch.sie_block->cbrlo);
3229 	vcpu->arch.sie_block->cbrlo = 0;
3230 }
3231 
3232 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3233 {
3234 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3235 	if (!vcpu->arch.sie_block->cbrlo)
3236 		return -ENOMEM;
3237 	return 0;
3238 }
3239 
3240 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3241 {
3242 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3243 
3244 	vcpu->arch.sie_block->ibc = model->ibc;
3245 	if (test_kvm_facility(vcpu->kvm, 7))
3246 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3247 }
3248 
3249 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3250 {
3251 	int rc = 0;
3252 	u16 uvrc, uvrrc;
3253 
3254 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3255 						    CPUSTAT_SM |
3256 						    CPUSTAT_STOPPED);
3257 
3258 	if (test_kvm_facility(vcpu->kvm, 78))
3259 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3260 	else if (test_kvm_facility(vcpu->kvm, 8))
3261 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3262 
3263 	kvm_s390_vcpu_setup_model(vcpu);
3264 
3265 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3266 	if (MACHINE_HAS_ESOP)
3267 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3268 	if (test_kvm_facility(vcpu->kvm, 9))
3269 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3270 	if (test_kvm_facility(vcpu->kvm, 73))
3271 		vcpu->arch.sie_block->ecb |= ECB_TE;
3272 	if (!kvm_is_ucontrol(vcpu->kvm))
3273 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3274 
3275 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3276 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3277 	if (test_kvm_facility(vcpu->kvm, 130))
3278 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3279 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3280 	if (sclp.has_cei)
3281 		vcpu->arch.sie_block->eca |= ECA_CEI;
3282 	if (sclp.has_ib)
3283 		vcpu->arch.sie_block->eca |= ECA_IB;
3284 	if (sclp.has_siif)
3285 		vcpu->arch.sie_block->eca |= ECA_SII;
3286 	if (sclp.has_sigpif)
3287 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3288 	if (test_kvm_facility(vcpu->kvm, 129)) {
3289 		vcpu->arch.sie_block->eca |= ECA_VX;
3290 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3291 	}
3292 	if (test_kvm_facility(vcpu->kvm, 139))
3293 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3294 	if (test_kvm_facility(vcpu->kvm, 156))
3295 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3296 	if (vcpu->arch.sie_block->gd) {
3297 		vcpu->arch.sie_block->eca |= ECA_AIV;
3298 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3299 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3300 	}
3301 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3302 					| SDNXC;
3303 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3304 
3305 	if (sclp.has_kss)
3306 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3307 	else
3308 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3309 
3310 	if (vcpu->kvm->arch.use_cmma) {
3311 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3312 		if (rc)
3313 			return rc;
3314 	}
3315 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3316 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3317 
3318 	vcpu->arch.sie_block->hpid = HPID_KVM;
3319 
3320 	kvm_s390_vcpu_crypto_setup(vcpu);
3321 
3322 	mutex_lock(&vcpu->kvm->lock);
3323 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3324 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3325 		if (rc)
3326 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3327 	}
3328 	mutex_unlock(&vcpu->kvm->lock);
3329 
3330 	return rc;
3331 }
3332 
3333 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3334 {
3335 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3336 		return -EINVAL;
3337 	return 0;
3338 }
3339 
3340 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3341 {
3342 	struct sie_page *sie_page;
3343 	int rc;
3344 
3345 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3346 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3347 	if (!sie_page)
3348 		return -ENOMEM;
3349 
3350 	vcpu->arch.sie_block = &sie_page->sie_block;
3351 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3352 
3353 	/* the real guest size will always be smaller than msl */
3354 	vcpu->arch.sie_block->mso = 0;
3355 	vcpu->arch.sie_block->msl = sclp.hamax;
3356 
3357 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3358 	spin_lock_init(&vcpu->arch.local_int.lock);
3359 	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3360 	seqcount_init(&vcpu->arch.cputm_seqcount);
3361 
3362 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3363 	kvm_clear_async_pf_completion_queue(vcpu);
3364 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3365 				    KVM_SYNC_GPRS |
3366 				    KVM_SYNC_ACRS |
3367 				    KVM_SYNC_CRS |
3368 				    KVM_SYNC_ARCH0 |
3369 				    KVM_SYNC_PFAULT |
3370 				    KVM_SYNC_DIAG318;
3371 	kvm_s390_set_prefix(vcpu, 0);
3372 	if (test_kvm_facility(vcpu->kvm, 64))
3373 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3374 	if (test_kvm_facility(vcpu->kvm, 82))
3375 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3376 	if (test_kvm_facility(vcpu->kvm, 133))
3377 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3378 	if (test_kvm_facility(vcpu->kvm, 156))
3379 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3380 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3381 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3382 	 */
3383 	if (MACHINE_HAS_VX)
3384 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3385 	else
3386 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3387 
3388 	if (kvm_is_ucontrol(vcpu->kvm)) {
3389 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3390 		if (rc)
3391 			goto out_free_sie_block;
3392 	}
3393 
3394 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3395 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3396 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3397 
3398 	rc = kvm_s390_vcpu_setup(vcpu);
3399 	if (rc)
3400 		goto out_ucontrol_uninit;
3401 	return 0;
3402 
3403 out_ucontrol_uninit:
3404 	if (kvm_is_ucontrol(vcpu->kvm))
3405 		gmap_remove(vcpu->arch.gmap);
3406 out_free_sie_block:
3407 	free_page((unsigned long)(vcpu->arch.sie_block));
3408 	return rc;
3409 }
3410 
3411 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3412 {
3413 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3414 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3415 }
3416 
3417 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3418 {
3419 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3420 }
3421 
3422 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3423 {
3424 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3425 	exit_sie(vcpu);
3426 }
3427 
3428 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3429 {
3430 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3431 }
3432 
3433 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3434 {
3435 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3436 	exit_sie(vcpu);
3437 }
3438 
3439 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3440 {
3441 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3442 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3443 }
3444 
3445 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3446 {
3447 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3448 }
3449 
3450 /*
3451  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3452  * If the CPU is not running (e.g. waiting as idle) the function will
3453  * return immediately. */
3454 void exit_sie(struct kvm_vcpu *vcpu)
3455 {
3456 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3457 	kvm_s390_vsie_kick(vcpu);
3458 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3459 		cpu_relax();
3460 }
3461 
3462 /* Kick a guest cpu out of SIE to process a request synchronously */
3463 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3464 {
3465 	__kvm_make_request(req, vcpu);
3466 	kvm_s390_vcpu_request(vcpu);
3467 }
3468 
3469 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3470 			      unsigned long end)
3471 {
3472 	struct kvm *kvm = gmap->private;
3473 	struct kvm_vcpu *vcpu;
3474 	unsigned long prefix;
3475 	unsigned long i;
3476 
3477 	if (gmap_is_shadow(gmap))
3478 		return;
3479 	if (start >= 1UL << 31)
3480 		/* We are only interested in prefix pages */
3481 		return;
3482 	kvm_for_each_vcpu(i, vcpu, kvm) {
3483 		/* match against both prefix pages */
3484 		prefix = kvm_s390_get_prefix(vcpu);
3485 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3486 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3487 				   start, end);
3488 			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3489 		}
3490 	}
3491 }
3492 
3493 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3494 {
3495 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3496 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3497 	    READ_ONCE(halt_poll_max_steal)) {
3498 		vcpu->stat.halt_no_poll_steal++;
3499 		return true;
3500 	}
3501 	return false;
3502 }
3503 
3504 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3505 {
3506 	/* kvm common code refers to this, but never calls it */
3507 	BUG();
3508 	return 0;
3509 }
3510 
3511 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3512 					   struct kvm_one_reg *reg)
3513 {
3514 	int r = -EINVAL;
3515 
3516 	switch (reg->id) {
3517 	case KVM_REG_S390_TODPR:
3518 		r = put_user(vcpu->arch.sie_block->todpr,
3519 			     (u32 __user *)reg->addr);
3520 		break;
3521 	case KVM_REG_S390_EPOCHDIFF:
3522 		r = put_user(vcpu->arch.sie_block->epoch,
3523 			     (u64 __user *)reg->addr);
3524 		break;
3525 	case KVM_REG_S390_CPU_TIMER:
3526 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3527 			     (u64 __user *)reg->addr);
3528 		break;
3529 	case KVM_REG_S390_CLOCK_COMP:
3530 		r = put_user(vcpu->arch.sie_block->ckc,
3531 			     (u64 __user *)reg->addr);
3532 		break;
3533 	case KVM_REG_S390_PFTOKEN:
3534 		r = put_user(vcpu->arch.pfault_token,
3535 			     (u64 __user *)reg->addr);
3536 		break;
3537 	case KVM_REG_S390_PFCOMPARE:
3538 		r = put_user(vcpu->arch.pfault_compare,
3539 			     (u64 __user *)reg->addr);
3540 		break;
3541 	case KVM_REG_S390_PFSELECT:
3542 		r = put_user(vcpu->arch.pfault_select,
3543 			     (u64 __user *)reg->addr);
3544 		break;
3545 	case KVM_REG_S390_PP:
3546 		r = put_user(vcpu->arch.sie_block->pp,
3547 			     (u64 __user *)reg->addr);
3548 		break;
3549 	case KVM_REG_S390_GBEA:
3550 		r = put_user(vcpu->arch.sie_block->gbea,
3551 			     (u64 __user *)reg->addr);
3552 		break;
3553 	default:
3554 		break;
3555 	}
3556 
3557 	return r;
3558 }
3559 
3560 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3561 					   struct kvm_one_reg *reg)
3562 {
3563 	int r = -EINVAL;
3564 	__u64 val;
3565 
3566 	switch (reg->id) {
3567 	case KVM_REG_S390_TODPR:
3568 		r = get_user(vcpu->arch.sie_block->todpr,
3569 			     (u32 __user *)reg->addr);
3570 		break;
3571 	case KVM_REG_S390_EPOCHDIFF:
3572 		r = get_user(vcpu->arch.sie_block->epoch,
3573 			     (u64 __user *)reg->addr);
3574 		break;
3575 	case KVM_REG_S390_CPU_TIMER:
3576 		r = get_user(val, (u64 __user *)reg->addr);
3577 		if (!r)
3578 			kvm_s390_set_cpu_timer(vcpu, val);
3579 		break;
3580 	case KVM_REG_S390_CLOCK_COMP:
3581 		r = get_user(vcpu->arch.sie_block->ckc,
3582 			     (u64 __user *)reg->addr);
3583 		break;
3584 	case KVM_REG_S390_PFTOKEN:
3585 		r = get_user(vcpu->arch.pfault_token,
3586 			     (u64 __user *)reg->addr);
3587 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3588 			kvm_clear_async_pf_completion_queue(vcpu);
3589 		break;
3590 	case KVM_REG_S390_PFCOMPARE:
3591 		r = get_user(vcpu->arch.pfault_compare,
3592 			     (u64 __user *)reg->addr);
3593 		break;
3594 	case KVM_REG_S390_PFSELECT:
3595 		r = get_user(vcpu->arch.pfault_select,
3596 			     (u64 __user *)reg->addr);
3597 		break;
3598 	case KVM_REG_S390_PP:
3599 		r = get_user(vcpu->arch.sie_block->pp,
3600 			     (u64 __user *)reg->addr);
3601 		break;
3602 	case KVM_REG_S390_GBEA:
3603 		r = get_user(vcpu->arch.sie_block->gbea,
3604 			     (u64 __user *)reg->addr);
3605 		break;
3606 	default:
3607 		break;
3608 	}
3609 
3610 	return r;
3611 }
3612 
3613 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3614 {
3615 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3616 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3617 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3618 
3619 	kvm_clear_async_pf_completion_queue(vcpu);
3620 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3621 		kvm_s390_vcpu_stop(vcpu);
3622 	kvm_s390_clear_local_irqs(vcpu);
3623 }
3624 
3625 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3626 {
3627 	/* Initial reset is a superset of the normal reset */
3628 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3629 
3630 	/*
3631 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3632 	 * We do not only reset the internal data, but also ...
3633 	 */
3634 	vcpu->arch.sie_block->gpsw.mask = 0;
3635 	vcpu->arch.sie_block->gpsw.addr = 0;
3636 	kvm_s390_set_prefix(vcpu, 0);
3637 	kvm_s390_set_cpu_timer(vcpu, 0);
3638 	vcpu->arch.sie_block->ckc = 0;
3639 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3640 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3641 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3642 
3643 	/* ... the data in sync regs */
3644 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3645 	vcpu->run->s.regs.ckc = 0;
3646 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3647 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3648 	vcpu->run->psw_addr = 0;
3649 	vcpu->run->psw_mask = 0;
3650 	vcpu->run->s.regs.todpr = 0;
3651 	vcpu->run->s.regs.cputm = 0;
3652 	vcpu->run->s.regs.ckc = 0;
3653 	vcpu->run->s.regs.pp = 0;
3654 	vcpu->run->s.regs.gbea = 1;
3655 	vcpu->run->s.regs.fpc = 0;
3656 	/*
3657 	 * Do not reset these registers in the protected case, as some of
3658 	 * them are overlayed and they are not accessible in this case
3659 	 * anyway.
3660 	 */
3661 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3662 		vcpu->arch.sie_block->gbea = 1;
3663 		vcpu->arch.sie_block->pp = 0;
3664 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3665 		vcpu->arch.sie_block->todpr = 0;
3666 	}
3667 }
3668 
3669 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3670 {
3671 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3672 
3673 	/* Clear reset is a superset of the initial reset */
3674 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3675 
3676 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3677 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3678 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3679 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3680 
3681 	regs->etoken = 0;
3682 	regs->etoken_extension = 0;
3683 }
3684 
3685 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3686 {
3687 	vcpu_load(vcpu);
3688 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3689 	vcpu_put(vcpu);
3690 	return 0;
3691 }
3692 
3693 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3694 {
3695 	vcpu_load(vcpu);
3696 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3697 	vcpu_put(vcpu);
3698 	return 0;
3699 }
3700 
3701 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3702 				  struct kvm_sregs *sregs)
3703 {
3704 	vcpu_load(vcpu);
3705 
3706 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3707 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3708 
3709 	vcpu_put(vcpu);
3710 	return 0;
3711 }
3712 
3713 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3714 				  struct kvm_sregs *sregs)
3715 {
3716 	vcpu_load(vcpu);
3717 
3718 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3719 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3720 
3721 	vcpu_put(vcpu);
3722 	return 0;
3723 }
3724 
3725 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3726 {
3727 	int ret = 0;
3728 
3729 	vcpu_load(vcpu);
3730 
3731 	if (test_fp_ctl(fpu->fpc)) {
3732 		ret = -EINVAL;
3733 		goto out;
3734 	}
3735 	vcpu->run->s.regs.fpc = fpu->fpc;
3736 	if (MACHINE_HAS_VX)
3737 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3738 				 (freg_t *) fpu->fprs);
3739 	else
3740 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3741 
3742 out:
3743 	vcpu_put(vcpu);
3744 	return ret;
3745 }
3746 
3747 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3748 {
3749 	vcpu_load(vcpu);
3750 
3751 	/* make sure we have the latest values */
3752 	save_fpu_regs();
3753 	if (MACHINE_HAS_VX)
3754 		convert_vx_to_fp((freg_t *) fpu->fprs,
3755 				 (__vector128 *) vcpu->run->s.regs.vrs);
3756 	else
3757 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3758 	fpu->fpc = vcpu->run->s.regs.fpc;
3759 
3760 	vcpu_put(vcpu);
3761 	return 0;
3762 }
3763 
3764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3765 {
3766 	int rc = 0;
3767 
3768 	if (!is_vcpu_stopped(vcpu))
3769 		rc = -EBUSY;
3770 	else {
3771 		vcpu->run->psw_mask = psw.mask;
3772 		vcpu->run->psw_addr = psw.addr;
3773 	}
3774 	return rc;
3775 }
3776 
3777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3778 				  struct kvm_translation *tr)
3779 {
3780 	return -EINVAL; /* not implemented yet */
3781 }
3782 
3783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3784 			      KVM_GUESTDBG_USE_HW_BP | \
3785 			      KVM_GUESTDBG_ENABLE)
3786 
3787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3788 					struct kvm_guest_debug *dbg)
3789 {
3790 	int rc = 0;
3791 
3792 	vcpu_load(vcpu);
3793 
3794 	vcpu->guest_debug = 0;
3795 	kvm_s390_clear_bp_data(vcpu);
3796 
3797 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3798 		rc = -EINVAL;
3799 		goto out;
3800 	}
3801 	if (!sclp.has_gpere) {
3802 		rc = -EINVAL;
3803 		goto out;
3804 	}
3805 
3806 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3807 		vcpu->guest_debug = dbg->control;
3808 		/* enforce guest PER */
3809 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3810 
3811 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3812 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3813 	} else {
3814 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3815 		vcpu->arch.guestdbg.last_bp = 0;
3816 	}
3817 
3818 	if (rc) {
3819 		vcpu->guest_debug = 0;
3820 		kvm_s390_clear_bp_data(vcpu);
3821 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3822 	}
3823 
3824 out:
3825 	vcpu_put(vcpu);
3826 	return rc;
3827 }
3828 
3829 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3830 				    struct kvm_mp_state *mp_state)
3831 {
3832 	int ret;
3833 
3834 	vcpu_load(vcpu);
3835 
3836 	/* CHECK_STOP and LOAD are not supported yet */
3837 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3838 				      KVM_MP_STATE_OPERATING;
3839 
3840 	vcpu_put(vcpu);
3841 	return ret;
3842 }
3843 
3844 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3845 				    struct kvm_mp_state *mp_state)
3846 {
3847 	int rc = 0;
3848 
3849 	vcpu_load(vcpu);
3850 
3851 	/* user space knows about this interface - let it control the state */
3852 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3853 
3854 	switch (mp_state->mp_state) {
3855 	case KVM_MP_STATE_STOPPED:
3856 		rc = kvm_s390_vcpu_stop(vcpu);
3857 		break;
3858 	case KVM_MP_STATE_OPERATING:
3859 		rc = kvm_s390_vcpu_start(vcpu);
3860 		break;
3861 	case KVM_MP_STATE_LOAD:
3862 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3863 			rc = -ENXIO;
3864 			break;
3865 		}
3866 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3867 		break;
3868 	case KVM_MP_STATE_CHECK_STOP:
3869 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3870 	default:
3871 		rc = -ENXIO;
3872 	}
3873 
3874 	vcpu_put(vcpu);
3875 	return rc;
3876 }
3877 
3878 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3879 {
3880 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3881 }
3882 
3883 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3884 {
3885 retry:
3886 	kvm_s390_vcpu_request_handled(vcpu);
3887 	if (!kvm_request_pending(vcpu))
3888 		return 0;
3889 	/*
3890 	 * If the guest prefix changed, re-arm the ipte notifier for the
3891 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3892 	 * This ensures that the ipte instruction for this request has
3893 	 * already finished. We might race against a second unmapper that
3894 	 * wants to set the blocking bit. Lets just retry the request loop.
3895 	 */
3896 	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
3897 		int rc;
3898 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3899 					  kvm_s390_get_prefix(vcpu),
3900 					  PAGE_SIZE * 2, PROT_WRITE);
3901 		if (rc) {
3902 			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3903 			return rc;
3904 		}
3905 		goto retry;
3906 	}
3907 
3908 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3909 		vcpu->arch.sie_block->ihcpu = 0xffff;
3910 		goto retry;
3911 	}
3912 
3913 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3914 		if (!ibs_enabled(vcpu)) {
3915 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3916 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3917 		}
3918 		goto retry;
3919 	}
3920 
3921 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3922 		if (ibs_enabled(vcpu)) {
3923 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3924 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3925 		}
3926 		goto retry;
3927 	}
3928 
3929 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3930 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3931 		goto retry;
3932 	}
3933 
3934 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3935 		/*
3936 		 * Disable CMM virtualization; we will emulate the ESSA
3937 		 * instruction manually, in order to provide additional
3938 		 * functionalities needed for live migration.
3939 		 */
3940 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3941 		goto retry;
3942 	}
3943 
3944 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3945 		/*
3946 		 * Re-enable CMM virtualization if CMMA is available and
3947 		 * CMM has been used.
3948 		 */
3949 		if ((vcpu->kvm->arch.use_cmma) &&
3950 		    (vcpu->kvm->mm->context.uses_cmm))
3951 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3952 		goto retry;
3953 	}
3954 
3955 	/* nothing to do, just clear the request */
3956 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3957 	/* we left the vsie handler, nothing to do, just clear the request */
3958 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3959 
3960 	return 0;
3961 }
3962 
3963 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3964 {
3965 	struct kvm_vcpu *vcpu;
3966 	union tod_clock clk;
3967 	unsigned long i;
3968 
3969 	preempt_disable();
3970 
3971 	store_tod_clock_ext(&clk);
3972 
3973 	kvm->arch.epoch = gtod->tod - clk.tod;
3974 	kvm->arch.epdx = 0;
3975 	if (test_kvm_facility(kvm, 139)) {
3976 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3977 		if (kvm->arch.epoch > gtod->tod)
3978 			kvm->arch.epdx -= 1;
3979 	}
3980 
3981 	kvm_s390_vcpu_block_all(kvm);
3982 	kvm_for_each_vcpu(i, vcpu, kvm) {
3983 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3984 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3985 	}
3986 
3987 	kvm_s390_vcpu_unblock_all(kvm);
3988 	preempt_enable();
3989 }
3990 
3991 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3992 {
3993 	mutex_lock(&kvm->lock);
3994 	__kvm_s390_set_tod_clock(kvm, gtod);
3995 	mutex_unlock(&kvm->lock);
3996 }
3997 
3998 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3999 {
4000 	if (!mutex_trylock(&kvm->lock))
4001 		return 0;
4002 	__kvm_s390_set_tod_clock(kvm, gtod);
4003 	mutex_unlock(&kvm->lock);
4004 	return 1;
4005 }
4006 
4007 /**
4008  * kvm_arch_fault_in_page - fault-in guest page if necessary
4009  * @vcpu: The corresponding virtual cpu
4010  * @gpa: Guest physical address
4011  * @writable: Whether the page should be writable or not
4012  *
4013  * Make sure that a guest page has been faulted-in on the host.
4014  *
4015  * Return: Zero on success, negative error code otherwise.
4016  */
4017 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4018 {
4019 	return gmap_fault(vcpu->arch.gmap, gpa,
4020 			  writable ? FAULT_FLAG_WRITE : 0);
4021 }
4022 
4023 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4024 				      unsigned long token)
4025 {
4026 	struct kvm_s390_interrupt inti;
4027 	struct kvm_s390_irq irq;
4028 
4029 	if (start_token) {
4030 		irq.u.ext.ext_params2 = token;
4031 		irq.type = KVM_S390_INT_PFAULT_INIT;
4032 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4033 	} else {
4034 		inti.type = KVM_S390_INT_PFAULT_DONE;
4035 		inti.parm64 = token;
4036 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4037 	}
4038 }
4039 
4040 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4041 				     struct kvm_async_pf *work)
4042 {
4043 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4044 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4045 
4046 	return true;
4047 }
4048 
4049 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4050 				 struct kvm_async_pf *work)
4051 {
4052 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4053 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4054 }
4055 
4056 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4057 			       struct kvm_async_pf *work)
4058 {
4059 	/* s390 will always inject the page directly */
4060 }
4061 
4062 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4063 {
4064 	/*
4065 	 * s390 will always inject the page directly,
4066 	 * but we still want check_async_completion to cleanup
4067 	 */
4068 	return true;
4069 }
4070 
4071 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4072 {
4073 	hva_t hva;
4074 	struct kvm_arch_async_pf arch;
4075 
4076 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4077 		return false;
4078 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4079 	    vcpu->arch.pfault_compare)
4080 		return false;
4081 	if (psw_extint_disabled(vcpu))
4082 		return false;
4083 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4084 		return false;
4085 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4086 		return false;
4087 	if (!vcpu->arch.gmap->pfault_enabled)
4088 		return false;
4089 
4090 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4091 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4092 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4093 		return false;
4094 
4095 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4096 }
4097 
4098 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4099 {
4100 	int rc, cpuflags;
4101 
4102 	/*
4103 	 * On s390 notifications for arriving pages will be delivered directly
4104 	 * to the guest but the house keeping for completed pfaults is
4105 	 * handled outside the worker.
4106 	 */
4107 	kvm_check_async_pf_completion(vcpu);
4108 
4109 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4110 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4111 
4112 	if (need_resched())
4113 		schedule();
4114 
4115 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4116 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4117 		if (rc)
4118 			return rc;
4119 	}
4120 
4121 	rc = kvm_s390_handle_requests(vcpu);
4122 	if (rc)
4123 		return rc;
4124 
4125 	if (guestdbg_enabled(vcpu)) {
4126 		kvm_s390_backup_guest_per_regs(vcpu);
4127 		kvm_s390_patch_guest_per_regs(vcpu);
4128 	}
4129 
4130 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4131 
4132 	vcpu->arch.sie_block->icptcode = 0;
4133 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4134 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4135 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4136 
4137 	return 0;
4138 }
4139 
4140 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4141 {
4142 	struct kvm_s390_pgm_info pgm_info = {
4143 		.code = PGM_ADDRESSING,
4144 	};
4145 	u8 opcode, ilen;
4146 	int rc;
4147 
4148 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4149 	trace_kvm_s390_sie_fault(vcpu);
4150 
4151 	/*
4152 	 * We want to inject an addressing exception, which is defined as a
4153 	 * suppressing or terminating exception. However, since we came here
4154 	 * by a DAT access exception, the PSW still points to the faulting
4155 	 * instruction since DAT exceptions are nullifying. So we've got
4156 	 * to look up the current opcode to get the length of the instruction
4157 	 * to be able to forward the PSW.
4158 	 */
4159 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4160 	ilen = insn_length(opcode);
4161 	if (rc < 0) {
4162 		return rc;
4163 	} else if (rc) {
4164 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4165 		 * Forward by arbitrary ilc, injection will take care of
4166 		 * nullification if necessary.
4167 		 */
4168 		pgm_info = vcpu->arch.pgm;
4169 		ilen = 4;
4170 	}
4171 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4172 	kvm_s390_forward_psw(vcpu, ilen);
4173 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4174 }
4175 
4176 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4177 {
4178 	struct mcck_volatile_info *mcck_info;
4179 	struct sie_page *sie_page;
4180 
4181 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4182 		   vcpu->arch.sie_block->icptcode);
4183 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4184 
4185 	if (guestdbg_enabled(vcpu))
4186 		kvm_s390_restore_guest_per_regs(vcpu);
4187 
4188 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4189 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4190 
4191 	if (exit_reason == -EINTR) {
4192 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4193 		sie_page = container_of(vcpu->arch.sie_block,
4194 					struct sie_page, sie_block);
4195 		mcck_info = &sie_page->mcck_info;
4196 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4197 		return 0;
4198 	}
4199 
4200 	if (vcpu->arch.sie_block->icptcode > 0) {
4201 		int rc = kvm_handle_sie_intercept(vcpu);
4202 
4203 		if (rc != -EOPNOTSUPP)
4204 			return rc;
4205 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4206 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4207 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4208 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4209 		return -EREMOTE;
4210 	} else if (exit_reason != -EFAULT) {
4211 		vcpu->stat.exit_null++;
4212 		return 0;
4213 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4214 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4215 		vcpu->run->s390_ucontrol.trans_exc_code =
4216 						current->thread.gmap_addr;
4217 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4218 		return -EREMOTE;
4219 	} else if (current->thread.gmap_pfault) {
4220 		trace_kvm_s390_major_guest_pfault(vcpu);
4221 		current->thread.gmap_pfault = 0;
4222 		if (kvm_arch_setup_async_pf(vcpu))
4223 			return 0;
4224 		vcpu->stat.pfault_sync++;
4225 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4226 	}
4227 	return vcpu_post_run_fault_in_sie(vcpu);
4228 }
4229 
4230 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4231 static int __vcpu_run(struct kvm_vcpu *vcpu)
4232 {
4233 	int rc, exit_reason;
4234 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4235 
4236 	/*
4237 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4238 	 * ning the guest), so that memslots (and other stuff) are protected
4239 	 */
4240 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4241 
4242 	do {
4243 		rc = vcpu_pre_run(vcpu);
4244 		if (rc)
4245 			break;
4246 
4247 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4248 		/*
4249 		 * As PF_VCPU will be used in fault handler, between
4250 		 * guest_enter and guest_exit should be no uaccess.
4251 		 */
4252 		local_irq_disable();
4253 		guest_enter_irqoff();
4254 		__disable_cpu_timer_accounting(vcpu);
4255 		local_irq_enable();
4256 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4257 			memcpy(sie_page->pv_grregs,
4258 			       vcpu->run->s.regs.gprs,
4259 			       sizeof(sie_page->pv_grregs));
4260 		}
4261 		if (test_cpu_flag(CIF_FPU))
4262 			load_fpu_regs();
4263 		exit_reason = sie64a(vcpu->arch.sie_block,
4264 				     vcpu->run->s.regs.gprs);
4265 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4266 			memcpy(vcpu->run->s.regs.gprs,
4267 			       sie_page->pv_grregs,
4268 			       sizeof(sie_page->pv_grregs));
4269 			/*
4270 			 * We're not allowed to inject interrupts on intercepts
4271 			 * that leave the guest state in an "in-between" state
4272 			 * where the next SIE entry will do a continuation.
4273 			 * Fence interrupts in our "internal" PSW.
4274 			 */
4275 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4276 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4277 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4278 			}
4279 		}
4280 		local_irq_disable();
4281 		__enable_cpu_timer_accounting(vcpu);
4282 		guest_exit_irqoff();
4283 		local_irq_enable();
4284 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4285 
4286 		rc = vcpu_post_run(vcpu, exit_reason);
4287 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4288 
4289 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4290 	return rc;
4291 }
4292 
4293 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4294 {
4295 	struct kvm_run *kvm_run = vcpu->run;
4296 	struct runtime_instr_cb *riccb;
4297 	struct gs_cb *gscb;
4298 
4299 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4300 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4301 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4302 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4303 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4304 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4305 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4306 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4307 	}
4308 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4309 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4310 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4311 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4312 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4313 			kvm_clear_async_pf_completion_queue(vcpu);
4314 	}
4315 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4316 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4317 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4318 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4319 	}
4320 	/*
4321 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4322 	 * we should enable RI here instead of doing the lazy enablement.
4323 	 */
4324 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4325 	    test_kvm_facility(vcpu->kvm, 64) &&
4326 	    riccb->v &&
4327 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4328 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4329 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4330 	}
4331 	/*
4332 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4333 	 * we should enable GS here instead of doing the lazy enablement.
4334 	 */
4335 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4336 	    test_kvm_facility(vcpu->kvm, 133) &&
4337 	    gscb->gssm &&
4338 	    !vcpu->arch.gs_enabled) {
4339 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4340 		vcpu->arch.sie_block->ecb |= ECB_GS;
4341 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4342 		vcpu->arch.gs_enabled = 1;
4343 	}
4344 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4345 	    test_kvm_facility(vcpu->kvm, 82)) {
4346 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4347 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4348 	}
4349 	if (MACHINE_HAS_GS) {
4350 		preempt_disable();
4351 		__ctl_set_bit(2, 4);
4352 		if (current->thread.gs_cb) {
4353 			vcpu->arch.host_gscb = current->thread.gs_cb;
4354 			save_gs_cb(vcpu->arch.host_gscb);
4355 		}
4356 		if (vcpu->arch.gs_enabled) {
4357 			current->thread.gs_cb = (struct gs_cb *)
4358 						&vcpu->run->s.regs.gscb;
4359 			restore_gs_cb(current->thread.gs_cb);
4360 		}
4361 		preempt_enable();
4362 	}
4363 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4364 }
4365 
4366 static void sync_regs(struct kvm_vcpu *vcpu)
4367 {
4368 	struct kvm_run *kvm_run = vcpu->run;
4369 
4370 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4371 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4372 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4373 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4374 		/* some control register changes require a tlb flush */
4375 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4376 	}
4377 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4378 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4379 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4380 	}
4381 	save_access_regs(vcpu->arch.host_acrs);
4382 	restore_access_regs(vcpu->run->s.regs.acrs);
4383 	/* save host (userspace) fprs/vrs */
4384 	save_fpu_regs();
4385 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4386 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4387 	if (MACHINE_HAS_VX)
4388 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4389 	else
4390 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4391 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4392 	if (test_fp_ctl(current->thread.fpu.fpc))
4393 		/* User space provided an invalid FPC, let's clear it */
4394 		current->thread.fpu.fpc = 0;
4395 
4396 	/* Sync fmt2 only data */
4397 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4398 		sync_regs_fmt2(vcpu);
4399 	} else {
4400 		/*
4401 		 * In several places we have to modify our internal view to
4402 		 * not do things that are disallowed by the ultravisor. For
4403 		 * example we must not inject interrupts after specific exits
4404 		 * (e.g. 112 prefix page not secure). We do this by turning
4405 		 * off the machine check, external and I/O interrupt bits
4406 		 * of our PSW copy. To avoid getting validity intercepts, we
4407 		 * do only accept the condition code from userspace.
4408 		 */
4409 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4410 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4411 						   PSW_MASK_CC;
4412 	}
4413 
4414 	kvm_run->kvm_dirty_regs = 0;
4415 }
4416 
4417 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4418 {
4419 	struct kvm_run *kvm_run = vcpu->run;
4420 
4421 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4422 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4423 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4424 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4425 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4426 	if (MACHINE_HAS_GS) {
4427 		preempt_disable();
4428 		__ctl_set_bit(2, 4);
4429 		if (vcpu->arch.gs_enabled)
4430 			save_gs_cb(current->thread.gs_cb);
4431 		current->thread.gs_cb = vcpu->arch.host_gscb;
4432 		restore_gs_cb(vcpu->arch.host_gscb);
4433 		if (!vcpu->arch.host_gscb)
4434 			__ctl_clear_bit(2, 4);
4435 		vcpu->arch.host_gscb = NULL;
4436 		preempt_enable();
4437 	}
4438 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4439 }
4440 
4441 static void store_regs(struct kvm_vcpu *vcpu)
4442 {
4443 	struct kvm_run *kvm_run = vcpu->run;
4444 
4445 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4446 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4447 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4448 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4449 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4450 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4451 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4452 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4453 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4454 	save_access_regs(vcpu->run->s.regs.acrs);
4455 	restore_access_regs(vcpu->arch.host_acrs);
4456 	/* Save guest register state */
4457 	save_fpu_regs();
4458 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4459 	/* Restore will be done lazily at return */
4460 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4461 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4462 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4463 		store_regs_fmt2(vcpu);
4464 }
4465 
4466 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4467 {
4468 	struct kvm_run *kvm_run = vcpu->run;
4469 	int rc;
4470 
4471 	if (kvm_run->immediate_exit)
4472 		return -EINTR;
4473 
4474 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4475 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4476 		return -EINVAL;
4477 
4478 	vcpu_load(vcpu);
4479 
4480 	if (guestdbg_exit_pending(vcpu)) {
4481 		kvm_s390_prepare_debug_exit(vcpu);
4482 		rc = 0;
4483 		goto out;
4484 	}
4485 
4486 	kvm_sigset_activate(vcpu);
4487 
4488 	/*
4489 	 * no need to check the return value of vcpu_start as it can only have
4490 	 * an error for protvirt, but protvirt means user cpu state
4491 	 */
4492 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4493 		kvm_s390_vcpu_start(vcpu);
4494 	} else if (is_vcpu_stopped(vcpu)) {
4495 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4496 				   vcpu->vcpu_id);
4497 		rc = -EINVAL;
4498 		goto out;
4499 	}
4500 
4501 	sync_regs(vcpu);
4502 	enable_cpu_timer_accounting(vcpu);
4503 
4504 	might_fault();
4505 	rc = __vcpu_run(vcpu);
4506 
4507 	if (signal_pending(current) && !rc) {
4508 		kvm_run->exit_reason = KVM_EXIT_INTR;
4509 		rc = -EINTR;
4510 	}
4511 
4512 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4513 		kvm_s390_prepare_debug_exit(vcpu);
4514 		rc = 0;
4515 	}
4516 
4517 	if (rc == -EREMOTE) {
4518 		/* userspace support is needed, kvm_run has been prepared */
4519 		rc = 0;
4520 	}
4521 
4522 	disable_cpu_timer_accounting(vcpu);
4523 	store_regs(vcpu);
4524 
4525 	kvm_sigset_deactivate(vcpu);
4526 
4527 	vcpu->stat.exit_userspace++;
4528 out:
4529 	vcpu_put(vcpu);
4530 	return rc;
4531 }
4532 
4533 /*
4534  * store status at address
4535  * we use have two special cases:
4536  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4537  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4538  */
4539 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4540 {
4541 	unsigned char archmode = 1;
4542 	freg_t fprs[NUM_FPRS];
4543 	unsigned int px;
4544 	u64 clkcomp, cputm;
4545 	int rc;
4546 
4547 	px = kvm_s390_get_prefix(vcpu);
4548 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4549 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4550 			return -EFAULT;
4551 		gpa = 0;
4552 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4553 		if (write_guest_real(vcpu, 163, &archmode, 1))
4554 			return -EFAULT;
4555 		gpa = px;
4556 	} else
4557 		gpa -= __LC_FPREGS_SAVE_AREA;
4558 
4559 	/* manually convert vector registers if necessary */
4560 	if (MACHINE_HAS_VX) {
4561 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4562 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4563 				     fprs, 128);
4564 	} else {
4565 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4566 				     vcpu->run->s.regs.fprs, 128);
4567 	}
4568 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4569 			      vcpu->run->s.regs.gprs, 128);
4570 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4571 			      &vcpu->arch.sie_block->gpsw, 16);
4572 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4573 			      &px, 4);
4574 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4575 			      &vcpu->run->s.regs.fpc, 4);
4576 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4577 			      &vcpu->arch.sie_block->todpr, 4);
4578 	cputm = kvm_s390_get_cpu_timer(vcpu);
4579 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4580 			      &cputm, 8);
4581 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4582 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4583 			      &clkcomp, 8);
4584 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4585 			      &vcpu->run->s.regs.acrs, 64);
4586 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4587 			      &vcpu->arch.sie_block->gcr, 128);
4588 	return rc ? -EFAULT : 0;
4589 }
4590 
4591 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4592 {
4593 	/*
4594 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4595 	 * switch in the run ioctl. Let's update our copies before we save
4596 	 * it into the save area
4597 	 */
4598 	save_fpu_regs();
4599 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4600 	save_access_regs(vcpu->run->s.regs.acrs);
4601 
4602 	return kvm_s390_store_status_unloaded(vcpu, addr);
4603 }
4604 
4605 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4606 {
4607 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4608 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4609 }
4610 
4611 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4612 {
4613 	unsigned long i;
4614 	struct kvm_vcpu *vcpu;
4615 
4616 	kvm_for_each_vcpu(i, vcpu, kvm) {
4617 		__disable_ibs_on_vcpu(vcpu);
4618 	}
4619 }
4620 
4621 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4622 {
4623 	if (!sclp.has_ibs)
4624 		return;
4625 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4626 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4627 }
4628 
4629 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4630 {
4631 	int i, online_vcpus, r = 0, started_vcpus = 0;
4632 
4633 	if (!is_vcpu_stopped(vcpu))
4634 		return 0;
4635 
4636 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4637 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4638 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4639 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4640 
4641 	/* Let's tell the UV that we want to change into the operating state */
4642 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4643 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4644 		if (r) {
4645 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4646 			return r;
4647 		}
4648 	}
4649 
4650 	for (i = 0; i < online_vcpus; i++) {
4651 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4652 			started_vcpus++;
4653 	}
4654 
4655 	if (started_vcpus == 0) {
4656 		/* we're the only active VCPU -> speed it up */
4657 		__enable_ibs_on_vcpu(vcpu);
4658 	} else if (started_vcpus == 1) {
4659 		/*
4660 		 * As we are starting a second VCPU, we have to disable
4661 		 * the IBS facility on all VCPUs to remove potentially
4662 		 * outstanding ENABLE requests.
4663 		 */
4664 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4665 	}
4666 
4667 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4668 	/*
4669 	 * The real PSW might have changed due to a RESTART interpreted by the
4670 	 * ultravisor. We block all interrupts and let the next sie exit
4671 	 * refresh our view.
4672 	 */
4673 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4674 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4675 	/*
4676 	 * Another VCPU might have used IBS while we were offline.
4677 	 * Let's play safe and flush the VCPU at startup.
4678 	 */
4679 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4680 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4681 	return 0;
4682 }
4683 
4684 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4685 {
4686 	int i, online_vcpus, r = 0, started_vcpus = 0;
4687 	struct kvm_vcpu *started_vcpu = NULL;
4688 
4689 	if (is_vcpu_stopped(vcpu))
4690 		return 0;
4691 
4692 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4693 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4694 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4695 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4696 
4697 	/* Let's tell the UV that we want to change into the stopped state */
4698 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4699 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4700 		if (r) {
4701 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4702 			return r;
4703 		}
4704 	}
4705 
4706 	/*
4707 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4708 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4709 	 * have been fully processed. This will ensure that the VCPU
4710 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4711 	 */
4712 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4713 	kvm_s390_clear_stop_irq(vcpu);
4714 
4715 	__disable_ibs_on_vcpu(vcpu);
4716 
4717 	for (i = 0; i < online_vcpus; i++) {
4718 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4719 
4720 		if (!is_vcpu_stopped(tmp)) {
4721 			started_vcpus++;
4722 			started_vcpu = tmp;
4723 		}
4724 	}
4725 
4726 	if (started_vcpus == 1) {
4727 		/*
4728 		 * As we only have one VCPU left, we want to enable the
4729 		 * IBS facility for that VCPU to speed it up.
4730 		 */
4731 		__enable_ibs_on_vcpu(started_vcpu);
4732 	}
4733 
4734 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4735 	return 0;
4736 }
4737 
4738 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4739 				     struct kvm_enable_cap *cap)
4740 {
4741 	int r;
4742 
4743 	if (cap->flags)
4744 		return -EINVAL;
4745 
4746 	switch (cap->cap) {
4747 	case KVM_CAP_S390_CSS_SUPPORT:
4748 		if (!vcpu->kvm->arch.css_support) {
4749 			vcpu->kvm->arch.css_support = 1;
4750 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4751 			trace_kvm_s390_enable_css(vcpu->kvm);
4752 		}
4753 		r = 0;
4754 		break;
4755 	default:
4756 		r = -EINVAL;
4757 		break;
4758 	}
4759 	return r;
4760 }
4761 
4762 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4763 				  struct kvm_s390_mem_op *mop)
4764 {
4765 	void __user *uaddr = (void __user *)mop->buf;
4766 	int r = 0;
4767 
4768 	if (mop->flags || !mop->size)
4769 		return -EINVAL;
4770 	if (mop->size + mop->sida_offset < mop->size)
4771 		return -EINVAL;
4772 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4773 		return -E2BIG;
4774 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4775 		return -EINVAL;
4776 
4777 	switch (mop->op) {
4778 	case KVM_S390_MEMOP_SIDA_READ:
4779 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4780 				 mop->sida_offset), mop->size))
4781 			r = -EFAULT;
4782 
4783 		break;
4784 	case KVM_S390_MEMOP_SIDA_WRITE:
4785 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4786 				   mop->sida_offset), uaddr, mop->size))
4787 			r = -EFAULT;
4788 		break;
4789 	}
4790 	return r;
4791 }
4792 
4793 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4794 				 struct kvm_s390_mem_op *mop)
4795 {
4796 	void __user *uaddr = (void __user *)mop->buf;
4797 	void *tmpbuf = NULL;
4798 	int r = 0;
4799 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4800 				    | KVM_S390_MEMOP_F_CHECK_ONLY
4801 				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4802 
4803 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4804 		return -EINVAL;
4805 	if (mop->size > MEM_OP_MAX_SIZE)
4806 		return -E2BIG;
4807 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4808 		return -EINVAL;
4809 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4810 		if (access_key_invalid(mop->key))
4811 			return -EINVAL;
4812 	} else {
4813 		mop->key = 0;
4814 	}
4815 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4816 		tmpbuf = vmalloc(mop->size);
4817 		if (!tmpbuf)
4818 			return -ENOMEM;
4819 	}
4820 
4821 	switch (mop->op) {
4822 	case KVM_S390_MEMOP_LOGICAL_READ:
4823 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4824 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4825 					    GACC_FETCH, mop->key);
4826 			break;
4827 		}
4828 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4829 					mop->size, mop->key);
4830 		if (r == 0) {
4831 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4832 				r = -EFAULT;
4833 		}
4834 		break;
4835 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4836 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4837 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4838 					    GACC_STORE, mop->key);
4839 			break;
4840 		}
4841 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4842 			r = -EFAULT;
4843 			break;
4844 		}
4845 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4846 					 mop->size, mop->key);
4847 		break;
4848 	}
4849 
4850 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4851 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4852 
4853 	vfree(tmpbuf);
4854 	return r;
4855 }
4856 
4857 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4858 				     struct kvm_s390_mem_op *mop)
4859 {
4860 	int r, srcu_idx;
4861 
4862 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4863 
4864 	switch (mop->op) {
4865 	case KVM_S390_MEMOP_LOGICAL_READ:
4866 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4867 		r = kvm_s390_vcpu_mem_op(vcpu, mop);
4868 		break;
4869 	case KVM_S390_MEMOP_SIDA_READ:
4870 	case KVM_S390_MEMOP_SIDA_WRITE:
4871 		/* we are locked against sida going away by the vcpu->mutex */
4872 		r = kvm_s390_vcpu_sida_op(vcpu, mop);
4873 		break;
4874 	default:
4875 		r = -EINVAL;
4876 	}
4877 
4878 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4879 	return r;
4880 }
4881 
4882 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4883 			       unsigned int ioctl, unsigned long arg)
4884 {
4885 	struct kvm_vcpu *vcpu = filp->private_data;
4886 	void __user *argp = (void __user *)arg;
4887 
4888 	switch (ioctl) {
4889 	case KVM_S390_IRQ: {
4890 		struct kvm_s390_irq s390irq;
4891 
4892 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4893 			return -EFAULT;
4894 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4895 	}
4896 	case KVM_S390_INTERRUPT: {
4897 		struct kvm_s390_interrupt s390int;
4898 		struct kvm_s390_irq s390irq = {};
4899 
4900 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4901 			return -EFAULT;
4902 		if (s390int_to_s390irq(&s390int, &s390irq))
4903 			return -EINVAL;
4904 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4905 	}
4906 	}
4907 	return -ENOIOCTLCMD;
4908 }
4909 
4910 long kvm_arch_vcpu_ioctl(struct file *filp,
4911 			 unsigned int ioctl, unsigned long arg)
4912 {
4913 	struct kvm_vcpu *vcpu = filp->private_data;
4914 	void __user *argp = (void __user *)arg;
4915 	int idx;
4916 	long r;
4917 	u16 rc, rrc;
4918 
4919 	vcpu_load(vcpu);
4920 
4921 	switch (ioctl) {
4922 	case KVM_S390_STORE_STATUS:
4923 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4924 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4925 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4926 		break;
4927 	case KVM_S390_SET_INITIAL_PSW: {
4928 		psw_t psw;
4929 
4930 		r = -EFAULT;
4931 		if (copy_from_user(&psw, argp, sizeof(psw)))
4932 			break;
4933 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4934 		break;
4935 	}
4936 	case KVM_S390_CLEAR_RESET:
4937 		r = 0;
4938 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4939 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4940 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4941 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4942 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4943 				   rc, rrc);
4944 		}
4945 		break;
4946 	case KVM_S390_INITIAL_RESET:
4947 		r = 0;
4948 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4949 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4950 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4951 					  UVC_CMD_CPU_RESET_INITIAL,
4952 					  &rc, &rrc);
4953 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4954 				   rc, rrc);
4955 		}
4956 		break;
4957 	case KVM_S390_NORMAL_RESET:
4958 		r = 0;
4959 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4960 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4961 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4962 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4963 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4964 				   rc, rrc);
4965 		}
4966 		break;
4967 	case KVM_SET_ONE_REG:
4968 	case KVM_GET_ONE_REG: {
4969 		struct kvm_one_reg reg;
4970 		r = -EINVAL;
4971 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4972 			break;
4973 		r = -EFAULT;
4974 		if (copy_from_user(&reg, argp, sizeof(reg)))
4975 			break;
4976 		if (ioctl == KVM_SET_ONE_REG)
4977 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4978 		else
4979 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4980 		break;
4981 	}
4982 #ifdef CONFIG_KVM_S390_UCONTROL
4983 	case KVM_S390_UCAS_MAP: {
4984 		struct kvm_s390_ucas_mapping ucasmap;
4985 
4986 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4987 			r = -EFAULT;
4988 			break;
4989 		}
4990 
4991 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4992 			r = -EINVAL;
4993 			break;
4994 		}
4995 
4996 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4997 				     ucasmap.vcpu_addr, ucasmap.length);
4998 		break;
4999 	}
5000 	case KVM_S390_UCAS_UNMAP: {
5001 		struct kvm_s390_ucas_mapping ucasmap;
5002 
5003 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5004 			r = -EFAULT;
5005 			break;
5006 		}
5007 
5008 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5009 			r = -EINVAL;
5010 			break;
5011 		}
5012 
5013 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5014 			ucasmap.length);
5015 		break;
5016 	}
5017 #endif
5018 	case KVM_S390_VCPU_FAULT: {
5019 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5020 		break;
5021 	}
5022 	case KVM_ENABLE_CAP:
5023 	{
5024 		struct kvm_enable_cap cap;
5025 		r = -EFAULT;
5026 		if (copy_from_user(&cap, argp, sizeof(cap)))
5027 			break;
5028 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5029 		break;
5030 	}
5031 	case KVM_S390_MEM_OP: {
5032 		struct kvm_s390_mem_op mem_op;
5033 
5034 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5035 			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5036 		else
5037 			r = -EFAULT;
5038 		break;
5039 	}
5040 	case KVM_S390_SET_IRQ_STATE: {
5041 		struct kvm_s390_irq_state irq_state;
5042 
5043 		r = -EFAULT;
5044 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5045 			break;
5046 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5047 		    irq_state.len == 0 ||
5048 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5049 			r = -EINVAL;
5050 			break;
5051 		}
5052 		/* do not use irq_state.flags, it will break old QEMUs */
5053 		r = kvm_s390_set_irq_state(vcpu,
5054 					   (void __user *) irq_state.buf,
5055 					   irq_state.len);
5056 		break;
5057 	}
5058 	case KVM_S390_GET_IRQ_STATE: {
5059 		struct kvm_s390_irq_state irq_state;
5060 
5061 		r = -EFAULT;
5062 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5063 			break;
5064 		if (irq_state.len == 0) {
5065 			r = -EINVAL;
5066 			break;
5067 		}
5068 		/* do not use irq_state.flags, it will break old QEMUs */
5069 		r = kvm_s390_get_irq_state(vcpu,
5070 					   (__u8 __user *)  irq_state.buf,
5071 					   irq_state.len);
5072 		break;
5073 	}
5074 	default:
5075 		r = -ENOTTY;
5076 	}
5077 
5078 	vcpu_put(vcpu);
5079 	return r;
5080 }
5081 
5082 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5083 {
5084 #ifdef CONFIG_KVM_S390_UCONTROL
5085 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5086 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5087 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5088 		get_page(vmf->page);
5089 		return 0;
5090 	}
5091 #endif
5092 	return VM_FAULT_SIGBUS;
5093 }
5094 
5095 /* Section: memory related */
5096 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5097 				   const struct kvm_memory_slot *old,
5098 				   struct kvm_memory_slot *new,
5099 				   enum kvm_mr_change change)
5100 {
5101 	gpa_t size;
5102 
5103 	/* When we are protected, we should not change the memory slots */
5104 	if (kvm_s390_pv_get_handle(kvm))
5105 		return -EINVAL;
5106 
5107 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5108 		return 0;
5109 
5110 	/* A few sanity checks. We can have memory slots which have to be
5111 	   located/ended at a segment boundary (1MB). The memory in userland is
5112 	   ok to be fragmented into various different vmas. It is okay to mmap()
5113 	   and munmap() stuff in this slot after doing this call at any time */
5114 
5115 	if (new->userspace_addr & 0xffffful)
5116 		return -EINVAL;
5117 
5118 	size = new->npages * PAGE_SIZE;
5119 	if (size & 0xffffful)
5120 		return -EINVAL;
5121 
5122 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5123 		return -EINVAL;
5124 
5125 	return 0;
5126 }
5127 
5128 void kvm_arch_commit_memory_region(struct kvm *kvm,
5129 				struct kvm_memory_slot *old,
5130 				const struct kvm_memory_slot *new,
5131 				enum kvm_mr_change change)
5132 {
5133 	int rc = 0;
5134 
5135 	switch (change) {
5136 	case KVM_MR_DELETE:
5137 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5138 					old->npages * PAGE_SIZE);
5139 		break;
5140 	case KVM_MR_MOVE:
5141 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5142 					old->npages * PAGE_SIZE);
5143 		if (rc)
5144 			break;
5145 		fallthrough;
5146 	case KVM_MR_CREATE:
5147 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5148 				      new->base_gfn * PAGE_SIZE,
5149 				      new->npages * PAGE_SIZE);
5150 		break;
5151 	case KVM_MR_FLAGS_ONLY:
5152 		break;
5153 	default:
5154 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5155 	}
5156 	if (rc)
5157 		pr_warn("failed to commit memory region\n");
5158 	return;
5159 }
5160 
5161 static inline unsigned long nonhyp_mask(int i)
5162 {
5163 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5164 
5165 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5166 }
5167 
5168 static int __init kvm_s390_init(void)
5169 {
5170 	int i;
5171 
5172 	if (!sclp.has_sief2) {
5173 		pr_info("SIE is not available\n");
5174 		return -ENODEV;
5175 	}
5176 
5177 	if (nested && hpage) {
5178 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5179 		return -EINVAL;
5180 	}
5181 
5182 	for (i = 0; i < 16; i++)
5183 		kvm_s390_fac_base[i] |=
5184 			stfle_fac_list[i] & nonhyp_mask(i);
5185 
5186 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5187 }
5188 
5189 static void __exit kvm_s390_exit(void)
5190 {
5191 	kvm_exit();
5192 }
5193 
5194 module_init(kvm_s390_init);
5195 module_exit(kvm_s390_exit);
5196 
5197 /*
5198  * Enable autoloading of the kvm module.
5199  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5200  * since x86 takes a different approach.
5201  */
5202 #include <linux/miscdevice.h>
5203 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5204 MODULE_ALIAS("devname:kvm");
5205