xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision e7f127b2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 	.name_size = KVM_STATS_NAME_SIZE,
72 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 	.id_offset = sizeof(struct kvm_stats_header),
74 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 		       sizeof(kvm_vm_stats_desc),
77 };
78 
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 	KVM_GENERIC_VCPU_STATS(),
81 	STATS_DESC_COUNTER(VCPU, exit_userspace),
82 	STATS_DESC_COUNTER(VCPU, exit_null),
83 	STATS_DESC_COUNTER(VCPU, exit_external_request),
84 	STATS_DESC_COUNTER(VCPU, exit_io_request),
85 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 	STATS_DESC_COUNTER(VCPU, exit_validity),
88 	STATS_DESC_COUNTER(VCPU, exit_instruction),
89 	STATS_DESC_COUNTER(VCPU, exit_pei),
90 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_program),
108 	STATS_DESC_COUNTER(VCPU, deliver_io),
109 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 	STATS_DESC_COUNTER(VCPU, inject_ckc),
112 	STATS_DESC_COUNTER(VCPU, inject_cputm),
113 	STATS_DESC_COUNTER(VCPU, inject_external_call),
114 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 	STATS_DESC_COUNTER(VCPU, inject_mchk),
116 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 	STATS_DESC_COUNTER(VCPU, inject_program),
118 	STATS_DESC_COUNTER(VCPU, inject_restart),
119 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 	STATS_DESC_COUNTER(VCPU, instruction_gs),
123 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 	STATS_DESC_COUNTER(VCPU, instruction_sck),
129 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 	STATS_DESC_COUNTER(VCPU, instruction_spx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 	STATS_DESC_COUNTER(VCPU, instruction_stap),
134 	STATS_DESC_COUNTER(VCPU, instruction_iske),
135 	STATS_DESC_COUNTER(VCPU, instruction_ri),
136 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 	STATS_DESC_COUNTER(VCPU, instruction_sske),
138 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 	STATS_DESC_COUNTER(VCPU, instruction_tb),
142 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 	STATS_DESC_COUNTER(VCPU, instruction_sie),
146 	STATS_DESC_COUNTER(VCPU, instruction_essa),
147 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 	STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175 
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 	.name_size = KVM_STATS_NAME_SIZE,
178 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 	.id_offset = sizeof(struct kvm_stats_header),
180 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 		       sizeof(kvm_vcpu_stats_desc),
183 };
184 
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189 
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194 
195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199 
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa  = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204 
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209 
210 /*
211  * For now we handle at most 16 double words as this is what the s390 base
212  * kernel handles and stores in the prefix page. If we ever need to go beyond
213  * this, this requires changes to code, but the external uapi can stay.
214  */
215 #define SIZE_INTERNAL 16
216 
217 /*
218  * Base feature mask that defines default mask for facilities. Consists of the
219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220  */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224  * and defines the facilities that can be enabled via a cpu model.
225  */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227 
228 static unsigned long kvm_s390_fac_size(void)
229 {
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 		sizeof(stfle_fac_list));
234 
235 	return SIZE_INTERNAL;
236 }
237 
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242 
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247 
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
250 {
251 	/* every s390 is virtualization enabled ;-) */
252 	return 0;
253 }
254 
255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 	return 0;
258 }
259 
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 			      unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264 
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 	u8 delta_idx = 0;
268 
269 	/*
270 	 * The TOD jumps by delta, we have to compensate this by adding
271 	 * -delta to the epoch.
272 	 */
273 	delta = -delta;
274 
275 	/* sign-extension - we're adding to signed values below */
276 	if ((s64)delta < 0)
277 		delta_idx = -1;
278 
279 	scb->epoch += delta;
280 	if (scb->ecd & ECD_MEF) {
281 		scb->epdx += delta_idx;
282 		if (scb->epoch < delta)
283 			scb->epdx += 1;
284 	}
285 }
286 
287 /*
288  * This callback is executed during stop_machine(). All CPUs are therefore
289  * temporarily stopped. In order not to change guest behavior, we have to
290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291  * so a CPU won't be stopped while calculating with the epoch.
292  */
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 			  void *v)
295 {
296 	struct kvm *kvm;
297 	struct kvm_vcpu *vcpu;
298 	unsigned long i;
299 	unsigned long long *delta = v;
300 
301 	list_for_each_entry(kvm, &vm_list, vm_list) {
302 		kvm_for_each_vcpu(i, vcpu, kvm) {
303 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 			if (i == 0) {
305 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 			}
308 			if (vcpu->arch.cputm_enabled)
309 				vcpu->arch.cputm_start += *delta;
310 			if (vcpu->arch.vsie_block)
311 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 						   *delta);
313 		}
314 	}
315 	return NOTIFY_OK;
316 }
317 
318 static struct notifier_block kvm_clock_notifier = {
319 	.notifier_call = kvm_clock_sync,
320 };
321 
322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 	gmap_notifier.notifier_call = kvm_gmap_notifier;
325 	gmap_register_pte_notifier(&gmap_notifier);
326 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 	gmap_register_pte_notifier(&vsie_gmap_notifier);
328 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 				       &kvm_clock_notifier);
330 	return 0;
331 }
332 
333 void kvm_arch_hardware_unsetup(void)
334 {
335 	gmap_unregister_pte_notifier(&gmap_notifier);
336 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 					 &kvm_clock_notifier);
339 }
340 
341 static void allow_cpu_feat(unsigned long nr)
342 {
343 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345 
346 static inline int plo_test_bit(unsigned char nr)
347 {
348 	unsigned long function = (unsigned long)nr | 0x100;
349 	int cc;
350 
351 	asm volatile(
352 		"	lgr	0,%[function]\n"
353 		/* Parameter registers are ignored for "test bit" */
354 		"	plo	0,0,0,0(0)\n"
355 		"	ipm	%0\n"
356 		"	srl	%0,28\n"
357 		: "=d" (cc)
358 		: [function] "d" (function)
359 		: "cc", "0");
360 	return cc == 0;
361 }
362 
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 	asm volatile(
366 		"	lghi	0,0\n"
367 		"	lgr	1,%[query]\n"
368 		/* Parameter registers are ignored */
369 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
370 		:
371 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 		: "cc", "memory", "0", "1");
373 }
374 
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377 
378 static void kvm_s390_cpu_feat_init(void)
379 {
380 	int i;
381 
382 	for (i = 0; i < 256; ++i) {
383 		if (plo_test_bit(i))
384 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 	}
386 
387 	if (test_facility(28)) /* TOD-clock steering */
388 		ptff(kvm_s390_available_subfunc.ptff,
389 		     sizeof(kvm_s390_available_subfunc.ptff),
390 		     PTFF_QAF);
391 
392 	if (test_facility(17)) { /* MSA */
393 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmac);
395 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmc);
397 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.km);
399 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kimd);
401 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.klmd);
403 	}
404 	if (test_facility(76)) /* MSA3 */
405 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.pckmo);
407 	if (test_facility(77)) { /* MSA4 */
408 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kmctr);
410 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmf);
412 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmo);
414 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.pcc);
416 	}
417 	if (test_facility(57)) /* MSA5 */
418 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.ppno);
420 
421 	if (test_facility(146)) /* MSA8 */
422 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.kma);
424 
425 	if (test_facility(155)) /* MSA9 */
426 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kdsa);
428 
429 	if (test_facility(150)) /* SORTL */
430 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431 
432 	if (test_facility(151)) /* DFLTCC */
433 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434 
435 	if (MACHINE_HAS_ESOP)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 	/*
438 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 	 */
441 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 	    !test_facility(3) || !nested)
443 		return;
444 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 	if (sclp.has_64bscao)
446 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 	if (sclp.has_siif)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 	if (sclp.has_gpere)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 	if (sclp.has_gsls)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 	if (sclp.has_ib)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 	if (sclp.has_cei)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 	if (sclp.has_ibs)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 	if (sclp.has_kss)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 	/*
462 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 	 * all skey handling functions read/set the skey from the PGSTE
464 	 * instead of the real storage key.
465 	 *
466 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 	 * pages being detected as preserved although they are resident.
468 	 *
469 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 	 *
472 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 	 *
476 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 	 * cannot easily shadow the SCA because of the ipte lock.
478 	 */
479 }
480 
481 int kvm_arch_init(void *opaque)
482 {
483 	int rc = -ENOMEM;
484 
485 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 	if (!kvm_s390_dbf)
487 		return -ENOMEM;
488 
489 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf_uv)
491 		goto out;
492 
493 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 		goto out;
496 
497 	kvm_s390_cpu_feat_init();
498 
499 	/* Register floating interrupt controller interface. */
500 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 	if (rc) {
502 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 		goto out;
504 	}
505 
506 	rc = kvm_s390_gib_init(GAL_ISC);
507 	if (rc)
508 		goto out;
509 
510 	return 0;
511 
512 out:
513 	kvm_arch_exit();
514 	return rc;
515 }
516 
517 void kvm_arch_exit(void)
518 {
519 	kvm_s390_gib_destroy();
520 	debug_unregister(kvm_s390_dbf);
521 	debug_unregister(kvm_s390_dbf_uv);
522 }
523 
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 			unsigned int ioctl, unsigned long arg)
527 {
528 	if (ioctl == KVM_S390_ENABLE_SIE)
529 		return s390_enable_sie();
530 	return -EINVAL;
531 }
532 
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 	int r;
536 
537 	switch (ext) {
538 	case KVM_CAP_S390_PSW:
539 	case KVM_CAP_S390_GMAP:
540 	case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 	case KVM_CAP_S390_UCONTROL:
543 #endif
544 	case KVM_CAP_ASYNC_PF:
545 	case KVM_CAP_SYNC_REGS:
546 	case KVM_CAP_ONE_REG:
547 	case KVM_CAP_ENABLE_CAP:
548 	case KVM_CAP_S390_CSS_SUPPORT:
549 	case KVM_CAP_IOEVENTFD:
550 	case KVM_CAP_DEVICE_CTRL:
551 	case KVM_CAP_S390_IRQCHIP:
552 	case KVM_CAP_VM_ATTRIBUTES:
553 	case KVM_CAP_MP_STATE:
554 	case KVM_CAP_IMMEDIATE_EXIT:
555 	case KVM_CAP_S390_INJECT_IRQ:
556 	case KVM_CAP_S390_USER_SIGP:
557 	case KVM_CAP_S390_USER_STSI:
558 	case KVM_CAP_S390_SKEYS:
559 	case KVM_CAP_S390_IRQ_STATE:
560 	case KVM_CAP_S390_USER_INSTR0:
561 	case KVM_CAP_S390_CMMA_MIGRATION:
562 	case KVM_CAP_S390_AIS:
563 	case KVM_CAP_S390_AIS_MIGRATION:
564 	case KVM_CAP_S390_VCPU_RESETS:
565 	case KVM_CAP_SET_GUEST_DEBUG:
566 	case KVM_CAP_S390_DIAG318:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		if (ext == KVM_CAP_NR_VCPUS)
589 			r = min_t(unsigned int, num_online_cpus(), r);
590 		break;
591 	case KVM_CAP_S390_COW:
592 		r = MACHINE_HAS_ESOP;
593 		break;
594 	case KVM_CAP_S390_VECTOR_REGISTERS:
595 		r = MACHINE_HAS_VX;
596 		break;
597 	case KVM_CAP_S390_RI:
598 		r = test_facility(64);
599 		break;
600 	case KVM_CAP_S390_GS:
601 		r = test_facility(133);
602 		break;
603 	case KVM_CAP_S390_BPB:
604 		r = test_facility(82);
605 		break;
606 	case KVM_CAP_S390_PROTECTED:
607 		r = is_prot_virt_host();
608 		break;
609 	default:
610 		r = 0;
611 	}
612 	return r;
613 }
614 
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617 	int i;
618 	gfn_t cur_gfn, last_gfn;
619 	unsigned long gaddr, vmaddr;
620 	struct gmap *gmap = kvm->arch.gmap;
621 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622 
623 	/* Loop over all guest segments */
624 	cur_gfn = memslot->base_gfn;
625 	last_gfn = memslot->base_gfn + memslot->npages;
626 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 		gaddr = gfn_to_gpa(cur_gfn);
628 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 		if (kvm_is_error_hva(vmaddr))
630 			continue;
631 
632 		bitmap_zero(bitmap, _PAGE_ENTRIES);
633 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 		for (i = 0; i < _PAGE_ENTRIES; i++) {
635 			if (test_bit(i, bitmap))
636 				mark_page_dirty(kvm, cur_gfn + i);
637 		}
638 
639 		if (fatal_signal_pending(current))
640 			return;
641 		cond_resched();
642 	}
643 }
644 
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 			       struct kvm_dirty_log *log)
653 {
654 	int r;
655 	unsigned long n;
656 	struct kvm_memory_slot *memslot;
657 	int is_dirty;
658 
659 	if (kvm_is_ucontrol(kvm))
660 		return -EINVAL;
661 
662 	mutex_lock(&kvm->slots_lock);
663 
664 	r = -EINVAL;
665 	if (log->slot >= KVM_USER_MEM_SLOTS)
666 		goto out;
667 
668 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669 	if (r)
670 		goto out;
671 
672 	/* Clear the dirty log */
673 	if (is_dirty) {
674 		n = kvm_dirty_bitmap_bytes(memslot);
675 		memset(memslot->dirty_bitmap, 0, n);
676 	}
677 	r = 0;
678 out:
679 	mutex_unlock(&kvm->slots_lock);
680 	return r;
681 }
682 
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685 	unsigned long i;
686 	struct kvm_vcpu *vcpu;
687 
688 	kvm_for_each_vcpu(i, vcpu, kvm) {
689 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690 	}
691 }
692 
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695 	int r;
696 
697 	if (cap->flags)
698 		return -EINVAL;
699 
700 	switch (cap->cap) {
701 	case KVM_CAP_S390_IRQCHIP:
702 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 		kvm->arch.use_irqchip = 1;
704 		r = 0;
705 		break;
706 	case KVM_CAP_S390_USER_SIGP:
707 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 		kvm->arch.user_sigp = 1;
709 		r = 0;
710 		break;
711 	case KVM_CAP_S390_VECTOR_REGISTERS:
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (MACHINE_HAS_VX) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 			set_kvm_facility(kvm->arch.model.fac_list, 129);
718 			if (test_facility(134)) {
719 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 				set_kvm_facility(kvm->arch.model.fac_list, 134);
721 			}
722 			if (test_facility(135)) {
723 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 				set_kvm_facility(kvm->arch.model.fac_list, 135);
725 			}
726 			if (test_facility(148)) {
727 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 				set_kvm_facility(kvm->arch.model.fac_list, 148);
729 			}
730 			if (test_facility(152)) {
731 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 				set_kvm_facility(kvm->arch.model.fac_list, 152);
733 			}
734 			if (test_facility(192)) {
735 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 				set_kvm_facility(kvm->arch.model.fac_list, 192);
737 			}
738 			r = 0;
739 		} else
740 			r = -EINVAL;
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_RI:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(64)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 			set_kvm_facility(kvm->arch.model.fac_list, 64);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_AIS:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus) {
762 			r = -EBUSY;
763 		} else {
764 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 			set_kvm_facility(kvm->arch.model.fac_list, 72);
766 			r = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_GS:
773 		r = -EINVAL;
774 		mutex_lock(&kvm->lock);
775 		if (kvm->created_vcpus) {
776 			r = -EBUSY;
777 		} else if (test_facility(133)) {
778 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 			set_kvm_facility(kvm->arch.model.fac_list, 133);
780 			r = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 			 r ? "(not available)" : "(success)");
785 		break;
786 	case KVM_CAP_S390_HPAGE_1M:
787 		mutex_lock(&kvm->lock);
788 		if (kvm->created_vcpus)
789 			r = -EBUSY;
790 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791 			r = -EINVAL;
792 		else {
793 			r = 0;
794 			mmap_write_lock(kvm->mm);
795 			kvm->mm->context.allow_gmap_hpage_1m = 1;
796 			mmap_write_unlock(kvm->mm);
797 			/*
798 			 * We might have to create fake 4k page
799 			 * tables. To avoid that the hardware works on
800 			 * stale PGSTEs, we emulate these instructions.
801 			 */
802 			kvm->arch.use_skf = 0;
803 			kvm->arch.use_pfmfi = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_USER_STSI:
810 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 		kvm->arch.user_stsi = 1;
812 		r = 0;
813 		break;
814 	case KVM_CAP_S390_USER_INSTR0:
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 		kvm->arch.user_instr0 = 1;
817 		icpt_operexc_on_all_vcpus(kvm);
818 		r = 0;
819 		break;
820 	default:
821 		r = -EINVAL;
822 		break;
823 	}
824 	return r;
825 }
826 
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829 	int ret;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_MEM_LIMIT_SIZE:
833 		ret = 0;
834 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 			 kvm->arch.mem_limit);
836 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837 			ret = -EFAULT;
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	int ret;
849 	unsigned int idx;
850 	switch (attr->attr) {
851 	case KVM_S390_VM_MEM_ENABLE_CMMA:
852 		ret = -ENXIO;
853 		if (!sclp.has_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 		mutex_lock(&kvm->lock);
858 		if (kvm->created_vcpus)
859 			ret = -EBUSY;
860 		else if (kvm->mm->context.allow_gmap_hpage_1m)
861 			ret = -EINVAL;
862 		else {
863 			kvm->arch.use_cmma = 1;
864 			/* Not compatible with cmma. */
865 			kvm->arch.use_pfmfi = 0;
866 			ret = 0;
867 		}
868 		mutex_unlock(&kvm->lock);
869 		break;
870 	case KVM_S390_VM_MEM_CLR_CMMA:
871 		ret = -ENXIO;
872 		if (!sclp.has_cmma)
873 			break;
874 		ret = -EINVAL;
875 		if (!kvm->arch.use_cmma)
876 			break;
877 
878 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 		mutex_lock(&kvm->lock);
880 		idx = srcu_read_lock(&kvm->srcu);
881 		s390_reset_cmma(kvm->arch.gmap->mm);
882 		srcu_read_unlock(&kvm->srcu, idx);
883 		mutex_unlock(&kvm->lock);
884 		ret = 0;
885 		break;
886 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 		unsigned long new_limit;
888 
889 		if (kvm_is_ucontrol(kvm))
890 			return -EINVAL;
891 
892 		if (get_user(new_limit, (u64 __user *)attr->addr))
893 			return -EFAULT;
894 
895 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 		    new_limit > kvm->arch.mem_limit)
897 			return -E2BIG;
898 
899 		if (!new_limit)
900 			return -EINVAL;
901 
902 		/* gmap_create takes last usable address */
903 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
904 			new_limit -= 1;
905 
906 		ret = -EBUSY;
907 		mutex_lock(&kvm->lock);
908 		if (!kvm->created_vcpus) {
909 			/* gmap_create will round the limit up */
910 			struct gmap *new = gmap_create(current->mm, new_limit);
911 
912 			if (!new) {
913 				ret = -ENOMEM;
914 			} else {
915 				gmap_remove(kvm->arch.gmap);
916 				new->private = kvm;
917 				kvm->arch.gmap = new;
918 				ret = 0;
919 			}
920 		}
921 		mutex_unlock(&kvm->lock);
922 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 			 (void *) kvm->arch.gmap->asce);
925 		break;
926 	}
927 	default:
928 		ret = -ENXIO;
929 		break;
930 	}
931 	return ret;
932 }
933 
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935 
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938 	struct kvm_vcpu *vcpu;
939 	unsigned long i;
940 
941 	kvm_s390_vcpu_block_all(kvm);
942 
943 	kvm_for_each_vcpu(i, vcpu, kvm) {
944 		kvm_s390_vcpu_crypto_setup(vcpu);
945 		/* recreate the shadow crycb by leaving the VSIE handler */
946 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 	}
948 
949 	kvm_s390_vcpu_unblock_all(kvm);
950 }
951 
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954 	mutex_lock(&kvm->lock);
955 	switch (attr->attr) {
956 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		get_random_bytes(
962 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		kvm->arch.crypto.aes_kw = 1;
965 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 		if (!test_kvm_facility(kvm, 76)) {
969 			mutex_unlock(&kvm->lock);
970 			return -EINVAL;
971 		}
972 		get_random_bytes(
973 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 		kvm->arch.crypto.dea_kw = 1;
976 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 		if (!test_kvm_facility(kvm, 76)) {
980 			mutex_unlock(&kvm->lock);
981 			return -EINVAL;
982 		}
983 		kvm->arch.crypto.aes_kw = 0;
984 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987 		break;
988 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 		if (!test_kvm_facility(kvm, 76)) {
990 			mutex_unlock(&kvm->lock);
991 			return -EINVAL;
992 		}
993 		kvm->arch.crypto.dea_kw = 0;
994 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997 		break;
998 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 		if (!ap_instructions_available()) {
1000 			mutex_unlock(&kvm->lock);
1001 			return -EOPNOTSUPP;
1002 		}
1003 		kvm->arch.crypto.apie = 1;
1004 		break;
1005 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 		if (!ap_instructions_available()) {
1007 			mutex_unlock(&kvm->lock);
1008 			return -EOPNOTSUPP;
1009 		}
1010 		kvm->arch.crypto.apie = 0;
1011 		break;
1012 	default:
1013 		mutex_unlock(&kvm->lock);
1014 		return -ENXIO;
1015 	}
1016 
1017 	kvm_s390_vcpu_crypto_reset_all(kvm);
1018 	mutex_unlock(&kvm->lock);
1019 	return 0;
1020 }
1021 
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024 	unsigned long cx;
1025 	struct kvm_vcpu *vcpu;
1026 
1027 	kvm_for_each_vcpu(cx, vcpu, kvm)
1028 		kvm_s390_sync_request(req, vcpu);
1029 }
1030 
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037 	struct kvm_memory_slot *ms;
1038 	struct kvm_memslots *slots;
1039 	unsigned long ram_pages = 0;
1040 	int bkt;
1041 
1042 	/* migration mode already enabled */
1043 	if (kvm->arch.migration_mode)
1044 		return 0;
1045 	slots = kvm_memslots(kvm);
1046 	if (!slots || kvm_memslots_empty(slots))
1047 		return -EINVAL;
1048 
1049 	if (!kvm->arch.use_cmma) {
1050 		kvm->arch.migration_mode = 1;
1051 		return 0;
1052 	}
1053 	/* mark all the pages in active slots as dirty */
1054 	kvm_for_each_memslot(ms, bkt, slots) {
1055 		if (!ms->dirty_bitmap)
1056 			return -EINVAL;
1057 		/*
1058 		 * The second half of the bitmap is only used on x86,
1059 		 * and would be wasted otherwise, so we put it to good
1060 		 * use here to keep track of the state of the storage
1061 		 * attributes.
1062 		 */
1063 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 		ram_pages += ms->npages;
1065 	}
1066 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 	kvm->arch.migration_mode = 1;
1068 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069 	return 0;
1070 }
1071 
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078 	/* migration mode already disabled */
1079 	if (!kvm->arch.migration_mode)
1080 		return 0;
1081 	kvm->arch.migration_mode = 0;
1082 	if (kvm->arch.use_cmma)
1083 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084 	return 0;
1085 }
1086 
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 				     struct kvm_device_attr *attr)
1089 {
1090 	int res = -ENXIO;
1091 
1092 	mutex_lock(&kvm->slots_lock);
1093 	switch (attr->attr) {
1094 	case KVM_S390_VM_MIGRATION_START:
1095 		res = kvm_s390_vm_start_migration(kvm);
1096 		break;
1097 	case KVM_S390_VM_MIGRATION_STOP:
1098 		res = kvm_s390_vm_stop_migration(kvm);
1099 		break;
1100 	default:
1101 		break;
1102 	}
1103 	mutex_unlock(&kvm->slots_lock);
1104 
1105 	return res;
1106 }
1107 
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 				     struct kvm_device_attr *attr)
1110 {
1111 	u64 mig = kvm->arch.migration_mode;
1112 
1113 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114 		return -ENXIO;
1115 
1116 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117 		return -EFAULT;
1118 	return 0;
1119 }
1120 
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 	struct kvm_s390_vm_tod_clock gtod;
1124 
1125 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126 		return -EFAULT;
1127 
1128 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129 		return -EINVAL;
1130 	kvm_s390_set_tod_clock(kvm, &gtod);
1131 
1132 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 		gtod.epoch_idx, gtod.tod);
1134 
1135 	return 0;
1136 }
1137 
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 	u8 gtod_high;
1141 
1142 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143 					   sizeof(gtod_high)))
1144 		return -EFAULT;
1145 
1146 	if (gtod_high != 0)
1147 		return -EINVAL;
1148 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149 
1150 	return 0;
1151 }
1152 
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1156 
1157 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158 			   sizeof(gtod.tod)))
1159 		return -EFAULT;
1160 
1161 	kvm_s390_set_tod_clock(kvm, &gtod);
1162 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163 	return 0;
1164 }
1165 
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 	int ret;
1169 
1170 	if (attr->flags)
1171 		return -EINVAL;
1172 
1173 	switch (attr->attr) {
1174 	case KVM_S390_VM_TOD_EXT:
1175 		ret = kvm_s390_set_tod_ext(kvm, attr);
1176 		break;
1177 	case KVM_S390_VM_TOD_HIGH:
1178 		ret = kvm_s390_set_tod_high(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_LOW:
1181 		ret = kvm_s390_set_tod_low(kvm, attr);
1182 		break;
1183 	default:
1184 		ret = -ENXIO;
1185 		break;
1186 	}
1187 	return ret;
1188 }
1189 
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 				   struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193 	union tod_clock clk;
1194 
1195 	preempt_disable();
1196 
1197 	store_tod_clock_ext(&clk);
1198 
1199 	gtod->tod = clk.tod + kvm->arch.epoch;
1200 	gtod->epoch_idx = 0;
1201 	if (test_kvm_facility(kvm, 139)) {
1202 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 		if (gtod->tod < clk.tod)
1204 			gtod->epoch_idx += 1;
1205 	}
1206 
1207 	preempt_enable();
1208 }
1209 
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 	struct kvm_s390_vm_tod_clock gtod;
1213 
1214 	memset(&gtod, 0, sizeof(gtod));
1215 	kvm_s390_get_tod_clock(kvm, &gtod);
1216 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 		return -EFAULT;
1218 
1219 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 		gtod.epoch_idx, gtod.tod);
1221 	return 0;
1222 }
1223 
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 	u8 gtod_high = 0;
1227 
1228 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229 					 sizeof(gtod_high)))
1230 		return -EFAULT;
1231 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232 
1233 	return 0;
1234 }
1235 
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	u64 gtod;
1239 
1240 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242 		return -EFAULT;
1243 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244 
1245 	return 0;
1246 }
1247 
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250 	int ret;
1251 
1252 	if (attr->flags)
1253 		return -EINVAL;
1254 
1255 	switch (attr->attr) {
1256 	case KVM_S390_VM_TOD_EXT:
1257 		ret = kvm_s390_get_tod_ext(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_TOD_HIGH:
1260 		ret = kvm_s390_get_tod_high(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_TOD_LOW:
1263 		ret = kvm_s390_get_tod_low(kvm, attr);
1264 		break;
1265 	default:
1266 		ret = -ENXIO;
1267 		break;
1268 	}
1269 	return ret;
1270 }
1271 
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274 	struct kvm_s390_vm_cpu_processor *proc;
1275 	u16 lowest_ibc, unblocked_ibc;
1276 	int ret = 0;
1277 
1278 	mutex_lock(&kvm->lock);
1279 	if (kvm->created_vcpus) {
1280 		ret = -EBUSY;
1281 		goto out;
1282 	}
1283 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284 	if (!proc) {
1285 		ret = -ENOMEM;
1286 		goto out;
1287 	}
1288 	if (!copy_from_user(proc, (void __user *)attr->addr,
1289 			    sizeof(*proc))) {
1290 		kvm->arch.model.cpuid = proc->cpuid;
1291 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 		unblocked_ibc = sclp.ibc & 0xfff;
1293 		if (lowest_ibc && proc->ibc) {
1294 			if (proc->ibc > unblocked_ibc)
1295 				kvm->arch.model.ibc = unblocked_ibc;
1296 			else if (proc->ibc < lowest_ibc)
1297 				kvm->arch.model.ibc = lowest_ibc;
1298 			else
1299 				kvm->arch.model.ibc = proc->ibc;
1300 		}
1301 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 			 kvm->arch.model.ibc,
1305 			 kvm->arch.model.cpuid);
1306 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 			 kvm->arch.model.fac_list[0],
1308 			 kvm->arch.model.fac_list[1],
1309 			 kvm->arch.model.fac_list[2]);
1310 	} else
1311 		ret = -EFAULT;
1312 	kfree(proc);
1313 out:
1314 	mutex_unlock(&kvm->lock);
1315 	return ret;
1316 }
1317 
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 				       struct kvm_device_attr *attr)
1320 {
1321 	struct kvm_s390_vm_cpu_feat data;
1322 
1323 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324 		return -EFAULT;
1325 	if (!bitmap_subset((unsigned long *) data.feat,
1326 			   kvm_s390_available_cpu_feat,
1327 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1328 		return -EINVAL;
1329 
1330 	mutex_lock(&kvm->lock);
1331 	if (kvm->created_vcpus) {
1332 		mutex_unlock(&kvm->lock);
1333 		return -EBUSY;
1334 	}
1335 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1337 	mutex_unlock(&kvm->lock);
1338 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339 			 data.feat[0],
1340 			 data.feat[1],
1341 			 data.feat[2]);
1342 	return 0;
1343 }
1344 
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346 					  struct kvm_device_attr *attr)
1347 {
1348 	mutex_lock(&kvm->lock);
1349 	if (kvm->created_vcpus) {
1350 		mutex_unlock(&kvm->lock);
1351 		return -EBUSY;
1352 	}
1353 
1354 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356 		mutex_unlock(&kvm->lock);
1357 		return -EFAULT;
1358 	}
1359 	mutex_unlock(&kvm->lock);
1360 
1361 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418 
1419 	return 0;
1420 }
1421 
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424 	int ret = -ENXIO;
1425 
1426 	switch (attr->attr) {
1427 	case KVM_S390_VM_CPU_PROCESSOR:
1428 		ret = kvm_s390_set_processor(kvm, attr);
1429 		break;
1430 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431 		ret = kvm_s390_set_processor_feat(kvm, attr);
1432 		break;
1433 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435 		break;
1436 	}
1437 	return ret;
1438 }
1439 
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442 	struct kvm_s390_vm_cpu_processor *proc;
1443 	int ret = 0;
1444 
1445 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446 	if (!proc) {
1447 		ret = -ENOMEM;
1448 		goto out;
1449 	}
1450 	proc->cpuid = kvm->arch.model.cpuid;
1451 	proc->ibc = kvm->arch.model.ibc;
1452 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 		 kvm->arch.model.ibc,
1456 		 kvm->arch.model.cpuid);
1457 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 		 kvm->arch.model.fac_list[0],
1459 		 kvm->arch.model.fac_list[1],
1460 		 kvm->arch.model.fac_list[2]);
1461 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462 		ret = -EFAULT;
1463 	kfree(proc);
1464 out:
1465 	return ret;
1466 }
1467 
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470 	struct kvm_s390_vm_cpu_machine *mach;
1471 	int ret = 0;
1472 
1473 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474 	if (!mach) {
1475 		ret = -ENOMEM;
1476 		goto out;
1477 	}
1478 	get_cpu_id((struct cpuid *) &mach->cpuid);
1479 	mach->ibc = sclp.ibc;
1480 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1482 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483 	       sizeof(stfle_fac_list));
1484 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485 		 kvm->arch.model.ibc,
1486 		 kvm->arch.model.cpuid);
1487 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488 		 mach->fac_mask[0],
1489 		 mach->fac_mask[1],
1490 		 mach->fac_mask[2]);
1491 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492 		 mach->fac_list[0],
1493 		 mach->fac_list[1],
1494 		 mach->fac_list[2]);
1495 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496 		ret = -EFAULT;
1497 	kfree(mach);
1498 out:
1499 	return ret;
1500 }
1501 
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503 				       struct kvm_device_attr *attr)
1504 {
1505 	struct kvm_s390_vm_cpu_feat data;
1506 
1507 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1509 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510 		return -EFAULT;
1511 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512 			 data.feat[0],
1513 			 data.feat[1],
1514 			 data.feat[2]);
1515 	return 0;
1516 }
1517 
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519 				     struct kvm_device_attr *attr)
1520 {
1521 	struct kvm_s390_vm_cpu_feat data;
1522 
1523 	bitmap_copy((unsigned long *) data.feat,
1524 		    kvm_s390_available_cpu_feat,
1525 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1526 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527 		return -EFAULT;
1528 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529 			 data.feat[0],
1530 			 data.feat[1],
1531 			 data.feat[2]);
1532 	return 0;
1533 }
1534 
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536 					  struct kvm_device_attr *attr)
1537 {
1538 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540 		return -EFAULT;
1541 
1542 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599 
1600 	return 0;
1601 }
1602 
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604 					struct kvm_device_attr *attr)
1605 {
1606 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608 		return -EFAULT;
1609 
1610 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667 
1668 	return 0;
1669 }
1670 
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673 	int ret = -ENXIO;
1674 
1675 	switch (attr->attr) {
1676 	case KVM_S390_VM_CPU_PROCESSOR:
1677 		ret = kvm_s390_get_processor(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_CPU_MACHINE:
1680 		ret = kvm_s390_get_machine(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683 		ret = kvm_s390_get_processor_feat(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1686 		ret = kvm_s390_get_machine_feat(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690 		break;
1691 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693 		break;
1694 	}
1695 	return ret;
1696 }
1697 
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700 	int ret;
1701 
1702 	switch (attr->group) {
1703 	case KVM_S390_VM_MEM_CTRL:
1704 		ret = kvm_s390_set_mem_control(kvm, attr);
1705 		break;
1706 	case KVM_S390_VM_TOD:
1707 		ret = kvm_s390_set_tod(kvm, attr);
1708 		break;
1709 	case KVM_S390_VM_CPU_MODEL:
1710 		ret = kvm_s390_set_cpu_model(kvm, attr);
1711 		break;
1712 	case KVM_S390_VM_CRYPTO:
1713 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1714 		break;
1715 	case KVM_S390_VM_MIGRATION:
1716 		ret = kvm_s390_vm_set_migration(kvm, attr);
1717 		break;
1718 	default:
1719 		ret = -ENXIO;
1720 		break;
1721 	}
1722 
1723 	return ret;
1724 }
1725 
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728 	int ret;
1729 
1730 	switch (attr->group) {
1731 	case KVM_S390_VM_MEM_CTRL:
1732 		ret = kvm_s390_get_mem_control(kvm, attr);
1733 		break;
1734 	case KVM_S390_VM_TOD:
1735 		ret = kvm_s390_get_tod(kvm, attr);
1736 		break;
1737 	case KVM_S390_VM_CPU_MODEL:
1738 		ret = kvm_s390_get_cpu_model(kvm, attr);
1739 		break;
1740 	case KVM_S390_VM_MIGRATION:
1741 		ret = kvm_s390_vm_get_migration(kvm, attr);
1742 		break;
1743 	default:
1744 		ret = -ENXIO;
1745 		break;
1746 	}
1747 
1748 	return ret;
1749 }
1750 
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753 	int ret;
1754 
1755 	switch (attr->group) {
1756 	case KVM_S390_VM_MEM_CTRL:
1757 		switch (attr->attr) {
1758 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1759 		case KVM_S390_VM_MEM_CLR_CMMA:
1760 			ret = sclp.has_cmma ? 0 : -ENXIO;
1761 			break;
1762 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1763 			ret = 0;
1764 			break;
1765 		default:
1766 			ret = -ENXIO;
1767 			break;
1768 		}
1769 		break;
1770 	case KVM_S390_VM_TOD:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_TOD_LOW:
1773 		case KVM_S390_VM_TOD_HIGH:
1774 			ret = 0;
1775 			break;
1776 		default:
1777 			ret = -ENXIO;
1778 			break;
1779 		}
1780 		break;
1781 	case KVM_S390_VM_CPU_MODEL:
1782 		switch (attr->attr) {
1783 		case KVM_S390_VM_CPU_PROCESSOR:
1784 		case KVM_S390_VM_CPU_MACHINE:
1785 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1787 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789 			ret = 0;
1790 			break;
1791 		default:
1792 			ret = -ENXIO;
1793 			break;
1794 		}
1795 		break;
1796 	case KVM_S390_VM_CRYPTO:
1797 		switch (attr->attr) {
1798 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802 			ret = 0;
1803 			break;
1804 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806 			ret = ap_instructions_available() ? 0 : -ENXIO;
1807 			break;
1808 		default:
1809 			ret = -ENXIO;
1810 			break;
1811 		}
1812 		break;
1813 	case KVM_S390_VM_MIGRATION:
1814 		ret = 0;
1815 		break;
1816 	default:
1817 		ret = -ENXIO;
1818 		break;
1819 	}
1820 
1821 	return ret;
1822 }
1823 
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826 	uint8_t *keys;
1827 	uint64_t hva;
1828 	int srcu_idx, i, r = 0;
1829 
1830 	if (args->flags != 0)
1831 		return -EINVAL;
1832 
1833 	/* Is this guest using storage keys? */
1834 	if (!mm_uses_skeys(current->mm))
1835 		return KVM_S390_GET_SKEYS_NONE;
1836 
1837 	/* Enforce sane limit on memory allocation */
1838 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839 		return -EINVAL;
1840 
1841 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842 	if (!keys)
1843 		return -ENOMEM;
1844 
1845 	mmap_read_lock(current->mm);
1846 	srcu_idx = srcu_read_lock(&kvm->srcu);
1847 	for (i = 0; i < args->count; i++) {
1848 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1849 		if (kvm_is_error_hva(hva)) {
1850 			r = -EFAULT;
1851 			break;
1852 		}
1853 
1854 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855 		if (r)
1856 			break;
1857 	}
1858 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1859 	mmap_read_unlock(current->mm);
1860 
1861 	if (!r) {
1862 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863 				 sizeof(uint8_t) * args->count);
1864 		if (r)
1865 			r = -EFAULT;
1866 	}
1867 
1868 	kvfree(keys);
1869 	return r;
1870 }
1871 
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874 	uint8_t *keys;
1875 	uint64_t hva;
1876 	int srcu_idx, i, r = 0;
1877 	bool unlocked;
1878 
1879 	if (args->flags != 0)
1880 		return -EINVAL;
1881 
1882 	/* Enforce sane limit on memory allocation */
1883 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884 		return -EINVAL;
1885 
1886 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887 	if (!keys)
1888 		return -ENOMEM;
1889 
1890 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891 			   sizeof(uint8_t) * args->count);
1892 	if (r) {
1893 		r = -EFAULT;
1894 		goto out;
1895 	}
1896 
1897 	/* Enable storage key handling for the guest */
1898 	r = s390_enable_skey();
1899 	if (r)
1900 		goto out;
1901 
1902 	i = 0;
1903 	mmap_read_lock(current->mm);
1904 	srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906 		unlocked = false;
1907 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1908 		if (kvm_is_error_hva(hva)) {
1909 			r = -EFAULT;
1910 			break;
1911 		}
1912 
1913 		/* Lowest order bit is reserved */
1914 		if (keys[i] & 0x01) {
1915 			r = -EINVAL;
1916 			break;
1917 		}
1918 
1919 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920 		if (r) {
1921 			r = fixup_user_fault(current->mm, hva,
1922 					     FAULT_FLAG_WRITE, &unlocked);
1923 			if (r)
1924 				break;
1925 		}
1926 		if (!r)
1927 			i++;
1928 	}
1929 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1930 	mmap_read_unlock(current->mm);
1931 out:
1932 	kvfree(keys);
1933 	return r;
1934 }
1935 
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944 
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946 			      u8 *res, unsigned long bufsize)
1947 {
1948 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1949 
1950 	args->count = 0;
1951 	while (args->count < bufsize) {
1952 		hva = gfn_to_hva(kvm, cur_gfn);
1953 		/*
1954 		 * We return an error if the first value was invalid, but we
1955 		 * return successfully if at least one value was copied.
1956 		 */
1957 		if (kvm_is_error_hva(hva))
1958 			return args->count ? 0 : -EFAULT;
1959 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1960 			pgstev = 0;
1961 		res[args->count++] = (pgstev >> 24) & 0x43;
1962 		cur_gfn++;
1963 	}
1964 
1965 	return 0;
1966 }
1967 
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1969 						     gfn_t gfn)
1970 {
1971 	return ____gfn_to_memslot(slots, gfn, true);
1972 }
1973 
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975 					      unsigned long cur_gfn)
1976 {
1977 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978 	unsigned long ofs = cur_gfn - ms->base_gfn;
1979 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1980 
1981 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1982 		mnode = rb_next(mnode);
1983 		/* If we are above the highest slot, wrap around */
1984 		if (!mnode)
1985 			mnode = rb_first(&slots->gfn_tree);
1986 
1987 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1988 		ofs = 0;
1989 	}
1990 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1994 	}
1995 	return ms->base_gfn + ofs;
1996 }
1997 
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 			     u8 *res, unsigned long bufsize)
2000 {
2001 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 	struct kvm_memslots *slots = kvm_memslots(kvm);
2003 	struct kvm_memory_slot *ms;
2004 
2005 	if (unlikely(kvm_memslots_empty(slots)))
2006 		return 0;
2007 
2008 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 	ms = gfn_to_memslot(kvm, cur_gfn);
2010 	args->count = 0;
2011 	args->start_gfn = cur_gfn;
2012 	if (!ms)
2013 		return 0;
2014 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 	mem_end = kvm_s390_get_gfn_end(slots);
2016 
2017 	while (args->count < bufsize) {
2018 		hva = gfn_to_hva(kvm, cur_gfn);
2019 		if (kvm_is_error_hva(hva))
2020 			return 0;
2021 		/* Decrement only if we actually flipped the bit to 0 */
2022 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025 			pgstev = 0;
2026 		/* Save the value */
2027 		res[args->count++] = (pgstev >> 24) & 0x43;
2028 		/* If the next bit is too far away, stop. */
2029 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030 			return 0;
2031 		/* If we reached the previous "next", find the next one */
2032 		if (cur_gfn == next_gfn)
2033 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 		/* Reached the end of memory or of the buffer, stop */
2035 		if ((next_gfn >= mem_end) ||
2036 		    (next_gfn - args->start_gfn >= bufsize))
2037 			return 0;
2038 		cur_gfn++;
2039 		/* Reached the end of the current memslot, take the next one. */
2040 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 			ms = gfn_to_memslot(kvm, cur_gfn);
2042 			if (!ms)
2043 				return 0;
2044 		}
2045 	}
2046 	return 0;
2047 }
2048 
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 				  struct kvm_s390_cmma_log *args)
2059 {
2060 	unsigned long bufsize;
2061 	int srcu_idx, peek, ret;
2062 	u8 *values;
2063 
2064 	if (!kvm->arch.use_cmma)
2065 		return -ENXIO;
2066 	/* Invalid/unsupported flags were specified */
2067 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2068 		return -EINVAL;
2069 	/* Migration mode query, and we are not doing a migration */
2070 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 	if (!peek && !kvm->arch.migration_mode)
2072 		return -EINVAL;
2073 	/* CMMA is disabled or was not used, or the buffer has length zero */
2074 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 		memset(args, 0, sizeof(*args));
2077 		return 0;
2078 	}
2079 	/* We are not peeking, and there are no dirty pages */
2080 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 		memset(args, 0, sizeof(*args));
2082 		return 0;
2083 	}
2084 
2085 	values = vmalloc(bufsize);
2086 	if (!values)
2087 		return -ENOMEM;
2088 
2089 	mmap_read_lock(kvm->mm);
2090 	srcu_idx = srcu_read_lock(&kvm->srcu);
2091 	if (peek)
2092 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093 	else
2094 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 	mmap_read_unlock(kvm->mm);
2097 
2098 	if (kvm->arch.migration_mode)
2099 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100 	else
2101 		args->remaining = 0;
2102 
2103 	if (copy_to_user((void __user *)args->values, values, args->count))
2104 		ret = -EFAULT;
2105 
2106 	vfree(values);
2107 	return ret;
2108 }
2109 
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 				  const struct kvm_s390_cmma_log *args)
2117 {
2118 	unsigned long hva, mask, pgstev, i;
2119 	uint8_t *bits;
2120 	int srcu_idx, r = 0;
2121 
2122 	mask = args->mask;
2123 
2124 	if (!kvm->arch.use_cmma)
2125 		return -ENXIO;
2126 	/* invalid/unsupported flags */
2127 	if (args->flags != 0)
2128 		return -EINVAL;
2129 	/* Enforce sane limit on memory allocation */
2130 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131 		return -EINVAL;
2132 	/* Nothing to do */
2133 	if (args->count == 0)
2134 		return 0;
2135 
2136 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2137 	if (!bits)
2138 		return -ENOMEM;
2139 
2140 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2141 	if (r) {
2142 		r = -EFAULT;
2143 		goto out;
2144 	}
2145 
2146 	mmap_read_lock(kvm->mm);
2147 	srcu_idx = srcu_read_lock(&kvm->srcu);
2148 	for (i = 0; i < args->count; i++) {
2149 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 		if (kvm_is_error_hva(hva)) {
2151 			r = -EFAULT;
2152 			break;
2153 		}
2154 
2155 		pgstev = bits[i];
2156 		pgstev = pgstev << 24;
2157 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159 	}
2160 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 	mmap_read_unlock(kvm->mm);
2162 
2163 	if (!kvm->mm->context.uses_cmm) {
2164 		mmap_write_lock(kvm->mm);
2165 		kvm->mm->context.uses_cmm = 1;
2166 		mmap_write_unlock(kvm->mm);
2167 	}
2168 out:
2169 	vfree(bits);
2170 	return r;
2171 }
2172 
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175 	struct kvm_vcpu *vcpu;
2176 	u16 rc, rrc;
2177 	int ret = 0;
2178 	unsigned long i;
2179 
2180 	/*
2181 	 * We ignore failures and try to destroy as many CPUs as possible.
2182 	 * At the same time we must not free the assigned resources when
2183 	 * this fails, as the ultravisor has still access to that memory.
2184 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185 	 * behind.
2186 	 * We want to return the first failure rc and rrc, though.
2187 	 */
2188 	kvm_for_each_vcpu(i, vcpu, kvm) {
2189 		mutex_lock(&vcpu->mutex);
2190 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191 			*rcp = rc;
2192 			*rrcp = rrc;
2193 			ret = -EIO;
2194 		}
2195 		mutex_unlock(&vcpu->mutex);
2196 	}
2197 	return ret;
2198 }
2199 
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2201 {
2202 	unsigned long i;
2203 	int r = 0;
2204 	u16 dummy;
2205 
2206 	struct kvm_vcpu *vcpu;
2207 
2208 	kvm_for_each_vcpu(i, vcpu, kvm) {
2209 		mutex_lock(&vcpu->mutex);
2210 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2211 		mutex_unlock(&vcpu->mutex);
2212 		if (r)
2213 			break;
2214 	}
2215 	if (r)
2216 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2217 	return r;
2218 }
2219 
2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2221 {
2222 	int r = 0;
2223 	u16 dummy;
2224 	void __user *argp = (void __user *)cmd->data;
2225 
2226 	switch (cmd->cmd) {
2227 	case KVM_PV_ENABLE: {
2228 		r = -EINVAL;
2229 		if (kvm_s390_pv_is_protected(kvm))
2230 			break;
2231 
2232 		/*
2233 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2234 		 *  esca, we need no cleanup in the error cases below
2235 		 */
2236 		r = sca_switch_to_extended(kvm);
2237 		if (r)
2238 			break;
2239 
2240 		mmap_write_lock(current->mm);
2241 		r = gmap_mark_unmergeable();
2242 		mmap_write_unlock(current->mm);
2243 		if (r)
2244 			break;
2245 
2246 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2247 		if (r)
2248 			break;
2249 
2250 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2251 		if (r)
2252 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2253 
2254 		/* we need to block service interrupts from now on */
2255 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2256 		break;
2257 	}
2258 	case KVM_PV_DISABLE: {
2259 		r = -EINVAL;
2260 		if (!kvm_s390_pv_is_protected(kvm))
2261 			break;
2262 
2263 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2264 		/*
2265 		 * If a CPU could not be destroyed, destroy VM will also fail.
2266 		 * There is no point in trying to destroy it. Instead return
2267 		 * the rc and rrc from the first CPU that failed destroying.
2268 		 */
2269 		if (r)
2270 			break;
2271 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2272 
2273 		/* no need to block service interrupts any more */
2274 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2275 		break;
2276 	}
2277 	case KVM_PV_SET_SEC_PARMS: {
2278 		struct kvm_s390_pv_sec_parm parms = {};
2279 		void *hdr;
2280 
2281 		r = -EINVAL;
2282 		if (!kvm_s390_pv_is_protected(kvm))
2283 			break;
2284 
2285 		r = -EFAULT;
2286 		if (copy_from_user(&parms, argp, sizeof(parms)))
2287 			break;
2288 
2289 		/* Currently restricted to 8KB */
2290 		r = -EINVAL;
2291 		if (parms.length > PAGE_SIZE * 2)
2292 			break;
2293 
2294 		r = -ENOMEM;
2295 		hdr = vmalloc(parms.length);
2296 		if (!hdr)
2297 			break;
2298 
2299 		r = -EFAULT;
2300 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2301 				    parms.length))
2302 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2303 						      &cmd->rc, &cmd->rrc);
2304 
2305 		vfree(hdr);
2306 		break;
2307 	}
2308 	case KVM_PV_UNPACK: {
2309 		struct kvm_s390_pv_unp unp = {};
2310 
2311 		r = -EINVAL;
2312 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2313 			break;
2314 
2315 		r = -EFAULT;
2316 		if (copy_from_user(&unp, argp, sizeof(unp)))
2317 			break;
2318 
2319 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2320 				       &cmd->rc, &cmd->rrc);
2321 		break;
2322 	}
2323 	case KVM_PV_VERIFY: {
2324 		r = -EINVAL;
2325 		if (!kvm_s390_pv_is_protected(kvm))
2326 			break;
2327 
2328 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2329 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2330 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2331 			     cmd->rrc);
2332 		break;
2333 	}
2334 	case KVM_PV_PREP_RESET: {
2335 		r = -EINVAL;
2336 		if (!kvm_s390_pv_is_protected(kvm))
2337 			break;
2338 
2339 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2340 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2341 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2342 			     cmd->rc, cmd->rrc);
2343 		break;
2344 	}
2345 	case KVM_PV_UNSHARE_ALL: {
2346 		r = -EINVAL;
2347 		if (!kvm_s390_pv_is_protected(kvm))
2348 			break;
2349 
2350 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2352 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2353 			     cmd->rc, cmd->rrc);
2354 		break;
2355 	}
2356 	default:
2357 		r = -ENOTTY;
2358 	}
2359 	return r;
2360 }
2361 
2362 long kvm_arch_vm_ioctl(struct file *filp,
2363 		       unsigned int ioctl, unsigned long arg)
2364 {
2365 	struct kvm *kvm = filp->private_data;
2366 	void __user *argp = (void __user *)arg;
2367 	struct kvm_device_attr attr;
2368 	int r;
2369 
2370 	switch (ioctl) {
2371 	case KVM_S390_INTERRUPT: {
2372 		struct kvm_s390_interrupt s390int;
2373 
2374 		r = -EFAULT;
2375 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2376 			break;
2377 		r = kvm_s390_inject_vm(kvm, &s390int);
2378 		break;
2379 	}
2380 	case KVM_CREATE_IRQCHIP: {
2381 		struct kvm_irq_routing_entry routing;
2382 
2383 		r = -EINVAL;
2384 		if (kvm->arch.use_irqchip) {
2385 			/* Set up dummy routing. */
2386 			memset(&routing, 0, sizeof(routing));
2387 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2388 		}
2389 		break;
2390 	}
2391 	case KVM_SET_DEVICE_ATTR: {
2392 		r = -EFAULT;
2393 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2394 			break;
2395 		r = kvm_s390_vm_set_attr(kvm, &attr);
2396 		break;
2397 	}
2398 	case KVM_GET_DEVICE_ATTR: {
2399 		r = -EFAULT;
2400 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2401 			break;
2402 		r = kvm_s390_vm_get_attr(kvm, &attr);
2403 		break;
2404 	}
2405 	case KVM_HAS_DEVICE_ATTR: {
2406 		r = -EFAULT;
2407 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2408 			break;
2409 		r = kvm_s390_vm_has_attr(kvm, &attr);
2410 		break;
2411 	}
2412 	case KVM_S390_GET_SKEYS: {
2413 		struct kvm_s390_skeys args;
2414 
2415 		r = -EFAULT;
2416 		if (copy_from_user(&args, argp,
2417 				   sizeof(struct kvm_s390_skeys)))
2418 			break;
2419 		r = kvm_s390_get_skeys(kvm, &args);
2420 		break;
2421 	}
2422 	case KVM_S390_SET_SKEYS: {
2423 		struct kvm_s390_skeys args;
2424 
2425 		r = -EFAULT;
2426 		if (copy_from_user(&args, argp,
2427 				   sizeof(struct kvm_s390_skeys)))
2428 			break;
2429 		r = kvm_s390_set_skeys(kvm, &args);
2430 		break;
2431 	}
2432 	case KVM_S390_GET_CMMA_BITS: {
2433 		struct kvm_s390_cmma_log args;
2434 
2435 		r = -EFAULT;
2436 		if (copy_from_user(&args, argp, sizeof(args)))
2437 			break;
2438 		mutex_lock(&kvm->slots_lock);
2439 		r = kvm_s390_get_cmma_bits(kvm, &args);
2440 		mutex_unlock(&kvm->slots_lock);
2441 		if (!r) {
2442 			r = copy_to_user(argp, &args, sizeof(args));
2443 			if (r)
2444 				r = -EFAULT;
2445 		}
2446 		break;
2447 	}
2448 	case KVM_S390_SET_CMMA_BITS: {
2449 		struct kvm_s390_cmma_log args;
2450 
2451 		r = -EFAULT;
2452 		if (copy_from_user(&args, argp, sizeof(args)))
2453 			break;
2454 		mutex_lock(&kvm->slots_lock);
2455 		r = kvm_s390_set_cmma_bits(kvm, &args);
2456 		mutex_unlock(&kvm->slots_lock);
2457 		break;
2458 	}
2459 	case KVM_S390_PV_COMMAND: {
2460 		struct kvm_pv_cmd args;
2461 
2462 		/* protvirt means user cpu state */
2463 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2464 		r = 0;
2465 		if (!is_prot_virt_host()) {
2466 			r = -EINVAL;
2467 			break;
2468 		}
2469 		if (copy_from_user(&args, argp, sizeof(args))) {
2470 			r = -EFAULT;
2471 			break;
2472 		}
2473 		if (args.flags) {
2474 			r = -EINVAL;
2475 			break;
2476 		}
2477 		mutex_lock(&kvm->lock);
2478 		r = kvm_s390_handle_pv(kvm, &args);
2479 		mutex_unlock(&kvm->lock);
2480 		if (copy_to_user(argp, &args, sizeof(args))) {
2481 			r = -EFAULT;
2482 			break;
2483 		}
2484 		break;
2485 	}
2486 	default:
2487 		r = -ENOTTY;
2488 	}
2489 
2490 	return r;
2491 }
2492 
2493 static int kvm_s390_apxa_installed(void)
2494 {
2495 	struct ap_config_info info;
2496 
2497 	if (ap_instructions_available()) {
2498 		if (ap_qci(&info) == 0)
2499 			return info.apxa;
2500 	}
2501 
2502 	return 0;
2503 }
2504 
2505 /*
2506  * The format of the crypto control block (CRYCB) is specified in the 3 low
2507  * order bits of the CRYCB designation (CRYCBD) field as follows:
2508  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2509  *	     AP extended addressing (APXA) facility are installed.
2510  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2511  * Format 2: Both the APXA and MSAX3 facilities are installed
2512  */
2513 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2514 {
2515 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2516 
2517 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2518 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2519 
2520 	/* Check whether MSAX3 is installed */
2521 	if (!test_kvm_facility(kvm, 76))
2522 		return;
2523 
2524 	if (kvm_s390_apxa_installed())
2525 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2526 	else
2527 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2528 }
2529 
2530 /*
2531  * kvm_arch_crypto_set_masks
2532  *
2533  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2534  *	 to be set.
2535  * @apm: the mask identifying the accessible AP adapters
2536  * @aqm: the mask identifying the accessible AP domains
2537  * @adm: the mask identifying the accessible AP control domains
2538  *
2539  * Set the masks that identify the adapters, domains and control domains to
2540  * which the KVM guest is granted access.
2541  *
2542  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2543  *	 function.
2544  */
2545 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2546 			       unsigned long *aqm, unsigned long *adm)
2547 {
2548 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2549 
2550 	kvm_s390_vcpu_block_all(kvm);
2551 
2552 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2553 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2554 		memcpy(crycb->apcb1.apm, apm, 32);
2555 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2556 			 apm[0], apm[1], apm[2], apm[3]);
2557 		memcpy(crycb->apcb1.aqm, aqm, 32);
2558 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2559 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2560 		memcpy(crycb->apcb1.adm, adm, 32);
2561 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2562 			 adm[0], adm[1], adm[2], adm[3]);
2563 		break;
2564 	case CRYCB_FORMAT1:
2565 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2566 		memcpy(crycb->apcb0.apm, apm, 8);
2567 		memcpy(crycb->apcb0.aqm, aqm, 2);
2568 		memcpy(crycb->apcb0.adm, adm, 2);
2569 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2570 			 apm[0], *((unsigned short *)aqm),
2571 			 *((unsigned short *)adm));
2572 		break;
2573 	default:	/* Can not happen */
2574 		break;
2575 	}
2576 
2577 	/* recreate the shadow crycb for each vcpu */
2578 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2579 	kvm_s390_vcpu_unblock_all(kvm);
2580 }
2581 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2582 
2583 /*
2584  * kvm_arch_crypto_clear_masks
2585  *
2586  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2587  *	 to be cleared.
2588  *
2589  * Clear the masks that identify the adapters, domains and control domains to
2590  * which the KVM guest is granted access.
2591  *
2592  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2593  *	 function.
2594  */
2595 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2596 {
2597 	kvm_s390_vcpu_block_all(kvm);
2598 
2599 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2600 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2601 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2602 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2603 
2604 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2605 	/* recreate the shadow crycb for each vcpu */
2606 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2607 	kvm_s390_vcpu_unblock_all(kvm);
2608 }
2609 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2610 
2611 static u64 kvm_s390_get_initial_cpuid(void)
2612 {
2613 	struct cpuid cpuid;
2614 
2615 	get_cpu_id(&cpuid);
2616 	cpuid.version = 0xff;
2617 	return *((u64 *) &cpuid);
2618 }
2619 
2620 static void kvm_s390_crypto_init(struct kvm *kvm)
2621 {
2622 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2623 	kvm_s390_set_crycb_format(kvm);
2624 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2625 
2626 	if (!test_kvm_facility(kvm, 76))
2627 		return;
2628 
2629 	/* Enable AES/DEA protected key functions by default */
2630 	kvm->arch.crypto.aes_kw = 1;
2631 	kvm->arch.crypto.dea_kw = 1;
2632 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2633 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2634 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2635 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2636 }
2637 
2638 static void sca_dispose(struct kvm *kvm)
2639 {
2640 	if (kvm->arch.use_esca)
2641 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2642 	else
2643 		free_page((unsigned long)(kvm->arch.sca));
2644 	kvm->arch.sca = NULL;
2645 }
2646 
2647 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2648 {
2649 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2650 	int i, rc;
2651 	char debug_name[16];
2652 	static unsigned long sca_offset;
2653 
2654 	rc = -EINVAL;
2655 #ifdef CONFIG_KVM_S390_UCONTROL
2656 	if (type & ~KVM_VM_S390_UCONTROL)
2657 		goto out_err;
2658 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2659 		goto out_err;
2660 #else
2661 	if (type)
2662 		goto out_err;
2663 #endif
2664 
2665 	rc = s390_enable_sie();
2666 	if (rc)
2667 		goto out_err;
2668 
2669 	rc = -ENOMEM;
2670 
2671 	if (!sclp.has_64bscao)
2672 		alloc_flags |= GFP_DMA;
2673 	rwlock_init(&kvm->arch.sca_lock);
2674 	/* start with basic SCA */
2675 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2676 	if (!kvm->arch.sca)
2677 		goto out_err;
2678 	mutex_lock(&kvm_lock);
2679 	sca_offset += 16;
2680 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2681 		sca_offset = 0;
2682 	kvm->arch.sca = (struct bsca_block *)
2683 			((char *) kvm->arch.sca + sca_offset);
2684 	mutex_unlock(&kvm_lock);
2685 
2686 	sprintf(debug_name, "kvm-%u", current->pid);
2687 
2688 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2689 	if (!kvm->arch.dbf)
2690 		goto out_err;
2691 
2692 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2693 	kvm->arch.sie_page2 =
2694 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2695 	if (!kvm->arch.sie_page2)
2696 		goto out_err;
2697 
2698 	kvm->arch.sie_page2->kvm = kvm;
2699 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2700 
2701 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2702 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2703 					      (kvm_s390_fac_base[i] |
2704 					       kvm_s390_fac_ext[i]);
2705 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2706 					      kvm_s390_fac_base[i];
2707 	}
2708 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2709 
2710 	/* we are always in czam mode - even on pre z14 machines */
2711 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2712 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2713 	/* we emulate STHYI in kvm */
2714 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2715 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2716 	if (MACHINE_HAS_TLB_GUEST) {
2717 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2718 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2719 	}
2720 
2721 	if (css_general_characteristics.aiv && test_facility(65))
2722 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2723 
2724 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2725 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2726 
2727 	kvm_s390_crypto_init(kvm);
2728 
2729 	mutex_init(&kvm->arch.float_int.ais_lock);
2730 	spin_lock_init(&kvm->arch.float_int.lock);
2731 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2732 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2733 	init_waitqueue_head(&kvm->arch.ipte_wq);
2734 	mutex_init(&kvm->arch.ipte_mutex);
2735 
2736 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2737 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2738 
2739 	if (type & KVM_VM_S390_UCONTROL) {
2740 		kvm->arch.gmap = NULL;
2741 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2742 	} else {
2743 		if (sclp.hamax == U64_MAX)
2744 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2745 		else
2746 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2747 						    sclp.hamax + 1);
2748 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2749 		if (!kvm->arch.gmap)
2750 			goto out_err;
2751 		kvm->arch.gmap->private = kvm;
2752 		kvm->arch.gmap->pfault_enabled = 0;
2753 	}
2754 
2755 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2756 	kvm->arch.use_skf = sclp.has_skey;
2757 	spin_lock_init(&kvm->arch.start_stop_lock);
2758 	kvm_s390_vsie_init(kvm);
2759 	if (use_gisa)
2760 		kvm_s390_gisa_init(kvm);
2761 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2762 
2763 	return 0;
2764 out_err:
2765 	free_page((unsigned long)kvm->arch.sie_page2);
2766 	debug_unregister(kvm->arch.dbf);
2767 	sca_dispose(kvm);
2768 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2769 	return rc;
2770 }
2771 
2772 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2773 {
2774 	u16 rc, rrc;
2775 
2776 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2777 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2778 	kvm_s390_clear_local_irqs(vcpu);
2779 	kvm_clear_async_pf_completion_queue(vcpu);
2780 	if (!kvm_is_ucontrol(vcpu->kvm))
2781 		sca_del_vcpu(vcpu);
2782 
2783 	if (kvm_is_ucontrol(vcpu->kvm))
2784 		gmap_remove(vcpu->arch.gmap);
2785 
2786 	if (vcpu->kvm->arch.use_cmma)
2787 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2788 	/* We can not hold the vcpu mutex here, we are already dying */
2789 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2790 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2791 	free_page((unsigned long)(vcpu->arch.sie_block));
2792 }
2793 
2794 void kvm_arch_destroy_vm(struct kvm *kvm)
2795 {
2796 	u16 rc, rrc;
2797 
2798 	kvm_destroy_vcpus(kvm);
2799 	sca_dispose(kvm);
2800 	kvm_s390_gisa_destroy(kvm);
2801 	/*
2802 	 * We are already at the end of life and kvm->lock is not taken.
2803 	 * This is ok as the file descriptor is closed by now and nobody
2804 	 * can mess with the pv state. To avoid lockdep_assert_held from
2805 	 * complaining we do not use kvm_s390_pv_is_protected.
2806 	 */
2807 	if (kvm_s390_pv_get_handle(kvm))
2808 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2809 	debug_unregister(kvm->arch.dbf);
2810 	free_page((unsigned long)kvm->arch.sie_page2);
2811 	if (!kvm_is_ucontrol(kvm))
2812 		gmap_remove(kvm->arch.gmap);
2813 	kvm_s390_destroy_adapters(kvm);
2814 	kvm_s390_clear_float_irqs(kvm);
2815 	kvm_s390_vsie_destroy(kvm);
2816 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2817 }
2818 
2819 /* Section: vcpu related */
2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2821 {
2822 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2823 	if (!vcpu->arch.gmap)
2824 		return -ENOMEM;
2825 	vcpu->arch.gmap->private = vcpu->kvm;
2826 
2827 	return 0;
2828 }
2829 
2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2831 {
2832 	if (!kvm_s390_use_sca_entries())
2833 		return;
2834 	read_lock(&vcpu->kvm->arch.sca_lock);
2835 	if (vcpu->kvm->arch.use_esca) {
2836 		struct esca_block *sca = vcpu->kvm->arch.sca;
2837 
2838 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2839 		sca->cpu[vcpu->vcpu_id].sda = 0;
2840 	} else {
2841 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2842 
2843 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2844 		sca->cpu[vcpu->vcpu_id].sda = 0;
2845 	}
2846 	read_unlock(&vcpu->kvm->arch.sca_lock);
2847 }
2848 
2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2850 {
2851 	if (!kvm_s390_use_sca_entries()) {
2852 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2853 
2854 		/* we still need the basic sca for the ipte control */
2855 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2857 		return;
2858 	}
2859 	read_lock(&vcpu->kvm->arch.sca_lock);
2860 	if (vcpu->kvm->arch.use_esca) {
2861 		struct esca_block *sca = vcpu->kvm->arch.sca;
2862 
2863 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2866 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2867 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2868 	} else {
2869 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2870 
2871 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2872 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2873 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2874 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2875 	}
2876 	read_unlock(&vcpu->kvm->arch.sca_lock);
2877 }
2878 
2879 /* Basic SCA to Extended SCA data copy routines */
2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2881 {
2882 	d->sda = s->sda;
2883 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2884 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2885 }
2886 
2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2888 {
2889 	int i;
2890 
2891 	d->ipte_control = s->ipte_control;
2892 	d->mcn[0] = s->mcn;
2893 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2894 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2895 }
2896 
2897 static int sca_switch_to_extended(struct kvm *kvm)
2898 {
2899 	struct bsca_block *old_sca = kvm->arch.sca;
2900 	struct esca_block *new_sca;
2901 	struct kvm_vcpu *vcpu;
2902 	unsigned long vcpu_idx;
2903 	u32 scaol, scaoh;
2904 
2905 	if (kvm->arch.use_esca)
2906 		return 0;
2907 
2908 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2909 	if (!new_sca)
2910 		return -ENOMEM;
2911 
2912 	scaoh = (u32)((u64)(new_sca) >> 32);
2913 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2914 
2915 	kvm_s390_vcpu_block_all(kvm);
2916 	write_lock(&kvm->arch.sca_lock);
2917 
2918 	sca_copy_b_to_e(new_sca, old_sca);
2919 
2920 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2921 		vcpu->arch.sie_block->scaoh = scaoh;
2922 		vcpu->arch.sie_block->scaol = scaol;
2923 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2924 	}
2925 	kvm->arch.sca = new_sca;
2926 	kvm->arch.use_esca = 1;
2927 
2928 	write_unlock(&kvm->arch.sca_lock);
2929 	kvm_s390_vcpu_unblock_all(kvm);
2930 
2931 	free_page((unsigned long)old_sca);
2932 
2933 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2934 		 old_sca, kvm->arch.sca);
2935 	return 0;
2936 }
2937 
2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2939 {
2940 	int rc;
2941 
2942 	if (!kvm_s390_use_sca_entries()) {
2943 		if (id < KVM_MAX_VCPUS)
2944 			return true;
2945 		return false;
2946 	}
2947 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2948 		return true;
2949 	if (!sclp.has_esca || !sclp.has_64bscao)
2950 		return false;
2951 
2952 	mutex_lock(&kvm->lock);
2953 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2954 	mutex_unlock(&kvm->lock);
2955 
2956 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2957 }
2958 
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2961 {
2962 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2963 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964 	vcpu->arch.cputm_start = get_tod_clock_fast();
2965 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2966 }
2967 
2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2970 {
2971 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2972 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2973 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2974 	vcpu->arch.cputm_start = 0;
2975 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2976 }
2977 
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2980 {
2981 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2982 	vcpu->arch.cputm_enabled = true;
2983 	__start_cpu_timer_accounting(vcpu);
2984 }
2985 
2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2988 {
2989 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2990 	__stop_cpu_timer_accounting(vcpu);
2991 	vcpu->arch.cputm_enabled = false;
2992 }
2993 
2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2995 {
2996 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2997 	__enable_cpu_timer_accounting(vcpu);
2998 	preempt_enable();
2999 }
3000 
3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002 {
3003 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004 	__disable_cpu_timer_accounting(vcpu);
3005 	preempt_enable();
3006 }
3007 
3008 /* set the cpu timer - may only be called from the VCPU thread itself */
3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3010 {
3011 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3012 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3013 	if (vcpu->arch.cputm_enabled)
3014 		vcpu->arch.cputm_start = get_tod_clock_fast();
3015 	vcpu->arch.sie_block->cputm = cputm;
3016 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3017 	preempt_enable();
3018 }
3019 
3020 /* update and get the cpu timer - can also be called from other VCPU threads */
3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3022 {
3023 	unsigned int seq;
3024 	__u64 value;
3025 
3026 	if (unlikely(!vcpu->arch.cputm_enabled))
3027 		return vcpu->arch.sie_block->cputm;
3028 
3029 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3030 	do {
3031 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3032 		/*
3033 		 * If the writer would ever execute a read in the critical
3034 		 * section, e.g. in irq context, we have a deadlock.
3035 		 */
3036 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3037 		value = vcpu->arch.sie_block->cputm;
3038 		/* if cputm_start is 0, accounting is being started/stopped */
3039 		if (likely(vcpu->arch.cputm_start))
3040 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3041 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3042 	preempt_enable();
3043 	return value;
3044 }
3045 
3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3047 {
3048 
3049 	gmap_enable(vcpu->arch.enabled_gmap);
3050 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3051 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052 		__start_cpu_timer_accounting(vcpu);
3053 	vcpu->cpu = cpu;
3054 }
3055 
3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3057 {
3058 	vcpu->cpu = -1;
3059 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3060 		__stop_cpu_timer_accounting(vcpu);
3061 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3062 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3063 	gmap_disable(vcpu->arch.enabled_gmap);
3064 
3065 }
3066 
3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3068 {
3069 	mutex_lock(&vcpu->kvm->lock);
3070 	preempt_disable();
3071 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3072 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3073 	preempt_enable();
3074 	mutex_unlock(&vcpu->kvm->lock);
3075 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3076 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3077 		sca_add_vcpu(vcpu);
3078 	}
3079 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3080 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3081 	/* make vcpu_load load the right gmap on the first trigger */
3082 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3083 }
3084 
3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3086 {
3087 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3088 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3089 		return true;
3090 	return false;
3091 }
3092 
3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3094 {
3095 	/* At least one ECC subfunction must be present */
3096 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3097 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3098 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3099 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3100 	       kvm_has_pckmo_subfunc(kvm, 41);
3101 
3102 }
3103 
3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3105 {
3106 	/*
3107 	 * If the AP instructions are not being interpreted and the MSAX3
3108 	 * facility is not configured for the guest, there is nothing to set up.
3109 	 */
3110 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3111 		return;
3112 
3113 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3114 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3115 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3116 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3117 
3118 	if (vcpu->kvm->arch.crypto.apie)
3119 		vcpu->arch.sie_block->eca |= ECA_APIE;
3120 
3121 	/* Set up protected key support */
3122 	if (vcpu->kvm->arch.crypto.aes_kw) {
3123 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3124 		/* ecc is also wrapped with AES key */
3125 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3126 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3127 	}
3128 
3129 	if (vcpu->kvm->arch.crypto.dea_kw)
3130 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3131 }
3132 
3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3134 {
3135 	free_page(vcpu->arch.sie_block->cbrlo);
3136 	vcpu->arch.sie_block->cbrlo = 0;
3137 }
3138 
3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3140 {
3141 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3142 	if (!vcpu->arch.sie_block->cbrlo)
3143 		return -ENOMEM;
3144 	return 0;
3145 }
3146 
3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3148 {
3149 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3150 
3151 	vcpu->arch.sie_block->ibc = model->ibc;
3152 	if (test_kvm_facility(vcpu->kvm, 7))
3153 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3154 }
3155 
3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3157 {
3158 	int rc = 0;
3159 	u16 uvrc, uvrrc;
3160 
3161 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3162 						    CPUSTAT_SM |
3163 						    CPUSTAT_STOPPED);
3164 
3165 	if (test_kvm_facility(vcpu->kvm, 78))
3166 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3167 	else if (test_kvm_facility(vcpu->kvm, 8))
3168 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3169 
3170 	kvm_s390_vcpu_setup_model(vcpu);
3171 
3172 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3173 	if (MACHINE_HAS_ESOP)
3174 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3175 	if (test_kvm_facility(vcpu->kvm, 9))
3176 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3177 	if (test_kvm_facility(vcpu->kvm, 73))
3178 		vcpu->arch.sie_block->ecb |= ECB_TE;
3179 	if (!kvm_is_ucontrol(vcpu->kvm))
3180 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3181 
3182 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3183 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3184 	if (test_kvm_facility(vcpu->kvm, 130))
3185 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3186 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3187 	if (sclp.has_cei)
3188 		vcpu->arch.sie_block->eca |= ECA_CEI;
3189 	if (sclp.has_ib)
3190 		vcpu->arch.sie_block->eca |= ECA_IB;
3191 	if (sclp.has_siif)
3192 		vcpu->arch.sie_block->eca |= ECA_SII;
3193 	if (sclp.has_sigpif)
3194 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3195 	if (test_kvm_facility(vcpu->kvm, 129)) {
3196 		vcpu->arch.sie_block->eca |= ECA_VX;
3197 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3198 	}
3199 	if (test_kvm_facility(vcpu->kvm, 139))
3200 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3201 	if (test_kvm_facility(vcpu->kvm, 156))
3202 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3203 	if (vcpu->arch.sie_block->gd) {
3204 		vcpu->arch.sie_block->eca |= ECA_AIV;
3205 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3206 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3207 	}
3208 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3209 					| SDNXC;
3210 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3211 
3212 	if (sclp.has_kss)
3213 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3214 	else
3215 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3216 
3217 	if (vcpu->kvm->arch.use_cmma) {
3218 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3219 		if (rc)
3220 			return rc;
3221 	}
3222 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3223 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3224 
3225 	vcpu->arch.sie_block->hpid = HPID_KVM;
3226 
3227 	kvm_s390_vcpu_crypto_setup(vcpu);
3228 
3229 	mutex_lock(&vcpu->kvm->lock);
3230 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3231 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3232 		if (rc)
3233 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3234 	}
3235 	mutex_unlock(&vcpu->kvm->lock);
3236 
3237 	return rc;
3238 }
3239 
3240 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3241 {
3242 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3243 		return -EINVAL;
3244 	return 0;
3245 }
3246 
3247 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3248 {
3249 	struct sie_page *sie_page;
3250 	int rc;
3251 
3252 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3253 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3254 	if (!sie_page)
3255 		return -ENOMEM;
3256 
3257 	vcpu->arch.sie_block = &sie_page->sie_block;
3258 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3259 
3260 	/* the real guest size will always be smaller than msl */
3261 	vcpu->arch.sie_block->mso = 0;
3262 	vcpu->arch.sie_block->msl = sclp.hamax;
3263 
3264 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3265 	spin_lock_init(&vcpu->arch.local_int.lock);
3266 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3267 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3268 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3269 	seqcount_init(&vcpu->arch.cputm_seqcount);
3270 
3271 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3272 	kvm_clear_async_pf_completion_queue(vcpu);
3273 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3274 				    KVM_SYNC_GPRS |
3275 				    KVM_SYNC_ACRS |
3276 				    KVM_SYNC_CRS |
3277 				    KVM_SYNC_ARCH0 |
3278 				    KVM_SYNC_PFAULT |
3279 				    KVM_SYNC_DIAG318;
3280 	kvm_s390_set_prefix(vcpu, 0);
3281 	if (test_kvm_facility(vcpu->kvm, 64))
3282 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3283 	if (test_kvm_facility(vcpu->kvm, 82))
3284 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3285 	if (test_kvm_facility(vcpu->kvm, 133))
3286 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3287 	if (test_kvm_facility(vcpu->kvm, 156))
3288 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3289 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3290 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3291 	 */
3292 	if (MACHINE_HAS_VX)
3293 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3294 	else
3295 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3296 
3297 	if (kvm_is_ucontrol(vcpu->kvm)) {
3298 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3299 		if (rc)
3300 			goto out_free_sie_block;
3301 	}
3302 
3303 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3304 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3305 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3306 
3307 	rc = kvm_s390_vcpu_setup(vcpu);
3308 	if (rc)
3309 		goto out_ucontrol_uninit;
3310 	return 0;
3311 
3312 out_ucontrol_uninit:
3313 	if (kvm_is_ucontrol(vcpu->kvm))
3314 		gmap_remove(vcpu->arch.gmap);
3315 out_free_sie_block:
3316 	free_page((unsigned long)(vcpu->arch.sie_block));
3317 	return rc;
3318 }
3319 
3320 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3321 {
3322 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3323 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3324 }
3325 
3326 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3327 {
3328 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3329 }
3330 
3331 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3332 {
3333 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3334 	exit_sie(vcpu);
3335 }
3336 
3337 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3338 {
3339 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3340 }
3341 
3342 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3343 {
3344 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3345 	exit_sie(vcpu);
3346 }
3347 
3348 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3349 {
3350 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3351 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3352 }
3353 
3354 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3355 {
3356 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3357 }
3358 
3359 /*
3360  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3361  * If the CPU is not running (e.g. waiting as idle) the function will
3362  * return immediately. */
3363 void exit_sie(struct kvm_vcpu *vcpu)
3364 {
3365 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3366 	kvm_s390_vsie_kick(vcpu);
3367 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3368 		cpu_relax();
3369 }
3370 
3371 /* Kick a guest cpu out of SIE to process a request synchronously */
3372 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3373 {
3374 	kvm_make_request(req, vcpu);
3375 	kvm_s390_vcpu_request(vcpu);
3376 }
3377 
3378 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3379 			      unsigned long end)
3380 {
3381 	struct kvm *kvm = gmap->private;
3382 	struct kvm_vcpu *vcpu;
3383 	unsigned long prefix;
3384 	unsigned long i;
3385 
3386 	if (gmap_is_shadow(gmap))
3387 		return;
3388 	if (start >= 1UL << 31)
3389 		/* We are only interested in prefix pages */
3390 		return;
3391 	kvm_for_each_vcpu(i, vcpu, kvm) {
3392 		/* match against both prefix pages */
3393 		prefix = kvm_s390_get_prefix(vcpu);
3394 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3395 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3396 				   start, end);
3397 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3398 		}
3399 	}
3400 }
3401 
3402 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3403 {
3404 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3405 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3406 	    READ_ONCE(halt_poll_max_steal)) {
3407 		vcpu->stat.halt_no_poll_steal++;
3408 		return true;
3409 	}
3410 	return false;
3411 }
3412 
3413 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3414 {
3415 	/* kvm common code refers to this, but never calls it */
3416 	BUG();
3417 	return 0;
3418 }
3419 
3420 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3421 					   struct kvm_one_reg *reg)
3422 {
3423 	int r = -EINVAL;
3424 
3425 	switch (reg->id) {
3426 	case KVM_REG_S390_TODPR:
3427 		r = put_user(vcpu->arch.sie_block->todpr,
3428 			     (u32 __user *)reg->addr);
3429 		break;
3430 	case KVM_REG_S390_EPOCHDIFF:
3431 		r = put_user(vcpu->arch.sie_block->epoch,
3432 			     (u64 __user *)reg->addr);
3433 		break;
3434 	case KVM_REG_S390_CPU_TIMER:
3435 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3436 			     (u64 __user *)reg->addr);
3437 		break;
3438 	case KVM_REG_S390_CLOCK_COMP:
3439 		r = put_user(vcpu->arch.sie_block->ckc,
3440 			     (u64 __user *)reg->addr);
3441 		break;
3442 	case KVM_REG_S390_PFTOKEN:
3443 		r = put_user(vcpu->arch.pfault_token,
3444 			     (u64 __user *)reg->addr);
3445 		break;
3446 	case KVM_REG_S390_PFCOMPARE:
3447 		r = put_user(vcpu->arch.pfault_compare,
3448 			     (u64 __user *)reg->addr);
3449 		break;
3450 	case KVM_REG_S390_PFSELECT:
3451 		r = put_user(vcpu->arch.pfault_select,
3452 			     (u64 __user *)reg->addr);
3453 		break;
3454 	case KVM_REG_S390_PP:
3455 		r = put_user(vcpu->arch.sie_block->pp,
3456 			     (u64 __user *)reg->addr);
3457 		break;
3458 	case KVM_REG_S390_GBEA:
3459 		r = put_user(vcpu->arch.sie_block->gbea,
3460 			     (u64 __user *)reg->addr);
3461 		break;
3462 	default:
3463 		break;
3464 	}
3465 
3466 	return r;
3467 }
3468 
3469 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3470 					   struct kvm_one_reg *reg)
3471 {
3472 	int r = -EINVAL;
3473 	__u64 val;
3474 
3475 	switch (reg->id) {
3476 	case KVM_REG_S390_TODPR:
3477 		r = get_user(vcpu->arch.sie_block->todpr,
3478 			     (u32 __user *)reg->addr);
3479 		break;
3480 	case KVM_REG_S390_EPOCHDIFF:
3481 		r = get_user(vcpu->arch.sie_block->epoch,
3482 			     (u64 __user *)reg->addr);
3483 		break;
3484 	case KVM_REG_S390_CPU_TIMER:
3485 		r = get_user(val, (u64 __user *)reg->addr);
3486 		if (!r)
3487 			kvm_s390_set_cpu_timer(vcpu, val);
3488 		break;
3489 	case KVM_REG_S390_CLOCK_COMP:
3490 		r = get_user(vcpu->arch.sie_block->ckc,
3491 			     (u64 __user *)reg->addr);
3492 		break;
3493 	case KVM_REG_S390_PFTOKEN:
3494 		r = get_user(vcpu->arch.pfault_token,
3495 			     (u64 __user *)reg->addr);
3496 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3497 			kvm_clear_async_pf_completion_queue(vcpu);
3498 		break;
3499 	case KVM_REG_S390_PFCOMPARE:
3500 		r = get_user(vcpu->arch.pfault_compare,
3501 			     (u64 __user *)reg->addr);
3502 		break;
3503 	case KVM_REG_S390_PFSELECT:
3504 		r = get_user(vcpu->arch.pfault_select,
3505 			     (u64 __user *)reg->addr);
3506 		break;
3507 	case KVM_REG_S390_PP:
3508 		r = get_user(vcpu->arch.sie_block->pp,
3509 			     (u64 __user *)reg->addr);
3510 		break;
3511 	case KVM_REG_S390_GBEA:
3512 		r = get_user(vcpu->arch.sie_block->gbea,
3513 			     (u64 __user *)reg->addr);
3514 		break;
3515 	default:
3516 		break;
3517 	}
3518 
3519 	return r;
3520 }
3521 
3522 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3523 {
3524 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3525 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3526 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3527 
3528 	kvm_clear_async_pf_completion_queue(vcpu);
3529 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3530 		kvm_s390_vcpu_stop(vcpu);
3531 	kvm_s390_clear_local_irqs(vcpu);
3532 }
3533 
3534 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3535 {
3536 	/* Initial reset is a superset of the normal reset */
3537 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3538 
3539 	/*
3540 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3541 	 * We do not only reset the internal data, but also ...
3542 	 */
3543 	vcpu->arch.sie_block->gpsw.mask = 0;
3544 	vcpu->arch.sie_block->gpsw.addr = 0;
3545 	kvm_s390_set_prefix(vcpu, 0);
3546 	kvm_s390_set_cpu_timer(vcpu, 0);
3547 	vcpu->arch.sie_block->ckc = 0;
3548 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3549 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3550 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3551 
3552 	/* ... the data in sync regs */
3553 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3554 	vcpu->run->s.regs.ckc = 0;
3555 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3556 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3557 	vcpu->run->psw_addr = 0;
3558 	vcpu->run->psw_mask = 0;
3559 	vcpu->run->s.regs.todpr = 0;
3560 	vcpu->run->s.regs.cputm = 0;
3561 	vcpu->run->s.regs.ckc = 0;
3562 	vcpu->run->s.regs.pp = 0;
3563 	vcpu->run->s.regs.gbea = 1;
3564 	vcpu->run->s.regs.fpc = 0;
3565 	/*
3566 	 * Do not reset these registers in the protected case, as some of
3567 	 * them are overlayed and they are not accessible in this case
3568 	 * anyway.
3569 	 */
3570 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3571 		vcpu->arch.sie_block->gbea = 1;
3572 		vcpu->arch.sie_block->pp = 0;
3573 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3574 		vcpu->arch.sie_block->todpr = 0;
3575 	}
3576 }
3577 
3578 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3579 {
3580 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3581 
3582 	/* Clear reset is a superset of the initial reset */
3583 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3584 
3585 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3586 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3587 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3588 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3589 
3590 	regs->etoken = 0;
3591 	regs->etoken_extension = 0;
3592 }
3593 
3594 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3595 {
3596 	vcpu_load(vcpu);
3597 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3598 	vcpu_put(vcpu);
3599 	return 0;
3600 }
3601 
3602 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3603 {
3604 	vcpu_load(vcpu);
3605 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3606 	vcpu_put(vcpu);
3607 	return 0;
3608 }
3609 
3610 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3611 				  struct kvm_sregs *sregs)
3612 {
3613 	vcpu_load(vcpu);
3614 
3615 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3616 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3617 
3618 	vcpu_put(vcpu);
3619 	return 0;
3620 }
3621 
3622 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3623 				  struct kvm_sregs *sregs)
3624 {
3625 	vcpu_load(vcpu);
3626 
3627 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3628 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3629 
3630 	vcpu_put(vcpu);
3631 	return 0;
3632 }
3633 
3634 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3635 {
3636 	int ret = 0;
3637 
3638 	vcpu_load(vcpu);
3639 
3640 	if (test_fp_ctl(fpu->fpc)) {
3641 		ret = -EINVAL;
3642 		goto out;
3643 	}
3644 	vcpu->run->s.regs.fpc = fpu->fpc;
3645 	if (MACHINE_HAS_VX)
3646 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3647 				 (freg_t *) fpu->fprs);
3648 	else
3649 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3650 
3651 out:
3652 	vcpu_put(vcpu);
3653 	return ret;
3654 }
3655 
3656 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3657 {
3658 	vcpu_load(vcpu);
3659 
3660 	/* make sure we have the latest values */
3661 	save_fpu_regs();
3662 	if (MACHINE_HAS_VX)
3663 		convert_vx_to_fp((freg_t *) fpu->fprs,
3664 				 (__vector128 *) vcpu->run->s.regs.vrs);
3665 	else
3666 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3667 	fpu->fpc = vcpu->run->s.regs.fpc;
3668 
3669 	vcpu_put(vcpu);
3670 	return 0;
3671 }
3672 
3673 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3674 {
3675 	int rc = 0;
3676 
3677 	if (!is_vcpu_stopped(vcpu))
3678 		rc = -EBUSY;
3679 	else {
3680 		vcpu->run->psw_mask = psw.mask;
3681 		vcpu->run->psw_addr = psw.addr;
3682 	}
3683 	return rc;
3684 }
3685 
3686 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3687 				  struct kvm_translation *tr)
3688 {
3689 	return -EINVAL; /* not implemented yet */
3690 }
3691 
3692 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3693 			      KVM_GUESTDBG_USE_HW_BP | \
3694 			      KVM_GUESTDBG_ENABLE)
3695 
3696 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3697 					struct kvm_guest_debug *dbg)
3698 {
3699 	int rc = 0;
3700 
3701 	vcpu_load(vcpu);
3702 
3703 	vcpu->guest_debug = 0;
3704 	kvm_s390_clear_bp_data(vcpu);
3705 
3706 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3707 		rc = -EINVAL;
3708 		goto out;
3709 	}
3710 	if (!sclp.has_gpere) {
3711 		rc = -EINVAL;
3712 		goto out;
3713 	}
3714 
3715 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3716 		vcpu->guest_debug = dbg->control;
3717 		/* enforce guest PER */
3718 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3719 
3720 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3721 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3722 	} else {
3723 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3724 		vcpu->arch.guestdbg.last_bp = 0;
3725 	}
3726 
3727 	if (rc) {
3728 		vcpu->guest_debug = 0;
3729 		kvm_s390_clear_bp_data(vcpu);
3730 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3731 	}
3732 
3733 out:
3734 	vcpu_put(vcpu);
3735 	return rc;
3736 }
3737 
3738 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3739 				    struct kvm_mp_state *mp_state)
3740 {
3741 	int ret;
3742 
3743 	vcpu_load(vcpu);
3744 
3745 	/* CHECK_STOP and LOAD are not supported yet */
3746 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3747 				      KVM_MP_STATE_OPERATING;
3748 
3749 	vcpu_put(vcpu);
3750 	return ret;
3751 }
3752 
3753 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3754 				    struct kvm_mp_state *mp_state)
3755 {
3756 	int rc = 0;
3757 
3758 	vcpu_load(vcpu);
3759 
3760 	/* user space knows about this interface - let it control the state */
3761 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3762 
3763 	switch (mp_state->mp_state) {
3764 	case KVM_MP_STATE_STOPPED:
3765 		rc = kvm_s390_vcpu_stop(vcpu);
3766 		break;
3767 	case KVM_MP_STATE_OPERATING:
3768 		rc = kvm_s390_vcpu_start(vcpu);
3769 		break;
3770 	case KVM_MP_STATE_LOAD:
3771 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3772 			rc = -ENXIO;
3773 			break;
3774 		}
3775 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3776 		break;
3777 	case KVM_MP_STATE_CHECK_STOP:
3778 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3779 	default:
3780 		rc = -ENXIO;
3781 	}
3782 
3783 	vcpu_put(vcpu);
3784 	return rc;
3785 }
3786 
3787 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3788 {
3789 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3790 }
3791 
3792 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3793 {
3794 retry:
3795 	kvm_s390_vcpu_request_handled(vcpu);
3796 	if (!kvm_request_pending(vcpu))
3797 		return 0;
3798 	/*
3799 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3800 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3801 	 * This ensures that the ipte instruction for this request has
3802 	 * already finished. We might race against a second unmapper that
3803 	 * wants to set the blocking bit. Lets just retry the request loop.
3804 	 */
3805 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3806 		int rc;
3807 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3808 					  kvm_s390_get_prefix(vcpu),
3809 					  PAGE_SIZE * 2, PROT_WRITE);
3810 		if (rc) {
3811 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3812 			return rc;
3813 		}
3814 		goto retry;
3815 	}
3816 
3817 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3818 		vcpu->arch.sie_block->ihcpu = 0xffff;
3819 		goto retry;
3820 	}
3821 
3822 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3823 		if (!ibs_enabled(vcpu)) {
3824 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3825 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3826 		}
3827 		goto retry;
3828 	}
3829 
3830 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3831 		if (ibs_enabled(vcpu)) {
3832 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3833 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3834 		}
3835 		goto retry;
3836 	}
3837 
3838 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3839 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3840 		goto retry;
3841 	}
3842 
3843 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3844 		/*
3845 		 * Disable CMM virtualization; we will emulate the ESSA
3846 		 * instruction manually, in order to provide additional
3847 		 * functionalities needed for live migration.
3848 		 */
3849 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3850 		goto retry;
3851 	}
3852 
3853 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3854 		/*
3855 		 * Re-enable CMM virtualization if CMMA is available and
3856 		 * CMM has been used.
3857 		 */
3858 		if ((vcpu->kvm->arch.use_cmma) &&
3859 		    (vcpu->kvm->mm->context.uses_cmm))
3860 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3861 		goto retry;
3862 	}
3863 
3864 	/* nothing to do, just clear the request */
3865 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3866 	/* we left the vsie handler, nothing to do, just clear the request */
3867 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3868 
3869 	return 0;
3870 }
3871 
3872 void kvm_s390_set_tod_clock(struct kvm *kvm,
3873 			    const struct kvm_s390_vm_tod_clock *gtod)
3874 {
3875 	struct kvm_vcpu *vcpu;
3876 	union tod_clock clk;
3877 	unsigned long i;
3878 
3879 	mutex_lock(&kvm->lock);
3880 	preempt_disable();
3881 
3882 	store_tod_clock_ext(&clk);
3883 
3884 	kvm->arch.epoch = gtod->tod - clk.tod;
3885 	kvm->arch.epdx = 0;
3886 	if (test_kvm_facility(kvm, 139)) {
3887 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3888 		if (kvm->arch.epoch > gtod->tod)
3889 			kvm->arch.epdx -= 1;
3890 	}
3891 
3892 	kvm_s390_vcpu_block_all(kvm);
3893 	kvm_for_each_vcpu(i, vcpu, kvm) {
3894 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3895 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3896 	}
3897 
3898 	kvm_s390_vcpu_unblock_all(kvm);
3899 	preempt_enable();
3900 	mutex_unlock(&kvm->lock);
3901 }
3902 
3903 /**
3904  * kvm_arch_fault_in_page - fault-in guest page if necessary
3905  * @vcpu: The corresponding virtual cpu
3906  * @gpa: Guest physical address
3907  * @writable: Whether the page should be writable or not
3908  *
3909  * Make sure that a guest page has been faulted-in on the host.
3910  *
3911  * Return: Zero on success, negative error code otherwise.
3912  */
3913 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3914 {
3915 	return gmap_fault(vcpu->arch.gmap, gpa,
3916 			  writable ? FAULT_FLAG_WRITE : 0);
3917 }
3918 
3919 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3920 				      unsigned long token)
3921 {
3922 	struct kvm_s390_interrupt inti;
3923 	struct kvm_s390_irq irq;
3924 
3925 	if (start_token) {
3926 		irq.u.ext.ext_params2 = token;
3927 		irq.type = KVM_S390_INT_PFAULT_INIT;
3928 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3929 	} else {
3930 		inti.type = KVM_S390_INT_PFAULT_DONE;
3931 		inti.parm64 = token;
3932 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3933 	}
3934 }
3935 
3936 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3937 				     struct kvm_async_pf *work)
3938 {
3939 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3940 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3941 
3942 	return true;
3943 }
3944 
3945 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3946 				 struct kvm_async_pf *work)
3947 {
3948 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3949 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3950 }
3951 
3952 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3953 			       struct kvm_async_pf *work)
3954 {
3955 	/* s390 will always inject the page directly */
3956 }
3957 
3958 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3959 {
3960 	/*
3961 	 * s390 will always inject the page directly,
3962 	 * but we still want check_async_completion to cleanup
3963 	 */
3964 	return true;
3965 }
3966 
3967 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3968 {
3969 	hva_t hva;
3970 	struct kvm_arch_async_pf arch;
3971 
3972 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3973 		return false;
3974 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3975 	    vcpu->arch.pfault_compare)
3976 		return false;
3977 	if (psw_extint_disabled(vcpu))
3978 		return false;
3979 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3980 		return false;
3981 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3982 		return false;
3983 	if (!vcpu->arch.gmap->pfault_enabled)
3984 		return false;
3985 
3986 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3987 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3988 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3989 		return false;
3990 
3991 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3992 }
3993 
3994 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3995 {
3996 	int rc, cpuflags;
3997 
3998 	/*
3999 	 * On s390 notifications for arriving pages will be delivered directly
4000 	 * to the guest but the house keeping for completed pfaults is
4001 	 * handled outside the worker.
4002 	 */
4003 	kvm_check_async_pf_completion(vcpu);
4004 
4005 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4006 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4007 
4008 	if (need_resched())
4009 		schedule();
4010 
4011 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4012 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4013 		if (rc)
4014 			return rc;
4015 	}
4016 
4017 	rc = kvm_s390_handle_requests(vcpu);
4018 	if (rc)
4019 		return rc;
4020 
4021 	if (guestdbg_enabled(vcpu)) {
4022 		kvm_s390_backup_guest_per_regs(vcpu);
4023 		kvm_s390_patch_guest_per_regs(vcpu);
4024 	}
4025 
4026 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4027 
4028 	vcpu->arch.sie_block->icptcode = 0;
4029 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4030 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4031 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4032 
4033 	return 0;
4034 }
4035 
4036 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4037 {
4038 	struct kvm_s390_pgm_info pgm_info = {
4039 		.code = PGM_ADDRESSING,
4040 	};
4041 	u8 opcode, ilen;
4042 	int rc;
4043 
4044 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4045 	trace_kvm_s390_sie_fault(vcpu);
4046 
4047 	/*
4048 	 * We want to inject an addressing exception, which is defined as a
4049 	 * suppressing or terminating exception. However, since we came here
4050 	 * by a DAT access exception, the PSW still points to the faulting
4051 	 * instruction since DAT exceptions are nullifying. So we've got
4052 	 * to look up the current opcode to get the length of the instruction
4053 	 * to be able to forward the PSW.
4054 	 */
4055 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4056 	ilen = insn_length(opcode);
4057 	if (rc < 0) {
4058 		return rc;
4059 	} else if (rc) {
4060 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4061 		 * Forward by arbitrary ilc, injection will take care of
4062 		 * nullification if necessary.
4063 		 */
4064 		pgm_info = vcpu->arch.pgm;
4065 		ilen = 4;
4066 	}
4067 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4068 	kvm_s390_forward_psw(vcpu, ilen);
4069 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4070 }
4071 
4072 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4073 {
4074 	struct mcck_volatile_info *mcck_info;
4075 	struct sie_page *sie_page;
4076 
4077 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4078 		   vcpu->arch.sie_block->icptcode);
4079 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4080 
4081 	if (guestdbg_enabled(vcpu))
4082 		kvm_s390_restore_guest_per_regs(vcpu);
4083 
4084 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4085 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4086 
4087 	if (exit_reason == -EINTR) {
4088 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4089 		sie_page = container_of(vcpu->arch.sie_block,
4090 					struct sie_page, sie_block);
4091 		mcck_info = &sie_page->mcck_info;
4092 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4093 		return 0;
4094 	}
4095 
4096 	if (vcpu->arch.sie_block->icptcode > 0) {
4097 		int rc = kvm_handle_sie_intercept(vcpu);
4098 
4099 		if (rc != -EOPNOTSUPP)
4100 			return rc;
4101 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4102 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4103 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4104 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4105 		return -EREMOTE;
4106 	} else if (exit_reason != -EFAULT) {
4107 		vcpu->stat.exit_null++;
4108 		return 0;
4109 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4110 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4111 		vcpu->run->s390_ucontrol.trans_exc_code =
4112 						current->thread.gmap_addr;
4113 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4114 		return -EREMOTE;
4115 	} else if (current->thread.gmap_pfault) {
4116 		trace_kvm_s390_major_guest_pfault(vcpu);
4117 		current->thread.gmap_pfault = 0;
4118 		if (kvm_arch_setup_async_pf(vcpu))
4119 			return 0;
4120 		vcpu->stat.pfault_sync++;
4121 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4122 	}
4123 	return vcpu_post_run_fault_in_sie(vcpu);
4124 }
4125 
4126 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4127 static int __vcpu_run(struct kvm_vcpu *vcpu)
4128 {
4129 	int rc, exit_reason;
4130 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4131 
4132 	/*
4133 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4134 	 * ning the guest), so that memslots (and other stuff) are protected
4135 	 */
4136 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4137 
4138 	do {
4139 		rc = vcpu_pre_run(vcpu);
4140 		if (rc)
4141 			break;
4142 
4143 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4144 		/*
4145 		 * As PF_VCPU will be used in fault handler, between
4146 		 * guest_enter and guest_exit should be no uaccess.
4147 		 */
4148 		local_irq_disable();
4149 		guest_enter_irqoff();
4150 		__disable_cpu_timer_accounting(vcpu);
4151 		local_irq_enable();
4152 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153 			memcpy(sie_page->pv_grregs,
4154 			       vcpu->run->s.regs.gprs,
4155 			       sizeof(sie_page->pv_grregs));
4156 		}
4157 		if (test_cpu_flag(CIF_FPU))
4158 			load_fpu_regs();
4159 		exit_reason = sie64a(vcpu->arch.sie_block,
4160 				     vcpu->run->s.regs.gprs);
4161 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4162 			memcpy(vcpu->run->s.regs.gprs,
4163 			       sie_page->pv_grregs,
4164 			       sizeof(sie_page->pv_grregs));
4165 			/*
4166 			 * We're not allowed to inject interrupts on intercepts
4167 			 * that leave the guest state in an "in-between" state
4168 			 * where the next SIE entry will do a continuation.
4169 			 * Fence interrupts in our "internal" PSW.
4170 			 */
4171 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4172 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4173 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4174 			}
4175 		}
4176 		local_irq_disable();
4177 		__enable_cpu_timer_accounting(vcpu);
4178 		guest_exit_irqoff();
4179 		local_irq_enable();
4180 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4181 
4182 		rc = vcpu_post_run(vcpu, exit_reason);
4183 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4184 
4185 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4186 	return rc;
4187 }
4188 
4189 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4190 {
4191 	struct kvm_run *kvm_run = vcpu->run;
4192 	struct runtime_instr_cb *riccb;
4193 	struct gs_cb *gscb;
4194 
4195 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4196 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4197 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4198 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4199 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4200 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4201 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4202 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4203 	}
4204 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4205 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4206 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4207 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4208 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4209 			kvm_clear_async_pf_completion_queue(vcpu);
4210 	}
4211 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4212 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4213 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4214 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4215 	}
4216 	/*
4217 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4218 	 * we should enable RI here instead of doing the lazy enablement.
4219 	 */
4220 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4221 	    test_kvm_facility(vcpu->kvm, 64) &&
4222 	    riccb->v &&
4223 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4224 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4225 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4226 	}
4227 	/*
4228 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4229 	 * we should enable GS here instead of doing the lazy enablement.
4230 	 */
4231 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4232 	    test_kvm_facility(vcpu->kvm, 133) &&
4233 	    gscb->gssm &&
4234 	    !vcpu->arch.gs_enabled) {
4235 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4236 		vcpu->arch.sie_block->ecb |= ECB_GS;
4237 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4238 		vcpu->arch.gs_enabled = 1;
4239 	}
4240 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4241 	    test_kvm_facility(vcpu->kvm, 82)) {
4242 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4243 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4244 	}
4245 	if (MACHINE_HAS_GS) {
4246 		preempt_disable();
4247 		__ctl_set_bit(2, 4);
4248 		if (current->thread.gs_cb) {
4249 			vcpu->arch.host_gscb = current->thread.gs_cb;
4250 			save_gs_cb(vcpu->arch.host_gscb);
4251 		}
4252 		if (vcpu->arch.gs_enabled) {
4253 			current->thread.gs_cb = (struct gs_cb *)
4254 						&vcpu->run->s.regs.gscb;
4255 			restore_gs_cb(current->thread.gs_cb);
4256 		}
4257 		preempt_enable();
4258 	}
4259 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4260 }
4261 
4262 static void sync_regs(struct kvm_vcpu *vcpu)
4263 {
4264 	struct kvm_run *kvm_run = vcpu->run;
4265 
4266 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4267 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4268 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4269 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4270 		/* some control register changes require a tlb flush */
4271 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4272 	}
4273 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4274 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4275 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4276 	}
4277 	save_access_regs(vcpu->arch.host_acrs);
4278 	restore_access_regs(vcpu->run->s.regs.acrs);
4279 	/* save host (userspace) fprs/vrs */
4280 	save_fpu_regs();
4281 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4282 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4283 	if (MACHINE_HAS_VX)
4284 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4285 	else
4286 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4287 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4288 	if (test_fp_ctl(current->thread.fpu.fpc))
4289 		/* User space provided an invalid FPC, let's clear it */
4290 		current->thread.fpu.fpc = 0;
4291 
4292 	/* Sync fmt2 only data */
4293 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4294 		sync_regs_fmt2(vcpu);
4295 	} else {
4296 		/*
4297 		 * In several places we have to modify our internal view to
4298 		 * not do things that are disallowed by the ultravisor. For
4299 		 * example we must not inject interrupts after specific exits
4300 		 * (e.g. 112 prefix page not secure). We do this by turning
4301 		 * off the machine check, external and I/O interrupt bits
4302 		 * of our PSW copy. To avoid getting validity intercepts, we
4303 		 * do only accept the condition code from userspace.
4304 		 */
4305 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4306 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4307 						   PSW_MASK_CC;
4308 	}
4309 
4310 	kvm_run->kvm_dirty_regs = 0;
4311 }
4312 
4313 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4314 {
4315 	struct kvm_run *kvm_run = vcpu->run;
4316 
4317 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4318 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4319 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4320 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4321 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4322 	if (MACHINE_HAS_GS) {
4323 		preempt_disable();
4324 		__ctl_set_bit(2, 4);
4325 		if (vcpu->arch.gs_enabled)
4326 			save_gs_cb(current->thread.gs_cb);
4327 		current->thread.gs_cb = vcpu->arch.host_gscb;
4328 		restore_gs_cb(vcpu->arch.host_gscb);
4329 		if (!vcpu->arch.host_gscb)
4330 			__ctl_clear_bit(2, 4);
4331 		vcpu->arch.host_gscb = NULL;
4332 		preempt_enable();
4333 	}
4334 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4335 }
4336 
4337 static void store_regs(struct kvm_vcpu *vcpu)
4338 {
4339 	struct kvm_run *kvm_run = vcpu->run;
4340 
4341 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4342 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4343 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4344 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4345 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4346 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4347 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4348 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4349 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4350 	save_access_regs(vcpu->run->s.regs.acrs);
4351 	restore_access_regs(vcpu->arch.host_acrs);
4352 	/* Save guest register state */
4353 	save_fpu_regs();
4354 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4355 	/* Restore will be done lazily at return */
4356 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4357 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4358 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4359 		store_regs_fmt2(vcpu);
4360 }
4361 
4362 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4363 {
4364 	struct kvm_run *kvm_run = vcpu->run;
4365 	int rc;
4366 
4367 	if (kvm_run->immediate_exit)
4368 		return -EINTR;
4369 
4370 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4371 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4372 		return -EINVAL;
4373 
4374 	vcpu_load(vcpu);
4375 
4376 	if (guestdbg_exit_pending(vcpu)) {
4377 		kvm_s390_prepare_debug_exit(vcpu);
4378 		rc = 0;
4379 		goto out;
4380 	}
4381 
4382 	kvm_sigset_activate(vcpu);
4383 
4384 	/*
4385 	 * no need to check the return value of vcpu_start as it can only have
4386 	 * an error for protvirt, but protvirt means user cpu state
4387 	 */
4388 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4389 		kvm_s390_vcpu_start(vcpu);
4390 	} else if (is_vcpu_stopped(vcpu)) {
4391 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4392 				   vcpu->vcpu_id);
4393 		rc = -EINVAL;
4394 		goto out;
4395 	}
4396 
4397 	sync_regs(vcpu);
4398 	enable_cpu_timer_accounting(vcpu);
4399 
4400 	might_fault();
4401 	rc = __vcpu_run(vcpu);
4402 
4403 	if (signal_pending(current) && !rc) {
4404 		kvm_run->exit_reason = KVM_EXIT_INTR;
4405 		rc = -EINTR;
4406 	}
4407 
4408 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4409 		kvm_s390_prepare_debug_exit(vcpu);
4410 		rc = 0;
4411 	}
4412 
4413 	if (rc == -EREMOTE) {
4414 		/* userspace support is needed, kvm_run has been prepared */
4415 		rc = 0;
4416 	}
4417 
4418 	disable_cpu_timer_accounting(vcpu);
4419 	store_regs(vcpu);
4420 
4421 	kvm_sigset_deactivate(vcpu);
4422 
4423 	vcpu->stat.exit_userspace++;
4424 out:
4425 	vcpu_put(vcpu);
4426 	return rc;
4427 }
4428 
4429 /*
4430  * store status at address
4431  * we use have two special cases:
4432  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4433  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4434  */
4435 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4436 {
4437 	unsigned char archmode = 1;
4438 	freg_t fprs[NUM_FPRS];
4439 	unsigned int px;
4440 	u64 clkcomp, cputm;
4441 	int rc;
4442 
4443 	px = kvm_s390_get_prefix(vcpu);
4444 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4445 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4446 			return -EFAULT;
4447 		gpa = 0;
4448 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4449 		if (write_guest_real(vcpu, 163, &archmode, 1))
4450 			return -EFAULT;
4451 		gpa = px;
4452 	} else
4453 		gpa -= __LC_FPREGS_SAVE_AREA;
4454 
4455 	/* manually convert vector registers if necessary */
4456 	if (MACHINE_HAS_VX) {
4457 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4458 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4459 				     fprs, 128);
4460 	} else {
4461 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4462 				     vcpu->run->s.regs.fprs, 128);
4463 	}
4464 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4465 			      vcpu->run->s.regs.gprs, 128);
4466 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4467 			      &vcpu->arch.sie_block->gpsw, 16);
4468 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4469 			      &px, 4);
4470 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4471 			      &vcpu->run->s.regs.fpc, 4);
4472 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4473 			      &vcpu->arch.sie_block->todpr, 4);
4474 	cputm = kvm_s390_get_cpu_timer(vcpu);
4475 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4476 			      &cputm, 8);
4477 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4478 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4479 			      &clkcomp, 8);
4480 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4481 			      &vcpu->run->s.regs.acrs, 64);
4482 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4483 			      &vcpu->arch.sie_block->gcr, 128);
4484 	return rc ? -EFAULT : 0;
4485 }
4486 
4487 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4488 {
4489 	/*
4490 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4491 	 * switch in the run ioctl. Let's update our copies before we save
4492 	 * it into the save area
4493 	 */
4494 	save_fpu_regs();
4495 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4496 	save_access_regs(vcpu->run->s.regs.acrs);
4497 
4498 	return kvm_s390_store_status_unloaded(vcpu, addr);
4499 }
4500 
4501 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4502 {
4503 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4504 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4505 }
4506 
4507 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4508 {
4509 	unsigned long i;
4510 	struct kvm_vcpu *vcpu;
4511 
4512 	kvm_for_each_vcpu(i, vcpu, kvm) {
4513 		__disable_ibs_on_vcpu(vcpu);
4514 	}
4515 }
4516 
4517 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4518 {
4519 	if (!sclp.has_ibs)
4520 		return;
4521 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4522 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4523 }
4524 
4525 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4526 {
4527 	int i, online_vcpus, r = 0, started_vcpus = 0;
4528 
4529 	if (!is_vcpu_stopped(vcpu))
4530 		return 0;
4531 
4532 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4533 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4534 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4535 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4536 
4537 	/* Let's tell the UV that we want to change into the operating state */
4538 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4539 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4540 		if (r) {
4541 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4542 			return r;
4543 		}
4544 	}
4545 
4546 	for (i = 0; i < online_vcpus; i++) {
4547 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4548 			started_vcpus++;
4549 	}
4550 
4551 	if (started_vcpus == 0) {
4552 		/* we're the only active VCPU -> speed it up */
4553 		__enable_ibs_on_vcpu(vcpu);
4554 	} else if (started_vcpus == 1) {
4555 		/*
4556 		 * As we are starting a second VCPU, we have to disable
4557 		 * the IBS facility on all VCPUs to remove potentially
4558 		 * outstanding ENABLE requests.
4559 		 */
4560 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4561 	}
4562 
4563 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4564 	/*
4565 	 * The real PSW might have changed due to a RESTART interpreted by the
4566 	 * ultravisor. We block all interrupts and let the next sie exit
4567 	 * refresh our view.
4568 	 */
4569 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4570 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4571 	/*
4572 	 * Another VCPU might have used IBS while we were offline.
4573 	 * Let's play safe and flush the VCPU at startup.
4574 	 */
4575 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4576 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4577 	return 0;
4578 }
4579 
4580 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4581 {
4582 	int i, online_vcpus, r = 0, started_vcpus = 0;
4583 	struct kvm_vcpu *started_vcpu = NULL;
4584 
4585 	if (is_vcpu_stopped(vcpu))
4586 		return 0;
4587 
4588 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4589 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4590 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4591 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4592 
4593 	/* Let's tell the UV that we want to change into the stopped state */
4594 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4595 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4596 		if (r) {
4597 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4598 			return r;
4599 		}
4600 	}
4601 
4602 	/*
4603 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4604 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4605 	 * have been fully processed. This will ensure that the VCPU
4606 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4607 	 */
4608 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4609 	kvm_s390_clear_stop_irq(vcpu);
4610 
4611 	__disable_ibs_on_vcpu(vcpu);
4612 
4613 	for (i = 0; i < online_vcpus; i++) {
4614 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4615 
4616 		if (!is_vcpu_stopped(tmp)) {
4617 			started_vcpus++;
4618 			started_vcpu = tmp;
4619 		}
4620 	}
4621 
4622 	if (started_vcpus == 1) {
4623 		/*
4624 		 * As we only have one VCPU left, we want to enable the
4625 		 * IBS facility for that VCPU to speed it up.
4626 		 */
4627 		__enable_ibs_on_vcpu(started_vcpu);
4628 	}
4629 
4630 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4631 	return 0;
4632 }
4633 
4634 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4635 				     struct kvm_enable_cap *cap)
4636 {
4637 	int r;
4638 
4639 	if (cap->flags)
4640 		return -EINVAL;
4641 
4642 	switch (cap->cap) {
4643 	case KVM_CAP_S390_CSS_SUPPORT:
4644 		if (!vcpu->kvm->arch.css_support) {
4645 			vcpu->kvm->arch.css_support = 1;
4646 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4647 			trace_kvm_s390_enable_css(vcpu->kvm);
4648 		}
4649 		r = 0;
4650 		break;
4651 	default:
4652 		r = -EINVAL;
4653 		break;
4654 	}
4655 	return r;
4656 }
4657 
4658 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4659 				   struct kvm_s390_mem_op *mop)
4660 {
4661 	void __user *uaddr = (void __user *)mop->buf;
4662 	int r = 0;
4663 
4664 	if (mop->flags || !mop->size)
4665 		return -EINVAL;
4666 	if (mop->size + mop->sida_offset < mop->size)
4667 		return -EINVAL;
4668 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4669 		return -E2BIG;
4670 
4671 	switch (mop->op) {
4672 	case KVM_S390_MEMOP_SIDA_READ:
4673 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4674 				 mop->sida_offset), mop->size))
4675 			r = -EFAULT;
4676 
4677 		break;
4678 	case KVM_S390_MEMOP_SIDA_WRITE:
4679 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4680 				   mop->sida_offset), uaddr, mop->size))
4681 			r = -EFAULT;
4682 		break;
4683 	}
4684 	return r;
4685 }
4686 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4687 				  struct kvm_s390_mem_op *mop)
4688 {
4689 	void __user *uaddr = (void __user *)mop->buf;
4690 	void *tmpbuf = NULL;
4691 	int r = 0;
4692 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4693 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4694 
4695 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4696 		return -EINVAL;
4697 
4698 	if (mop->size > MEM_OP_MAX_SIZE)
4699 		return -E2BIG;
4700 
4701 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4702 		return -EINVAL;
4703 
4704 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4705 		tmpbuf = vmalloc(mop->size);
4706 		if (!tmpbuf)
4707 			return -ENOMEM;
4708 	}
4709 
4710 	switch (mop->op) {
4711 	case KVM_S390_MEMOP_LOGICAL_READ:
4712 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4713 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4714 					    mop->size, GACC_FETCH);
4715 			break;
4716 		}
4717 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4718 		if (r == 0) {
4719 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4720 				r = -EFAULT;
4721 		}
4722 		break;
4723 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4724 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4725 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4726 					    mop->size, GACC_STORE);
4727 			break;
4728 		}
4729 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4730 			r = -EFAULT;
4731 			break;
4732 		}
4733 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4734 		break;
4735 	}
4736 
4737 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4738 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4739 
4740 	vfree(tmpbuf);
4741 	return r;
4742 }
4743 
4744 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4745 				      struct kvm_s390_mem_op *mop)
4746 {
4747 	int r, srcu_idx;
4748 
4749 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4750 
4751 	switch (mop->op) {
4752 	case KVM_S390_MEMOP_LOGICAL_READ:
4753 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4754 		r = kvm_s390_guest_mem_op(vcpu, mop);
4755 		break;
4756 	case KVM_S390_MEMOP_SIDA_READ:
4757 	case KVM_S390_MEMOP_SIDA_WRITE:
4758 		/* we are locked against sida going away by the vcpu->mutex */
4759 		r = kvm_s390_guest_sida_op(vcpu, mop);
4760 		break;
4761 	default:
4762 		r = -EINVAL;
4763 	}
4764 
4765 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4766 	return r;
4767 }
4768 
4769 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4770 			       unsigned int ioctl, unsigned long arg)
4771 {
4772 	struct kvm_vcpu *vcpu = filp->private_data;
4773 	void __user *argp = (void __user *)arg;
4774 
4775 	switch (ioctl) {
4776 	case KVM_S390_IRQ: {
4777 		struct kvm_s390_irq s390irq;
4778 
4779 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4780 			return -EFAULT;
4781 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4782 	}
4783 	case KVM_S390_INTERRUPT: {
4784 		struct kvm_s390_interrupt s390int;
4785 		struct kvm_s390_irq s390irq = {};
4786 
4787 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4788 			return -EFAULT;
4789 		if (s390int_to_s390irq(&s390int, &s390irq))
4790 			return -EINVAL;
4791 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4792 	}
4793 	}
4794 	return -ENOIOCTLCMD;
4795 }
4796 
4797 long kvm_arch_vcpu_ioctl(struct file *filp,
4798 			 unsigned int ioctl, unsigned long arg)
4799 {
4800 	struct kvm_vcpu *vcpu = filp->private_data;
4801 	void __user *argp = (void __user *)arg;
4802 	int idx;
4803 	long r;
4804 	u16 rc, rrc;
4805 
4806 	vcpu_load(vcpu);
4807 
4808 	switch (ioctl) {
4809 	case KVM_S390_STORE_STATUS:
4810 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4811 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4812 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4813 		break;
4814 	case KVM_S390_SET_INITIAL_PSW: {
4815 		psw_t psw;
4816 
4817 		r = -EFAULT;
4818 		if (copy_from_user(&psw, argp, sizeof(psw)))
4819 			break;
4820 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4821 		break;
4822 	}
4823 	case KVM_S390_CLEAR_RESET:
4824 		r = 0;
4825 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4826 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4827 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4828 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4829 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4830 				   rc, rrc);
4831 		}
4832 		break;
4833 	case KVM_S390_INITIAL_RESET:
4834 		r = 0;
4835 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4836 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 					  UVC_CMD_CPU_RESET_INITIAL,
4839 					  &rc, &rrc);
4840 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4841 				   rc, rrc);
4842 		}
4843 		break;
4844 	case KVM_S390_NORMAL_RESET:
4845 		r = 0;
4846 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4847 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4848 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4849 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4850 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4851 				   rc, rrc);
4852 		}
4853 		break;
4854 	case KVM_SET_ONE_REG:
4855 	case KVM_GET_ONE_REG: {
4856 		struct kvm_one_reg reg;
4857 		r = -EINVAL;
4858 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4859 			break;
4860 		r = -EFAULT;
4861 		if (copy_from_user(&reg, argp, sizeof(reg)))
4862 			break;
4863 		if (ioctl == KVM_SET_ONE_REG)
4864 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4865 		else
4866 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4867 		break;
4868 	}
4869 #ifdef CONFIG_KVM_S390_UCONTROL
4870 	case KVM_S390_UCAS_MAP: {
4871 		struct kvm_s390_ucas_mapping ucasmap;
4872 
4873 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4874 			r = -EFAULT;
4875 			break;
4876 		}
4877 
4878 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4879 			r = -EINVAL;
4880 			break;
4881 		}
4882 
4883 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4884 				     ucasmap.vcpu_addr, ucasmap.length);
4885 		break;
4886 	}
4887 	case KVM_S390_UCAS_UNMAP: {
4888 		struct kvm_s390_ucas_mapping ucasmap;
4889 
4890 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4891 			r = -EFAULT;
4892 			break;
4893 		}
4894 
4895 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4896 			r = -EINVAL;
4897 			break;
4898 		}
4899 
4900 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4901 			ucasmap.length);
4902 		break;
4903 	}
4904 #endif
4905 	case KVM_S390_VCPU_FAULT: {
4906 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4907 		break;
4908 	}
4909 	case KVM_ENABLE_CAP:
4910 	{
4911 		struct kvm_enable_cap cap;
4912 		r = -EFAULT;
4913 		if (copy_from_user(&cap, argp, sizeof(cap)))
4914 			break;
4915 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4916 		break;
4917 	}
4918 	case KVM_S390_MEM_OP: {
4919 		struct kvm_s390_mem_op mem_op;
4920 
4921 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4922 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4923 		else
4924 			r = -EFAULT;
4925 		break;
4926 	}
4927 	case KVM_S390_SET_IRQ_STATE: {
4928 		struct kvm_s390_irq_state irq_state;
4929 
4930 		r = -EFAULT;
4931 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4932 			break;
4933 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4934 		    irq_state.len == 0 ||
4935 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4936 			r = -EINVAL;
4937 			break;
4938 		}
4939 		/* do not use irq_state.flags, it will break old QEMUs */
4940 		r = kvm_s390_set_irq_state(vcpu,
4941 					   (void __user *) irq_state.buf,
4942 					   irq_state.len);
4943 		break;
4944 	}
4945 	case KVM_S390_GET_IRQ_STATE: {
4946 		struct kvm_s390_irq_state irq_state;
4947 
4948 		r = -EFAULT;
4949 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4950 			break;
4951 		if (irq_state.len == 0) {
4952 			r = -EINVAL;
4953 			break;
4954 		}
4955 		/* do not use irq_state.flags, it will break old QEMUs */
4956 		r = kvm_s390_get_irq_state(vcpu,
4957 					   (__u8 __user *)  irq_state.buf,
4958 					   irq_state.len);
4959 		break;
4960 	}
4961 	default:
4962 		r = -ENOTTY;
4963 	}
4964 
4965 	vcpu_put(vcpu);
4966 	return r;
4967 }
4968 
4969 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4970 {
4971 #ifdef CONFIG_KVM_S390_UCONTROL
4972 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4973 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4974 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4975 		get_page(vmf->page);
4976 		return 0;
4977 	}
4978 #endif
4979 	return VM_FAULT_SIGBUS;
4980 }
4981 
4982 /* Section: memory related */
4983 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4984 				   const struct kvm_memory_slot *old,
4985 				   struct kvm_memory_slot *new,
4986 				   enum kvm_mr_change change)
4987 {
4988 	gpa_t size;
4989 
4990 	/* When we are protected, we should not change the memory slots */
4991 	if (kvm_s390_pv_get_handle(kvm))
4992 		return -EINVAL;
4993 
4994 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
4995 		return 0;
4996 
4997 	/* A few sanity checks. We can have memory slots which have to be
4998 	   located/ended at a segment boundary (1MB). The memory in userland is
4999 	   ok to be fragmented into various different vmas. It is okay to mmap()
5000 	   and munmap() stuff in this slot after doing this call at any time */
5001 
5002 	if (new->userspace_addr & 0xffffful)
5003 		return -EINVAL;
5004 
5005 	size = new->npages * PAGE_SIZE;
5006 	if (size & 0xffffful)
5007 		return -EINVAL;
5008 
5009 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5010 		return -EINVAL;
5011 
5012 	return 0;
5013 }
5014 
5015 void kvm_arch_commit_memory_region(struct kvm *kvm,
5016 				struct kvm_memory_slot *old,
5017 				const struct kvm_memory_slot *new,
5018 				enum kvm_mr_change change)
5019 {
5020 	int rc = 0;
5021 
5022 	switch (change) {
5023 	case KVM_MR_DELETE:
5024 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5025 					old->npages * PAGE_SIZE);
5026 		break;
5027 	case KVM_MR_MOVE:
5028 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5029 					old->npages * PAGE_SIZE);
5030 		if (rc)
5031 			break;
5032 		fallthrough;
5033 	case KVM_MR_CREATE:
5034 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5035 				      new->base_gfn * PAGE_SIZE,
5036 				      new->npages * PAGE_SIZE);
5037 		break;
5038 	case KVM_MR_FLAGS_ONLY:
5039 		break;
5040 	default:
5041 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5042 	}
5043 	if (rc)
5044 		pr_warn("failed to commit memory region\n");
5045 	return;
5046 }
5047 
5048 static inline unsigned long nonhyp_mask(int i)
5049 {
5050 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5051 
5052 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5053 }
5054 
5055 static int __init kvm_s390_init(void)
5056 {
5057 	int i;
5058 
5059 	if (!sclp.has_sief2) {
5060 		pr_info("SIE is not available\n");
5061 		return -ENODEV;
5062 	}
5063 
5064 	if (nested && hpage) {
5065 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5066 		return -EINVAL;
5067 	}
5068 
5069 	for (i = 0; i < 16; i++)
5070 		kvm_s390_fac_base[i] |=
5071 			stfle_fac_list[i] & nonhyp_mask(i);
5072 
5073 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5074 }
5075 
5076 static void __exit kvm_s390_exit(void)
5077 {
5078 	kvm_exit();
5079 }
5080 
5081 module_init(kvm_s390_init);
5082 module_exit(kvm_s390_exit);
5083 
5084 /*
5085  * Enable autoloading of the kvm module.
5086  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5087  * since x86 takes a different approach.
5088  */
5089 #include <linux/miscdevice.h>
5090 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5091 MODULE_ALIAS("devname:kvm");
5092