xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision a5d46d9a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
70 		sizeof(struct kvm_vm_stat) / sizeof(u64));
71 
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 	.name_size = KVM_STATS_NAME_SIZE,
74 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 	.id_offset = sizeof(struct kvm_stats_header),
76 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 		       sizeof(kvm_vm_stats_desc),
79 };
80 
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 	KVM_GENERIC_VCPU_STATS(),
83 	STATS_DESC_COUNTER(VCPU, exit_userspace),
84 	STATS_DESC_COUNTER(VCPU, exit_null),
85 	STATS_DESC_COUNTER(VCPU, exit_external_request),
86 	STATS_DESC_COUNTER(VCPU, exit_io_request),
87 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 	STATS_DESC_COUNTER(VCPU, exit_validity),
90 	STATS_DESC_COUNTER(VCPU, exit_instruction),
91 	STATS_DESC_COUNTER(VCPU, exit_pei),
92 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 	STATS_DESC_COUNTER(VCPU, deliver_program),
110 	STATS_DESC_COUNTER(VCPU, deliver_io),
111 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 	STATS_DESC_COUNTER(VCPU, inject_ckc),
114 	STATS_DESC_COUNTER(VCPU, inject_cputm),
115 	STATS_DESC_COUNTER(VCPU, inject_external_call),
116 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 	STATS_DESC_COUNTER(VCPU, inject_mchk),
118 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 	STATS_DESC_COUNTER(VCPU, inject_program),
120 	STATS_DESC_COUNTER(VCPU, inject_restart),
121 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_gs),
125 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 	STATS_DESC_COUNTER(VCPU, instruction_sck),
131 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 	STATS_DESC_COUNTER(VCPU, instruction_spx),
134 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 	STATS_DESC_COUNTER(VCPU, instruction_stap),
136 	STATS_DESC_COUNTER(VCPU, instruction_iske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ri),
138 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 	STATS_DESC_COUNTER(VCPU, instruction_sske),
140 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 	STATS_DESC_COUNTER(VCPU, instruction_tb),
144 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 	STATS_DESC_COUNTER(VCPU, instruction_sie),
148 	STATS_DESC_COUNTER(VCPU, instruction_essa),
149 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 	STATS_DESC_COUNTER(VCPU, diagnose_10),
167 	STATS_DESC_COUNTER(VCPU, diagnose_44),
168 	STATS_DESC_COUNTER(VCPU, diagnose_9c),
169 	STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored),
170 	STATS_DESC_COUNTER(VCPU, diagnose_9c_forward),
171 	STATS_DESC_COUNTER(VCPU, diagnose_258),
172 	STATS_DESC_COUNTER(VCPU, diagnose_308),
173 	STATS_DESC_COUNTER(VCPU, diagnose_500),
174 	STATS_DESC_COUNTER(VCPU, diagnose_other),
175 	STATS_DESC_COUNTER(VCPU, pfault_sync)
176 };
177 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
178 		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
179 
180 const struct kvm_stats_header kvm_vcpu_stats_header = {
181 	.name_size = KVM_STATS_NAME_SIZE,
182 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
183 	.id_offset = sizeof(struct kvm_stats_header),
184 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
185 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
186 		       sizeof(kvm_vcpu_stats_desc),
187 };
188 
189 /* allow nested virtualization in KVM (if enabled by user space) */
190 static int nested;
191 module_param(nested, int, S_IRUGO);
192 MODULE_PARM_DESC(nested, "Nested virtualization support");
193 
194 /* allow 1m huge page guest backing, if !nested */
195 static int hpage;
196 module_param(hpage, int, 0444);
197 MODULE_PARM_DESC(hpage, "1m huge page backing support");
198 
199 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
200 static u8 halt_poll_max_steal = 10;
201 module_param(halt_poll_max_steal, byte, 0644);
202 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
203 
204 /* if set to true, the GISA will be initialized and used if available */
205 static bool use_gisa  = true;
206 module_param(use_gisa, bool, 0644);
207 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
208 
209 /* maximum diag9c forwarding per second */
210 unsigned int diag9c_forwarding_hz;
211 module_param(diag9c_forwarding_hz, uint, 0644);
212 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
213 
214 /*
215  * For now we handle at most 16 double words as this is what the s390 base
216  * kernel handles and stores in the prefix page. If we ever need to go beyond
217  * this, this requires changes to code, but the external uapi can stay.
218  */
219 #define SIZE_INTERNAL 16
220 
221 /*
222  * Base feature mask that defines default mask for facilities. Consists of the
223  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
224  */
225 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
226 /*
227  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
228  * and defines the facilities that can be enabled via a cpu model.
229  */
230 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
231 
232 static unsigned long kvm_s390_fac_size(void)
233 {
234 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
235 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
236 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
237 		sizeof(stfle_fac_list));
238 
239 	return SIZE_INTERNAL;
240 }
241 
242 /* available cpu features supported by kvm */
243 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
244 /* available subfunctions indicated via query / "test bit" */
245 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
246 
247 static struct gmap_notifier gmap_notifier;
248 static struct gmap_notifier vsie_gmap_notifier;
249 debug_info_t *kvm_s390_dbf;
250 debug_info_t *kvm_s390_dbf_uv;
251 
252 /* Section: not file related */
253 int kvm_arch_hardware_enable(void)
254 {
255 	/* every s390 is virtualization enabled ;-) */
256 	return 0;
257 }
258 
259 int kvm_arch_check_processor_compat(void *opaque)
260 {
261 	return 0;
262 }
263 
264 /* forward declarations */
265 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
266 			      unsigned long end);
267 static int sca_switch_to_extended(struct kvm *kvm);
268 
269 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
270 {
271 	u8 delta_idx = 0;
272 
273 	/*
274 	 * The TOD jumps by delta, we have to compensate this by adding
275 	 * -delta to the epoch.
276 	 */
277 	delta = -delta;
278 
279 	/* sign-extension - we're adding to signed values below */
280 	if ((s64)delta < 0)
281 		delta_idx = -1;
282 
283 	scb->epoch += delta;
284 	if (scb->ecd & ECD_MEF) {
285 		scb->epdx += delta_idx;
286 		if (scb->epoch < delta)
287 			scb->epdx += 1;
288 	}
289 }
290 
291 /*
292  * This callback is executed during stop_machine(). All CPUs are therefore
293  * temporarily stopped. In order not to change guest behavior, we have to
294  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
295  * so a CPU won't be stopped while calculating with the epoch.
296  */
297 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
298 			  void *v)
299 {
300 	struct kvm *kvm;
301 	struct kvm_vcpu *vcpu;
302 	int i;
303 	unsigned long long *delta = v;
304 
305 	list_for_each_entry(kvm, &vm_list, vm_list) {
306 		kvm_for_each_vcpu(i, vcpu, kvm) {
307 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
308 			if (i == 0) {
309 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
310 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
311 			}
312 			if (vcpu->arch.cputm_enabled)
313 				vcpu->arch.cputm_start += *delta;
314 			if (vcpu->arch.vsie_block)
315 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
316 						   *delta);
317 		}
318 	}
319 	return NOTIFY_OK;
320 }
321 
322 static struct notifier_block kvm_clock_notifier = {
323 	.notifier_call = kvm_clock_sync,
324 };
325 
326 int kvm_arch_hardware_setup(void *opaque)
327 {
328 	gmap_notifier.notifier_call = kvm_gmap_notifier;
329 	gmap_register_pte_notifier(&gmap_notifier);
330 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
331 	gmap_register_pte_notifier(&vsie_gmap_notifier);
332 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
333 				       &kvm_clock_notifier);
334 	return 0;
335 }
336 
337 void kvm_arch_hardware_unsetup(void)
338 {
339 	gmap_unregister_pte_notifier(&gmap_notifier);
340 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
341 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
342 					 &kvm_clock_notifier);
343 }
344 
345 static void allow_cpu_feat(unsigned long nr)
346 {
347 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
348 }
349 
350 static inline int plo_test_bit(unsigned char nr)
351 {
352 	unsigned long function = (unsigned long)nr | 0x100;
353 	int cc;
354 
355 	asm volatile(
356 		"	lgr	0,%[function]\n"
357 		/* Parameter registers are ignored for "test bit" */
358 		"	plo	0,0,0,0(0)\n"
359 		"	ipm	%0\n"
360 		"	srl	%0,28\n"
361 		: "=d" (cc)
362 		: [function] "d" (function)
363 		: "cc", "0");
364 	return cc == 0;
365 }
366 
367 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
368 {
369 	asm volatile(
370 		"	lghi	0,0\n"
371 		"	lgr	1,%[query]\n"
372 		/* Parameter registers are ignored */
373 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
374 		:
375 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
376 		: "cc", "memory", "0", "1");
377 }
378 
379 #define INSN_SORTL 0xb938
380 #define INSN_DFLTCC 0xb939
381 
382 static void kvm_s390_cpu_feat_init(void)
383 {
384 	int i;
385 
386 	for (i = 0; i < 256; ++i) {
387 		if (plo_test_bit(i))
388 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
389 	}
390 
391 	if (test_facility(28)) /* TOD-clock steering */
392 		ptff(kvm_s390_available_subfunc.ptff,
393 		     sizeof(kvm_s390_available_subfunc.ptff),
394 		     PTFF_QAF);
395 
396 	if (test_facility(17)) { /* MSA */
397 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.kmac);
399 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kmc);
401 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.km);
403 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
404 			      kvm_s390_available_subfunc.kimd);
405 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.klmd);
407 	}
408 	if (test_facility(76)) /* MSA3 */
409 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.pckmo);
411 	if (test_facility(77)) { /* MSA4 */
412 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmctr);
414 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.kmf);
416 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
417 			      kvm_s390_available_subfunc.kmo);
418 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.pcc);
420 	}
421 	if (test_facility(57)) /* MSA5 */
422 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.ppno);
424 
425 	if (test_facility(146)) /* MSA8 */
426 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kma);
428 
429 	if (test_facility(155)) /* MSA9 */
430 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
431 			      kvm_s390_available_subfunc.kdsa);
432 
433 	if (test_facility(150)) /* SORTL */
434 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
435 
436 	if (test_facility(151)) /* DFLTCC */
437 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
438 
439 	if (MACHINE_HAS_ESOP)
440 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
441 	/*
442 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
443 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
444 	 */
445 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
446 	    !test_facility(3) || !nested)
447 		return;
448 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
449 	if (sclp.has_64bscao)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
451 	if (sclp.has_siif)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
453 	if (sclp.has_gpere)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
455 	if (sclp.has_gsls)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
457 	if (sclp.has_ib)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
459 	if (sclp.has_cei)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
461 	if (sclp.has_ibs)
462 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
463 	if (sclp.has_kss)
464 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
465 	/*
466 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
467 	 * all skey handling functions read/set the skey from the PGSTE
468 	 * instead of the real storage key.
469 	 *
470 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
471 	 * pages being detected as preserved although they are resident.
472 	 *
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
474 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
475 	 *
476 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
477 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
478 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
479 	 *
480 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
481 	 * cannot easily shadow the SCA because of the ipte lock.
482 	 */
483 }
484 
485 int kvm_arch_init(void *opaque)
486 {
487 	int rc = -ENOMEM;
488 
489 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf)
491 		return -ENOMEM;
492 
493 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
494 	if (!kvm_s390_dbf_uv)
495 		goto out;
496 
497 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
498 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
499 		goto out;
500 
501 	kvm_s390_cpu_feat_init();
502 
503 	/* Register floating interrupt controller interface. */
504 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
505 	if (rc) {
506 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
507 		goto out;
508 	}
509 
510 	rc = kvm_s390_gib_init(GAL_ISC);
511 	if (rc)
512 		goto out;
513 
514 	return 0;
515 
516 out:
517 	kvm_arch_exit();
518 	return rc;
519 }
520 
521 void kvm_arch_exit(void)
522 {
523 	kvm_s390_gib_destroy();
524 	debug_unregister(kvm_s390_dbf);
525 	debug_unregister(kvm_s390_dbf_uv);
526 }
527 
528 /* Section: device related */
529 long kvm_arch_dev_ioctl(struct file *filp,
530 			unsigned int ioctl, unsigned long arg)
531 {
532 	if (ioctl == KVM_S390_ENABLE_SIE)
533 		return s390_enable_sie();
534 	return -EINVAL;
535 }
536 
537 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
538 {
539 	int r;
540 
541 	switch (ext) {
542 	case KVM_CAP_S390_PSW:
543 	case KVM_CAP_S390_GMAP:
544 	case KVM_CAP_SYNC_MMU:
545 #ifdef CONFIG_KVM_S390_UCONTROL
546 	case KVM_CAP_S390_UCONTROL:
547 #endif
548 	case KVM_CAP_ASYNC_PF:
549 	case KVM_CAP_SYNC_REGS:
550 	case KVM_CAP_ONE_REG:
551 	case KVM_CAP_ENABLE_CAP:
552 	case KVM_CAP_S390_CSS_SUPPORT:
553 	case KVM_CAP_IOEVENTFD:
554 	case KVM_CAP_DEVICE_CTRL:
555 	case KVM_CAP_S390_IRQCHIP:
556 	case KVM_CAP_VM_ATTRIBUTES:
557 	case KVM_CAP_MP_STATE:
558 	case KVM_CAP_IMMEDIATE_EXIT:
559 	case KVM_CAP_S390_INJECT_IRQ:
560 	case KVM_CAP_S390_USER_SIGP:
561 	case KVM_CAP_S390_USER_STSI:
562 	case KVM_CAP_S390_SKEYS:
563 	case KVM_CAP_S390_IRQ_STATE:
564 	case KVM_CAP_S390_USER_INSTR0:
565 	case KVM_CAP_S390_CMMA_MIGRATION:
566 	case KVM_CAP_S390_AIS:
567 	case KVM_CAP_S390_AIS_MIGRATION:
568 	case KVM_CAP_S390_VCPU_RESETS:
569 	case KVM_CAP_SET_GUEST_DEBUG:
570 	case KVM_CAP_S390_DIAG318:
571 		r = 1;
572 		break;
573 	case KVM_CAP_SET_GUEST_DEBUG2:
574 		r = KVM_GUESTDBG_VALID_MASK;
575 		break;
576 	case KVM_CAP_S390_HPAGE_1M:
577 		r = 0;
578 		if (hpage && !kvm_is_ucontrol(kvm))
579 			r = 1;
580 		break;
581 	case KVM_CAP_S390_MEM_OP:
582 		r = MEM_OP_MAX_SIZE;
583 		break;
584 	case KVM_CAP_NR_VCPUS:
585 	case KVM_CAP_MAX_VCPUS:
586 	case KVM_CAP_MAX_VCPU_ID:
587 		r = KVM_S390_BSCA_CPU_SLOTS;
588 		if (!kvm_s390_use_sca_entries())
589 			r = KVM_MAX_VCPUS;
590 		else if (sclp.has_esca && sclp.has_64bscao)
591 			r = KVM_S390_ESCA_CPU_SLOTS;
592 		break;
593 	case KVM_CAP_S390_COW:
594 		r = MACHINE_HAS_ESOP;
595 		break;
596 	case KVM_CAP_S390_VECTOR_REGISTERS:
597 		r = MACHINE_HAS_VX;
598 		break;
599 	case KVM_CAP_S390_RI:
600 		r = test_facility(64);
601 		break;
602 	case KVM_CAP_S390_GS:
603 		r = test_facility(133);
604 		break;
605 	case KVM_CAP_S390_BPB:
606 		r = test_facility(82);
607 		break;
608 	case KVM_CAP_S390_PROTECTED:
609 		r = is_prot_virt_host();
610 		break;
611 	default:
612 		r = 0;
613 	}
614 	return r;
615 }
616 
617 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
618 {
619 	int i;
620 	gfn_t cur_gfn, last_gfn;
621 	unsigned long gaddr, vmaddr;
622 	struct gmap *gmap = kvm->arch.gmap;
623 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
624 
625 	/* Loop over all guest segments */
626 	cur_gfn = memslot->base_gfn;
627 	last_gfn = memslot->base_gfn + memslot->npages;
628 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
629 		gaddr = gfn_to_gpa(cur_gfn);
630 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
631 		if (kvm_is_error_hva(vmaddr))
632 			continue;
633 
634 		bitmap_zero(bitmap, _PAGE_ENTRIES);
635 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
636 		for (i = 0; i < _PAGE_ENTRIES; i++) {
637 			if (test_bit(i, bitmap))
638 				mark_page_dirty(kvm, cur_gfn + i);
639 		}
640 
641 		if (fatal_signal_pending(current))
642 			return;
643 		cond_resched();
644 	}
645 }
646 
647 /* Section: vm related */
648 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
649 
650 /*
651  * Get (and clear) the dirty memory log for a memory slot.
652  */
653 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
654 			       struct kvm_dirty_log *log)
655 {
656 	int r;
657 	unsigned long n;
658 	struct kvm_memory_slot *memslot;
659 	int is_dirty;
660 
661 	if (kvm_is_ucontrol(kvm))
662 		return -EINVAL;
663 
664 	mutex_lock(&kvm->slots_lock);
665 
666 	r = -EINVAL;
667 	if (log->slot >= KVM_USER_MEM_SLOTS)
668 		goto out;
669 
670 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
671 	if (r)
672 		goto out;
673 
674 	/* Clear the dirty log */
675 	if (is_dirty) {
676 		n = kvm_dirty_bitmap_bytes(memslot);
677 		memset(memslot->dirty_bitmap, 0, n);
678 	}
679 	r = 0;
680 out:
681 	mutex_unlock(&kvm->slots_lock);
682 	return r;
683 }
684 
685 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
686 {
687 	unsigned int i;
688 	struct kvm_vcpu *vcpu;
689 
690 	kvm_for_each_vcpu(i, vcpu, kvm) {
691 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
692 	}
693 }
694 
695 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
696 {
697 	int r;
698 
699 	if (cap->flags)
700 		return -EINVAL;
701 
702 	switch (cap->cap) {
703 	case KVM_CAP_S390_IRQCHIP:
704 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
705 		kvm->arch.use_irqchip = 1;
706 		r = 0;
707 		break;
708 	case KVM_CAP_S390_USER_SIGP:
709 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
710 		kvm->arch.user_sigp = 1;
711 		r = 0;
712 		break;
713 	case KVM_CAP_S390_VECTOR_REGISTERS:
714 		mutex_lock(&kvm->lock);
715 		if (kvm->created_vcpus) {
716 			r = -EBUSY;
717 		} else if (MACHINE_HAS_VX) {
718 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
719 			set_kvm_facility(kvm->arch.model.fac_list, 129);
720 			if (test_facility(134)) {
721 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
722 				set_kvm_facility(kvm->arch.model.fac_list, 134);
723 			}
724 			if (test_facility(135)) {
725 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
726 				set_kvm_facility(kvm->arch.model.fac_list, 135);
727 			}
728 			if (test_facility(148)) {
729 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
730 				set_kvm_facility(kvm->arch.model.fac_list, 148);
731 			}
732 			if (test_facility(152)) {
733 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
734 				set_kvm_facility(kvm->arch.model.fac_list, 152);
735 			}
736 			if (test_facility(192)) {
737 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
738 				set_kvm_facility(kvm->arch.model.fac_list, 192);
739 			}
740 			r = 0;
741 		} else
742 			r = -EINVAL;
743 		mutex_unlock(&kvm->lock);
744 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
745 			 r ? "(not available)" : "(success)");
746 		break;
747 	case KVM_CAP_S390_RI:
748 		r = -EINVAL;
749 		mutex_lock(&kvm->lock);
750 		if (kvm->created_vcpus) {
751 			r = -EBUSY;
752 		} else if (test_facility(64)) {
753 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
754 			set_kvm_facility(kvm->arch.model.fac_list, 64);
755 			r = 0;
756 		}
757 		mutex_unlock(&kvm->lock);
758 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
759 			 r ? "(not available)" : "(success)");
760 		break;
761 	case KVM_CAP_S390_AIS:
762 		mutex_lock(&kvm->lock);
763 		if (kvm->created_vcpus) {
764 			r = -EBUSY;
765 		} else {
766 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
767 			set_kvm_facility(kvm->arch.model.fac_list, 72);
768 			r = 0;
769 		}
770 		mutex_unlock(&kvm->lock);
771 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
772 			 r ? "(not available)" : "(success)");
773 		break;
774 	case KVM_CAP_S390_GS:
775 		r = -EINVAL;
776 		mutex_lock(&kvm->lock);
777 		if (kvm->created_vcpus) {
778 			r = -EBUSY;
779 		} else if (test_facility(133)) {
780 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
781 			set_kvm_facility(kvm->arch.model.fac_list, 133);
782 			r = 0;
783 		}
784 		mutex_unlock(&kvm->lock);
785 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
786 			 r ? "(not available)" : "(success)");
787 		break;
788 	case KVM_CAP_S390_HPAGE_1M:
789 		mutex_lock(&kvm->lock);
790 		if (kvm->created_vcpus)
791 			r = -EBUSY;
792 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
793 			r = -EINVAL;
794 		else {
795 			r = 0;
796 			mmap_write_lock(kvm->mm);
797 			kvm->mm->context.allow_gmap_hpage_1m = 1;
798 			mmap_write_unlock(kvm->mm);
799 			/*
800 			 * We might have to create fake 4k page
801 			 * tables. To avoid that the hardware works on
802 			 * stale PGSTEs, we emulate these instructions.
803 			 */
804 			kvm->arch.use_skf = 0;
805 			kvm->arch.use_pfmfi = 0;
806 		}
807 		mutex_unlock(&kvm->lock);
808 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
809 			 r ? "(not available)" : "(success)");
810 		break;
811 	case KVM_CAP_S390_USER_STSI:
812 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
813 		kvm->arch.user_stsi = 1;
814 		r = 0;
815 		break;
816 	case KVM_CAP_S390_USER_INSTR0:
817 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
818 		kvm->arch.user_instr0 = 1;
819 		icpt_operexc_on_all_vcpus(kvm);
820 		r = 0;
821 		break;
822 	default:
823 		r = -EINVAL;
824 		break;
825 	}
826 	return r;
827 }
828 
829 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
830 {
831 	int ret;
832 
833 	switch (attr->attr) {
834 	case KVM_S390_VM_MEM_LIMIT_SIZE:
835 		ret = 0;
836 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
837 			 kvm->arch.mem_limit);
838 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
839 			ret = -EFAULT;
840 		break;
841 	default:
842 		ret = -ENXIO;
843 		break;
844 	}
845 	return ret;
846 }
847 
848 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
849 {
850 	int ret;
851 	unsigned int idx;
852 	switch (attr->attr) {
853 	case KVM_S390_VM_MEM_ENABLE_CMMA:
854 		ret = -ENXIO;
855 		if (!sclp.has_cmma)
856 			break;
857 
858 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
859 		mutex_lock(&kvm->lock);
860 		if (kvm->created_vcpus)
861 			ret = -EBUSY;
862 		else if (kvm->mm->context.allow_gmap_hpage_1m)
863 			ret = -EINVAL;
864 		else {
865 			kvm->arch.use_cmma = 1;
866 			/* Not compatible with cmma. */
867 			kvm->arch.use_pfmfi = 0;
868 			ret = 0;
869 		}
870 		mutex_unlock(&kvm->lock);
871 		break;
872 	case KVM_S390_VM_MEM_CLR_CMMA:
873 		ret = -ENXIO;
874 		if (!sclp.has_cmma)
875 			break;
876 		ret = -EINVAL;
877 		if (!kvm->arch.use_cmma)
878 			break;
879 
880 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
881 		mutex_lock(&kvm->lock);
882 		idx = srcu_read_lock(&kvm->srcu);
883 		s390_reset_cmma(kvm->arch.gmap->mm);
884 		srcu_read_unlock(&kvm->srcu, idx);
885 		mutex_unlock(&kvm->lock);
886 		ret = 0;
887 		break;
888 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
889 		unsigned long new_limit;
890 
891 		if (kvm_is_ucontrol(kvm))
892 			return -EINVAL;
893 
894 		if (get_user(new_limit, (u64 __user *)attr->addr))
895 			return -EFAULT;
896 
897 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
898 		    new_limit > kvm->arch.mem_limit)
899 			return -E2BIG;
900 
901 		if (!new_limit)
902 			return -EINVAL;
903 
904 		/* gmap_create takes last usable address */
905 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
906 			new_limit -= 1;
907 
908 		ret = -EBUSY;
909 		mutex_lock(&kvm->lock);
910 		if (!kvm->created_vcpus) {
911 			/* gmap_create will round the limit up */
912 			struct gmap *new = gmap_create(current->mm, new_limit);
913 
914 			if (!new) {
915 				ret = -ENOMEM;
916 			} else {
917 				gmap_remove(kvm->arch.gmap);
918 				new->private = kvm;
919 				kvm->arch.gmap = new;
920 				ret = 0;
921 			}
922 		}
923 		mutex_unlock(&kvm->lock);
924 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
925 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
926 			 (void *) kvm->arch.gmap->asce);
927 		break;
928 	}
929 	default:
930 		ret = -ENXIO;
931 		break;
932 	}
933 	return ret;
934 }
935 
936 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
937 
938 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
939 {
940 	struct kvm_vcpu *vcpu;
941 	int i;
942 
943 	kvm_s390_vcpu_block_all(kvm);
944 
945 	kvm_for_each_vcpu(i, vcpu, kvm) {
946 		kvm_s390_vcpu_crypto_setup(vcpu);
947 		/* recreate the shadow crycb by leaving the VSIE handler */
948 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
949 	}
950 
951 	kvm_s390_vcpu_unblock_all(kvm);
952 }
953 
954 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
955 {
956 	mutex_lock(&kvm->lock);
957 	switch (attr->attr) {
958 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
959 		if (!test_kvm_facility(kvm, 76)) {
960 			mutex_unlock(&kvm->lock);
961 			return -EINVAL;
962 		}
963 		get_random_bytes(
964 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
965 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
966 		kvm->arch.crypto.aes_kw = 1;
967 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
968 		break;
969 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
970 		if (!test_kvm_facility(kvm, 76)) {
971 			mutex_unlock(&kvm->lock);
972 			return -EINVAL;
973 		}
974 		get_random_bytes(
975 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
976 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
977 		kvm->arch.crypto.dea_kw = 1;
978 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
979 		break;
980 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
981 		if (!test_kvm_facility(kvm, 76)) {
982 			mutex_unlock(&kvm->lock);
983 			return -EINVAL;
984 		}
985 		kvm->arch.crypto.aes_kw = 0;
986 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
987 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
988 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
989 		break;
990 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
991 		if (!test_kvm_facility(kvm, 76)) {
992 			mutex_unlock(&kvm->lock);
993 			return -EINVAL;
994 		}
995 		kvm->arch.crypto.dea_kw = 0;
996 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
997 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
998 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
999 		break;
1000 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1001 		if (!ap_instructions_available()) {
1002 			mutex_unlock(&kvm->lock);
1003 			return -EOPNOTSUPP;
1004 		}
1005 		kvm->arch.crypto.apie = 1;
1006 		break;
1007 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1008 		if (!ap_instructions_available()) {
1009 			mutex_unlock(&kvm->lock);
1010 			return -EOPNOTSUPP;
1011 		}
1012 		kvm->arch.crypto.apie = 0;
1013 		break;
1014 	default:
1015 		mutex_unlock(&kvm->lock);
1016 		return -ENXIO;
1017 	}
1018 
1019 	kvm_s390_vcpu_crypto_reset_all(kvm);
1020 	mutex_unlock(&kvm->lock);
1021 	return 0;
1022 }
1023 
1024 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1025 {
1026 	int cx;
1027 	struct kvm_vcpu *vcpu;
1028 
1029 	kvm_for_each_vcpu(cx, vcpu, kvm)
1030 		kvm_s390_sync_request(req, vcpu);
1031 }
1032 
1033 /*
1034  * Must be called with kvm->srcu held to avoid races on memslots, and with
1035  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1036  */
1037 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1038 {
1039 	struct kvm_memory_slot *ms;
1040 	struct kvm_memslots *slots;
1041 	unsigned long ram_pages = 0;
1042 	int slotnr;
1043 
1044 	/* migration mode already enabled */
1045 	if (kvm->arch.migration_mode)
1046 		return 0;
1047 	slots = kvm_memslots(kvm);
1048 	if (!slots || !slots->used_slots)
1049 		return -EINVAL;
1050 
1051 	if (!kvm->arch.use_cmma) {
1052 		kvm->arch.migration_mode = 1;
1053 		return 0;
1054 	}
1055 	/* mark all the pages in active slots as dirty */
1056 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1057 		ms = slots->memslots + slotnr;
1058 		if (!ms->dirty_bitmap)
1059 			return -EINVAL;
1060 		/*
1061 		 * The second half of the bitmap is only used on x86,
1062 		 * and would be wasted otherwise, so we put it to good
1063 		 * use here to keep track of the state of the storage
1064 		 * attributes.
1065 		 */
1066 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1067 		ram_pages += ms->npages;
1068 	}
1069 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1070 	kvm->arch.migration_mode = 1;
1071 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1072 	return 0;
1073 }
1074 
1075 /*
1076  * Must be called with kvm->slots_lock to avoid races with ourselves and
1077  * kvm_s390_vm_start_migration.
1078  */
1079 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1080 {
1081 	/* migration mode already disabled */
1082 	if (!kvm->arch.migration_mode)
1083 		return 0;
1084 	kvm->arch.migration_mode = 0;
1085 	if (kvm->arch.use_cmma)
1086 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1087 	return 0;
1088 }
1089 
1090 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1091 				     struct kvm_device_attr *attr)
1092 {
1093 	int res = -ENXIO;
1094 
1095 	mutex_lock(&kvm->slots_lock);
1096 	switch (attr->attr) {
1097 	case KVM_S390_VM_MIGRATION_START:
1098 		res = kvm_s390_vm_start_migration(kvm);
1099 		break;
1100 	case KVM_S390_VM_MIGRATION_STOP:
1101 		res = kvm_s390_vm_stop_migration(kvm);
1102 		break;
1103 	default:
1104 		break;
1105 	}
1106 	mutex_unlock(&kvm->slots_lock);
1107 
1108 	return res;
1109 }
1110 
1111 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1112 				     struct kvm_device_attr *attr)
1113 {
1114 	u64 mig = kvm->arch.migration_mode;
1115 
1116 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1117 		return -ENXIO;
1118 
1119 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1120 		return -EFAULT;
1121 	return 0;
1122 }
1123 
1124 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1125 {
1126 	struct kvm_s390_vm_tod_clock gtod;
1127 
1128 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1129 		return -EFAULT;
1130 
1131 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1132 		return -EINVAL;
1133 	kvm_s390_set_tod_clock(kvm, &gtod);
1134 
1135 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1136 		gtod.epoch_idx, gtod.tod);
1137 
1138 	return 0;
1139 }
1140 
1141 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1142 {
1143 	u8 gtod_high;
1144 
1145 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1146 					   sizeof(gtod_high)))
1147 		return -EFAULT;
1148 
1149 	if (gtod_high != 0)
1150 		return -EINVAL;
1151 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1152 
1153 	return 0;
1154 }
1155 
1156 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1157 {
1158 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1159 
1160 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1161 			   sizeof(gtod.tod)))
1162 		return -EFAULT;
1163 
1164 	kvm_s390_set_tod_clock(kvm, &gtod);
1165 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1166 	return 0;
1167 }
1168 
1169 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170 {
1171 	int ret;
1172 
1173 	if (attr->flags)
1174 		return -EINVAL;
1175 
1176 	switch (attr->attr) {
1177 	case KVM_S390_VM_TOD_EXT:
1178 		ret = kvm_s390_set_tod_ext(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_HIGH:
1181 		ret = kvm_s390_set_tod_high(kvm, attr);
1182 		break;
1183 	case KVM_S390_VM_TOD_LOW:
1184 		ret = kvm_s390_set_tod_low(kvm, attr);
1185 		break;
1186 	default:
1187 		ret = -ENXIO;
1188 		break;
1189 	}
1190 	return ret;
1191 }
1192 
1193 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1194 				   struct kvm_s390_vm_tod_clock *gtod)
1195 {
1196 	union tod_clock clk;
1197 
1198 	preempt_disable();
1199 
1200 	store_tod_clock_ext(&clk);
1201 
1202 	gtod->tod = clk.tod + kvm->arch.epoch;
1203 	gtod->epoch_idx = 0;
1204 	if (test_kvm_facility(kvm, 139)) {
1205 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1206 		if (gtod->tod < clk.tod)
1207 			gtod->epoch_idx += 1;
1208 	}
1209 
1210 	preempt_enable();
1211 }
1212 
1213 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215 	struct kvm_s390_vm_tod_clock gtod;
1216 
1217 	memset(&gtod, 0, sizeof(gtod));
1218 	kvm_s390_get_tod_clock(kvm, &gtod);
1219 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1220 		return -EFAULT;
1221 
1222 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1223 		gtod.epoch_idx, gtod.tod);
1224 	return 0;
1225 }
1226 
1227 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228 {
1229 	u8 gtod_high = 0;
1230 
1231 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1232 					 sizeof(gtod_high)))
1233 		return -EFAULT;
1234 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1235 
1236 	return 0;
1237 }
1238 
1239 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1240 {
1241 	u64 gtod;
1242 
1243 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1244 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1245 		return -EFAULT;
1246 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1247 
1248 	return 0;
1249 }
1250 
1251 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1252 {
1253 	int ret;
1254 
1255 	if (attr->flags)
1256 		return -EINVAL;
1257 
1258 	switch (attr->attr) {
1259 	case KVM_S390_VM_TOD_EXT:
1260 		ret = kvm_s390_get_tod_ext(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_TOD_HIGH:
1263 		ret = kvm_s390_get_tod_high(kvm, attr);
1264 		break;
1265 	case KVM_S390_VM_TOD_LOW:
1266 		ret = kvm_s390_get_tod_low(kvm, attr);
1267 		break;
1268 	default:
1269 		ret = -ENXIO;
1270 		break;
1271 	}
1272 	return ret;
1273 }
1274 
1275 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1276 {
1277 	struct kvm_s390_vm_cpu_processor *proc;
1278 	u16 lowest_ibc, unblocked_ibc;
1279 	int ret = 0;
1280 
1281 	mutex_lock(&kvm->lock);
1282 	if (kvm->created_vcpus) {
1283 		ret = -EBUSY;
1284 		goto out;
1285 	}
1286 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1287 	if (!proc) {
1288 		ret = -ENOMEM;
1289 		goto out;
1290 	}
1291 	if (!copy_from_user(proc, (void __user *)attr->addr,
1292 			    sizeof(*proc))) {
1293 		kvm->arch.model.cpuid = proc->cpuid;
1294 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1295 		unblocked_ibc = sclp.ibc & 0xfff;
1296 		if (lowest_ibc && proc->ibc) {
1297 			if (proc->ibc > unblocked_ibc)
1298 				kvm->arch.model.ibc = unblocked_ibc;
1299 			else if (proc->ibc < lowest_ibc)
1300 				kvm->arch.model.ibc = lowest_ibc;
1301 			else
1302 				kvm->arch.model.ibc = proc->ibc;
1303 		}
1304 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1305 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1306 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1307 			 kvm->arch.model.ibc,
1308 			 kvm->arch.model.cpuid);
1309 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1310 			 kvm->arch.model.fac_list[0],
1311 			 kvm->arch.model.fac_list[1],
1312 			 kvm->arch.model.fac_list[2]);
1313 	} else
1314 		ret = -EFAULT;
1315 	kfree(proc);
1316 out:
1317 	mutex_unlock(&kvm->lock);
1318 	return ret;
1319 }
1320 
1321 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1322 				       struct kvm_device_attr *attr)
1323 {
1324 	struct kvm_s390_vm_cpu_feat data;
1325 
1326 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1327 		return -EFAULT;
1328 	if (!bitmap_subset((unsigned long *) data.feat,
1329 			   kvm_s390_available_cpu_feat,
1330 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1331 		return -EINVAL;
1332 
1333 	mutex_lock(&kvm->lock);
1334 	if (kvm->created_vcpus) {
1335 		mutex_unlock(&kvm->lock);
1336 		return -EBUSY;
1337 	}
1338 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1339 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1340 	mutex_unlock(&kvm->lock);
1341 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1342 			 data.feat[0],
1343 			 data.feat[1],
1344 			 data.feat[2]);
1345 	return 0;
1346 }
1347 
1348 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1349 					  struct kvm_device_attr *attr)
1350 {
1351 	mutex_lock(&kvm->lock);
1352 	if (kvm->created_vcpus) {
1353 		mutex_unlock(&kvm->lock);
1354 		return -EBUSY;
1355 	}
1356 
1357 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1358 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1359 		mutex_unlock(&kvm->lock);
1360 		return -EFAULT;
1361 	}
1362 	mutex_unlock(&kvm->lock);
1363 
1364 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1369 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1384 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1387 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1390 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1393 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1396 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1399 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1402 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1405 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1408 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1411 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1413 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1416 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1418 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1419 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1420 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1421 
1422 	return 0;
1423 }
1424 
1425 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1426 {
1427 	int ret = -ENXIO;
1428 
1429 	switch (attr->attr) {
1430 	case KVM_S390_VM_CPU_PROCESSOR:
1431 		ret = kvm_s390_set_processor(kvm, attr);
1432 		break;
1433 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1434 		ret = kvm_s390_set_processor_feat(kvm, attr);
1435 		break;
1436 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1437 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1438 		break;
1439 	}
1440 	return ret;
1441 }
1442 
1443 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1444 {
1445 	struct kvm_s390_vm_cpu_processor *proc;
1446 	int ret = 0;
1447 
1448 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1449 	if (!proc) {
1450 		ret = -ENOMEM;
1451 		goto out;
1452 	}
1453 	proc->cpuid = kvm->arch.model.cpuid;
1454 	proc->ibc = kvm->arch.model.ibc;
1455 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1456 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1457 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458 		 kvm->arch.model.ibc,
1459 		 kvm->arch.model.cpuid);
1460 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461 		 kvm->arch.model.fac_list[0],
1462 		 kvm->arch.model.fac_list[1],
1463 		 kvm->arch.model.fac_list[2]);
1464 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1465 		ret = -EFAULT;
1466 	kfree(proc);
1467 out:
1468 	return ret;
1469 }
1470 
1471 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1472 {
1473 	struct kvm_s390_vm_cpu_machine *mach;
1474 	int ret = 0;
1475 
1476 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1477 	if (!mach) {
1478 		ret = -ENOMEM;
1479 		goto out;
1480 	}
1481 	get_cpu_id((struct cpuid *) &mach->cpuid);
1482 	mach->ibc = sclp.ibc;
1483 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1484 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1485 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1486 	       sizeof(stfle_fac_list));
1487 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1488 		 kvm->arch.model.ibc,
1489 		 kvm->arch.model.cpuid);
1490 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1491 		 mach->fac_mask[0],
1492 		 mach->fac_mask[1],
1493 		 mach->fac_mask[2]);
1494 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1495 		 mach->fac_list[0],
1496 		 mach->fac_list[1],
1497 		 mach->fac_list[2]);
1498 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1499 		ret = -EFAULT;
1500 	kfree(mach);
1501 out:
1502 	return ret;
1503 }
1504 
1505 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1506 				       struct kvm_device_attr *attr)
1507 {
1508 	struct kvm_s390_vm_cpu_feat data;
1509 
1510 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1511 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1512 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1513 		return -EFAULT;
1514 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1515 			 data.feat[0],
1516 			 data.feat[1],
1517 			 data.feat[2]);
1518 	return 0;
1519 }
1520 
1521 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1522 				     struct kvm_device_attr *attr)
1523 {
1524 	struct kvm_s390_vm_cpu_feat data;
1525 
1526 	bitmap_copy((unsigned long *) data.feat,
1527 		    kvm_s390_available_cpu_feat,
1528 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1529 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1530 		return -EFAULT;
1531 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1532 			 data.feat[0],
1533 			 data.feat[1],
1534 			 data.feat[2]);
1535 	return 0;
1536 }
1537 
1538 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1539 					  struct kvm_device_attr *attr)
1540 {
1541 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1542 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1543 		return -EFAULT;
1544 
1545 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1550 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1565 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1568 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1571 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1574 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1577 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1580 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1583 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1586 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1589 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1592 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1594 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1597 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1599 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1600 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1601 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1602 
1603 	return 0;
1604 }
1605 
1606 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1607 					struct kvm_device_attr *attr)
1608 {
1609 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1610 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1611 		return -EFAULT;
1612 
1613 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1618 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1633 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1636 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1639 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1642 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1645 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1648 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1651 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1654 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1657 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1660 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1662 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1665 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1667 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1668 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1669 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1670 
1671 	return 0;
1672 }
1673 
1674 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1675 {
1676 	int ret = -ENXIO;
1677 
1678 	switch (attr->attr) {
1679 	case KVM_S390_VM_CPU_PROCESSOR:
1680 		ret = kvm_s390_get_processor(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_MACHINE:
1683 		ret = kvm_s390_get_machine(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1686 		ret = kvm_s390_get_processor_feat(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1689 		ret = kvm_s390_get_machine_feat(kvm, attr);
1690 		break;
1691 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1692 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1693 		break;
1694 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1695 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1696 		break;
1697 	}
1698 	return ret;
1699 }
1700 
1701 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702 {
1703 	int ret;
1704 
1705 	switch (attr->group) {
1706 	case KVM_S390_VM_MEM_CTRL:
1707 		ret = kvm_s390_set_mem_control(kvm, attr);
1708 		break;
1709 	case KVM_S390_VM_TOD:
1710 		ret = kvm_s390_set_tod(kvm, attr);
1711 		break;
1712 	case KVM_S390_VM_CPU_MODEL:
1713 		ret = kvm_s390_set_cpu_model(kvm, attr);
1714 		break;
1715 	case KVM_S390_VM_CRYPTO:
1716 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1717 		break;
1718 	case KVM_S390_VM_MIGRATION:
1719 		ret = kvm_s390_vm_set_migration(kvm, attr);
1720 		break;
1721 	default:
1722 		ret = -ENXIO;
1723 		break;
1724 	}
1725 
1726 	return ret;
1727 }
1728 
1729 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1730 {
1731 	int ret;
1732 
1733 	switch (attr->group) {
1734 	case KVM_S390_VM_MEM_CTRL:
1735 		ret = kvm_s390_get_mem_control(kvm, attr);
1736 		break;
1737 	case KVM_S390_VM_TOD:
1738 		ret = kvm_s390_get_tod(kvm, attr);
1739 		break;
1740 	case KVM_S390_VM_CPU_MODEL:
1741 		ret = kvm_s390_get_cpu_model(kvm, attr);
1742 		break;
1743 	case KVM_S390_VM_MIGRATION:
1744 		ret = kvm_s390_vm_get_migration(kvm, attr);
1745 		break;
1746 	default:
1747 		ret = -ENXIO;
1748 		break;
1749 	}
1750 
1751 	return ret;
1752 }
1753 
1754 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1755 {
1756 	int ret;
1757 
1758 	switch (attr->group) {
1759 	case KVM_S390_VM_MEM_CTRL:
1760 		switch (attr->attr) {
1761 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1762 		case KVM_S390_VM_MEM_CLR_CMMA:
1763 			ret = sclp.has_cmma ? 0 : -ENXIO;
1764 			break;
1765 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1766 			ret = 0;
1767 			break;
1768 		default:
1769 			ret = -ENXIO;
1770 			break;
1771 		}
1772 		break;
1773 	case KVM_S390_VM_TOD:
1774 		switch (attr->attr) {
1775 		case KVM_S390_VM_TOD_LOW:
1776 		case KVM_S390_VM_TOD_HIGH:
1777 			ret = 0;
1778 			break;
1779 		default:
1780 			ret = -ENXIO;
1781 			break;
1782 		}
1783 		break;
1784 	case KVM_S390_VM_CPU_MODEL:
1785 		switch (attr->attr) {
1786 		case KVM_S390_VM_CPU_PROCESSOR:
1787 		case KVM_S390_VM_CPU_MACHINE:
1788 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1789 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1790 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1791 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1792 			ret = 0;
1793 			break;
1794 		default:
1795 			ret = -ENXIO;
1796 			break;
1797 		}
1798 		break;
1799 	case KVM_S390_VM_CRYPTO:
1800 		switch (attr->attr) {
1801 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1802 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1803 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1804 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1805 			ret = 0;
1806 			break;
1807 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1808 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1809 			ret = ap_instructions_available() ? 0 : -ENXIO;
1810 			break;
1811 		default:
1812 			ret = -ENXIO;
1813 			break;
1814 		}
1815 		break;
1816 	case KVM_S390_VM_MIGRATION:
1817 		ret = 0;
1818 		break;
1819 	default:
1820 		ret = -ENXIO;
1821 		break;
1822 	}
1823 
1824 	return ret;
1825 }
1826 
1827 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1828 {
1829 	uint8_t *keys;
1830 	uint64_t hva;
1831 	int srcu_idx, i, r = 0;
1832 
1833 	if (args->flags != 0)
1834 		return -EINVAL;
1835 
1836 	/* Is this guest using storage keys? */
1837 	if (!mm_uses_skeys(current->mm))
1838 		return KVM_S390_GET_SKEYS_NONE;
1839 
1840 	/* Enforce sane limit on memory allocation */
1841 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1842 		return -EINVAL;
1843 
1844 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1845 	if (!keys)
1846 		return -ENOMEM;
1847 
1848 	mmap_read_lock(current->mm);
1849 	srcu_idx = srcu_read_lock(&kvm->srcu);
1850 	for (i = 0; i < args->count; i++) {
1851 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1852 		if (kvm_is_error_hva(hva)) {
1853 			r = -EFAULT;
1854 			break;
1855 		}
1856 
1857 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1858 		if (r)
1859 			break;
1860 	}
1861 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1862 	mmap_read_unlock(current->mm);
1863 
1864 	if (!r) {
1865 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1866 				 sizeof(uint8_t) * args->count);
1867 		if (r)
1868 			r = -EFAULT;
1869 	}
1870 
1871 	kvfree(keys);
1872 	return r;
1873 }
1874 
1875 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1876 {
1877 	uint8_t *keys;
1878 	uint64_t hva;
1879 	int srcu_idx, i, r = 0;
1880 	bool unlocked;
1881 
1882 	if (args->flags != 0)
1883 		return -EINVAL;
1884 
1885 	/* Enforce sane limit on memory allocation */
1886 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1887 		return -EINVAL;
1888 
1889 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1890 	if (!keys)
1891 		return -ENOMEM;
1892 
1893 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1894 			   sizeof(uint8_t) * args->count);
1895 	if (r) {
1896 		r = -EFAULT;
1897 		goto out;
1898 	}
1899 
1900 	/* Enable storage key handling for the guest */
1901 	r = s390_enable_skey();
1902 	if (r)
1903 		goto out;
1904 
1905 	i = 0;
1906 	mmap_read_lock(current->mm);
1907 	srcu_idx = srcu_read_lock(&kvm->srcu);
1908         while (i < args->count) {
1909 		unlocked = false;
1910 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1911 		if (kvm_is_error_hva(hva)) {
1912 			r = -EFAULT;
1913 			break;
1914 		}
1915 
1916 		/* Lowest order bit is reserved */
1917 		if (keys[i] & 0x01) {
1918 			r = -EINVAL;
1919 			break;
1920 		}
1921 
1922 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1923 		if (r) {
1924 			r = fixup_user_fault(current->mm, hva,
1925 					     FAULT_FLAG_WRITE, &unlocked);
1926 			if (r)
1927 				break;
1928 		}
1929 		if (!r)
1930 			i++;
1931 	}
1932 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1933 	mmap_read_unlock(current->mm);
1934 out:
1935 	kvfree(keys);
1936 	return r;
1937 }
1938 
1939 /*
1940  * Base address and length must be sent at the start of each block, therefore
1941  * it's cheaper to send some clean data, as long as it's less than the size of
1942  * two longs.
1943  */
1944 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1945 /* for consistency */
1946 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1947 
1948 /*
1949  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1950  * address falls in a hole. In that case the index of one of the memslots
1951  * bordering the hole is returned.
1952  */
1953 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1954 {
1955 	int start = 0, end = slots->used_slots;
1956 	int slot = atomic_read(&slots->lru_slot);
1957 	struct kvm_memory_slot *memslots = slots->memslots;
1958 
1959 	if (gfn >= memslots[slot].base_gfn &&
1960 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1961 		return slot;
1962 
1963 	while (start < end) {
1964 		slot = start + (end - start) / 2;
1965 
1966 		if (gfn >= memslots[slot].base_gfn)
1967 			end = slot;
1968 		else
1969 			start = slot + 1;
1970 	}
1971 
1972 	if (start >= slots->used_slots)
1973 		return slots->used_slots - 1;
1974 
1975 	if (gfn >= memslots[start].base_gfn &&
1976 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1977 		atomic_set(&slots->lru_slot, start);
1978 	}
1979 
1980 	return start;
1981 }
1982 
1983 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984 			      u8 *res, unsigned long bufsize)
1985 {
1986 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1987 
1988 	args->count = 0;
1989 	while (args->count < bufsize) {
1990 		hva = gfn_to_hva(kvm, cur_gfn);
1991 		/*
1992 		 * We return an error if the first value was invalid, but we
1993 		 * return successfully if at least one value was copied.
1994 		 */
1995 		if (kvm_is_error_hva(hva))
1996 			return args->count ? 0 : -EFAULT;
1997 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1998 			pgstev = 0;
1999 		res[args->count++] = (pgstev >> 24) & 0x43;
2000 		cur_gfn++;
2001 	}
2002 
2003 	return 0;
2004 }
2005 
2006 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2007 					      unsigned long cur_gfn)
2008 {
2009 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2010 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
2011 	unsigned long ofs = cur_gfn - ms->base_gfn;
2012 
2013 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2014 		slotidx--;
2015 		/* If we are above the highest slot, wrap around */
2016 		if (slotidx < 0)
2017 			slotidx = slots->used_slots - 1;
2018 
2019 		ms = slots->memslots + slotidx;
2020 		ofs = 0;
2021 	}
2022 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2023 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2024 		slotidx--;
2025 		ms = slots->memslots + slotidx;
2026 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2027 	}
2028 	return ms->base_gfn + ofs;
2029 }
2030 
2031 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2032 			     u8 *res, unsigned long bufsize)
2033 {
2034 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2035 	struct kvm_memslots *slots = kvm_memslots(kvm);
2036 	struct kvm_memory_slot *ms;
2037 
2038 	if (unlikely(!slots->used_slots))
2039 		return 0;
2040 
2041 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2042 	ms = gfn_to_memslot(kvm, cur_gfn);
2043 	args->count = 0;
2044 	args->start_gfn = cur_gfn;
2045 	if (!ms)
2046 		return 0;
2047 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2048 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2049 
2050 	while (args->count < bufsize) {
2051 		hva = gfn_to_hva(kvm, cur_gfn);
2052 		if (kvm_is_error_hva(hva))
2053 			return 0;
2054 		/* Decrement only if we actually flipped the bit to 0 */
2055 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2056 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2057 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2058 			pgstev = 0;
2059 		/* Save the value */
2060 		res[args->count++] = (pgstev >> 24) & 0x43;
2061 		/* If the next bit is too far away, stop. */
2062 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2063 			return 0;
2064 		/* If we reached the previous "next", find the next one */
2065 		if (cur_gfn == next_gfn)
2066 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2067 		/* Reached the end of memory or of the buffer, stop */
2068 		if ((next_gfn >= mem_end) ||
2069 		    (next_gfn - args->start_gfn >= bufsize))
2070 			return 0;
2071 		cur_gfn++;
2072 		/* Reached the end of the current memslot, take the next one. */
2073 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2074 			ms = gfn_to_memslot(kvm, cur_gfn);
2075 			if (!ms)
2076 				return 0;
2077 		}
2078 	}
2079 	return 0;
2080 }
2081 
2082 /*
2083  * This function searches for the next page with dirty CMMA attributes, and
2084  * saves the attributes in the buffer up to either the end of the buffer or
2085  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2086  * no trailing clean bytes are saved.
2087  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2088  * output buffer will indicate 0 as length.
2089  */
2090 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2091 				  struct kvm_s390_cmma_log *args)
2092 {
2093 	unsigned long bufsize;
2094 	int srcu_idx, peek, ret;
2095 	u8 *values;
2096 
2097 	if (!kvm->arch.use_cmma)
2098 		return -ENXIO;
2099 	/* Invalid/unsupported flags were specified */
2100 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2101 		return -EINVAL;
2102 	/* Migration mode query, and we are not doing a migration */
2103 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2104 	if (!peek && !kvm->arch.migration_mode)
2105 		return -EINVAL;
2106 	/* CMMA is disabled or was not used, or the buffer has length zero */
2107 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2108 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2109 		memset(args, 0, sizeof(*args));
2110 		return 0;
2111 	}
2112 	/* We are not peeking, and there are no dirty pages */
2113 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2114 		memset(args, 0, sizeof(*args));
2115 		return 0;
2116 	}
2117 
2118 	values = vmalloc(bufsize);
2119 	if (!values)
2120 		return -ENOMEM;
2121 
2122 	mmap_read_lock(kvm->mm);
2123 	srcu_idx = srcu_read_lock(&kvm->srcu);
2124 	if (peek)
2125 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2126 	else
2127 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2128 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2129 	mmap_read_unlock(kvm->mm);
2130 
2131 	if (kvm->arch.migration_mode)
2132 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2133 	else
2134 		args->remaining = 0;
2135 
2136 	if (copy_to_user((void __user *)args->values, values, args->count))
2137 		ret = -EFAULT;
2138 
2139 	vfree(values);
2140 	return ret;
2141 }
2142 
2143 /*
2144  * This function sets the CMMA attributes for the given pages. If the input
2145  * buffer has zero length, no action is taken, otherwise the attributes are
2146  * set and the mm->context.uses_cmm flag is set.
2147  */
2148 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2149 				  const struct kvm_s390_cmma_log *args)
2150 {
2151 	unsigned long hva, mask, pgstev, i;
2152 	uint8_t *bits;
2153 	int srcu_idx, r = 0;
2154 
2155 	mask = args->mask;
2156 
2157 	if (!kvm->arch.use_cmma)
2158 		return -ENXIO;
2159 	/* invalid/unsupported flags */
2160 	if (args->flags != 0)
2161 		return -EINVAL;
2162 	/* Enforce sane limit on memory allocation */
2163 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2164 		return -EINVAL;
2165 	/* Nothing to do */
2166 	if (args->count == 0)
2167 		return 0;
2168 
2169 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2170 	if (!bits)
2171 		return -ENOMEM;
2172 
2173 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2174 	if (r) {
2175 		r = -EFAULT;
2176 		goto out;
2177 	}
2178 
2179 	mmap_read_lock(kvm->mm);
2180 	srcu_idx = srcu_read_lock(&kvm->srcu);
2181 	for (i = 0; i < args->count; i++) {
2182 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2183 		if (kvm_is_error_hva(hva)) {
2184 			r = -EFAULT;
2185 			break;
2186 		}
2187 
2188 		pgstev = bits[i];
2189 		pgstev = pgstev << 24;
2190 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2191 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2192 	}
2193 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2194 	mmap_read_unlock(kvm->mm);
2195 
2196 	if (!kvm->mm->context.uses_cmm) {
2197 		mmap_write_lock(kvm->mm);
2198 		kvm->mm->context.uses_cmm = 1;
2199 		mmap_write_unlock(kvm->mm);
2200 	}
2201 out:
2202 	vfree(bits);
2203 	return r;
2204 }
2205 
2206 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2207 {
2208 	struct kvm_vcpu *vcpu;
2209 	u16 rc, rrc;
2210 	int ret = 0;
2211 	int i;
2212 
2213 	/*
2214 	 * We ignore failures and try to destroy as many CPUs as possible.
2215 	 * At the same time we must not free the assigned resources when
2216 	 * this fails, as the ultravisor has still access to that memory.
2217 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2218 	 * behind.
2219 	 * We want to return the first failure rc and rrc, though.
2220 	 */
2221 	kvm_for_each_vcpu(i, vcpu, kvm) {
2222 		mutex_lock(&vcpu->mutex);
2223 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2224 			*rcp = rc;
2225 			*rrcp = rrc;
2226 			ret = -EIO;
2227 		}
2228 		mutex_unlock(&vcpu->mutex);
2229 	}
2230 	return ret;
2231 }
2232 
2233 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234 {
2235 	int i, r = 0;
2236 	u16 dummy;
2237 
2238 	struct kvm_vcpu *vcpu;
2239 
2240 	kvm_for_each_vcpu(i, vcpu, kvm) {
2241 		mutex_lock(&vcpu->mutex);
2242 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2243 		mutex_unlock(&vcpu->mutex);
2244 		if (r)
2245 			break;
2246 	}
2247 	if (r)
2248 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2249 	return r;
2250 }
2251 
2252 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2253 {
2254 	int r = 0;
2255 	u16 dummy;
2256 	void __user *argp = (void __user *)cmd->data;
2257 
2258 	switch (cmd->cmd) {
2259 	case KVM_PV_ENABLE: {
2260 		r = -EINVAL;
2261 		if (kvm_s390_pv_is_protected(kvm))
2262 			break;
2263 
2264 		/*
2265 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2266 		 *  esca, we need no cleanup in the error cases below
2267 		 */
2268 		r = sca_switch_to_extended(kvm);
2269 		if (r)
2270 			break;
2271 
2272 		mmap_write_lock(current->mm);
2273 		r = gmap_mark_unmergeable();
2274 		mmap_write_unlock(current->mm);
2275 		if (r)
2276 			break;
2277 
2278 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2279 		if (r)
2280 			break;
2281 
2282 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2283 		if (r)
2284 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2285 
2286 		/* we need to block service interrupts from now on */
2287 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2288 		break;
2289 	}
2290 	case KVM_PV_DISABLE: {
2291 		r = -EINVAL;
2292 		if (!kvm_s390_pv_is_protected(kvm))
2293 			break;
2294 
2295 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2296 		/*
2297 		 * If a CPU could not be destroyed, destroy VM will also fail.
2298 		 * There is no point in trying to destroy it. Instead return
2299 		 * the rc and rrc from the first CPU that failed destroying.
2300 		 */
2301 		if (r)
2302 			break;
2303 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2304 
2305 		/* no need to block service interrupts any more */
2306 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2307 		break;
2308 	}
2309 	case KVM_PV_SET_SEC_PARMS: {
2310 		struct kvm_s390_pv_sec_parm parms = {};
2311 		void *hdr;
2312 
2313 		r = -EINVAL;
2314 		if (!kvm_s390_pv_is_protected(kvm))
2315 			break;
2316 
2317 		r = -EFAULT;
2318 		if (copy_from_user(&parms, argp, sizeof(parms)))
2319 			break;
2320 
2321 		/* Currently restricted to 8KB */
2322 		r = -EINVAL;
2323 		if (parms.length > PAGE_SIZE * 2)
2324 			break;
2325 
2326 		r = -ENOMEM;
2327 		hdr = vmalloc(parms.length);
2328 		if (!hdr)
2329 			break;
2330 
2331 		r = -EFAULT;
2332 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2333 				    parms.length))
2334 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2335 						      &cmd->rc, &cmd->rrc);
2336 
2337 		vfree(hdr);
2338 		break;
2339 	}
2340 	case KVM_PV_UNPACK: {
2341 		struct kvm_s390_pv_unp unp = {};
2342 
2343 		r = -EINVAL;
2344 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2345 			break;
2346 
2347 		r = -EFAULT;
2348 		if (copy_from_user(&unp, argp, sizeof(unp)))
2349 			break;
2350 
2351 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2352 				       &cmd->rc, &cmd->rrc);
2353 		break;
2354 	}
2355 	case KVM_PV_VERIFY: {
2356 		r = -EINVAL;
2357 		if (!kvm_s390_pv_is_protected(kvm))
2358 			break;
2359 
2360 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2361 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2362 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2363 			     cmd->rrc);
2364 		break;
2365 	}
2366 	case KVM_PV_PREP_RESET: {
2367 		r = -EINVAL;
2368 		if (!kvm_s390_pv_is_protected(kvm))
2369 			break;
2370 
2371 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2373 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2374 			     cmd->rc, cmd->rrc);
2375 		break;
2376 	}
2377 	case KVM_PV_UNSHARE_ALL: {
2378 		r = -EINVAL;
2379 		if (!kvm_s390_pv_is_protected(kvm))
2380 			break;
2381 
2382 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2384 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2385 			     cmd->rc, cmd->rrc);
2386 		break;
2387 	}
2388 	default:
2389 		r = -ENOTTY;
2390 	}
2391 	return r;
2392 }
2393 
2394 long kvm_arch_vm_ioctl(struct file *filp,
2395 		       unsigned int ioctl, unsigned long arg)
2396 {
2397 	struct kvm *kvm = filp->private_data;
2398 	void __user *argp = (void __user *)arg;
2399 	struct kvm_device_attr attr;
2400 	int r;
2401 
2402 	switch (ioctl) {
2403 	case KVM_S390_INTERRUPT: {
2404 		struct kvm_s390_interrupt s390int;
2405 
2406 		r = -EFAULT;
2407 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2408 			break;
2409 		r = kvm_s390_inject_vm(kvm, &s390int);
2410 		break;
2411 	}
2412 	case KVM_CREATE_IRQCHIP: {
2413 		struct kvm_irq_routing_entry routing;
2414 
2415 		r = -EINVAL;
2416 		if (kvm->arch.use_irqchip) {
2417 			/* Set up dummy routing. */
2418 			memset(&routing, 0, sizeof(routing));
2419 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2420 		}
2421 		break;
2422 	}
2423 	case KVM_SET_DEVICE_ATTR: {
2424 		r = -EFAULT;
2425 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2426 			break;
2427 		r = kvm_s390_vm_set_attr(kvm, &attr);
2428 		break;
2429 	}
2430 	case KVM_GET_DEVICE_ATTR: {
2431 		r = -EFAULT;
2432 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2433 			break;
2434 		r = kvm_s390_vm_get_attr(kvm, &attr);
2435 		break;
2436 	}
2437 	case KVM_HAS_DEVICE_ATTR: {
2438 		r = -EFAULT;
2439 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2440 			break;
2441 		r = kvm_s390_vm_has_attr(kvm, &attr);
2442 		break;
2443 	}
2444 	case KVM_S390_GET_SKEYS: {
2445 		struct kvm_s390_skeys args;
2446 
2447 		r = -EFAULT;
2448 		if (copy_from_user(&args, argp,
2449 				   sizeof(struct kvm_s390_skeys)))
2450 			break;
2451 		r = kvm_s390_get_skeys(kvm, &args);
2452 		break;
2453 	}
2454 	case KVM_S390_SET_SKEYS: {
2455 		struct kvm_s390_skeys args;
2456 
2457 		r = -EFAULT;
2458 		if (copy_from_user(&args, argp,
2459 				   sizeof(struct kvm_s390_skeys)))
2460 			break;
2461 		r = kvm_s390_set_skeys(kvm, &args);
2462 		break;
2463 	}
2464 	case KVM_S390_GET_CMMA_BITS: {
2465 		struct kvm_s390_cmma_log args;
2466 
2467 		r = -EFAULT;
2468 		if (copy_from_user(&args, argp, sizeof(args)))
2469 			break;
2470 		mutex_lock(&kvm->slots_lock);
2471 		r = kvm_s390_get_cmma_bits(kvm, &args);
2472 		mutex_unlock(&kvm->slots_lock);
2473 		if (!r) {
2474 			r = copy_to_user(argp, &args, sizeof(args));
2475 			if (r)
2476 				r = -EFAULT;
2477 		}
2478 		break;
2479 	}
2480 	case KVM_S390_SET_CMMA_BITS: {
2481 		struct kvm_s390_cmma_log args;
2482 
2483 		r = -EFAULT;
2484 		if (copy_from_user(&args, argp, sizeof(args)))
2485 			break;
2486 		mutex_lock(&kvm->slots_lock);
2487 		r = kvm_s390_set_cmma_bits(kvm, &args);
2488 		mutex_unlock(&kvm->slots_lock);
2489 		break;
2490 	}
2491 	case KVM_S390_PV_COMMAND: {
2492 		struct kvm_pv_cmd args;
2493 
2494 		/* protvirt means user sigp */
2495 		kvm->arch.user_cpu_state_ctrl = 1;
2496 		r = 0;
2497 		if (!is_prot_virt_host()) {
2498 			r = -EINVAL;
2499 			break;
2500 		}
2501 		if (copy_from_user(&args, argp, sizeof(args))) {
2502 			r = -EFAULT;
2503 			break;
2504 		}
2505 		if (args.flags) {
2506 			r = -EINVAL;
2507 			break;
2508 		}
2509 		mutex_lock(&kvm->lock);
2510 		r = kvm_s390_handle_pv(kvm, &args);
2511 		mutex_unlock(&kvm->lock);
2512 		if (copy_to_user(argp, &args, sizeof(args))) {
2513 			r = -EFAULT;
2514 			break;
2515 		}
2516 		break;
2517 	}
2518 	default:
2519 		r = -ENOTTY;
2520 	}
2521 
2522 	return r;
2523 }
2524 
2525 static int kvm_s390_apxa_installed(void)
2526 {
2527 	struct ap_config_info info;
2528 
2529 	if (ap_instructions_available()) {
2530 		if (ap_qci(&info) == 0)
2531 			return info.apxa;
2532 	}
2533 
2534 	return 0;
2535 }
2536 
2537 /*
2538  * The format of the crypto control block (CRYCB) is specified in the 3 low
2539  * order bits of the CRYCB designation (CRYCBD) field as follows:
2540  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2541  *	     AP extended addressing (APXA) facility are installed.
2542  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2543  * Format 2: Both the APXA and MSAX3 facilities are installed
2544  */
2545 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2546 {
2547 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2548 
2549 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2550 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2551 
2552 	/* Check whether MSAX3 is installed */
2553 	if (!test_kvm_facility(kvm, 76))
2554 		return;
2555 
2556 	if (kvm_s390_apxa_installed())
2557 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2558 	else
2559 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2560 }
2561 
2562 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2563 			       unsigned long *aqm, unsigned long *adm)
2564 {
2565 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2566 
2567 	mutex_lock(&kvm->lock);
2568 	kvm_s390_vcpu_block_all(kvm);
2569 
2570 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2571 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2572 		memcpy(crycb->apcb1.apm, apm, 32);
2573 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2574 			 apm[0], apm[1], apm[2], apm[3]);
2575 		memcpy(crycb->apcb1.aqm, aqm, 32);
2576 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2577 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2578 		memcpy(crycb->apcb1.adm, adm, 32);
2579 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2580 			 adm[0], adm[1], adm[2], adm[3]);
2581 		break;
2582 	case CRYCB_FORMAT1:
2583 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2584 		memcpy(crycb->apcb0.apm, apm, 8);
2585 		memcpy(crycb->apcb0.aqm, aqm, 2);
2586 		memcpy(crycb->apcb0.adm, adm, 2);
2587 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2588 			 apm[0], *((unsigned short *)aqm),
2589 			 *((unsigned short *)adm));
2590 		break;
2591 	default:	/* Can not happen */
2592 		break;
2593 	}
2594 
2595 	/* recreate the shadow crycb for each vcpu */
2596 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2597 	kvm_s390_vcpu_unblock_all(kvm);
2598 	mutex_unlock(&kvm->lock);
2599 }
2600 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2601 
2602 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2603 {
2604 	mutex_lock(&kvm->lock);
2605 	kvm_s390_vcpu_block_all(kvm);
2606 
2607 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2608 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2609 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2610 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2611 
2612 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2613 	/* recreate the shadow crycb for each vcpu */
2614 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2615 	kvm_s390_vcpu_unblock_all(kvm);
2616 	mutex_unlock(&kvm->lock);
2617 }
2618 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2619 
2620 static u64 kvm_s390_get_initial_cpuid(void)
2621 {
2622 	struct cpuid cpuid;
2623 
2624 	get_cpu_id(&cpuid);
2625 	cpuid.version = 0xff;
2626 	return *((u64 *) &cpuid);
2627 }
2628 
2629 static void kvm_s390_crypto_init(struct kvm *kvm)
2630 {
2631 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2632 	kvm_s390_set_crycb_format(kvm);
2633 
2634 	if (!test_kvm_facility(kvm, 76))
2635 		return;
2636 
2637 	/* Enable AES/DEA protected key functions by default */
2638 	kvm->arch.crypto.aes_kw = 1;
2639 	kvm->arch.crypto.dea_kw = 1;
2640 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2641 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2642 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2643 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2644 }
2645 
2646 static void sca_dispose(struct kvm *kvm)
2647 {
2648 	if (kvm->arch.use_esca)
2649 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2650 	else
2651 		free_page((unsigned long)(kvm->arch.sca));
2652 	kvm->arch.sca = NULL;
2653 }
2654 
2655 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2656 {
2657 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2658 	int i, rc;
2659 	char debug_name[16];
2660 	static unsigned long sca_offset;
2661 
2662 	rc = -EINVAL;
2663 #ifdef CONFIG_KVM_S390_UCONTROL
2664 	if (type & ~KVM_VM_S390_UCONTROL)
2665 		goto out_err;
2666 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2667 		goto out_err;
2668 #else
2669 	if (type)
2670 		goto out_err;
2671 #endif
2672 
2673 	rc = s390_enable_sie();
2674 	if (rc)
2675 		goto out_err;
2676 
2677 	rc = -ENOMEM;
2678 
2679 	if (!sclp.has_64bscao)
2680 		alloc_flags |= GFP_DMA;
2681 	rwlock_init(&kvm->arch.sca_lock);
2682 	/* start with basic SCA */
2683 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2684 	if (!kvm->arch.sca)
2685 		goto out_err;
2686 	mutex_lock(&kvm_lock);
2687 	sca_offset += 16;
2688 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2689 		sca_offset = 0;
2690 	kvm->arch.sca = (struct bsca_block *)
2691 			((char *) kvm->arch.sca + sca_offset);
2692 	mutex_unlock(&kvm_lock);
2693 
2694 	sprintf(debug_name, "kvm-%u", current->pid);
2695 
2696 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2697 	if (!kvm->arch.dbf)
2698 		goto out_err;
2699 
2700 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2701 	kvm->arch.sie_page2 =
2702 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2703 	if (!kvm->arch.sie_page2)
2704 		goto out_err;
2705 
2706 	kvm->arch.sie_page2->kvm = kvm;
2707 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2708 
2709 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2710 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2711 					      (kvm_s390_fac_base[i] |
2712 					       kvm_s390_fac_ext[i]);
2713 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2714 					      kvm_s390_fac_base[i];
2715 	}
2716 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2717 
2718 	/* we are always in czam mode - even on pre z14 machines */
2719 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2720 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2721 	/* we emulate STHYI in kvm */
2722 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2723 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2724 	if (MACHINE_HAS_TLB_GUEST) {
2725 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2726 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2727 	}
2728 
2729 	if (css_general_characteristics.aiv && test_facility(65))
2730 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2731 
2732 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2733 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2734 
2735 	kvm_s390_crypto_init(kvm);
2736 
2737 	mutex_init(&kvm->arch.float_int.ais_lock);
2738 	spin_lock_init(&kvm->arch.float_int.lock);
2739 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2740 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2741 	init_waitqueue_head(&kvm->arch.ipte_wq);
2742 	mutex_init(&kvm->arch.ipte_mutex);
2743 
2744 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2745 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2746 
2747 	if (type & KVM_VM_S390_UCONTROL) {
2748 		kvm->arch.gmap = NULL;
2749 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2750 	} else {
2751 		if (sclp.hamax == U64_MAX)
2752 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2753 		else
2754 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2755 						    sclp.hamax + 1);
2756 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2757 		if (!kvm->arch.gmap)
2758 			goto out_err;
2759 		kvm->arch.gmap->private = kvm;
2760 		kvm->arch.gmap->pfault_enabled = 0;
2761 	}
2762 
2763 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2764 	kvm->arch.use_skf = sclp.has_skey;
2765 	spin_lock_init(&kvm->arch.start_stop_lock);
2766 	kvm_s390_vsie_init(kvm);
2767 	if (use_gisa)
2768 		kvm_s390_gisa_init(kvm);
2769 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2770 
2771 	return 0;
2772 out_err:
2773 	free_page((unsigned long)kvm->arch.sie_page2);
2774 	debug_unregister(kvm->arch.dbf);
2775 	sca_dispose(kvm);
2776 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2777 	return rc;
2778 }
2779 
2780 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2781 {
2782 	u16 rc, rrc;
2783 
2784 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2785 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2786 	kvm_s390_clear_local_irqs(vcpu);
2787 	kvm_clear_async_pf_completion_queue(vcpu);
2788 	if (!kvm_is_ucontrol(vcpu->kvm))
2789 		sca_del_vcpu(vcpu);
2790 
2791 	if (kvm_is_ucontrol(vcpu->kvm))
2792 		gmap_remove(vcpu->arch.gmap);
2793 
2794 	if (vcpu->kvm->arch.use_cmma)
2795 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2796 	/* We can not hold the vcpu mutex here, we are already dying */
2797 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2798 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2799 	free_page((unsigned long)(vcpu->arch.sie_block));
2800 }
2801 
2802 static void kvm_free_vcpus(struct kvm *kvm)
2803 {
2804 	unsigned int i;
2805 	struct kvm_vcpu *vcpu;
2806 
2807 	kvm_for_each_vcpu(i, vcpu, kvm)
2808 		kvm_vcpu_destroy(vcpu);
2809 
2810 	mutex_lock(&kvm->lock);
2811 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2812 		kvm->vcpus[i] = NULL;
2813 
2814 	atomic_set(&kvm->online_vcpus, 0);
2815 	mutex_unlock(&kvm->lock);
2816 }
2817 
2818 void kvm_arch_destroy_vm(struct kvm *kvm)
2819 {
2820 	u16 rc, rrc;
2821 
2822 	kvm_free_vcpus(kvm);
2823 	sca_dispose(kvm);
2824 	kvm_s390_gisa_destroy(kvm);
2825 	/*
2826 	 * We are already at the end of life and kvm->lock is not taken.
2827 	 * This is ok as the file descriptor is closed by now and nobody
2828 	 * can mess with the pv state. To avoid lockdep_assert_held from
2829 	 * complaining we do not use kvm_s390_pv_is_protected.
2830 	 */
2831 	if (kvm_s390_pv_get_handle(kvm))
2832 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2833 	debug_unregister(kvm->arch.dbf);
2834 	free_page((unsigned long)kvm->arch.sie_page2);
2835 	if (!kvm_is_ucontrol(kvm))
2836 		gmap_remove(kvm->arch.gmap);
2837 	kvm_s390_destroy_adapters(kvm);
2838 	kvm_s390_clear_float_irqs(kvm);
2839 	kvm_s390_vsie_destroy(kvm);
2840 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2841 }
2842 
2843 /* Section: vcpu related */
2844 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2845 {
2846 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2847 	if (!vcpu->arch.gmap)
2848 		return -ENOMEM;
2849 	vcpu->arch.gmap->private = vcpu->kvm;
2850 
2851 	return 0;
2852 }
2853 
2854 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2855 {
2856 	if (!kvm_s390_use_sca_entries())
2857 		return;
2858 	read_lock(&vcpu->kvm->arch.sca_lock);
2859 	if (vcpu->kvm->arch.use_esca) {
2860 		struct esca_block *sca = vcpu->kvm->arch.sca;
2861 
2862 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863 		sca->cpu[vcpu->vcpu_id].sda = 0;
2864 	} else {
2865 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2866 
2867 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2868 		sca->cpu[vcpu->vcpu_id].sda = 0;
2869 	}
2870 	read_unlock(&vcpu->kvm->arch.sca_lock);
2871 }
2872 
2873 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2874 {
2875 	if (!kvm_s390_use_sca_entries()) {
2876 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2877 
2878 		/* we still need the basic sca for the ipte control */
2879 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2880 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2881 		return;
2882 	}
2883 	read_lock(&vcpu->kvm->arch.sca_lock);
2884 	if (vcpu->kvm->arch.use_esca) {
2885 		struct esca_block *sca = vcpu->kvm->arch.sca;
2886 
2887 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2888 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2889 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2890 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2891 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2892 	} else {
2893 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2894 
2895 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2896 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2897 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2898 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2899 	}
2900 	read_unlock(&vcpu->kvm->arch.sca_lock);
2901 }
2902 
2903 /* Basic SCA to Extended SCA data copy routines */
2904 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2905 {
2906 	d->sda = s->sda;
2907 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2908 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2909 }
2910 
2911 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2912 {
2913 	int i;
2914 
2915 	d->ipte_control = s->ipte_control;
2916 	d->mcn[0] = s->mcn;
2917 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2918 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2919 }
2920 
2921 static int sca_switch_to_extended(struct kvm *kvm)
2922 {
2923 	struct bsca_block *old_sca = kvm->arch.sca;
2924 	struct esca_block *new_sca;
2925 	struct kvm_vcpu *vcpu;
2926 	unsigned int vcpu_idx;
2927 	u32 scaol, scaoh;
2928 
2929 	if (kvm->arch.use_esca)
2930 		return 0;
2931 
2932 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2933 	if (!new_sca)
2934 		return -ENOMEM;
2935 
2936 	scaoh = (u32)((u64)(new_sca) >> 32);
2937 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2938 
2939 	kvm_s390_vcpu_block_all(kvm);
2940 	write_lock(&kvm->arch.sca_lock);
2941 
2942 	sca_copy_b_to_e(new_sca, old_sca);
2943 
2944 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2945 		vcpu->arch.sie_block->scaoh = scaoh;
2946 		vcpu->arch.sie_block->scaol = scaol;
2947 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2948 	}
2949 	kvm->arch.sca = new_sca;
2950 	kvm->arch.use_esca = 1;
2951 
2952 	write_unlock(&kvm->arch.sca_lock);
2953 	kvm_s390_vcpu_unblock_all(kvm);
2954 
2955 	free_page((unsigned long)old_sca);
2956 
2957 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2958 		 old_sca, kvm->arch.sca);
2959 	return 0;
2960 }
2961 
2962 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2963 {
2964 	int rc;
2965 
2966 	if (!kvm_s390_use_sca_entries()) {
2967 		if (id < KVM_MAX_VCPUS)
2968 			return true;
2969 		return false;
2970 	}
2971 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2972 		return true;
2973 	if (!sclp.has_esca || !sclp.has_64bscao)
2974 		return false;
2975 
2976 	mutex_lock(&kvm->lock);
2977 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2978 	mutex_unlock(&kvm->lock);
2979 
2980 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2981 }
2982 
2983 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2984 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2985 {
2986 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2987 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2988 	vcpu->arch.cputm_start = get_tod_clock_fast();
2989 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2990 }
2991 
2992 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2993 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994 {
2995 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2996 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2997 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2998 	vcpu->arch.cputm_start = 0;
2999 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3000 }
3001 
3002 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3003 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3004 {
3005 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3006 	vcpu->arch.cputm_enabled = true;
3007 	__start_cpu_timer_accounting(vcpu);
3008 }
3009 
3010 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3011 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3012 {
3013 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3014 	__stop_cpu_timer_accounting(vcpu);
3015 	vcpu->arch.cputm_enabled = false;
3016 }
3017 
3018 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3019 {
3020 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3021 	__enable_cpu_timer_accounting(vcpu);
3022 	preempt_enable();
3023 }
3024 
3025 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3026 {
3027 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3028 	__disable_cpu_timer_accounting(vcpu);
3029 	preempt_enable();
3030 }
3031 
3032 /* set the cpu timer - may only be called from the VCPU thread itself */
3033 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3034 {
3035 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3036 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3037 	if (vcpu->arch.cputm_enabled)
3038 		vcpu->arch.cputm_start = get_tod_clock_fast();
3039 	vcpu->arch.sie_block->cputm = cputm;
3040 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3041 	preempt_enable();
3042 }
3043 
3044 /* update and get the cpu timer - can also be called from other VCPU threads */
3045 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3046 {
3047 	unsigned int seq;
3048 	__u64 value;
3049 
3050 	if (unlikely(!vcpu->arch.cputm_enabled))
3051 		return vcpu->arch.sie_block->cputm;
3052 
3053 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3054 	do {
3055 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3056 		/*
3057 		 * If the writer would ever execute a read in the critical
3058 		 * section, e.g. in irq context, we have a deadlock.
3059 		 */
3060 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3061 		value = vcpu->arch.sie_block->cputm;
3062 		/* if cputm_start is 0, accounting is being started/stopped */
3063 		if (likely(vcpu->arch.cputm_start))
3064 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3065 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3066 	preempt_enable();
3067 	return value;
3068 }
3069 
3070 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3071 {
3072 
3073 	gmap_enable(vcpu->arch.enabled_gmap);
3074 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3075 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3076 		__start_cpu_timer_accounting(vcpu);
3077 	vcpu->cpu = cpu;
3078 }
3079 
3080 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3081 {
3082 	vcpu->cpu = -1;
3083 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3084 		__stop_cpu_timer_accounting(vcpu);
3085 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3086 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3087 	gmap_disable(vcpu->arch.enabled_gmap);
3088 
3089 }
3090 
3091 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3092 {
3093 	mutex_lock(&vcpu->kvm->lock);
3094 	preempt_disable();
3095 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3096 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3097 	preempt_enable();
3098 	mutex_unlock(&vcpu->kvm->lock);
3099 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3100 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3101 		sca_add_vcpu(vcpu);
3102 	}
3103 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3104 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3105 	/* make vcpu_load load the right gmap on the first trigger */
3106 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3107 }
3108 
3109 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3110 {
3111 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3112 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3113 		return true;
3114 	return false;
3115 }
3116 
3117 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3118 {
3119 	/* At least one ECC subfunction must be present */
3120 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3121 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3122 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3123 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3124 	       kvm_has_pckmo_subfunc(kvm, 41);
3125 
3126 }
3127 
3128 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3129 {
3130 	/*
3131 	 * If the AP instructions are not being interpreted and the MSAX3
3132 	 * facility is not configured for the guest, there is nothing to set up.
3133 	 */
3134 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3135 		return;
3136 
3137 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3138 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3139 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3140 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3141 
3142 	if (vcpu->kvm->arch.crypto.apie)
3143 		vcpu->arch.sie_block->eca |= ECA_APIE;
3144 
3145 	/* Set up protected key support */
3146 	if (vcpu->kvm->arch.crypto.aes_kw) {
3147 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3148 		/* ecc is also wrapped with AES key */
3149 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3150 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3151 	}
3152 
3153 	if (vcpu->kvm->arch.crypto.dea_kw)
3154 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3155 }
3156 
3157 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3158 {
3159 	free_page(vcpu->arch.sie_block->cbrlo);
3160 	vcpu->arch.sie_block->cbrlo = 0;
3161 }
3162 
3163 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3164 {
3165 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3166 	if (!vcpu->arch.sie_block->cbrlo)
3167 		return -ENOMEM;
3168 	return 0;
3169 }
3170 
3171 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3172 {
3173 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3174 
3175 	vcpu->arch.sie_block->ibc = model->ibc;
3176 	if (test_kvm_facility(vcpu->kvm, 7))
3177 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3178 }
3179 
3180 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3181 {
3182 	int rc = 0;
3183 	u16 uvrc, uvrrc;
3184 
3185 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3186 						    CPUSTAT_SM |
3187 						    CPUSTAT_STOPPED);
3188 
3189 	if (test_kvm_facility(vcpu->kvm, 78))
3190 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3191 	else if (test_kvm_facility(vcpu->kvm, 8))
3192 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3193 
3194 	kvm_s390_vcpu_setup_model(vcpu);
3195 
3196 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3197 	if (MACHINE_HAS_ESOP)
3198 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3199 	if (test_kvm_facility(vcpu->kvm, 9))
3200 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3201 	if (test_kvm_facility(vcpu->kvm, 73))
3202 		vcpu->arch.sie_block->ecb |= ECB_TE;
3203 
3204 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3205 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3206 	if (test_kvm_facility(vcpu->kvm, 130))
3207 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3208 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3209 	if (sclp.has_cei)
3210 		vcpu->arch.sie_block->eca |= ECA_CEI;
3211 	if (sclp.has_ib)
3212 		vcpu->arch.sie_block->eca |= ECA_IB;
3213 	if (sclp.has_siif)
3214 		vcpu->arch.sie_block->eca |= ECA_SII;
3215 	if (sclp.has_sigpif)
3216 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3217 	if (test_kvm_facility(vcpu->kvm, 129)) {
3218 		vcpu->arch.sie_block->eca |= ECA_VX;
3219 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3220 	}
3221 	if (test_kvm_facility(vcpu->kvm, 139))
3222 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3223 	if (test_kvm_facility(vcpu->kvm, 156))
3224 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3225 	if (vcpu->arch.sie_block->gd) {
3226 		vcpu->arch.sie_block->eca |= ECA_AIV;
3227 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3228 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3229 	}
3230 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3231 					| SDNXC;
3232 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3233 
3234 	if (sclp.has_kss)
3235 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3236 	else
3237 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3238 
3239 	if (vcpu->kvm->arch.use_cmma) {
3240 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3241 		if (rc)
3242 			return rc;
3243 	}
3244 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3245 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3246 
3247 	vcpu->arch.sie_block->hpid = HPID_KVM;
3248 
3249 	kvm_s390_vcpu_crypto_setup(vcpu);
3250 
3251 	mutex_lock(&vcpu->kvm->lock);
3252 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3253 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3254 		if (rc)
3255 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3256 	}
3257 	mutex_unlock(&vcpu->kvm->lock);
3258 
3259 	return rc;
3260 }
3261 
3262 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3263 {
3264 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3265 		return -EINVAL;
3266 	return 0;
3267 }
3268 
3269 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3270 {
3271 	struct sie_page *sie_page;
3272 	int rc;
3273 
3274 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3275 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3276 	if (!sie_page)
3277 		return -ENOMEM;
3278 
3279 	vcpu->arch.sie_block = &sie_page->sie_block;
3280 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3281 
3282 	/* the real guest size will always be smaller than msl */
3283 	vcpu->arch.sie_block->mso = 0;
3284 	vcpu->arch.sie_block->msl = sclp.hamax;
3285 
3286 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3287 	spin_lock_init(&vcpu->arch.local_int.lock);
3288 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3289 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3290 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3291 	seqcount_init(&vcpu->arch.cputm_seqcount);
3292 
3293 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3294 	kvm_clear_async_pf_completion_queue(vcpu);
3295 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3296 				    KVM_SYNC_GPRS |
3297 				    KVM_SYNC_ACRS |
3298 				    KVM_SYNC_CRS |
3299 				    KVM_SYNC_ARCH0 |
3300 				    KVM_SYNC_PFAULT |
3301 				    KVM_SYNC_DIAG318;
3302 	kvm_s390_set_prefix(vcpu, 0);
3303 	if (test_kvm_facility(vcpu->kvm, 64))
3304 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3305 	if (test_kvm_facility(vcpu->kvm, 82))
3306 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3307 	if (test_kvm_facility(vcpu->kvm, 133))
3308 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3309 	if (test_kvm_facility(vcpu->kvm, 156))
3310 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3311 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3312 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3313 	 */
3314 	if (MACHINE_HAS_VX)
3315 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3316 	else
3317 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3318 
3319 	if (kvm_is_ucontrol(vcpu->kvm)) {
3320 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3321 		if (rc)
3322 			goto out_free_sie_block;
3323 	}
3324 
3325 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3326 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3327 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3328 
3329 	rc = kvm_s390_vcpu_setup(vcpu);
3330 	if (rc)
3331 		goto out_ucontrol_uninit;
3332 	return 0;
3333 
3334 out_ucontrol_uninit:
3335 	if (kvm_is_ucontrol(vcpu->kvm))
3336 		gmap_remove(vcpu->arch.gmap);
3337 out_free_sie_block:
3338 	free_page((unsigned long)(vcpu->arch.sie_block));
3339 	return rc;
3340 }
3341 
3342 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3343 {
3344 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3345 }
3346 
3347 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3348 {
3349 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3350 }
3351 
3352 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3353 {
3354 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3355 	exit_sie(vcpu);
3356 }
3357 
3358 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3359 {
3360 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3361 }
3362 
3363 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3364 {
3365 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3366 	exit_sie(vcpu);
3367 }
3368 
3369 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3370 {
3371 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3372 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3373 }
3374 
3375 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3376 {
3377 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3378 }
3379 
3380 /*
3381  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3382  * If the CPU is not running (e.g. waiting as idle) the function will
3383  * return immediately. */
3384 void exit_sie(struct kvm_vcpu *vcpu)
3385 {
3386 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3387 	kvm_s390_vsie_kick(vcpu);
3388 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3389 		cpu_relax();
3390 }
3391 
3392 /* Kick a guest cpu out of SIE to process a request synchronously */
3393 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3394 {
3395 	kvm_make_request(req, vcpu);
3396 	kvm_s390_vcpu_request(vcpu);
3397 }
3398 
3399 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3400 			      unsigned long end)
3401 {
3402 	struct kvm *kvm = gmap->private;
3403 	struct kvm_vcpu *vcpu;
3404 	unsigned long prefix;
3405 	int i;
3406 
3407 	if (gmap_is_shadow(gmap))
3408 		return;
3409 	if (start >= 1UL << 31)
3410 		/* We are only interested in prefix pages */
3411 		return;
3412 	kvm_for_each_vcpu(i, vcpu, kvm) {
3413 		/* match against both prefix pages */
3414 		prefix = kvm_s390_get_prefix(vcpu);
3415 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3416 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3417 				   start, end);
3418 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3419 		}
3420 	}
3421 }
3422 
3423 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3424 {
3425 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3426 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3427 	    halt_poll_max_steal) {
3428 		vcpu->stat.halt_no_poll_steal++;
3429 		return true;
3430 	}
3431 	return false;
3432 }
3433 
3434 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3435 {
3436 	/* kvm common code refers to this, but never calls it */
3437 	BUG();
3438 	return 0;
3439 }
3440 
3441 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3442 					   struct kvm_one_reg *reg)
3443 {
3444 	int r = -EINVAL;
3445 
3446 	switch (reg->id) {
3447 	case KVM_REG_S390_TODPR:
3448 		r = put_user(vcpu->arch.sie_block->todpr,
3449 			     (u32 __user *)reg->addr);
3450 		break;
3451 	case KVM_REG_S390_EPOCHDIFF:
3452 		r = put_user(vcpu->arch.sie_block->epoch,
3453 			     (u64 __user *)reg->addr);
3454 		break;
3455 	case KVM_REG_S390_CPU_TIMER:
3456 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3457 			     (u64 __user *)reg->addr);
3458 		break;
3459 	case KVM_REG_S390_CLOCK_COMP:
3460 		r = put_user(vcpu->arch.sie_block->ckc,
3461 			     (u64 __user *)reg->addr);
3462 		break;
3463 	case KVM_REG_S390_PFTOKEN:
3464 		r = put_user(vcpu->arch.pfault_token,
3465 			     (u64 __user *)reg->addr);
3466 		break;
3467 	case KVM_REG_S390_PFCOMPARE:
3468 		r = put_user(vcpu->arch.pfault_compare,
3469 			     (u64 __user *)reg->addr);
3470 		break;
3471 	case KVM_REG_S390_PFSELECT:
3472 		r = put_user(vcpu->arch.pfault_select,
3473 			     (u64 __user *)reg->addr);
3474 		break;
3475 	case KVM_REG_S390_PP:
3476 		r = put_user(vcpu->arch.sie_block->pp,
3477 			     (u64 __user *)reg->addr);
3478 		break;
3479 	case KVM_REG_S390_GBEA:
3480 		r = put_user(vcpu->arch.sie_block->gbea,
3481 			     (u64 __user *)reg->addr);
3482 		break;
3483 	default:
3484 		break;
3485 	}
3486 
3487 	return r;
3488 }
3489 
3490 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3491 					   struct kvm_one_reg *reg)
3492 {
3493 	int r = -EINVAL;
3494 	__u64 val;
3495 
3496 	switch (reg->id) {
3497 	case KVM_REG_S390_TODPR:
3498 		r = get_user(vcpu->arch.sie_block->todpr,
3499 			     (u32 __user *)reg->addr);
3500 		break;
3501 	case KVM_REG_S390_EPOCHDIFF:
3502 		r = get_user(vcpu->arch.sie_block->epoch,
3503 			     (u64 __user *)reg->addr);
3504 		break;
3505 	case KVM_REG_S390_CPU_TIMER:
3506 		r = get_user(val, (u64 __user *)reg->addr);
3507 		if (!r)
3508 			kvm_s390_set_cpu_timer(vcpu, val);
3509 		break;
3510 	case KVM_REG_S390_CLOCK_COMP:
3511 		r = get_user(vcpu->arch.sie_block->ckc,
3512 			     (u64 __user *)reg->addr);
3513 		break;
3514 	case KVM_REG_S390_PFTOKEN:
3515 		r = get_user(vcpu->arch.pfault_token,
3516 			     (u64 __user *)reg->addr);
3517 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3518 			kvm_clear_async_pf_completion_queue(vcpu);
3519 		break;
3520 	case KVM_REG_S390_PFCOMPARE:
3521 		r = get_user(vcpu->arch.pfault_compare,
3522 			     (u64 __user *)reg->addr);
3523 		break;
3524 	case KVM_REG_S390_PFSELECT:
3525 		r = get_user(vcpu->arch.pfault_select,
3526 			     (u64 __user *)reg->addr);
3527 		break;
3528 	case KVM_REG_S390_PP:
3529 		r = get_user(vcpu->arch.sie_block->pp,
3530 			     (u64 __user *)reg->addr);
3531 		break;
3532 	case KVM_REG_S390_GBEA:
3533 		r = get_user(vcpu->arch.sie_block->gbea,
3534 			     (u64 __user *)reg->addr);
3535 		break;
3536 	default:
3537 		break;
3538 	}
3539 
3540 	return r;
3541 }
3542 
3543 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3544 {
3545 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3546 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3547 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3548 
3549 	kvm_clear_async_pf_completion_queue(vcpu);
3550 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3551 		kvm_s390_vcpu_stop(vcpu);
3552 	kvm_s390_clear_local_irqs(vcpu);
3553 }
3554 
3555 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3556 {
3557 	/* Initial reset is a superset of the normal reset */
3558 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3559 
3560 	/*
3561 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3562 	 * We do not only reset the internal data, but also ...
3563 	 */
3564 	vcpu->arch.sie_block->gpsw.mask = 0;
3565 	vcpu->arch.sie_block->gpsw.addr = 0;
3566 	kvm_s390_set_prefix(vcpu, 0);
3567 	kvm_s390_set_cpu_timer(vcpu, 0);
3568 	vcpu->arch.sie_block->ckc = 0;
3569 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3570 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3571 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3572 
3573 	/* ... the data in sync regs */
3574 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3575 	vcpu->run->s.regs.ckc = 0;
3576 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3577 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3578 	vcpu->run->psw_addr = 0;
3579 	vcpu->run->psw_mask = 0;
3580 	vcpu->run->s.regs.todpr = 0;
3581 	vcpu->run->s.regs.cputm = 0;
3582 	vcpu->run->s.regs.ckc = 0;
3583 	vcpu->run->s.regs.pp = 0;
3584 	vcpu->run->s.regs.gbea = 1;
3585 	vcpu->run->s.regs.fpc = 0;
3586 	/*
3587 	 * Do not reset these registers in the protected case, as some of
3588 	 * them are overlayed and they are not accessible in this case
3589 	 * anyway.
3590 	 */
3591 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3592 		vcpu->arch.sie_block->gbea = 1;
3593 		vcpu->arch.sie_block->pp = 0;
3594 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3595 		vcpu->arch.sie_block->todpr = 0;
3596 	}
3597 }
3598 
3599 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3600 {
3601 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3602 
3603 	/* Clear reset is a superset of the initial reset */
3604 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3605 
3606 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3607 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3608 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3609 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3610 
3611 	regs->etoken = 0;
3612 	regs->etoken_extension = 0;
3613 }
3614 
3615 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3616 {
3617 	vcpu_load(vcpu);
3618 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3619 	vcpu_put(vcpu);
3620 	return 0;
3621 }
3622 
3623 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3624 {
3625 	vcpu_load(vcpu);
3626 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3627 	vcpu_put(vcpu);
3628 	return 0;
3629 }
3630 
3631 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3632 				  struct kvm_sregs *sregs)
3633 {
3634 	vcpu_load(vcpu);
3635 
3636 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3637 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3638 
3639 	vcpu_put(vcpu);
3640 	return 0;
3641 }
3642 
3643 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3644 				  struct kvm_sregs *sregs)
3645 {
3646 	vcpu_load(vcpu);
3647 
3648 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3649 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3650 
3651 	vcpu_put(vcpu);
3652 	return 0;
3653 }
3654 
3655 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3656 {
3657 	int ret = 0;
3658 
3659 	vcpu_load(vcpu);
3660 
3661 	if (test_fp_ctl(fpu->fpc)) {
3662 		ret = -EINVAL;
3663 		goto out;
3664 	}
3665 	vcpu->run->s.regs.fpc = fpu->fpc;
3666 	if (MACHINE_HAS_VX)
3667 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3668 				 (freg_t *) fpu->fprs);
3669 	else
3670 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3671 
3672 out:
3673 	vcpu_put(vcpu);
3674 	return ret;
3675 }
3676 
3677 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3678 {
3679 	vcpu_load(vcpu);
3680 
3681 	/* make sure we have the latest values */
3682 	save_fpu_regs();
3683 	if (MACHINE_HAS_VX)
3684 		convert_vx_to_fp((freg_t *) fpu->fprs,
3685 				 (__vector128 *) vcpu->run->s.regs.vrs);
3686 	else
3687 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3688 	fpu->fpc = vcpu->run->s.regs.fpc;
3689 
3690 	vcpu_put(vcpu);
3691 	return 0;
3692 }
3693 
3694 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3695 {
3696 	int rc = 0;
3697 
3698 	if (!is_vcpu_stopped(vcpu))
3699 		rc = -EBUSY;
3700 	else {
3701 		vcpu->run->psw_mask = psw.mask;
3702 		vcpu->run->psw_addr = psw.addr;
3703 	}
3704 	return rc;
3705 }
3706 
3707 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3708 				  struct kvm_translation *tr)
3709 {
3710 	return -EINVAL; /* not implemented yet */
3711 }
3712 
3713 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3714 			      KVM_GUESTDBG_USE_HW_BP | \
3715 			      KVM_GUESTDBG_ENABLE)
3716 
3717 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3718 					struct kvm_guest_debug *dbg)
3719 {
3720 	int rc = 0;
3721 
3722 	vcpu_load(vcpu);
3723 
3724 	vcpu->guest_debug = 0;
3725 	kvm_s390_clear_bp_data(vcpu);
3726 
3727 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3728 		rc = -EINVAL;
3729 		goto out;
3730 	}
3731 	if (!sclp.has_gpere) {
3732 		rc = -EINVAL;
3733 		goto out;
3734 	}
3735 
3736 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3737 		vcpu->guest_debug = dbg->control;
3738 		/* enforce guest PER */
3739 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3740 
3741 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3742 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3743 	} else {
3744 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3745 		vcpu->arch.guestdbg.last_bp = 0;
3746 	}
3747 
3748 	if (rc) {
3749 		vcpu->guest_debug = 0;
3750 		kvm_s390_clear_bp_data(vcpu);
3751 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3752 	}
3753 
3754 out:
3755 	vcpu_put(vcpu);
3756 	return rc;
3757 }
3758 
3759 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3760 				    struct kvm_mp_state *mp_state)
3761 {
3762 	int ret;
3763 
3764 	vcpu_load(vcpu);
3765 
3766 	/* CHECK_STOP and LOAD are not supported yet */
3767 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3768 				      KVM_MP_STATE_OPERATING;
3769 
3770 	vcpu_put(vcpu);
3771 	return ret;
3772 }
3773 
3774 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3775 				    struct kvm_mp_state *mp_state)
3776 {
3777 	int rc = 0;
3778 
3779 	vcpu_load(vcpu);
3780 
3781 	/* user space knows about this interface - let it control the state */
3782 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3783 
3784 	switch (mp_state->mp_state) {
3785 	case KVM_MP_STATE_STOPPED:
3786 		rc = kvm_s390_vcpu_stop(vcpu);
3787 		break;
3788 	case KVM_MP_STATE_OPERATING:
3789 		rc = kvm_s390_vcpu_start(vcpu);
3790 		break;
3791 	case KVM_MP_STATE_LOAD:
3792 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3793 			rc = -ENXIO;
3794 			break;
3795 		}
3796 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3797 		break;
3798 	case KVM_MP_STATE_CHECK_STOP:
3799 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3800 	default:
3801 		rc = -ENXIO;
3802 	}
3803 
3804 	vcpu_put(vcpu);
3805 	return rc;
3806 }
3807 
3808 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3809 {
3810 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3811 }
3812 
3813 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3814 {
3815 retry:
3816 	kvm_s390_vcpu_request_handled(vcpu);
3817 	if (!kvm_request_pending(vcpu))
3818 		return 0;
3819 	/*
3820 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3821 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3822 	 * This ensures that the ipte instruction for this request has
3823 	 * already finished. We might race against a second unmapper that
3824 	 * wants to set the blocking bit. Lets just retry the request loop.
3825 	 */
3826 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3827 		int rc;
3828 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3829 					  kvm_s390_get_prefix(vcpu),
3830 					  PAGE_SIZE * 2, PROT_WRITE);
3831 		if (rc) {
3832 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3833 			return rc;
3834 		}
3835 		goto retry;
3836 	}
3837 
3838 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3839 		vcpu->arch.sie_block->ihcpu = 0xffff;
3840 		goto retry;
3841 	}
3842 
3843 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3844 		if (!ibs_enabled(vcpu)) {
3845 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3846 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3847 		}
3848 		goto retry;
3849 	}
3850 
3851 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3852 		if (ibs_enabled(vcpu)) {
3853 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3854 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3855 		}
3856 		goto retry;
3857 	}
3858 
3859 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3860 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3861 		goto retry;
3862 	}
3863 
3864 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3865 		/*
3866 		 * Disable CMM virtualization; we will emulate the ESSA
3867 		 * instruction manually, in order to provide additional
3868 		 * functionalities needed for live migration.
3869 		 */
3870 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3871 		goto retry;
3872 	}
3873 
3874 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3875 		/*
3876 		 * Re-enable CMM virtualization if CMMA is available and
3877 		 * CMM has been used.
3878 		 */
3879 		if ((vcpu->kvm->arch.use_cmma) &&
3880 		    (vcpu->kvm->mm->context.uses_cmm))
3881 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3882 		goto retry;
3883 	}
3884 
3885 	/* nothing to do, just clear the request */
3886 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3887 	/* we left the vsie handler, nothing to do, just clear the request */
3888 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3889 
3890 	return 0;
3891 }
3892 
3893 void kvm_s390_set_tod_clock(struct kvm *kvm,
3894 			    const struct kvm_s390_vm_tod_clock *gtod)
3895 {
3896 	struct kvm_vcpu *vcpu;
3897 	union tod_clock clk;
3898 	int i;
3899 
3900 	mutex_lock(&kvm->lock);
3901 	preempt_disable();
3902 
3903 	store_tod_clock_ext(&clk);
3904 
3905 	kvm->arch.epoch = gtod->tod - clk.tod;
3906 	kvm->arch.epdx = 0;
3907 	if (test_kvm_facility(kvm, 139)) {
3908 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3909 		if (kvm->arch.epoch > gtod->tod)
3910 			kvm->arch.epdx -= 1;
3911 	}
3912 
3913 	kvm_s390_vcpu_block_all(kvm);
3914 	kvm_for_each_vcpu(i, vcpu, kvm) {
3915 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3916 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3917 	}
3918 
3919 	kvm_s390_vcpu_unblock_all(kvm);
3920 	preempt_enable();
3921 	mutex_unlock(&kvm->lock);
3922 }
3923 
3924 /**
3925  * kvm_arch_fault_in_page - fault-in guest page if necessary
3926  * @vcpu: The corresponding virtual cpu
3927  * @gpa: Guest physical address
3928  * @writable: Whether the page should be writable or not
3929  *
3930  * Make sure that a guest page has been faulted-in on the host.
3931  *
3932  * Return: Zero on success, negative error code otherwise.
3933  */
3934 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3935 {
3936 	return gmap_fault(vcpu->arch.gmap, gpa,
3937 			  writable ? FAULT_FLAG_WRITE : 0);
3938 }
3939 
3940 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3941 				      unsigned long token)
3942 {
3943 	struct kvm_s390_interrupt inti;
3944 	struct kvm_s390_irq irq;
3945 
3946 	if (start_token) {
3947 		irq.u.ext.ext_params2 = token;
3948 		irq.type = KVM_S390_INT_PFAULT_INIT;
3949 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3950 	} else {
3951 		inti.type = KVM_S390_INT_PFAULT_DONE;
3952 		inti.parm64 = token;
3953 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3954 	}
3955 }
3956 
3957 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3958 				     struct kvm_async_pf *work)
3959 {
3960 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3961 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3962 
3963 	return true;
3964 }
3965 
3966 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3967 				 struct kvm_async_pf *work)
3968 {
3969 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3970 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3971 }
3972 
3973 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3974 			       struct kvm_async_pf *work)
3975 {
3976 	/* s390 will always inject the page directly */
3977 }
3978 
3979 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3980 {
3981 	/*
3982 	 * s390 will always inject the page directly,
3983 	 * but we still want check_async_completion to cleanup
3984 	 */
3985 	return true;
3986 }
3987 
3988 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3989 {
3990 	hva_t hva;
3991 	struct kvm_arch_async_pf arch;
3992 
3993 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3994 		return false;
3995 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3996 	    vcpu->arch.pfault_compare)
3997 		return false;
3998 	if (psw_extint_disabled(vcpu))
3999 		return false;
4000 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4001 		return false;
4002 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4003 		return false;
4004 	if (!vcpu->arch.gmap->pfault_enabled)
4005 		return false;
4006 
4007 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4008 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4009 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4010 		return false;
4011 
4012 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4013 }
4014 
4015 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4016 {
4017 	int rc, cpuflags;
4018 
4019 	/*
4020 	 * On s390 notifications for arriving pages will be delivered directly
4021 	 * to the guest but the house keeping for completed pfaults is
4022 	 * handled outside the worker.
4023 	 */
4024 	kvm_check_async_pf_completion(vcpu);
4025 
4026 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4027 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4028 
4029 	if (need_resched())
4030 		schedule();
4031 
4032 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4033 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4034 		if (rc)
4035 			return rc;
4036 	}
4037 
4038 	rc = kvm_s390_handle_requests(vcpu);
4039 	if (rc)
4040 		return rc;
4041 
4042 	if (guestdbg_enabled(vcpu)) {
4043 		kvm_s390_backup_guest_per_regs(vcpu);
4044 		kvm_s390_patch_guest_per_regs(vcpu);
4045 	}
4046 
4047 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4048 
4049 	vcpu->arch.sie_block->icptcode = 0;
4050 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4051 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4052 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4053 
4054 	return 0;
4055 }
4056 
4057 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4058 {
4059 	struct kvm_s390_pgm_info pgm_info = {
4060 		.code = PGM_ADDRESSING,
4061 	};
4062 	u8 opcode, ilen;
4063 	int rc;
4064 
4065 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4066 	trace_kvm_s390_sie_fault(vcpu);
4067 
4068 	/*
4069 	 * We want to inject an addressing exception, which is defined as a
4070 	 * suppressing or terminating exception. However, since we came here
4071 	 * by a DAT access exception, the PSW still points to the faulting
4072 	 * instruction since DAT exceptions are nullifying. So we've got
4073 	 * to look up the current opcode to get the length of the instruction
4074 	 * to be able to forward the PSW.
4075 	 */
4076 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4077 	ilen = insn_length(opcode);
4078 	if (rc < 0) {
4079 		return rc;
4080 	} else if (rc) {
4081 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4082 		 * Forward by arbitrary ilc, injection will take care of
4083 		 * nullification if necessary.
4084 		 */
4085 		pgm_info = vcpu->arch.pgm;
4086 		ilen = 4;
4087 	}
4088 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4089 	kvm_s390_forward_psw(vcpu, ilen);
4090 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4091 }
4092 
4093 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4094 {
4095 	struct mcck_volatile_info *mcck_info;
4096 	struct sie_page *sie_page;
4097 
4098 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4099 		   vcpu->arch.sie_block->icptcode);
4100 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4101 
4102 	if (guestdbg_enabled(vcpu))
4103 		kvm_s390_restore_guest_per_regs(vcpu);
4104 
4105 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4106 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4107 
4108 	if (exit_reason == -EINTR) {
4109 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4110 		sie_page = container_of(vcpu->arch.sie_block,
4111 					struct sie_page, sie_block);
4112 		mcck_info = &sie_page->mcck_info;
4113 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4114 		return 0;
4115 	}
4116 
4117 	if (vcpu->arch.sie_block->icptcode > 0) {
4118 		int rc = kvm_handle_sie_intercept(vcpu);
4119 
4120 		if (rc != -EOPNOTSUPP)
4121 			return rc;
4122 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4123 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4124 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4125 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4126 		return -EREMOTE;
4127 	} else if (exit_reason != -EFAULT) {
4128 		vcpu->stat.exit_null++;
4129 		return 0;
4130 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4131 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4132 		vcpu->run->s390_ucontrol.trans_exc_code =
4133 						current->thread.gmap_addr;
4134 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4135 		return -EREMOTE;
4136 	} else if (current->thread.gmap_pfault) {
4137 		trace_kvm_s390_major_guest_pfault(vcpu);
4138 		current->thread.gmap_pfault = 0;
4139 		if (kvm_arch_setup_async_pf(vcpu))
4140 			return 0;
4141 		vcpu->stat.pfault_sync++;
4142 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4143 	}
4144 	return vcpu_post_run_fault_in_sie(vcpu);
4145 }
4146 
4147 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4148 static int __vcpu_run(struct kvm_vcpu *vcpu)
4149 {
4150 	int rc, exit_reason;
4151 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4152 
4153 	/*
4154 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4155 	 * ning the guest), so that memslots (and other stuff) are protected
4156 	 */
4157 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4158 
4159 	do {
4160 		rc = vcpu_pre_run(vcpu);
4161 		if (rc)
4162 			break;
4163 
4164 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4165 		/*
4166 		 * As PF_VCPU will be used in fault handler, between
4167 		 * guest_enter and guest_exit should be no uaccess.
4168 		 */
4169 		local_irq_disable();
4170 		guest_enter_irqoff();
4171 		__disable_cpu_timer_accounting(vcpu);
4172 		local_irq_enable();
4173 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4174 			memcpy(sie_page->pv_grregs,
4175 			       vcpu->run->s.regs.gprs,
4176 			       sizeof(sie_page->pv_grregs));
4177 		}
4178 		if (test_cpu_flag(CIF_FPU))
4179 			load_fpu_regs();
4180 		exit_reason = sie64a(vcpu->arch.sie_block,
4181 				     vcpu->run->s.regs.gprs);
4182 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4183 			memcpy(vcpu->run->s.regs.gprs,
4184 			       sie_page->pv_grregs,
4185 			       sizeof(sie_page->pv_grregs));
4186 			/*
4187 			 * We're not allowed to inject interrupts on intercepts
4188 			 * that leave the guest state in an "in-between" state
4189 			 * where the next SIE entry will do a continuation.
4190 			 * Fence interrupts in our "internal" PSW.
4191 			 */
4192 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4193 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4194 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4195 			}
4196 		}
4197 		local_irq_disable();
4198 		__enable_cpu_timer_accounting(vcpu);
4199 		guest_exit_irqoff();
4200 		local_irq_enable();
4201 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4202 
4203 		rc = vcpu_post_run(vcpu, exit_reason);
4204 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4205 
4206 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4207 	return rc;
4208 }
4209 
4210 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4211 {
4212 	struct kvm_run *kvm_run = vcpu->run;
4213 	struct runtime_instr_cb *riccb;
4214 	struct gs_cb *gscb;
4215 
4216 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4217 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4218 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4219 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4220 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4221 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4222 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4223 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4224 	}
4225 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4226 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4227 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4228 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4229 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4230 			kvm_clear_async_pf_completion_queue(vcpu);
4231 	}
4232 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4233 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4234 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4235 	}
4236 	/*
4237 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4238 	 * we should enable RI here instead of doing the lazy enablement.
4239 	 */
4240 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4241 	    test_kvm_facility(vcpu->kvm, 64) &&
4242 	    riccb->v &&
4243 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4244 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4245 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4246 	}
4247 	/*
4248 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4249 	 * we should enable GS here instead of doing the lazy enablement.
4250 	 */
4251 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4252 	    test_kvm_facility(vcpu->kvm, 133) &&
4253 	    gscb->gssm &&
4254 	    !vcpu->arch.gs_enabled) {
4255 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4256 		vcpu->arch.sie_block->ecb |= ECB_GS;
4257 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4258 		vcpu->arch.gs_enabled = 1;
4259 	}
4260 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4261 	    test_kvm_facility(vcpu->kvm, 82)) {
4262 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4263 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4264 	}
4265 	if (MACHINE_HAS_GS) {
4266 		preempt_disable();
4267 		__ctl_set_bit(2, 4);
4268 		if (current->thread.gs_cb) {
4269 			vcpu->arch.host_gscb = current->thread.gs_cb;
4270 			save_gs_cb(vcpu->arch.host_gscb);
4271 		}
4272 		if (vcpu->arch.gs_enabled) {
4273 			current->thread.gs_cb = (struct gs_cb *)
4274 						&vcpu->run->s.regs.gscb;
4275 			restore_gs_cb(current->thread.gs_cb);
4276 		}
4277 		preempt_enable();
4278 	}
4279 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4280 }
4281 
4282 static void sync_regs(struct kvm_vcpu *vcpu)
4283 {
4284 	struct kvm_run *kvm_run = vcpu->run;
4285 
4286 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4287 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4288 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4289 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4290 		/* some control register changes require a tlb flush */
4291 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4292 	}
4293 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4294 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4295 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4296 	}
4297 	save_access_regs(vcpu->arch.host_acrs);
4298 	restore_access_regs(vcpu->run->s.regs.acrs);
4299 	/* save host (userspace) fprs/vrs */
4300 	save_fpu_regs();
4301 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4302 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4303 	if (MACHINE_HAS_VX)
4304 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4305 	else
4306 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4307 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4308 	if (test_fp_ctl(current->thread.fpu.fpc))
4309 		/* User space provided an invalid FPC, let's clear it */
4310 		current->thread.fpu.fpc = 0;
4311 
4312 	/* Sync fmt2 only data */
4313 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4314 		sync_regs_fmt2(vcpu);
4315 	} else {
4316 		/*
4317 		 * In several places we have to modify our internal view to
4318 		 * not do things that are disallowed by the ultravisor. For
4319 		 * example we must not inject interrupts after specific exits
4320 		 * (e.g. 112 prefix page not secure). We do this by turning
4321 		 * off the machine check, external and I/O interrupt bits
4322 		 * of our PSW copy. To avoid getting validity intercepts, we
4323 		 * do only accept the condition code from userspace.
4324 		 */
4325 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4326 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4327 						   PSW_MASK_CC;
4328 	}
4329 
4330 	kvm_run->kvm_dirty_regs = 0;
4331 }
4332 
4333 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4334 {
4335 	struct kvm_run *kvm_run = vcpu->run;
4336 
4337 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4338 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4339 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4340 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4341 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4342 	if (MACHINE_HAS_GS) {
4343 		preempt_disable();
4344 		__ctl_set_bit(2, 4);
4345 		if (vcpu->arch.gs_enabled)
4346 			save_gs_cb(current->thread.gs_cb);
4347 		current->thread.gs_cb = vcpu->arch.host_gscb;
4348 		restore_gs_cb(vcpu->arch.host_gscb);
4349 		if (!vcpu->arch.host_gscb)
4350 			__ctl_clear_bit(2, 4);
4351 		vcpu->arch.host_gscb = NULL;
4352 		preempt_enable();
4353 	}
4354 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4355 }
4356 
4357 static void store_regs(struct kvm_vcpu *vcpu)
4358 {
4359 	struct kvm_run *kvm_run = vcpu->run;
4360 
4361 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4362 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4363 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4364 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4365 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4366 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4367 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4368 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4369 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4370 	save_access_regs(vcpu->run->s.regs.acrs);
4371 	restore_access_regs(vcpu->arch.host_acrs);
4372 	/* Save guest register state */
4373 	save_fpu_regs();
4374 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4375 	/* Restore will be done lazily at return */
4376 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4377 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4378 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4379 		store_regs_fmt2(vcpu);
4380 }
4381 
4382 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4383 {
4384 	struct kvm_run *kvm_run = vcpu->run;
4385 	int rc;
4386 
4387 	if (kvm_run->immediate_exit)
4388 		return -EINTR;
4389 
4390 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4391 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4392 		return -EINVAL;
4393 
4394 	vcpu_load(vcpu);
4395 
4396 	if (guestdbg_exit_pending(vcpu)) {
4397 		kvm_s390_prepare_debug_exit(vcpu);
4398 		rc = 0;
4399 		goto out;
4400 	}
4401 
4402 	kvm_sigset_activate(vcpu);
4403 
4404 	/*
4405 	 * no need to check the return value of vcpu_start as it can only have
4406 	 * an error for protvirt, but protvirt means user cpu state
4407 	 */
4408 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4409 		kvm_s390_vcpu_start(vcpu);
4410 	} else if (is_vcpu_stopped(vcpu)) {
4411 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4412 				   vcpu->vcpu_id);
4413 		rc = -EINVAL;
4414 		goto out;
4415 	}
4416 
4417 	sync_regs(vcpu);
4418 	enable_cpu_timer_accounting(vcpu);
4419 
4420 	might_fault();
4421 	rc = __vcpu_run(vcpu);
4422 
4423 	if (signal_pending(current) && !rc) {
4424 		kvm_run->exit_reason = KVM_EXIT_INTR;
4425 		rc = -EINTR;
4426 	}
4427 
4428 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4429 		kvm_s390_prepare_debug_exit(vcpu);
4430 		rc = 0;
4431 	}
4432 
4433 	if (rc == -EREMOTE) {
4434 		/* userspace support is needed, kvm_run has been prepared */
4435 		rc = 0;
4436 	}
4437 
4438 	disable_cpu_timer_accounting(vcpu);
4439 	store_regs(vcpu);
4440 
4441 	kvm_sigset_deactivate(vcpu);
4442 
4443 	vcpu->stat.exit_userspace++;
4444 out:
4445 	vcpu_put(vcpu);
4446 	return rc;
4447 }
4448 
4449 /*
4450  * store status at address
4451  * we use have two special cases:
4452  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4453  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4454  */
4455 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4456 {
4457 	unsigned char archmode = 1;
4458 	freg_t fprs[NUM_FPRS];
4459 	unsigned int px;
4460 	u64 clkcomp, cputm;
4461 	int rc;
4462 
4463 	px = kvm_s390_get_prefix(vcpu);
4464 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4465 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4466 			return -EFAULT;
4467 		gpa = 0;
4468 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4469 		if (write_guest_real(vcpu, 163, &archmode, 1))
4470 			return -EFAULT;
4471 		gpa = px;
4472 	} else
4473 		gpa -= __LC_FPREGS_SAVE_AREA;
4474 
4475 	/* manually convert vector registers if necessary */
4476 	if (MACHINE_HAS_VX) {
4477 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4478 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4479 				     fprs, 128);
4480 	} else {
4481 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4482 				     vcpu->run->s.regs.fprs, 128);
4483 	}
4484 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4485 			      vcpu->run->s.regs.gprs, 128);
4486 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4487 			      &vcpu->arch.sie_block->gpsw, 16);
4488 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4489 			      &px, 4);
4490 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4491 			      &vcpu->run->s.regs.fpc, 4);
4492 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4493 			      &vcpu->arch.sie_block->todpr, 4);
4494 	cputm = kvm_s390_get_cpu_timer(vcpu);
4495 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4496 			      &cputm, 8);
4497 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4498 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4499 			      &clkcomp, 8);
4500 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4501 			      &vcpu->run->s.regs.acrs, 64);
4502 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4503 			      &vcpu->arch.sie_block->gcr, 128);
4504 	return rc ? -EFAULT : 0;
4505 }
4506 
4507 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4508 {
4509 	/*
4510 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4511 	 * switch in the run ioctl. Let's update our copies before we save
4512 	 * it into the save area
4513 	 */
4514 	save_fpu_regs();
4515 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4516 	save_access_regs(vcpu->run->s.regs.acrs);
4517 
4518 	return kvm_s390_store_status_unloaded(vcpu, addr);
4519 }
4520 
4521 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4522 {
4523 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4524 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4525 }
4526 
4527 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4528 {
4529 	unsigned int i;
4530 	struct kvm_vcpu *vcpu;
4531 
4532 	kvm_for_each_vcpu(i, vcpu, kvm) {
4533 		__disable_ibs_on_vcpu(vcpu);
4534 	}
4535 }
4536 
4537 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4538 {
4539 	if (!sclp.has_ibs)
4540 		return;
4541 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4542 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4543 }
4544 
4545 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4546 {
4547 	int i, online_vcpus, r = 0, started_vcpus = 0;
4548 
4549 	if (!is_vcpu_stopped(vcpu))
4550 		return 0;
4551 
4552 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4553 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4554 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4555 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4556 
4557 	/* Let's tell the UV that we want to change into the operating state */
4558 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4559 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4560 		if (r) {
4561 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4562 			return r;
4563 		}
4564 	}
4565 
4566 	for (i = 0; i < online_vcpus; i++) {
4567 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4568 			started_vcpus++;
4569 	}
4570 
4571 	if (started_vcpus == 0) {
4572 		/* we're the only active VCPU -> speed it up */
4573 		__enable_ibs_on_vcpu(vcpu);
4574 	} else if (started_vcpus == 1) {
4575 		/*
4576 		 * As we are starting a second VCPU, we have to disable
4577 		 * the IBS facility on all VCPUs to remove potentially
4578 		 * outstanding ENABLE requests.
4579 		 */
4580 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4581 	}
4582 
4583 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4584 	/*
4585 	 * The real PSW might have changed due to a RESTART interpreted by the
4586 	 * ultravisor. We block all interrupts and let the next sie exit
4587 	 * refresh our view.
4588 	 */
4589 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4590 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4591 	/*
4592 	 * Another VCPU might have used IBS while we were offline.
4593 	 * Let's play safe and flush the VCPU at startup.
4594 	 */
4595 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4596 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4597 	return 0;
4598 }
4599 
4600 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4601 {
4602 	int i, online_vcpus, r = 0, started_vcpus = 0;
4603 	struct kvm_vcpu *started_vcpu = NULL;
4604 
4605 	if (is_vcpu_stopped(vcpu))
4606 		return 0;
4607 
4608 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4609 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4610 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4611 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4612 
4613 	/* Let's tell the UV that we want to change into the stopped state */
4614 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4615 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4616 		if (r) {
4617 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4618 			return r;
4619 		}
4620 	}
4621 
4622 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4623 	kvm_s390_clear_stop_irq(vcpu);
4624 
4625 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4626 	__disable_ibs_on_vcpu(vcpu);
4627 
4628 	for (i = 0; i < online_vcpus; i++) {
4629 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4630 			started_vcpus++;
4631 			started_vcpu = vcpu->kvm->vcpus[i];
4632 		}
4633 	}
4634 
4635 	if (started_vcpus == 1) {
4636 		/*
4637 		 * As we only have one VCPU left, we want to enable the
4638 		 * IBS facility for that VCPU to speed it up.
4639 		 */
4640 		__enable_ibs_on_vcpu(started_vcpu);
4641 	}
4642 
4643 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4644 	return 0;
4645 }
4646 
4647 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4648 				     struct kvm_enable_cap *cap)
4649 {
4650 	int r;
4651 
4652 	if (cap->flags)
4653 		return -EINVAL;
4654 
4655 	switch (cap->cap) {
4656 	case KVM_CAP_S390_CSS_SUPPORT:
4657 		if (!vcpu->kvm->arch.css_support) {
4658 			vcpu->kvm->arch.css_support = 1;
4659 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4660 			trace_kvm_s390_enable_css(vcpu->kvm);
4661 		}
4662 		r = 0;
4663 		break;
4664 	default:
4665 		r = -EINVAL;
4666 		break;
4667 	}
4668 	return r;
4669 }
4670 
4671 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4672 				   struct kvm_s390_mem_op *mop)
4673 {
4674 	void __user *uaddr = (void __user *)mop->buf;
4675 	int r = 0;
4676 
4677 	if (mop->flags || !mop->size)
4678 		return -EINVAL;
4679 	if (mop->size + mop->sida_offset < mop->size)
4680 		return -EINVAL;
4681 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4682 		return -E2BIG;
4683 
4684 	switch (mop->op) {
4685 	case KVM_S390_MEMOP_SIDA_READ:
4686 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4687 				 mop->sida_offset), mop->size))
4688 			r = -EFAULT;
4689 
4690 		break;
4691 	case KVM_S390_MEMOP_SIDA_WRITE:
4692 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4693 				   mop->sida_offset), uaddr, mop->size))
4694 			r = -EFAULT;
4695 		break;
4696 	}
4697 	return r;
4698 }
4699 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4700 				  struct kvm_s390_mem_op *mop)
4701 {
4702 	void __user *uaddr = (void __user *)mop->buf;
4703 	void *tmpbuf = NULL;
4704 	int r = 0;
4705 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4706 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4707 
4708 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4709 		return -EINVAL;
4710 
4711 	if (mop->size > MEM_OP_MAX_SIZE)
4712 		return -E2BIG;
4713 
4714 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4715 		return -EINVAL;
4716 
4717 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4718 		tmpbuf = vmalloc(mop->size);
4719 		if (!tmpbuf)
4720 			return -ENOMEM;
4721 	}
4722 
4723 	switch (mop->op) {
4724 	case KVM_S390_MEMOP_LOGICAL_READ:
4725 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4726 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4727 					    mop->size, GACC_FETCH);
4728 			break;
4729 		}
4730 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4731 		if (r == 0) {
4732 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4733 				r = -EFAULT;
4734 		}
4735 		break;
4736 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4737 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4738 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4739 					    mop->size, GACC_STORE);
4740 			break;
4741 		}
4742 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4743 			r = -EFAULT;
4744 			break;
4745 		}
4746 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4747 		break;
4748 	}
4749 
4750 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4751 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4752 
4753 	vfree(tmpbuf);
4754 	return r;
4755 }
4756 
4757 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4758 				      struct kvm_s390_mem_op *mop)
4759 {
4760 	int r, srcu_idx;
4761 
4762 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4763 
4764 	switch (mop->op) {
4765 	case KVM_S390_MEMOP_LOGICAL_READ:
4766 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4767 		r = kvm_s390_guest_mem_op(vcpu, mop);
4768 		break;
4769 	case KVM_S390_MEMOP_SIDA_READ:
4770 	case KVM_S390_MEMOP_SIDA_WRITE:
4771 		/* we are locked against sida going away by the vcpu->mutex */
4772 		r = kvm_s390_guest_sida_op(vcpu, mop);
4773 		break;
4774 	default:
4775 		r = -EINVAL;
4776 	}
4777 
4778 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4779 	return r;
4780 }
4781 
4782 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4783 			       unsigned int ioctl, unsigned long arg)
4784 {
4785 	struct kvm_vcpu *vcpu = filp->private_data;
4786 	void __user *argp = (void __user *)arg;
4787 
4788 	switch (ioctl) {
4789 	case KVM_S390_IRQ: {
4790 		struct kvm_s390_irq s390irq;
4791 
4792 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4793 			return -EFAULT;
4794 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4795 	}
4796 	case KVM_S390_INTERRUPT: {
4797 		struct kvm_s390_interrupt s390int;
4798 		struct kvm_s390_irq s390irq = {};
4799 
4800 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4801 			return -EFAULT;
4802 		if (s390int_to_s390irq(&s390int, &s390irq))
4803 			return -EINVAL;
4804 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4805 	}
4806 	}
4807 	return -ENOIOCTLCMD;
4808 }
4809 
4810 long kvm_arch_vcpu_ioctl(struct file *filp,
4811 			 unsigned int ioctl, unsigned long arg)
4812 {
4813 	struct kvm_vcpu *vcpu = filp->private_data;
4814 	void __user *argp = (void __user *)arg;
4815 	int idx;
4816 	long r;
4817 	u16 rc, rrc;
4818 
4819 	vcpu_load(vcpu);
4820 
4821 	switch (ioctl) {
4822 	case KVM_S390_STORE_STATUS:
4823 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4824 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4825 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4826 		break;
4827 	case KVM_S390_SET_INITIAL_PSW: {
4828 		psw_t psw;
4829 
4830 		r = -EFAULT;
4831 		if (copy_from_user(&psw, argp, sizeof(psw)))
4832 			break;
4833 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4834 		break;
4835 	}
4836 	case KVM_S390_CLEAR_RESET:
4837 		r = 0;
4838 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4839 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4840 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4841 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4842 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4843 				   rc, rrc);
4844 		}
4845 		break;
4846 	case KVM_S390_INITIAL_RESET:
4847 		r = 0;
4848 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4849 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4850 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4851 					  UVC_CMD_CPU_RESET_INITIAL,
4852 					  &rc, &rrc);
4853 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4854 				   rc, rrc);
4855 		}
4856 		break;
4857 	case KVM_S390_NORMAL_RESET:
4858 		r = 0;
4859 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4860 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4861 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4862 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4863 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4864 				   rc, rrc);
4865 		}
4866 		break;
4867 	case KVM_SET_ONE_REG:
4868 	case KVM_GET_ONE_REG: {
4869 		struct kvm_one_reg reg;
4870 		r = -EINVAL;
4871 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4872 			break;
4873 		r = -EFAULT;
4874 		if (copy_from_user(&reg, argp, sizeof(reg)))
4875 			break;
4876 		if (ioctl == KVM_SET_ONE_REG)
4877 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4878 		else
4879 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4880 		break;
4881 	}
4882 #ifdef CONFIG_KVM_S390_UCONTROL
4883 	case KVM_S390_UCAS_MAP: {
4884 		struct kvm_s390_ucas_mapping ucasmap;
4885 
4886 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4887 			r = -EFAULT;
4888 			break;
4889 		}
4890 
4891 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4892 			r = -EINVAL;
4893 			break;
4894 		}
4895 
4896 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4897 				     ucasmap.vcpu_addr, ucasmap.length);
4898 		break;
4899 	}
4900 	case KVM_S390_UCAS_UNMAP: {
4901 		struct kvm_s390_ucas_mapping ucasmap;
4902 
4903 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4904 			r = -EFAULT;
4905 			break;
4906 		}
4907 
4908 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4909 			r = -EINVAL;
4910 			break;
4911 		}
4912 
4913 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4914 			ucasmap.length);
4915 		break;
4916 	}
4917 #endif
4918 	case KVM_S390_VCPU_FAULT: {
4919 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4920 		break;
4921 	}
4922 	case KVM_ENABLE_CAP:
4923 	{
4924 		struct kvm_enable_cap cap;
4925 		r = -EFAULT;
4926 		if (copy_from_user(&cap, argp, sizeof(cap)))
4927 			break;
4928 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4929 		break;
4930 	}
4931 	case KVM_S390_MEM_OP: {
4932 		struct kvm_s390_mem_op mem_op;
4933 
4934 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4935 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4936 		else
4937 			r = -EFAULT;
4938 		break;
4939 	}
4940 	case KVM_S390_SET_IRQ_STATE: {
4941 		struct kvm_s390_irq_state irq_state;
4942 
4943 		r = -EFAULT;
4944 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4945 			break;
4946 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4947 		    irq_state.len == 0 ||
4948 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4949 			r = -EINVAL;
4950 			break;
4951 		}
4952 		/* do not use irq_state.flags, it will break old QEMUs */
4953 		r = kvm_s390_set_irq_state(vcpu,
4954 					   (void __user *) irq_state.buf,
4955 					   irq_state.len);
4956 		break;
4957 	}
4958 	case KVM_S390_GET_IRQ_STATE: {
4959 		struct kvm_s390_irq_state irq_state;
4960 
4961 		r = -EFAULT;
4962 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4963 			break;
4964 		if (irq_state.len == 0) {
4965 			r = -EINVAL;
4966 			break;
4967 		}
4968 		/* do not use irq_state.flags, it will break old QEMUs */
4969 		r = kvm_s390_get_irq_state(vcpu,
4970 					   (__u8 __user *)  irq_state.buf,
4971 					   irq_state.len);
4972 		break;
4973 	}
4974 	default:
4975 		r = -ENOTTY;
4976 	}
4977 
4978 	vcpu_put(vcpu);
4979 	return r;
4980 }
4981 
4982 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4983 {
4984 #ifdef CONFIG_KVM_S390_UCONTROL
4985 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4986 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4987 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4988 		get_page(vmf->page);
4989 		return 0;
4990 	}
4991 #endif
4992 	return VM_FAULT_SIGBUS;
4993 }
4994 
4995 /* Section: memory related */
4996 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4997 				   struct kvm_memory_slot *memslot,
4998 				   const struct kvm_userspace_memory_region *mem,
4999 				   enum kvm_mr_change change)
5000 {
5001 	/* A few sanity checks. We can have memory slots which have to be
5002 	   located/ended at a segment boundary (1MB). The memory in userland is
5003 	   ok to be fragmented into various different vmas. It is okay to mmap()
5004 	   and munmap() stuff in this slot after doing this call at any time */
5005 
5006 	if (mem->userspace_addr & 0xffffful)
5007 		return -EINVAL;
5008 
5009 	if (mem->memory_size & 0xffffful)
5010 		return -EINVAL;
5011 
5012 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5013 		return -EINVAL;
5014 
5015 	/* When we are protected, we should not change the memory slots */
5016 	if (kvm_s390_pv_get_handle(kvm))
5017 		return -EINVAL;
5018 	return 0;
5019 }
5020 
5021 void kvm_arch_commit_memory_region(struct kvm *kvm,
5022 				const struct kvm_userspace_memory_region *mem,
5023 				struct kvm_memory_slot *old,
5024 				const struct kvm_memory_slot *new,
5025 				enum kvm_mr_change change)
5026 {
5027 	int rc = 0;
5028 
5029 	switch (change) {
5030 	case KVM_MR_DELETE:
5031 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5032 					old->npages * PAGE_SIZE);
5033 		break;
5034 	case KVM_MR_MOVE:
5035 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5036 					old->npages * PAGE_SIZE);
5037 		if (rc)
5038 			break;
5039 		fallthrough;
5040 	case KVM_MR_CREATE:
5041 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5042 				      mem->guest_phys_addr, mem->memory_size);
5043 		break;
5044 	case KVM_MR_FLAGS_ONLY:
5045 		break;
5046 	default:
5047 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5048 	}
5049 	if (rc)
5050 		pr_warn("failed to commit memory region\n");
5051 	return;
5052 }
5053 
5054 static inline unsigned long nonhyp_mask(int i)
5055 {
5056 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5057 
5058 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5059 }
5060 
5061 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5062 {
5063 	vcpu->valid_wakeup = false;
5064 }
5065 
5066 static int __init kvm_s390_init(void)
5067 {
5068 	int i;
5069 
5070 	if (!sclp.has_sief2) {
5071 		pr_info("SIE is not available\n");
5072 		return -ENODEV;
5073 	}
5074 
5075 	if (nested && hpage) {
5076 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5077 		return -EINVAL;
5078 	}
5079 
5080 	for (i = 0; i < 16; i++)
5081 		kvm_s390_fac_base[i] |=
5082 			stfle_fac_list[i] & nonhyp_mask(i);
5083 
5084 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5085 }
5086 
5087 static void __exit kvm_s390_exit(void)
5088 {
5089 	kvm_exit();
5090 }
5091 
5092 module_init(kvm_s390_init);
5093 module_exit(kvm_s390_exit);
5094 
5095 /*
5096  * Enable autoloading of the kvm module.
5097  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5098  * since x86 takes a different approach.
5099  */
5100 #include <linux/miscdevice.h>
5101 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5102 MODULE_ALIAS("devname:kvm");
5103