xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision e368cd72)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
70 		sizeof(struct kvm_vm_stat) / sizeof(u64));
71 
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 	.name_size = KVM_STATS_NAME_SIZE,
74 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 	.id_offset = sizeof(struct kvm_stats_header),
76 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 		       sizeof(kvm_vm_stats_desc),
79 };
80 
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 	KVM_GENERIC_VCPU_STATS(),
83 	STATS_DESC_COUNTER(VCPU, exit_userspace),
84 	STATS_DESC_COUNTER(VCPU, exit_null),
85 	STATS_DESC_COUNTER(VCPU, exit_external_request),
86 	STATS_DESC_COUNTER(VCPU, exit_io_request),
87 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 	STATS_DESC_COUNTER(VCPU, exit_validity),
90 	STATS_DESC_COUNTER(VCPU, exit_instruction),
91 	STATS_DESC_COUNTER(VCPU, exit_pei),
92 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 	STATS_DESC_COUNTER(VCPU, deliver_program),
110 	STATS_DESC_COUNTER(VCPU, deliver_io),
111 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 	STATS_DESC_COUNTER(VCPU, inject_ckc),
114 	STATS_DESC_COUNTER(VCPU, inject_cputm),
115 	STATS_DESC_COUNTER(VCPU, inject_external_call),
116 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 	STATS_DESC_COUNTER(VCPU, inject_mchk),
118 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 	STATS_DESC_COUNTER(VCPU, inject_program),
120 	STATS_DESC_COUNTER(VCPU, inject_restart),
121 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_gs),
125 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 	STATS_DESC_COUNTER(VCPU, instruction_sck),
131 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 	STATS_DESC_COUNTER(VCPU, instruction_spx),
134 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 	STATS_DESC_COUNTER(VCPU, instruction_stap),
136 	STATS_DESC_COUNTER(VCPU, instruction_iske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ri),
138 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 	STATS_DESC_COUNTER(VCPU, instruction_sske),
140 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 	STATS_DESC_COUNTER(VCPU, instruction_tb),
144 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 	STATS_DESC_COUNTER(VCPU, instruction_sie),
148 	STATS_DESC_COUNTER(VCPU, instruction_essa),
149 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 	STATS_DESC_COUNTER(VCPU, pfault_sync)
176 };
177 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
178 		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
179 
180 const struct kvm_stats_header kvm_vcpu_stats_header = {
181 	.name_size = KVM_STATS_NAME_SIZE,
182 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
183 	.id_offset = sizeof(struct kvm_stats_header),
184 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
185 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
186 		       sizeof(kvm_vcpu_stats_desc),
187 };
188 
189 /* allow nested virtualization in KVM (if enabled by user space) */
190 static int nested;
191 module_param(nested, int, S_IRUGO);
192 MODULE_PARM_DESC(nested, "Nested virtualization support");
193 
194 /* allow 1m huge page guest backing, if !nested */
195 static int hpage;
196 module_param(hpage, int, 0444);
197 MODULE_PARM_DESC(hpage, "1m huge page backing support");
198 
199 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
200 static u8 halt_poll_max_steal = 10;
201 module_param(halt_poll_max_steal, byte, 0644);
202 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
203 
204 /* if set to true, the GISA will be initialized and used if available */
205 static bool use_gisa  = true;
206 module_param(use_gisa, bool, 0644);
207 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
208 
209 /* maximum diag9c forwarding per second */
210 unsigned int diag9c_forwarding_hz;
211 module_param(diag9c_forwarding_hz, uint, 0644);
212 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
213 
214 /*
215  * For now we handle at most 16 double words as this is what the s390 base
216  * kernel handles and stores in the prefix page. If we ever need to go beyond
217  * this, this requires changes to code, but the external uapi can stay.
218  */
219 #define SIZE_INTERNAL 16
220 
221 /*
222  * Base feature mask that defines default mask for facilities. Consists of the
223  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
224  */
225 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
226 /*
227  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
228  * and defines the facilities that can be enabled via a cpu model.
229  */
230 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
231 
232 static unsigned long kvm_s390_fac_size(void)
233 {
234 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
235 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
236 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
237 		sizeof(stfle_fac_list));
238 
239 	return SIZE_INTERNAL;
240 }
241 
242 /* available cpu features supported by kvm */
243 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
244 /* available subfunctions indicated via query / "test bit" */
245 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
246 
247 static struct gmap_notifier gmap_notifier;
248 static struct gmap_notifier vsie_gmap_notifier;
249 debug_info_t *kvm_s390_dbf;
250 debug_info_t *kvm_s390_dbf_uv;
251 
252 /* Section: not file related */
253 int kvm_arch_hardware_enable(void)
254 {
255 	/* every s390 is virtualization enabled ;-) */
256 	return 0;
257 }
258 
259 int kvm_arch_check_processor_compat(void *opaque)
260 {
261 	return 0;
262 }
263 
264 /* forward declarations */
265 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
266 			      unsigned long end);
267 static int sca_switch_to_extended(struct kvm *kvm);
268 
269 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
270 {
271 	u8 delta_idx = 0;
272 
273 	/*
274 	 * The TOD jumps by delta, we have to compensate this by adding
275 	 * -delta to the epoch.
276 	 */
277 	delta = -delta;
278 
279 	/* sign-extension - we're adding to signed values below */
280 	if ((s64)delta < 0)
281 		delta_idx = -1;
282 
283 	scb->epoch += delta;
284 	if (scb->ecd & ECD_MEF) {
285 		scb->epdx += delta_idx;
286 		if (scb->epoch < delta)
287 			scb->epdx += 1;
288 	}
289 }
290 
291 /*
292  * This callback is executed during stop_machine(). All CPUs are therefore
293  * temporarily stopped. In order not to change guest behavior, we have to
294  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
295  * so a CPU won't be stopped while calculating with the epoch.
296  */
297 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
298 			  void *v)
299 {
300 	struct kvm *kvm;
301 	struct kvm_vcpu *vcpu;
302 	int i;
303 	unsigned long long *delta = v;
304 
305 	list_for_each_entry(kvm, &vm_list, vm_list) {
306 		kvm_for_each_vcpu(i, vcpu, kvm) {
307 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
308 			if (i == 0) {
309 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
310 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
311 			}
312 			if (vcpu->arch.cputm_enabled)
313 				vcpu->arch.cputm_start += *delta;
314 			if (vcpu->arch.vsie_block)
315 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
316 						   *delta);
317 		}
318 	}
319 	return NOTIFY_OK;
320 }
321 
322 static struct notifier_block kvm_clock_notifier = {
323 	.notifier_call = kvm_clock_sync,
324 };
325 
326 int kvm_arch_hardware_setup(void *opaque)
327 {
328 	gmap_notifier.notifier_call = kvm_gmap_notifier;
329 	gmap_register_pte_notifier(&gmap_notifier);
330 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
331 	gmap_register_pte_notifier(&vsie_gmap_notifier);
332 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
333 				       &kvm_clock_notifier);
334 	return 0;
335 }
336 
337 void kvm_arch_hardware_unsetup(void)
338 {
339 	gmap_unregister_pte_notifier(&gmap_notifier);
340 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
341 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
342 					 &kvm_clock_notifier);
343 }
344 
345 static void allow_cpu_feat(unsigned long nr)
346 {
347 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
348 }
349 
350 static inline int plo_test_bit(unsigned char nr)
351 {
352 	unsigned long function = (unsigned long)nr | 0x100;
353 	int cc;
354 
355 	asm volatile(
356 		"	lgr	0,%[function]\n"
357 		/* Parameter registers are ignored for "test bit" */
358 		"	plo	0,0,0,0(0)\n"
359 		"	ipm	%0\n"
360 		"	srl	%0,28\n"
361 		: "=d" (cc)
362 		: [function] "d" (function)
363 		: "cc", "0");
364 	return cc == 0;
365 }
366 
367 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
368 {
369 	asm volatile(
370 		"	lghi	0,0\n"
371 		"	lgr	1,%[query]\n"
372 		/* Parameter registers are ignored */
373 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
374 		:
375 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
376 		: "cc", "memory", "0", "1");
377 }
378 
379 #define INSN_SORTL 0xb938
380 #define INSN_DFLTCC 0xb939
381 
382 static void kvm_s390_cpu_feat_init(void)
383 {
384 	int i;
385 
386 	for (i = 0; i < 256; ++i) {
387 		if (plo_test_bit(i))
388 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
389 	}
390 
391 	if (test_facility(28)) /* TOD-clock steering */
392 		ptff(kvm_s390_available_subfunc.ptff,
393 		     sizeof(kvm_s390_available_subfunc.ptff),
394 		     PTFF_QAF);
395 
396 	if (test_facility(17)) { /* MSA */
397 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.kmac);
399 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kmc);
401 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.km);
403 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
404 			      kvm_s390_available_subfunc.kimd);
405 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.klmd);
407 	}
408 	if (test_facility(76)) /* MSA3 */
409 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.pckmo);
411 	if (test_facility(77)) { /* MSA4 */
412 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmctr);
414 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.kmf);
416 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
417 			      kvm_s390_available_subfunc.kmo);
418 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.pcc);
420 	}
421 	if (test_facility(57)) /* MSA5 */
422 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.ppno);
424 
425 	if (test_facility(146)) /* MSA8 */
426 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kma);
428 
429 	if (test_facility(155)) /* MSA9 */
430 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
431 			      kvm_s390_available_subfunc.kdsa);
432 
433 	if (test_facility(150)) /* SORTL */
434 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
435 
436 	if (test_facility(151)) /* DFLTCC */
437 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
438 
439 	if (MACHINE_HAS_ESOP)
440 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
441 	/*
442 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
443 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
444 	 */
445 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
446 	    !test_facility(3) || !nested)
447 		return;
448 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
449 	if (sclp.has_64bscao)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
451 	if (sclp.has_siif)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
453 	if (sclp.has_gpere)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
455 	if (sclp.has_gsls)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
457 	if (sclp.has_ib)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
459 	if (sclp.has_cei)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
461 	if (sclp.has_ibs)
462 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
463 	if (sclp.has_kss)
464 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
465 	/*
466 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
467 	 * all skey handling functions read/set the skey from the PGSTE
468 	 * instead of the real storage key.
469 	 *
470 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
471 	 * pages being detected as preserved although they are resident.
472 	 *
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
474 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
475 	 *
476 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
477 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
478 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
479 	 *
480 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
481 	 * cannot easily shadow the SCA because of the ipte lock.
482 	 */
483 }
484 
485 int kvm_arch_init(void *opaque)
486 {
487 	int rc = -ENOMEM;
488 
489 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf)
491 		return -ENOMEM;
492 
493 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
494 	if (!kvm_s390_dbf_uv)
495 		goto out;
496 
497 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
498 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
499 		goto out;
500 
501 	kvm_s390_cpu_feat_init();
502 
503 	/* Register floating interrupt controller interface. */
504 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
505 	if (rc) {
506 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
507 		goto out;
508 	}
509 
510 	rc = kvm_s390_gib_init(GAL_ISC);
511 	if (rc)
512 		goto out;
513 
514 	return 0;
515 
516 out:
517 	kvm_arch_exit();
518 	return rc;
519 }
520 
521 void kvm_arch_exit(void)
522 {
523 	kvm_s390_gib_destroy();
524 	debug_unregister(kvm_s390_dbf);
525 	debug_unregister(kvm_s390_dbf_uv);
526 }
527 
528 /* Section: device related */
529 long kvm_arch_dev_ioctl(struct file *filp,
530 			unsigned int ioctl, unsigned long arg)
531 {
532 	if (ioctl == KVM_S390_ENABLE_SIE)
533 		return s390_enable_sie();
534 	return -EINVAL;
535 }
536 
537 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
538 {
539 	int r;
540 
541 	switch (ext) {
542 	case KVM_CAP_S390_PSW:
543 	case KVM_CAP_S390_GMAP:
544 	case KVM_CAP_SYNC_MMU:
545 #ifdef CONFIG_KVM_S390_UCONTROL
546 	case KVM_CAP_S390_UCONTROL:
547 #endif
548 	case KVM_CAP_ASYNC_PF:
549 	case KVM_CAP_SYNC_REGS:
550 	case KVM_CAP_ONE_REG:
551 	case KVM_CAP_ENABLE_CAP:
552 	case KVM_CAP_S390_CSS_SUPPORT:
553 	case KVM_CAP_IOEVENTFD:
554 	case KVM_CAP_DEVICE_CTRL:
555 	case KVM_CAP_S390_IRQCHIP:
556 	case KVM_CAP_VM_ATTRIBUTES:
557 	case KVM_CAP_MP_STATE:
558 	case KVM_CAP_IMMEDIATE_EXIT:
559 	case KVM_CAP_S390_INJECT_IRQ:
560 	case KVM_CAP_S390_USER_SIGP:
561 	case KVM_CAP_S390_USER_STSI:
562 	case KVM_CAP_S390_SKEYS:
563 	case KVM_CAP_S390_IRQ_STATE:
564 	case KVM_CAP_S390_USER_INSTR0:
565 	case KVM_CAP_S390_CMMA_MIGRATION:
566 	case KVM_CAP_S390_AIS:
567 	case KVM_CAP_S390_AIS_MIGRATION:
568 	case KVM_CAP_S390_VCPU_RESETS:
569 	case KVM_CAP_SET_GUEST_DEBUG:
570 	case KVM_CAP_S390_DIAG318:
571 		r = 1;
572 		break;
573 	case KVM_CAP_SET_GUEST_DEBUG2:
574 		r = KVM_GUESTDBG_VALID_MASK;
575 		break;
576 	case KVM_CAP_S390_HPAGE_1M:
577 		r = 0;
578 		if (hpage && !kvm_is_ucontrol(kvm))
579 			r = 1;
580 		break;
581 	case KVM_CAP_S390_MEM_OP:
582 		r = MEM_OP_MAX_SIZE;
583 		break;
584 	case KVM_CAP_NR_VCPUS:
585 	case KVM_CAP_MAX_VCPUS:
586 	case KVM_CAP_MAX_VCPU_ID:
587 		r = KVM_S390_BSCA_CPU_SLOTS;
588 		if (!kvm_s390_use_sca_entries())
589 			r = KVM_MAX_VCPUS;
590 		else if (sclp.has_esca && sclp.has_64bscao)
591 			r = KVM_S390_ESCA_CPU_SLOTS;
592 		break;
593 	case KVM_CAP_S390_COW:
594 		r = MACHINE_HAS_ESOP;
595 		break;
596 	case KVM_CAP_S390_VECTOR_REGISTERS:
597 		r = MACHINE_HAS_VX;
598 		break;
599 	case KVM_CAP_S390_RI:
600 		r = test_facility(64);
601 		break;
602 	case KVM_CAP_S390_GS:
603 		r = test_facility(133);
604 		break;
605 	case KVM_CAP_S390_BPB:
606 		r = test_facility(82);
607 		break;
608 	case KVM_CAP_S390_PROTECTED:
609 		r = is_prot_virt_host();
610 		break;
611 	default:
612 		r = 0;
613 	}
614 	return r;
615 }
616 
617 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
618 {
619 	int i;
620 	gfn_t cur_gfn, last_gfn;
621 	unsigned long gaddr, vmaddr;
622 	struct gmap *gmap = kvm->arch.gmap;
623 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
624 
625 	/* Loop over all guest segments */
626 	cur_gfn = memslot->base_gfn;
627 	last_gfn = memslot->base_gfn + memslot->npages;
628 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
629 		gaddr = gfn_to_gpa(cur_gfn);
630 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
631 		if (kvm_is_error_hva(vmaddr))
632 			continue;
633 
634 		bitmap_zero(bitmap, _PAGE_ENTRIES);
635 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
636 		for (i = 0; i < _PAGE_ENTRIES; i++) {
637 			if (test_bit(i, bitmap))
638 				mark_page_dirty(kvm, cur_gfn + i);
639 		}
640 
641 		if (fatal_signal_pending(current))
642 			return;
643 		cond_resched();
644 	}
645 }
646 
647 /* Section: vm related */
648 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
649 
650 /*
651  * Get (and clear) the dirty memory log for a memory slot.
652  */
653 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
654 			       struct kvm_dirty_log *log)
655 {
656 	int r;
657 	unsigned long n;
658 	struct kvm_memory_slot *memslot;
659 	int is_dirty;
660 
661 	if (kvm_is_ucontrol(kvm))
662 		return -EINVAL;
663 
664 	mutex_lock(&kvm->slots_lock);
665 
666 	r = -EINVAL;
667 	if (log->slot >= KVM_USER_MEM_SLOTS)
668 		goto out;
669 
670 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
671 	if (r)
672 		goto out;
673 
674 	/* Clear the dirty log */
675 	if (is_dirty) {
676 		n = kvm_dirty_bitmap_bytes(memslot);
677 		memset(memslot->dirty_bitmap, 0, n);
678 	}
679 	r = 0;
680 out:
681 	mutex_unlock(&kvm->slots_lock);
682 	return r;
683 }
684 
685 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
686 {
687 	unsigned int i;
688 	struct kvm_vcpu *vcpu;
689 
690 	kvm_for_each_vcpu(i, vcpu, kvm) {
691 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
692 	}
693 }
694 
695 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
696 {
697 	int r;
698 
699 	if (cap->flags)
700 		return -EINVAL;
701 
702 	switch (cap->cap) {
703 	case KVM_CAP_S390_IRQCHIP:
704 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
705 		kvm->arch.use_irqchip = 1;
706 		r = 0;
707 		break;
708 	case KVM_CAP_S390_USER_SIGP:
709 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
710 		kvm->arch.user_sigp = 1;
711 		r = 0;
712 		break;
713 	case KVM_CAP_S390_VECTOR_REGISTERS:
714 		mutex_lock(&kvm->lock);
715 		if (kvm->created_vcpus) {
716 			r = -EBUSY;
717 		} else if (MACHINE_HAS_VX) {
718 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
719 			set_kvm_facility(kvm->arch.model.fac_list, 129);
720 			if (test_facility(134)) {
721 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
722 				set_kvm_facility(kvm->arch.model.fac_list, 134);
723 			}
724 			if (test_facility(135)) {
725 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
726 				set_kvm_facility(kvm->arch.model.fac_list, 135);
727 			}
728 			if (test_facility(148)) {
729 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
730 				set_kvm_facility(kvm->arch.model.fac_list, 148);
731 			}
732 			if (test_facility(152)) {
733 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
734 				set_kvm_facility(kvm->arch.model.fac_list, 152);
735 			}
736 			if (test_facility(192)) {
737 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
738 				set_kvm_facility(kvm->arch.model.fac_list, 192);
739 			}
740 			r = 0;
741 		} else
742 			r = -EINVAL;
743 		mutex_unlock(&kvm->lock);
744 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
745 			 r ? "(not available)" : "(success)");
746 		break;
747 	case KVM_CAP_S390_RI:
748 		r = -EINVAL;
749 		mutex_lock(&kvm->lock);
750 		if (kvm->created_vcpus) {
751 			r = -EBUSY;
752 		} else if (test_facility(64)) {
753 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
754 			set_kvm_facility(kvm->arch.model.fac_list, 64);
755 			r = 0;
756 		}
757 		mutex_unlock(&kvm->lock);
758 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
759 			 r ? "(not available)" : "(success)");
760 		break;
761 	case KVM_CAP_S390_AIS:
762 		mutex_lock(&kvm->lock);
763 		if (kvm->created_vcpus) {
764 			r = -EBUSY;
765 		} else {
766 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
767 			set_kvm_facility(kvm->arch.model.fac_list, 72);
768 			r = 0;
769 		}
770 		mutex_unlock(&kvm->lock);
771 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
772 			 r ? "(not available)" : "(success)");
773 		break;
774 	case KVM_CAP_S390_GS:
775 		r = -EINVAL;
776 		mutex_lock(&kvm->lock);
777 		if (kvm->created_vcpus) {
778 			r = -EBUSY;
779 		} else if (test_facility(133)) {
780 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
781 			set_kvm_facility(kvm->arch.model.fac_list, 133);
782 			r = 0;
783 		}
784 		mutex_unlock(&kvm->lock);
785 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
786 			 r ? "(not available)" : "(success)");
787 		break;
788 	case KVM_CAP_S390_HPAGE_1M:
789 		mutex_lock(&kvm->lock);
790 		if (kvm->created_vcpus)
791 			r = -EBUSY;
792 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
793 			r = -EINVAL;
794 		else {
795 			r = 0;
796 			mmap_write_lock(kvm->mm);
797 			kvm->mm->context.allow_gmap_hpage_1m = 1;
798 			mmap_write_unlock(kvm->mm);
799 			/*
800 			 * We might have to create fake 4k page
801 			 * tables. To avoid that the hardware works on
802 			 * stale PGSTEs, we emulate these instructions.
803 			 */
804 			kvm->arch.use_skf = 0;
805 			kvm->arch.use_pfmfi = 0;
806 		}
807 		mutex_unlock(&kvm->lock);
808 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
809 			 r ? "(not available)" : "(success)");
810 		break;
811 	case KVM_CAP_S390_USER_STSI:
812 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
813 		kvm->arch.user_stsi = 1;
814 		r = 0;
815 		break;
816 	case KVM_CAP_S390_USER_INSTR0:
817 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
818 		kvm->arch.user_instr0 = 1;
819 		icpt_operexc_on_all_vcpus(kvm);
820 		r = 0;
821 		break;
822 	default:
823 		r = -EINVAL;
824 		break;
825 	}
826 	return r;
827 }
828 
829 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
830 {
831 	int ret;
832 
833 	switch (attr->attr) {
834 	case KVM_S390_VM_MEM_LIMIT_SIZE:
835 		ret = 0;
836 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
837 			 kvm->arch.mem_limit);
838 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
839 			ret = -EFAULT;
840 		break;
841 	default:
842 		ret = -ENXIO;
843 		break;
844 	}
845 	return ret;
846 }
847 
848 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
849 {
850 	int ret;
851 	unsigned int idx;
852 	switch (attr->attr) {
853 	case KVM_S390_VM_MEM_ENABLE_CMMA:
854 		ret = -ENXIO;
855 		if (!sclp.has_cmma)
856 			break;
857 
858 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
859 		mutex_lock(&kvm->lock);
860 		if (kvm->created_vcpus)
861 			ret = -EBUSY;
862 		else if (kvm->mm->context.allow_gmap_hpage_1m)
863 			ret = -EINVAL;
864 		else {
865 			kvm->arch.use_cmma = 1;
866 			/* Not compatible with cmma. */
867 			kvm->arch.use_pfmfi = 0;
868 			ret = 0;
869 		}
870 		mutex_unlock(&kvm->lock);
871 		break;
872 	case KVM_S390_VM_MEM_CLR_CMMA:
873 		ret = -ENXIO;
874 		if (!sclp.has_cmma)
875 			break;
876 		ret = -EINVAL;
877 		if (!kvm->arch.use_cmma)
878 			break;
879 
880 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
881 		mutex_lock(&kvm->lock);
882 		idx = srcu_read_lock(&kvm->srcu);
883 		s390_reset_cmma(kvm->arch.gmap->mm);
884 		srcu_read_unlock(&kvm->srcu, idx);
885 		mutex_unlock(&kvm->lock);
886 		ret = 0;
887 		break;
888 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
889 		unsigned long new_limit;
890 
891 		if (kvm_is_ucontrol(kvm))
892 			return -EINVAL;
893 
894 		if (get_user(new_limit, (u64 __user *)attr->addr))
895 			return -EFAULT;
896 
897 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
898 		    new_limit > kvm->arch.mem_limit)
899 			return -E2BIG;
900 
901 		if (!new_limit)
902 			return -EINVAL;
903 
904 		/* gmap_create takes last usable address */
905 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
906 			new_limit -= 1;
907 
908 		ret = -EBUSY;
909 		mutex_lock(&kvm->lock);
910 		if (!kvm->created_vcpus) {
911 			/* gmap_create will round the limit up */
912 			struct gmap *new = gmap_create(current->mm, new_limit);
913 
914 			if (!new) {
915 				ret = -ENOMEM;
916 			} else {
917 				gmap_remove(kvm->arch.gmap);
918 				new->private = kvm;
919 				kvm->arch.gmap = new;
920 				ret = 0;
921 			}
922 		}
923 		mutex_unlock(&kvm->lock);
924 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
925 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
926 			 (void *) kvm->arch.gmap->asce);
927 		break;
928 	}
929 	default:
930 		ret = -ENXIO;
931 		break;
932 	}
933 	return ret;
934 }
935 
936 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
937 
938 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
939 {
940 	struct kvm_vcpu *vcpu;
941 	int i;
942 
943 	kvm_s390_vcpu_block_all(kvm);
944 
945 	kvm_for_each_vcpu(i, vcpu, kvm) {
946 		kvm_s390_vcpu_crypto_setup(vcpu);
947 		/* recreate the shadow crycb by leaving the VSIE handler */
948 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
949 	}
950 
951 	kvm_s390_vcpu_unblock_all(kvm);
952 }
953 
954 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
955 {
956 	mutex_lock(&kvm->lock);
957 	switch (attr->attr) {
958 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
959 		if (!test_kvm_facility(kvm, 76)) {
960 			mutex_unlock(&kvm->lock);
961 			return -EINVAL;
962 		}
963 		get_random_bytes(
964 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
965 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
966 		kvm->arch.crypto.aes_kw = 1;
967 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
968 		break;
969 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
970 		if (!test_kvm_facility(kvm, 76)) {
971 			mutex_unlock(&kvm->lock);
972 			return -EINVAL;
973 		}
974 		get_random_bytes(
975 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
976 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
977 		kvm->arch.crypto.dea_kw = 1;
978 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
979 		break;
980 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
981 		if (!test_kvm_facility(kvm, 76)) {
982 			mutex_unlock(&kvm->lock);
983 			return -EINVAL;
984 		}
985 		kvm->arch.crypto.aes_kw = 0;
986 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
987 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
988 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
989 		break;
990 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
991 		if (!test_kvm_facility(kvm, 76)) {
992 			mutex_unlock(&kvm->lock);
993 			return -EINVAL;
994 		}
995 		kvm->arch.crypto.dea_kw = 0;
996 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
997 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
998 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
999 		break;
1000 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1001 		if (!ap_instructions_available()) {
1002 			mutex_unlock(&kvm->lock);
1003 			return -EOPNOTSUPP;
1004 		}
1005 		kvm->arch.crypto.apie = 1;
1006 		break;
1007 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1008 		if (!ap_instructions_available()) {
1009 			mutex_unlock(&kvm->lock);
1010 			return -EOPNOTSUPP;
1011 		}
1012 		kvm->arch.crypto.apie = 0;
1013 		break;
1014 	default:
1015 		mutex_unlock(&kvm->lock);
1016 		return -ENXIO;
1017 	}
1018 
1019 	kvm_s390_vcpu_crypto_reset_all(kvm);
1020 	mutex_unlock(&kvm->lock);
1021 	return 0;
1022 }
1023 
1024 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1025 {
1026 	int cx;
1027 	struct kvm_vcpu *vcpu;
1028 
1029 	kvm_for_each_vcpu(cx, vcpu, kvm)
1030 		kvm_s390_sync_request(req, vcpu);
1031 }
1032 
1033 /*
1034  * Must be called with kvm->srcu held to avoid races on memslots, and with
1035  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1036  */
1037 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1038 {
1039 	struct kvm_memory_slot *ms;
1040 	struct kvm_memslots *slots;
1041 	unsigned long ram_pages = 0;
1042 	int slotnr;
1043 
1044 	/* migration mode already enabled */
1045 	if (kvm->arch.migration_mode)
1046 		return 0;
1047 	slots = kvm_memslots(kvm);
1048 	if (!slots || !slots->used_slots)
1049 		return -EINVAL;
1050 
1051 	if (!kvm->arch.use_cmma) {
1052 		kvm->arch.migration_mode = 1;
1053 		return 0;
1054 	}
1055 	/* mark all the pages in active slots as dirty */
1056 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1057 		ms = slots->memslots + slotnr;
1058 		if (!ms->dirty_bitmap)
1059 			return -EINVAL;
1060 		/*
1061 		 * The second half of the bitmap is only used on x86,
1062 		 * and would be wasted otherwise, so we put it to good
1063 		 * use here to keep track of the state of the storage
1064 		 * attributes.
1065 		 */
1066 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1067 		ram_pages += ms->npages;
1068 	}
1069 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1070 	kvm->arch.migration_mode = 1;
1071 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1072 	return 0;
1073 }
1074 
1075 /*
1076  * Must be called with kvm->slots_lock to avoid races with ourselves and
1077  * kvm_s390_vm_start_migration.
1078  */
1079 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1080 {
1081 	/* migration mode already disabled */
1082 	if (!kvm->arch.migration_mode)
1083 		return 0;
1084 	kvm->arch.migration_mode = 0;
1085 	if (kvm->arch.use_cmma)
1086 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1087 	return 0;
1088 }
1089 
1090 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1091 				     struct kvm_device_attr *attr)
1092 {
1093 	int res = -ENXIO;
1094 
1095 	mutex_lock(&kvm->slots_lock);
1096 	switch (attr->attr) {
1097 	case KVM_S390_VM_MIGRATION_START:
1098 		res = kvm_s390_vm_start_migration(kvm);
1099 		break;
1100 	case KVM_S390_VM_MIGRATION_STOP:
1101 		res = kvm_s390_vm_stop_migration(kvm);
1102 		break;
1103 	default:
1104 		break;
1105 	}
1106 	mutex_unlock(&kvm->slots_lock);
1107 
1108 	return res;
1109 }
1110 
1111 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1112 				     struct kvm_device_attr *attr)
1113 {
1114 	u64 mig = kvm->arch.migration_mode;
1115 
1116 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1117 		return -ENXIO;
1118 
1119 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1120 		return -EFAULT;
1121 	return 0;
1122 }
1123 
1124 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1125 {
1126 	struct kvm_s390_vm_tod_clock gtod;
1127 
1128 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1129 		return -EFAULT;
1130 
1131 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1132 		return -EINVAL;
1133 	kvm_s390_set_tod_clock(kvm, &gtod);
1134 
1135 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1136 		gtod.epoch_idx, gtod.tod);
1137 
1138 	return 0;
1139 }
1140 
1141 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1142 {
1143 	u8 gtod_high;
1144 
1145 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1146 					   sizeof(gtod_high)))
1147 		return -EFAULT;
1148 
1149 	if (gtod_high != 0)
1150 		return -EINVAL;
1151 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1152 
1153 	return 0;
1154 }
1155 
1156 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1157 {
1158 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1159 
1160 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1161 			   sizeof(gtod.tod)))
1162 		return -EFAULT;
1163 
1164 	kvm_s390_set_tod_clock(kvm, &gtod);
1165 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1166 	return 0;
1167 }
1168 
1169 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170 {
1171 	int ret;
1172 
1173 	if (attr->flags)
1174 		return -EINVAL;
1175 
1176 	switch (attr->attr) {
1177 	case KVM_S390_VM_TOD_EXT:
1178 		ret = kvm_s390_set_tod_ext(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_HIGH:
1181 		ret = kvm_s390_set_tod_high(kvm, attr);
1182 		break;
1183 	case KVM_S390_VM_TOD_LOW:
1184 		ret = kvm_s390_set_tod_low(kvm, attr);
1185 		break;
1186 	default:
1187 		ret = -ENXIO;
1188 		break;
1189 	}
1190 	return ret;
1191 }
1192 
1193 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1194 				   struct kvm_s390_vm_tod_clock *gtod)
1195 {
1196 	union tod_clock clk;
1197 
1198 	preempt_disable();
1199 
1200 	store_tod_clock_ext(&clk);
1201 
1202 	gtod->tod = clk.tod + kvm->arch.epoch;
1203 	gtod->epoch_idx = 0;
1204 	if (test_kvm_facility(kvm, 139)) {
1205 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1206 		if (gtod->tod < clk.tod)
1207 			gtod->epoch_idx += 1;
1208 	}
1209 
1210 	preempt_enable();
1211 }
1212 
1213 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215 	struct kvm_s390_vm_tod_clock gtod;
1216 
1217 	memset(&gtod, 0, sizeof(gtod));
1218 	kvm_s390_get_tod_clock(kvm, &gtod);
1219 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1220 		return -EFAULT;
1221 
1222 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1223 		gtod.epoch_idx, gtod.tod);
1224 	return 0;
1225 }
1226 
1227 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228 {
1229 	u8 gtod_high = 0;
1230 
1231 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1232 					 sizeof(gtod_high)))
1233 		return -EFAULT;
1234 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1235 
1236 	return 0;
1237 }
1238 
1239 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1240 {
1241 	u64 gtod;
1242 
1243 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1244 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1245 		return -EFAULT;
1246 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1247 
1248 	return 0;
1249 }
1250 
1251 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1252 {
1253 	int ret;
1254 
1255 	if (attr->flags)
1256 		return -EINVAL;
1257 
1258 	switch (attr->attr) {
1259 	case KVM_S390_VM_TOD_EXT:
1260 		ret = kvm_s390_get_tod_ext(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_TOD_HIGH:
1263 		ret = kvm_s390_get_tod_high(kvm, attr);
1264 		break;
1265 	case KVM_S390_VM_TOD_LOW:
1266 		ret = kvm_s390_get_tod_low(kvm, attr);
1267 		break;
1268 	default:
1269 		ret = -ENXIO;
1270 		break;
1271 	}
1272 	return ret;
1273 }
1274 
1275 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1276 {
1277 	struct kvm_s390_vm_cpu_processor *proc;
1278 	u16 lowest_ibc, unblocked_ibc;
1279 	int ret = 0;
1280 
1281 	mutex_lock(&kvm->lock);
1282 	if (kvm->created_vcpus) {
1283 		ret = -EBUSY;
1284 		goto out;
1285 	}
1286 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1287 	if (!proc) {
1288 		ret = -ENOMEM;
1289 		goto out;
1290 	}
1291 	if (!copy_from_user(proc, (void __user *)attr->addr,
1292 			    sizeof(*proc))) {
1293 		kvm->arch.model.cpuid = proc->cpuid;
1294 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1295 		unblocked_ibc = sclp.ibc & 0xfff;
1296 		if (lowest_ibc && proc->ibc) {
1297 			if (proc->ibc > unblocked_ibc)
1298 				kvm->arch.model.ibc = unblocked_ibc;
1299 			else if (proc->ibc < lowest_ibc)
1300 				kvm->arch.model.ibc = lowest_ibc;
1301 			else
1302 				kvm->arch.model.ibc = proc->ibc;
1303 		}
1304 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1305 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1306 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1307 			 kvm->arch.model.ibc,
1308 			 kvm->arch.model.cpuid);
1309 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1310 			 kvm->arch.model.fac_list[0],
1311 			 kvm->arch.model.fac_list[1],
1312 			 kvm->arch.model.fac_list[2]);
1313 	} else
1314 		ret = -EFAULT;
1315 	kfree(proc);
1316 out:
1317 	mutex_unlock(&kvm->lock);
1318 	return ret;
1319 }
1320 
1321 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1322 				       struct kvm_device_attr *attr)
1323 {
1324 	struct kvm_s390_vm_cpu_feat data;
1325 
1326 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1327 		return -EFAULT;
1328 	if (!bitmap_subset((unsigned long *) data.feat,
1329 			   kvm_s390_available_cpu_feat,
1330 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1331 		return -EINVAL;
1332 
1333 	mutex_lock(&kvm->lock);
1334 	if (kvm->created_vcpus) {
1335 		mutex_unlock(&kvm->lock);
1336 		return -EBUSY;
1337 	}
1338 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1339 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1340 	mutex_unlock(&kvm->lock);
1341 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1342 			 data.feat[0],
1343 			 data.feat[1],
1344 			 data.feat[2]);
1345 	return 0;
1346 }
1347 
1348 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1349 					  struct kvm_device_attr *attr)
1350 {
1351 	mutex_lock(&kvm->lock);
1352 	if (kvm->created_vcpus) {
1353 		mutex_unlock(&kvm->lock);
1354 		return -EBUSY;
1355 	}
1356 
1357 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1358 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1359 		mutex_unlock(&kvm->lock);
1360 		return -EFAULT;
1361 	}
1362 	mutex_unlock(&kvm->lock);
1363 
1364 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1369 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1384 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1387 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1390 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1393 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1396 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1399 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1402 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1405 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1408 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1411 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1413 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1416 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1418 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1419 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1420 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1421 
1422 	return 0;
1423 }
1424 
1425 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1426 {
1427 	int ret = -ENXIO;
1428 
1429 	switch (attr->attr) {
1430 	case KVM_S390_VM_CPU_PROCESSOR:
1431 		ret = kvm_s390_set_processor(kvm, attr);
1432 		break;
1433 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1434 		ret = kvm_s390_set_processor_feat(kvm, attr);
1435 		break;
1436 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1437 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1438 		break;
1439 	}
1440 	return ret;
1441 }
1442 
1443 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1444 {
1445 	struct kvm_s390_vm_cpu_processor *proc;
1446 	int ret = 0;
1447 
1448 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1449 	if (!proc) {
1450 		ret = -ENOMEM;
1451 		goto out;
1452 	}
1453 	proc->cpuid = kvm->arch.model.cpuid;
1454 	proc->ibc = kvm->arch.model.ibc;
1455 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1456 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1457 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458 		 kvm->arch.model.ibc,
1459 		 kvm->arch.model.cpuid);
1460 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461 		 kvm->arch.model.fac_list[0],
1462 		 kvm->arch.model.fac_list[1],
1463 		 kvm->arch.model.fac_list[2]);
1464 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1465 		ret = -EFAULT;
1466 	kfree(proc);
1467 out:
1468 	return ret;
1469 }
1470 
1471 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1472 {
1473 	struct kvm_s390_vm_cpu_machine *mach;
1474 	int ret = 0;
1475 
1476 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1477 	if (!mach) {
1478 		ret = -ENOMEM;
1479 		goto out;
1480 	}
1481 	get_cpu_id((struct cpuid *) &mach->cpuid);
1482 	mach->ibc = sclp.ibc;
1483 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1484 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1485 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1486 	       sizeof(stfle_fac_list));
1487 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1488 		 kvm->arch.model.ibc,
1489 		 kvm->arch.model.cpuid);
1490 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1491 		 mach->fac_mask[0],
1492 		 mach->fac_mask[1],
1493 		 mach->fac_mask[2]);
1494 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1495 		 mach->fac_list[0],
1496 		 mach->fac_list[1],
1497 		 mach->fac_list[2]);
1498 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1499 		ret = -EFAULT;
1500 	kfree(mach);
1501 out:
1502 	return ret;
1503 }
1504 
1505 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1506 				       struct kvm_device_attr *attr)
1507 {
1508 	struct kvm_s390_vm_cpu_feat data;
1509 
1510 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1511 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1512 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1513 		return -EFAULT;
1514 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1515 			 data.feat[0],
1516 			 data.feat[1],
1517 			 data.feat[2]);
1518 	return 0;
1519 }
1520 
1521 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1522 				     struct kvm_device_attr *attr)
1523 {
1524 	struct kvm_s390_vm_cpu_feat data;
1525 
1526 	bitmap_copy((unsigned long *) data.feat,
1527 		    kvm_s390_available_cpu_feat,
1528 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1529 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1530 		return -EFAULT;
1531 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1532 			 data.feat[0],
1533 			 data.feat[1],
1534 			 data.feat[2]);
1535 	return 0;
1536 }
1537 
1538 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1539 					  struct kvm_device_attr *attr)
1540 {
1541 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1542 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1543 		return -EFAULT;
1544 
1545 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1550 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1565 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1568 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1571 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1574 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1577 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1580 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1583 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1586 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1589 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1592 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1594 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1597 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1599 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1600 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1601 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1602 
1603 	return 0;
1604 }
1605 
1606 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1607 					struct kvm_device_attr *attr)
1608 {
1609 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1610 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1611 		return -EFAULT;
1612 
1613 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1618 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1633 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1636 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1639 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1642 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1645 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1648 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1651 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1654 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1657 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1660 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1662 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1665 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1667 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1668 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1669 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1670 
1671 	return 0;
1672 }
1673 
1674 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1675 {
1676 	int ret = -ENXIO;
1677 
1678 	switch (attr->attr) {
1679 	case KVM_S390_VM_CPU_PROCESSOR:
1680 		ret = kvm_s390_get_processor(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_MACHINE:
1683 		ret = kvm_s390_get_machine(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1686 		ret = kvm_s390_get_processor_feat(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1689 		ret = kvm_s390_get_machine_feat(kvm, attr);
1690 		break;
1691 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1692 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1693 		break;
1694 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1695 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1696 		break;
1697 	}
1698 	return ret;
1699 }
1700 
1701 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702 {
1703 	int ret;
1704 
1705 	switch (attr->group) {
1706 	case KVM_S390_VM_MEM_CTRL:
1707 		ret = kvm_s390_set_mem_control(kvm, attr);
1708 		break;
1709 	case KVM_S390_VM_TOD:
1710 		ret = kvm_s390_set_tod(kvm, attr);
1711 		break;
1712 	case KVM_S390_VM_CPU_MODEL:
1713 		ret = kvm_s390_set_cpu_model(kvm, attr);
1714 		break;
1715 	case KVM_S390_VM_CRYPTO:
1716 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1717 		break;
1718 	case KVM_S390_VM_MIGRATION:
1719 		ret = kvm_s390_vm_set_migration(kvm, attr);
1720 		break;
1721 	default:
1722 		ret = -ENXIO;
1723 		break;
1724 	}
1725 
1726 	return ret;
1727 }
1728 
1729 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1730 {
1731 	int ret;
1732 
1733 	switch (attr->group) {
1734 	case KVM_S390_VM_MEM_CTRL:
1735 		ret = kvm_s390_get_mem_control(kvm, attr);
1736 		break;
1737 	case KVM_S390_VM_TOD:
1738 		ret = kvm_s390_get_tod(kvm, attr);
1739 		break;
1740 	case KVM_S390_VM_CPU_MODEL:
1741 		ret = kvm_s390_get_cpu_model(kvm, attr);
1742 		break;
1743 	case KVM_S390_VM_MIGRATION:
1744 		ret = kvm_s390_vm_get_migration(kvm, attr);
1745 		break;
1746 	default:
1747 		ret = -ENXIO;
1748 		break;
1749 	}
1750 
1751 	return ret;
1752 }
1753 
1754 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1755 {
1756 	int ret;
1757 
1758 	switch (attr->group) {
1759 	case KVM_S390_VM_MEM_CTRL:
1760 		switch (attr->attr) {
1761 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1762 		case KVM_S390_VM_MEM_CLR_CMMA:
1763 			ret = sclp.has_cmma ? 0 : -ENXIO;
1764 			break;
1765 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1766 			ret = 0;
1767 			break;
1768 		default:
1769 			ret = -ENXIO;
1770 			break;
1771 		}
1772 		break;
1773 	case KVM_S390_VM_TOD:
1774 		switch (attr->attr) {
1775 		case KVM_S390_VM_TOD_LOW:
1776 		case KVM_S390_VM_TOD_HIGH:
1777 			ret = 0;
1778 			break;
1779 		default:
1780 			ret = -ENXIO;
1781 			break;
1782 		}
1783 		break;
1784 	case KVM_S390_VM_CPU_MODEL:
1785 		switch (attr->attr) {
1786 		case KVM_S390_VM_CPU_PROCESSOR:
1787 		case KVM_S390_VM_CPU_MACHINE:
1788 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1789 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1790 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1791 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1792 			ret = 0;
1793 			break;
1794 		default:
1795 			ret = -ENXIO;
1796 			break;
1797 		}
1798 		break;
1799 	case KVM_S390_VM_CRYPTO:
1800 		switch (attr->attr) {
1801 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1802 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1803 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1804 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1805 			ret = 0;
1806 			break;
1807 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1808 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1809 			ret = ap_instructions_available() ? 0 : -ENXIO;
1810 			break;
1811 		default:
1812 			ret = -ENXIO;
1813 			break;
1814 		}
1815 		break;
1816 	case KVM_S390_VM_MIGRATION:
1817 		ret = 0;
1818 		break;
1819 	default:
1820 		ret = -ENXIO;
1821 		break;
1822 	}
1823 
1824 	return ret;
1825 }
1826 
1827 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1828 {
1829 	uint8_t *keys;
1830 	uint64_t hva;
1831 	int srcu_idx, i, r = 0;
1832 
1833 	if (args->flags != 0)
1834 		return -EINVAL;
1835 
1836 	/* Is this guest using storage keys? */
1837 	if (!mm_uses_skeys(current->mm))
1838 		return KVM_S390_GET_SKEYS_NONE;
1839 
1840 	/* Enforce sane limit on memory allocation */
1841 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1842 		return -EINVAL;
1843 
1844 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1845 	if (!keys)
1846 		return -ENOMEM;
1847 
1848 	mmap_read_lock(current->mm);
1849 	srcu_idx = srcu_read_lock(&kvm->srcu);
1850 	for (i = 0; i < args->count; i++) {
1851 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1852 		if (kvm_is_error_hva(hva)) {
1853 			r = -EFAULT;
1854 			break;
1855 		}
1856 
1857 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1858 		if (r)
1859 			break;
1860 	}
1861 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1862 	mmap_read_unlock(current->mm);
1863 
1864 	if (!r) {
1865 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1866 				 sizeof(uint8_t) * args->count);
1867 		if (r)
1868 			r = -EFAULT;
1869 	}
1870 
1871 	kvfree(keys);
1872 	return r;
1873 }
1874 
1875 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1876 {
1877 	uint8_t *keys;
1878 	uint64_t hva;
1879 	int srcu_idx, i, r = 0;
1880 	bool unlocked;
1881 
1882 	if (args->flags != 0)
1883 		return -EINVAL;
1884 
1885 	/* Enforce sane limit on memory allocation */
1886 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1887 		return -EINVAL;
1888 
1889 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1890 	if (!keys)
1891 		return -ENOMEM;
1892 
1893 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1894 			   sizeof(uint8_t) * args->count);
1895 	if (r) {
1896 		r = -EFAULT;
1897 		goto out;
1898 	}
1899 
1900 	/* Enable storage key handling for the guest */
1901 	r = s390_enable_skey();
1902 	if (r)
1903 		goto out;
1904 
1905 	i = 0;
1906 	mmap_read_lock(current->mm);
1907 	srcu_idx = srcu_read_lock(&kvm->srcu);
1908         while (i < args->count) {
1909 		unlocked = false;
1910 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1911 		if (kvm_is_error_hva(hva)) {
1912 			r = -EFAULT;
1913 			break;
1914 		}
1915 
1916 		/* Lowest order bit is reserved */
1917 		if (keys[i] & 0x01) {
1918 			r = -EINVAL;
1919 			break;
1920 		}
1921 
1922 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1923 		if (r) {
1924 			r = fixup_user_fault(current->mm, hva,
1925 					     FAULT_FLAG_WRITE, &unlocked);
1926 			if (r)
1927 				break;
1928 		}
1929 		if (!r)
1930 			i++;
1931 	}
1932 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1933 	mmap_read_unlock(current->mm);
1934 out:
1935 	kvfree(keys);
1936 	return r;
1937 }
1938 
1939 /*
1940  * Base address and length must be sent at the start of each block, therefore
1941  * it's cheaper to send some clean data, as long as it's less than the size of
1942  * two longs.
1943  */
1944 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1945 /* for consistency */
1946 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1947 
1948 /*
1949  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1950  * address falls in a hole. In that case the index of one of the memslots
1951  * bordering the hole is returned.
1952  */
1953 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1954 {
1955 	int start = 0, end = slots->used_slots;
1956 	int slot = atomic_read(&slots->lru_slot);
1957 	struct kvm_memory_slot *memslots = slots->memslots;
1958 
1959 	if (gfn >= memslots[slot].base_gfn &&
1960 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1961 		return slot;
1962 
1963 	while (start < end) {
1964 		slot = start + (end - start) / 2;
1965 
1966 		if (gfn >= memslots[slot].base_gfn)
1967 			end = slot;
1968 		else
1969 			start = slot + 1;
1970 	}
1971 
1972 	if (start >= slots->used_slots)
1973 		return slots->used_slots - 1;
1974 
1975 	if (gfn >= memslots[start].base_gfn &&
1976 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1977 		atomic_set(&slots->lru_slot, start);
1978 	}
1979 
1980 	return start;
1981 }
1982 
1983 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984 			      u8 *res, unsigned long bufsize)
1985 {
1986 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1987 
1988 	args->count = 0;
1989 	while (args->count < bufsize) {
1990 		hva = gfn_to_hva(kvm, cur_gfn);
1991 		/*
1992 		 * We return an error if the first value was invalid, but we
1993 		 * return successfully if at least one value was copied.
1994 		 */
1995 		if (kvm_is_error_hva(hva))
1996 			return args->count ? 0 : -EFAULT;
1997 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1998 			pgstev = 0;
1999 		res[args->count++] = (pgstev >> 24) & 0x43;
2000 		cur_gfn++;
2001 	}
2002 
2003 	return 0;
2004 }
2005 
2006 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2007 					      unsigned long cur_gfn)
2008 {
2009 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2010 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
2011 	unsigned long ofs = cur_gfn - ms->base_gfn;
2012 
2013 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2014 		slotidx--;
2015 		/* If we are above the highest slot, wrap around */
2016 		if (slotidx < 0)
2017 			slotidx = slots->used_slots - 1;
2018 
2019 		ms = slots->memslots + slotidx;
2020 		ofs = 0;
2021 	}
2022 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2023 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2024 		slotidx--;
2025 		ms = slots->memslots + slotidx;
2026 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2027 	}
2028 	return ms->base_gfn + ofs;
2029 }
2030 
2031 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2032 			     u8 *res, unsigned long bufsize)
2033 {
2034 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2035 	struct kvm_memslots *slots = kvm_memslots(kvm);
2036 	struct kvm_memory_slot *ms;
2037 
2038 	if (unlikely(!slots->used_slots))
2039 		return 0;
2040 
2041 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2042 	ms = gfn_to_memslot(kvm, cur_gfn);
2043 	args->count = 0;
2044 	args->start_gfn = cur_gfn;
2045 	if (!ms)
2046 		return 0;
2047 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2048 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2049 
2050 	while (args->count < bufsize) {
2051 		hva = gfn_to_hva(kvm, cur_gfn);
2052 		if (kvm_is_error_hva(hva))
2053 			return 0;
2054 		/* Decrement only if we actually flipped the bit to 0 */
2055 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2056 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2057 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2058 			pgstev = 0;
2059 		/* Save the value */
2060 		res[args->count++] = (pgstev >> 24) & 0x43;
2061 		/* If the next bit is too far away, stop. */
2062 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2063 			return 0;
2064 		/* If we reached the previous "next", find the next one */
2065 		if (cur_gfn == next_gfn)
2066 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2067 		/* Reached the end of memory or of the buffer, stop */
2068 		if ((next_gfn >= mem_end) ||
2069 		    (next_gfn - args->start_gfn >= bufsize))
2070 			return 0;
2071 		cur_gfn++;
2072 		/* Reached the end of the current memslot, take the next one. */
2073 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2074 			ms = gfn_to_memslot(kvm, cur_gfn);
2075 			if (!ms)
2076 				return 0;
2077 		}
2078 	}
2079 	return 0;
2080 }
2081 
2082 /*
2083  * This function searches for the next page with dirty CMMA attributes, and
2084  * saves the attributes in the buffer up to either the end of the buffer or
2085  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2086  * no trailing clean bytes are saved.
2087  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2088  * output buffer will indicate 0 as length.
2089  */
2090 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2091 				  struct kvm_s390_cmma_log *args)
2092 {
2093 	unsigned long bufsize;
2094 	int srcu_idx, peek, ret;
2095 	u8 *values;
2096 
2097 	if (!kvm->arch.use_cmma)
2098 		return -ENXIO;
2099 	/* Invalid/unsupported flags were specified */
2100 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2101 		return -EINVAL;
2102 	/* Migration mode query, and we are not doing a migration */
2103 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2104 	if (!peek && !kvm->arch.migration_mode)
2105 		return -EINVAL;
2106 	/* CMMA is disabled or was not used, or the buffer has length zero */
2107 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2108 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2109 		memset(args, 0, sizeof(*args));
2110 		return 0;
2111 	}
2112 	/* We are not peeking, and there are no dirty pages */
2113 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2114 		memset(args, 0, sizeof(*args));
2115 		return 0;
2116 	}
2117 
2118 	values = vmalloc(bufsize);
2119 	if (!values)
2120 		return -ENOMEM;
2121 
2122 	mmap_read_lock(kvm->mm);
2123 	srcu_idx = srcu_read_lock(&kvm->srcu);
2124 	if (peek)
2125 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2126 	else
2127 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2128 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2129 	mmap_read_unlock(kvm->mm);
2130 
2131 	if (kvm->arch.migration_mode)
2132 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2133 	else
2134 		args->remaining = 0;
2135 
2136 	if (copy_to_user((void __user *)args->values, values, args->count))
2137 		ret = -EFAULT;
2138 
2139 	vfree(values);
2140 	return ret;
2141 }
2142 
2143 /*
2144  * This function sets the CMMA attributes for the given pages. If the input
2145  * buffer has zero length, no action is taken, otherwise the attributes are
2146  * set and the mm->context.uses_cmm flag is set.
2147  */
2148 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2149 				  const struct kvm_s390_cmma_log *args)
2150 {
2151 	unsigned long hva, mask, pgstev, i;
2152 	uint8_t *bits;
2153 	int srcu_idx, r = 0;
2154 
2155 	mask = args->mask;
2156 
2157 	if (!kvm->arch.use_cmma)
2158 		return -ENXIO;
2159 	/* invalid/unsupported flags */
2160 	if (args->flags != 0)
2161 		return -EINVAL;
2162 	/* Enforce sane limit on memory allocation */
2163 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2164 		return -EINVAL;
2165 	/* Nothing to do */
2166 	if (args->count == 0)
2167 		return 0;
2168 
2169 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2170 	if (!bits)
2171 		return -ENOMEM;
2172 
2173 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2174 	if (r) {
2175 		r = -EFAULT;
2176 		goto out;
2177 	}
2178 
2179 	mmap_read_lock(kvm->mm);
2180 	srcu_idx = srcu_read_lock(&kvm->srcu);
2181 	for (i = 0; i < args->count; i++) {
2182 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2183 		if (kvm_is_error_hva(hva)) {
2184 			r = -EFAULT;
2185 			break;
2186 		}
2187 
2188 		pgstev = bits[i];
2189 		pgstev = pgstev << 24;
2190 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2191 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2192 	}
2193 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2194 	mmap_read_unlock(kvm->mm);
2195 
2196 	if (!kvm->mm->context.uses_cmm) {
2197 		mmap_write_lock(kvm->mm);
2198 		kvm->mm->context.uses_cmm = 1;
2199 		mmap_write_unlock(kvm->mm);
2200 	}
2201 out:
2202 	vfree(bits);
2203 	return r;
2204 }
2205 
2206 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2207 {
2208 	struct kvm_vcpu *vcpu;
2209 	u16 rc, rrc;
2210 	int ret = 0;
2211 	int i;
2212 
2213 	/*
2214 	 * We ignore failures and try to destroy as many CPUs as possible.
2215 	 * At the same time we must not free the assigned resources when
2216 	 * this fails, as the ultravisor has still access to that memory.
2217 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2218 	 * behind.
2219 	 * We want to return the first failure rc and rrc, though.
2220 	 */
2221 	kvm_for_each_vcpu(i, vcpu, kvm) {
2222 		mutex_lock(&vcpu->mutex);
2223 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2224 			*rcp = rc;
2225 			*rrcp = rrc;
2226 			ret = -EIO;
2227 		}
2228 		mutex_unlock(&vcpu->mutex);
2229 	}
2230 	return ret;
2231 }
2232 
2233 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234 {
2235 	int i, r = 0;
2236 	u16 dummy;
2237 
2238 	struct kvm_vcpu *vcpu;
2239 
2240 	kvm_for_each_vcpu(i, vcpu, kvm) {
2241 		mutex_lock(&vcpu->mutex);
2242 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2243 		mutex_unlock(&vcpu->mutex);
2244 		if (r)
2245 			break;
2246 	}
2247 	if (r)
2248 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2249 	return r;
2250 }
2251 
2252 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2253 {
2254 	int r = 0;
2255 	u16 dummy;
2256 	void __user *argp = (void __user *)cmd->data;
2257 
2258 	switch (cmd->cmd) {
2259 	case KVM_PV_ENABLE: {
2260 		r = -EINVAL;
2261 		if (kvm_s390_pv_is_protected(kvm))
2262 			break;
2263 
2264 		/*
2265 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2266 		 *  esca, we need no cleanup in the error cases below
2267 		 */
2268 		r = sca_switch_to_extended(kvm);
2269 		if (r)
2270 			break;
2271 
2272 		mmap_write_lock(current->mm);
2273 		r = gmap_mark_unmergeable();
2274 		mmap_write_unlock(current->mm);
2275 		if (r)
2276 			break;
2277 
2278 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2279 		if (r)
2280 			break;
2281 
2282 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2283 		if (r)
2284 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2285 
2286 		/* we need to block service interrupts from now on */
2287 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2288 		break;
2289 	}
2290 	case KVM_PV_DISABLE: {
2291 		r = -EINVAL;
2292 		if (!kvm_s390_pv_is_protected(kvm))
2293 			break;
2294 
2295 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2296 		/*
2297 		 * If a CPU could not be destroyed, destroy VM will also fail.
2298 		 * There is no point in trying to destroy it. Instead return
2299 		 * the rc and rrc from the first CPU that failed destroying.
2300 		 */
2301 		if (r)
2302 			break;
2303 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2304 
2305 		/* no need to block service interrupts any more */
2306 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2307 		break;
2308 	}
2309 	case KVM_PV_SET_SEC_PARMS: {
2310 		struct kvm_s390_pv_sec_parm parms = {};
2311 		void *hdr;
2312 
2313 		r = -EINVAL;
2314 		if (!kvm_s390_pv_is_protected(kvm))
2315 			break;
2316 
2317 		r = -EFAULT;
2318 		if (copy_from_user(&parms, argp, sizeof(parms)))
2319 			break;
2320 
2321 		/* Currently restricted to 8KB */
2322 		r = -EINVAL;
2323 		if (parms.length > PAGE_SIZE * 2)
2324 			break;
2325 
2326 		r = -ENOMEM;
2327 		hdr = vmalloc(parms.length);
2328 		if (!hdr)
2329 			break;
2330 
2331 		r = -EFAULT;
2332 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2333 				    parms.length))
2334 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2335 						      &cmd->rc, &cmd->rrc);
2336 
2337 		vfree(hdr);
2338 		break;
2339 	}
2340 	case KVM_PV_UNPACK: {
2341 		struct kvm_s390_pv_unp unp = {};
2342 
2343 		r = -EINVAL;
2344 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2345 			break;
2346 
2347 		r = -EFAULT;
2348 		if (copy_from_user(&unp, argp, sizeof(unp)))
2349 			break;
2350 
2351 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2352 				       &cmd->rc, &cmd->rrc);
2353 		break;
2354 	}
2355 	case KVM_PV_VERIFY: {
2356 		r = -EINVAL;
2357 		if (!kvm_s390_pv_is_protected(kvm))
2358 			break;
2359 
2360 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2361 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2362 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2363 			     cmd->rrc);
2364 		break;
2365 	}
2366 	case KVM_PV_PREP_RESET: {
2367 		r = -EINVAL;
2368 		if (!kvm_s390_pv_is_protected(kvm))
2369 			break;
2370 
2371 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2373 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2374 			     cmd->rc, cmd->rrc);
2375 		break;
2376 	}
2377 	case KVM_PV_UNSHARE_ALL: {
2378 		r = -EINVAL;
2379 		if (!kvm_s390_pv_is_protected(kvm))
2380 			break;
2381 
2382 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2384 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2385 			     cmd->rc, cmd->rrc);
2386 		break;
2387 	}
2388 	default:
2389 		r = -ENOTTY;
2390 	}
2391 	return r;
2392 }
2393 
2394 long kvm_arch_vm_ioctl(struct file *filp,
2395 		       unsigned int ioctl, unsigned long arg)
2396 {
2397 	struct kvm *kvm = filp->private_data;
2398 	void __user *argp = (void __user *)arg;
2399 	struct kvm_device_attr attr;
2400 	int r;
2401 
2402 	switch (ioctl) {
2403 	case KVM_S390_INTERRUPT: {
2404 		struct kvm_s390_interrupt s390int;
2405 
2406 		r = -EFAULT;
2407 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2408 			break;
2409 		r = kvm_s390_inject_vm(kvm, &s390int);
2410 		break;
2411 	}
2412 	case KVM_CREATE_IRQCHIP: {
2413 		struct kvm_irq_routing_entry routing;
2414 
2415 		r = -EINVAL;
2416 		if (kvm->arch.use_irqchip) {
2417 			/* Set up dummy routing. */
2418 			memset(&routing, 0, sizeof(routing));
2419 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2420 		}
2421 		break;
2422 	}
2423 	case KVM_SET_DEVICE_ATTR: {
2424 		r = -EFAULT;
2425 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2426 			break;
2427 		r = kvm_s390_vm_set_attr(kvm, &attr);
2428 		break;
2429 	}
2430 	case KVM_GET_DEVICE_ATTR: {
2431 		r = -EFAULT;
2432 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2433 			break;
2434 		r = kvm_s390_vm_get_attr(kvm, &attr);
2435 		break;
2436 	}
2437 	case KVM_HAS_DEVICE_ATTR: {
2438 		r = -EFAULT;
2439 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2440 			break;
2441 		r = kvm_s390_vm_has_attr(kvm, &attr);
2442 		break;
2443 	}
2444 	case KVM_S390_GET_SKEYS: {
2445 		struct kvm_s390_skeys args;
2446 
2447 		r = -EFAULT;
2448 		if (copy_from_user(&args, argp,
2449 				   sizeof(struct kvm_s390_skeys)))
2450 			break;
2451 		r = kvm_s390_get_skeys(kvm, &args);
2452 		break;
2453 	}
2454 	case KVM_S390_SET_SKEYS: {
2455 		struct kvm_s390_skeys args;
2456 
2457 		r = -EFAULT;
2458 		if (copy_from_user(&args, argp,
2459 				   sizeof(struct kvm_s390_skeys)))
2460 			break;
2461 		r = kvm_s390_set_skeys(kvm, &args);
2462 		break;
2463 	}
2464 	case KVM_S390_GET_CMMA_BITS: {
2465 		struct kvm_s390_cmma_log args;
2466 
2467 		r = -EFAULT;
2468 		if (copy_from_user(&args, argp, sizeof(args)))
2469 			break;
2470 		mutex_lock(&kvm->slots_lock);
2471 		r = kvm_s390_get_cmma_bits(kvm, &args);
2472 		mutex_unlock(&kvm->slots_lock);
2473 		if (!r) {
2474 			r = copy_to_user(argp, &args, sizeof(args));
2475 			if (r)
2476 				r = -EFAULT;
2477 		}
2478 		break;
2479 	}
2480 	case KVM_S390_SET_CMMA_BITS: {
2481 		struct kvm_s390_cmma_log args;
2482 
2483 		r = -EFAULT;
2484 		if (copy_from_user(&args, argp, sizeof(args)))
2485 			break;
2486 		mutex_lock(&kvm->slots_lock);
2487 		r = kvm_s390_set_cmma_bits(kvm, &args);
2488 		mutex_unlock(&kvm->slots_lock);
2489 		break;
2490 	}
2491 	case KVM_S390_PV_COMMAND: {
2492 		struct kvm_pv_cmd args;
2493 
2494 		/* protvirt means user sigp */
2495 		kvm->arch.user_cpu_state_ctrl = 1;
2496 		r = 0;
2497 		if (!is_prot_virt_host()) {
2498 			r = -EINVAL;
2499 			break;
2500 		}
2501 		if (copy_from_user(&args, argp, sizeof(args))) {
2502 			r = -EFAULT;
2503 			break;
2504 		}
2505 		if (args.flags) {
2506 			r = -EINVAL;
2507 			break;
2508 		}
2509 		mutex_lock(&kvm->lock);
2510 		r = kvm_s390_handle_pv(kvm, &args);
2511 		mutex_unlock(&kvm->lock);
2512 		if (copy_to_user(argp, &args, sizeof(args))) {
2513 			r = -EFAULT;
2514 			break;
2515 		}
2516 		break;
2517 	}
2518 	default:
2519 		r = -ENOTTY;
2520 	}
2521 
2522 	return r;
2523 }
2524 
2525 static int kvm_s390_apxa_installed(void)
2526 {
2527 	struct ap_config_info info;
2528 
2529 	if (ap_instructions_available()) {
2530 		if (ap_qci(&info) == 0)
2531 			return info.apxa;
2532 	}
2533 
2534 	return 0;
2535 }
2536 
2537 /*
2538  * The format of the crypto control block (CRYCB) is specified in the 3 low
2539  * order bits of the CRYCB designation (CRYCBD) field as follows:
2540  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2541  *	     AP extended addressing (APXA) facility are installed.
2542  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2543  * Format 2: Both the APXA and MSAX3 facilities are installed
2544  */
2545 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2546 {
2547 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2548 
2549 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2550 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2551 
2552 	/* Check whether MSAX3 is installed */
2553 	if (!test_kvm_facility(kvm, 76))
2554 		return;
2555 
2556 	if (kvm_s390_apxa_installed())
2557 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2558 	else
2559 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2560 }
2561 
2562 /*
2563  * kvm_arch_crypto_set_masks
2564  *
2565  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2566  *	 to be set.
2567  * @apm: the mask identifying the accessible AP adapters
2568  * @aqm: the mask identifying the accessible AP domains
2569  * @adm: the mask identifying the accessible AP control domains
2570  *
2571  * Set the masks that identify the adapters, domains and control domains to
2572  * which the KVM guest is granted access.
2573  *
2574  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2575  *	 function.
2576  */
2577 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2578 			       unsigned long *aqm, unsigned long *adm)
2579 {
2580 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2581 
2582 	kvm_s390_vcpu_block_all(kvm);
2583 
2584 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2585 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2586 		memcpy(crycb->apcb1.apm, apm, 32);
2587 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2588 			 apm[0], apm[1], apm[2], apm[3]);
2589 		memcpy(crycb->apcb1.aqm, aqm, 32);
2590 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2591 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2592 		memcpy(crycb->apcb1.adm, adm, 32);
2593 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2594 			 adm[0], adm[1], adm[2], adm[3]);
2595 		break;
2596 	case CRYCB_FORMAT1:
2597 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2598 		memcpy(crycb->apcb0.apm, apm, 8);
2599 		memcpy(crycb->apcb0.aqm, aqm, 2);
2600 		memcpy(crycb->apcb0.adm, adm, 2);
2601 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2602 			 apm[0], *((unsigned short *)aqm),
2603 			 *((unsigned short *)adm));
2604 		break;
2605 	default:	/* Can not happen */
2606 		break;
2607 	}
2608 
2609 	/* recreate the shadow crycb for each vcpu */
2610 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2611 	kvm_s390_vcpu_unblock_all(kvm);
2612 }
2613 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2614 
2615 /*
2616  * kvm_arch_crypto_clear_masks
2617  *
2618  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2619  *	 to be cleared.
2620  *
2621  * Clear the masks that identify the adapters, domains and control domains to
2622  * which the KVM guest is granted access.
2623  *
2624  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2625  *	 function.
2626  */
2627 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2628 {
2629 	kvm_s390_vcpu_block_all(kvm);
2630 
2631 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2632 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2633 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2634 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2635 
2636 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2637 	/* recreate the shadow crycb for each vcpu */
2638 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2639 	kvm_s390_vcpu_unblock_all(kvm);
2640 }
2641 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2642 
2643 static u64 kvm_s390_get_initial_cpuid(void)
2644 {
2645 	struct cpuid cpuid;
2646 
2647 	get_cpu_id(&cpuid);
2648 	cpuid.version = 0xff;
2649 	return *((u64 *) &cpuid);
2650 }
2651 
2652 static void kvm_s390_crypto_init(struct kvm *kvm)
2653 {
2654 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2655 	kvm_s390_set_crycb_format(kvm);
2656 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2657 
2658 	if (!test_kvm_facility(kvm, 76))
2659 		return;
2660 
2661 	/* Enable AES/DEA protected key functions by default */
2662 	kvm->arch.crypto.aes_kw = 1;
2663 	kvm->arch.crypto.dea_kw = 1;
2664 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2665 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2666 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2667 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2668 }
2669 
2670 static void sca_dispose(struct kvm *kvm)
2671 {
2672 	if (kvm->arch.use_esca)
2673 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2674 	else
2675 		free_page((unsigned long)(kvm->arch.sca));
2676 	kvm->arch.sca = NULL;
2677 }
2678 
2679 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2680 {
2681 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2682 	int i, rc;
2683 	char debug_name[16];
2684 	static unsigned long sca_offset;
2685 
2686 	rc = -EINVAL;
2687 #ifdef CONFIG_KVM_S390_UCONTROL
2688 	if (type & ~KVM_VM_S390_UCONTROL)
2689 		goto out_err;
2690 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2691 		goto out_err;
2692 #else
2693 	if (type)
2694 		goto out_err;
2695 #endif
2696 
2697 	rc = s390_enable_sie();
2698 	if (rc)
2699 		goto out_err;
2700 
2701 	rc = -ENOMEM;
2702 
2703 	if (!sclp.has_64bscao)
2704 		alloc_flags |= GFP_DMA;
2705 	rwlock_init(&kvm->arch.sca_lock);
2706 	/* start with basic SCA */
2707 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2708 	if (!kvm->arch.sca)
2709 		goto out_err;
2710 	mutex_lock(&kvm_lock);
2711 	sca_offset += 16;
2712 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2713 		sca_offset = 0;
2714 	kvm->arch.sca = (struct bsca_block *)
2715 			((char *) kvm->arch.sca + sca_offset);
2716 	mutex_unlock(&kvm_lock);
2717 
2718 	sprintf(debug_name, "kvm-%u", current->pid);
2719 
2720 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2721 	if (!kvm->arch.dbf)
2722 		goto out_err;
2723 
2724 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2725 	kvm->arch.sie_page2 =
2726 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2727 	if (!kvm->arch.sie_page2)
2728 		goto out_err;
2729 
2730 	kvm->arch.sie_page2->kvm = kvm;
2731 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2732 
2733 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2734 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2735 					      (kvm_s390_fac_base[i] |
2736 					       kvm_s390_fac_ext[i]);
2737 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2738 					      kvm_s390_fac_base[i];
2739 	}
2740 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2741 
2742 	/* we are always in czam mode - even on pre z14 machines */
2743 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2744 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2745 	/* we emulate STHYI in kvm */
2746 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2747 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2748 	if (MACHINE_HAS_TLB_GUEST) {
2749 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2750 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2751 	}
2752 
2753 	if (css_general_characteristics.aiv && test_facility(65))
2754 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2755 
2756 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2757 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2758 
2759 	kvm_s390_crypto_init(kvm);
2760 
2761 	mutex_init(&kvm->arch.float_int.ais_lock);
2762 	spin_lock_init(&kvm->arch.float_int.lock);
2763 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2764 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2765 	init_waitqueue_head(&kvm->arch.ipte_wq);
2766 	mutex_init(&kvm->arch.ipte_mutex);
2767 
2768 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2769 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2770 
2771 	if (type & KVM_VM_S390_UCONTROL) {
2772 		kvm->arch.gmap = NULL;
2773 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2774 	} else {
2775 		if (sclp.hamax == U64_MAX)
2776 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2777 		else
2778 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2779 						    sclp.hamax + 1);
2780 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2781 		if (!kvm->arch.gmap)
2782 			goto out_err;
2783 		kvm->arch.gmap->private = kvm;
2784 		kvm->arch.gmap->pfault_enabled = 0;
2785 	}
2786 
2787 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2788 	kvm->arch.use_skf = sclp.has_skey;
2789 	spin_lock_init(&kvm->arch.start_stop_lock);
2790 	kvm_s390_vsie_init(kvm);
2791 	if (use_gisa)
2792 		kvm_s390_gisa_init(kvm);
2793 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2794 
2795 	return 0;
2796 out_err:
2797 	free_page((unsigned long)kvm->arch.sie_page2);
2798 	debug_unregister(kvm->arch.dbf);
2799 	sca_dispose(kvm);
2800 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2801 	return rc;
2802 }
2803 
2804 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2805 {
2806 	u16 rc, rrc;
2807 
2808 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2809 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2810 	kvm_s390_clear_local_irqs(vcpu);
2811 	kvm_clear_async_pf_completion_queue(vcpu);
2812 	if (!kvm_is_ucontrol(vcpu->kvm))
2813 		sca_del_vcpu(vcpu);
2814 
2815 	if (kvm_is_ucontrol(vcpu->kvm))
2816 		gmap_remove(vcpu->arch.gmap);
2817 
2818 	if (vcpu->kvm->arch.use_cmma)
2819 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2820 	/* We can not hold the vcpu mutex here, we are already dying */
2821 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2822 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2823 	free_page((unsigned long)(vcpu->arch.sie_block));
2824 }
2825 
2826 static void kvm_free_vcpus(struct kvm *kvm)
2827 {
2828 	unsigned int i;
2829 	struct kvm_vcpu *vcpu;
2830 
2831 	kvm_for_each_vcpu(i, vcpu, kvm)
2832 		kvm_vcpu_destroy(vcpu);
2833 
2834 	mutex_lock(&kvm->lock);
2835 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2836 		kvm->vcpus[i] = NULL;
2837 
2838 	atomic_set(&kvm->online_vcpus, 0);
2839 	mutex_unlock(&kvm->lock);
2840 }
2841 
2842 void kvm_arch_destroy_vm(struct kvm *kvm)
2843 {
2844 	u16 rc, rrc;
2845 
2846 	kvm_free_vcpus(kvm);
2847 	sca_dispose(kvm);
2848 	kvm_s390_gisa_destroy(kvm);
2849 	/*
2850 	 * We are already at the end of life and kvm->lock is not taken.
2851 	 * This is ok as the file descriptor is closed by now and nobody
2852 	 * can mess with the pv state. To avoid lockdep_assert_held from
2853 	 * complaining we do not use kvm_s390_pv_is_protected.
2854 	 */
2855 	if (kvm_s390_pv_get_handle(kvm))
2856 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2857 	debug_unregister(kvm->arch.dbf);
2858 	free_page((unsigned long)kvm->arch.sie_page2);
2859 	if (!kvm_is_ucontrol(kvm))
2860 		gmap_remove(kvm->arch.gmap);
2861 	kvm_s390_destroy_adapters(kvm);
2862 	kvm_s390_clear_float_irqs(kvm);
2863 	kvm_s390_vsie_destroy(kvm);
2864 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2865 }
2866 
2867 /* Section: vcpu related */
2868 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2869 {
2870 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2871 	if (!vcpu->arch.gmap)
2872 		return -ENOMEM;
2873 	vcpu->arch.gmap->private = vcpu->kvm;
2874 
2875 	return 0;
2876 }
2877 
2878 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2879 {
2880 	if (!kvm_s390_use_sca_entries())
2881 		return;
2882 	read_lock(&vcpu->kvm->arch.sca_lock);
2883 	if (vcpu->kvm->arch.use_esca) {
2884 		struct esca_block *sca = vcpu->kvm->arch.sca;
2885 
2886 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2887 		sca->cpu[vcpu->vcpu_id].sda = 0;
2888 	} else {
2889 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2890 
2891 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2892 		sca->cpu[vcpu->vcpu_id].sda = 0;
2893 	}
2894 	read_unlock(&vcpu->kvm->arch.sca_lock);
2895 }
2896 
2897 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2898 {
2899 	if (!kvm_s390_use_sca_entries()) {
2900 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2901 
2902 		/* we still need the basic sca for the ipte control */
2903 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2904 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2905 		return;
2906 	}
2907 	read_lock(&vcpu->kvm->arch.sca_lock);
2908 	if (vcpu->kvm->arch.use_esca) {
2909 		struct esca_block *sca = vcpu->kvm->arch.sca;
2910 
2911 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2912 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2913 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2914 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2915 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2916 	} else {
2917 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2918 
2919 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2920 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2921 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2922 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2923 	}
2924 	read_unlock(&vcpu->kvm->arch.sca_lock);
2925 }
2926 
2927 /* Basic SCA to Extended SCA data copy routines */
2928 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2929 {
2930 	d->sda = s->sda;
2931 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2932 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2933 }
2934 
2935 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2936 {
2937 	int i;
2938 
2939 	d->ipte_control = s->ipte_control;
2940 	d->mcn[0] = s->mcn;
2941 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2942 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2943 }
2944 
2945 static int sca_switch_to_extended(struct kvm *kvm)
2946 {
2947 	struct bsca_block *old_sca = kvm->arch.sca;
2948 	struct esca_block *new_sca;
2949 	struct kvm_vcpu *vcpu;
2950 	unsigned int vcpu_idx;
2951 	u32 scaol, scaoh;
2952 
2953 	if (kvm->arch.use_esca)
2954 		return 0;
2955 
2956 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2957 	if (!new_sca)
2958 		return -ENOMEM;
2959 
2960 	scaoh = (u32)((u64)(new_sca) >> 32);
2961 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2962 
2963 	kvm_s390_vcpu_block_all(kvm);
2964 	write_lock(&kvm->arch.sca_lock);
2965 
2966 	sca_copy_b_to_e(new_sca, old_sca);
2967 
2968 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2969 		vcpu->arch.sie_block->scaoh = scaoh;
2970 		vcpu->arch.sie_block->scaol = scaol;
2971 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2972 	}
2973 	kvm->arch.sca = new_sca;
2974 	kvm->arch.use_esca = 1;
2975 
2976 	write_unlock(&kvm->arch.sca_lock);
2977 	kvm_s390_vcpu_unblock_all(kvm);
2978 
2979 	free_page((unsigned long)old_sca);
2980 
2981 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2982 		 old_sca, kvm->arch.sca);
2983 	return 0;
2984 }
2985 
2986 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2987 {
2988 	int rc;
2989 
2990 	if (!kvm_s390_use_sca_entries()) {
2991 		if (id < KVM_MAX_VCPUS)
2992 			return true;
2993 		return false;
2994 	}
2995 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2996 		return true;
2997 	if (!sclp.has_esca || !sclp.has_64bscao)
2998 		return false;
2999 
3000 	mutex_lock(&kvm->lock);
3001 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3002 	mutex_unlock(&kvm->lock);
3003 
3004 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3005 }
3006 
3007 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3008 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009 {
3010 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3011 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3012 	vcpu->arch.cputm_start = get_tod_clock_fast();
3013 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3014 }
3015 
3016 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3017 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3018 {
3019 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3020 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3021 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3022 	vcpu->arch.cputm_start = 0;
3023 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3024 }
3025 
3026 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3027 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3028 {
3029 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3030 	vcpu->arch.cputm_enabled = true;
3031 	__start_cpu_timer_accounting(vcpu);
3032 }
3033 
3034 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3035 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3036 {
3037 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3038 	__stop_cpu_timer_accounting(vcpu);
3039 	vcpu->arch.cputm_enabled = false;
3040 }
3041 
3042 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3043 {
3044 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3045 	__enable_cpu_timer_accounting(vcpu);
3046 	preempt_enable();
3047 }
3048 
3049 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3050 {
3051 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3052 	__disable_cpu_timer_accounting(vcpu);
3053 	preempt_enable();
3054 }
3055 
3056 /* set the cpu timer - may only be called from the VCPU thread itself */
3057 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3058 {
3059 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3060 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3061 	if (vcpu->arch.cputm_enabled)
3062 		vcpu->arch.cputm_start = get_tod_clock_fast();
3063 	vcpu->arch.sie_block->cputm = cputm;
3064 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3065 	preempt_enable();
3066 }
3067 
3068 /* update and get the cpu timer - can also be called from other VCPU threads */
3069 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3070 {
3071 	unsigned int seq;
3072 	__u64 value;
3073 
3074 	if (unlikely(!vcpu->arch.cputm_enabled))
3075 		return vcpu->arch.sie_block->cputm;
3076 
3077 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3078 	do {
3079 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3080 		/*
3081 		 * If the writer would ever execute a read in the critical
3082 		 * section, e.g. in irq context, we have a deadlock.
3083 		 */
3084 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3085 		value = vcpu->arch.sie_block->cputm;
3086 		/* if cputm_start is 0, accounting is being started/stopped */
3087 		if (likely(vcpu->arch.cputm_start))
3088 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3089 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3090 	preempt_enable();
3091 	return value;
3092 }
3093 
3094 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3095 {
3096 
3097 	gmap_enable(vcpu->arch.enabled_gmap);
3098 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3099 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3100 		__start_cpu_timer_accounting(vcpu);
3101 	vcpu->cpu = cpu;
3102 }
3103 
3104 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3105 {
3106 	vcpu->cpu = -1;
3107 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3108 		__stop_cpu_timer_accounting(vcpu);
3109 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3110 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3111 	gmap_disable(vcpu->arch.enabled_gmap);
3112 
3113 }
3114 
3115 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3116 {
3117 	mutex_lock(&vcpu->kvm->lock);
3118 	preempt_disable();
3119 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3120 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3121 	preempt_enable();
3122 	mutex_unlock(&vcpu->kvm->lock);
3123 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3124 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3125 		sca_add_vcpu(vcpu);
3126 	}
3127 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3128 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3129 	/* make vcpu_load load the right gmap on the first trigger */
3130 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3131 }
3132 
3133 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3134 {
3135 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3136 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3137 		return true;
3138 	return false;
3139 }
3140 
3141 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3142 {
3143 	/* At least one ECC subfunction must be present */
3144 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3145 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3146 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3147 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3148 	       kvm_has_pckmo_subfunc(kvm, 41);
3149 
3150 }
3151 
3152 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3153 {
3154 	/*
3155 	 * If the AP instructions are not being interpreted and the MSAX3
3156 	 * facility is not configured for the guest, there is nothing to set up.
3157 	 */
3158 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3159 		return;
3160 
3161 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3162 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3163 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3164 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3165 
3166 	if (vcpu->kvm->arch.crypto.apie)
3167 		vcpu->arch.sie_block->eca |= ECA_APIE;
3168 
3169 	/* Set up protected key support */
3170 	if (vcpu->kvm->arch.crypto.aes_kw) {
3171 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3172 		/* ecc is also wrapped with AES key */
3173 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3174 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3175 	}
3176 
3177 	if (vcpu->kvm->arch.crypto.dea_kw)
3178 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3179 }
3180 
3181 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3182 {
3183 	free_page(vcpu->arch.sie_block->cbrlo);
3184 	vcpu->arch.sie_block->cbrlo = 0;
3185 }
3186 
3187 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3188 {
3189 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3190 	if (!vcpu->arch.sie_block->cbrlo)
3191 		return -ENOMEM;
3192 	return 0;
3193 }
3194 
3195 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3196 {
3197 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3198 
3199 	vcpu->arch.sie_block->ibc = model->ibc;
3200 	if (test_kvm_facility(vcpu->kvm, 7))
3201 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3202 }
3203 
3204 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3205 {
3206 	int rc = 0;
3207 	u16 uvrc, uvrrc;
3208 
3209 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3210 						    CPUSTAT_SM |
3211 						    CPUSTAT_STOPPED);
3212 
3213 	if (test_kvm_facility(vcpu->kvm, 78))
3214 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3215 	else if (test_kvm_facility(vcpu->kvm, 8))
3216 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3217 
3218 	kvm_s390_vcpu_setup_model(vcpu);
3219 
3220 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3221 	if (MACHINE_HAS_ESOP)
3222 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3223 	if (test_kvm_facility(vcpu->kvm, 9))
3224 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3225 	if (test_kvm_facility(vcpu->kvm, 73))
3226 		vcpu->arch.sie_block->ecb |= ECB_TE;
3227 
3228 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3229 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3230 	if (test_kvm_facility(vcpu->kvm, 130))
3231 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3232 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3233 	if (sclp.has_cei)
3234 		vcpu->arch.sie_block->eca |= ECA_CEI;
3235 	if (sclp.has_ib)
3236 		vcpu->arch.sie_block->eca |= ECA_IB;
3237 	if (sclp.has_siif)
3238 		vcpu->arch.sie_block->eca |= ECA_SII;
3239 	if (sclp.has_sigpif)
3240 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3241 	if (test_kvm_facility(vcpu->kvm, 129)) {
3242 		vcpu->arch.sie_block->eca |= ECA_VX;
3243 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3244 	}
3245 	if (test_kvm_facility(vcpu->kvm, 139))
3246 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3247 	if (test_kvm_facility(vcpu->kvm, 156))
3248 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3249 	if (vcpu->arch.sie_block->gd) {
3250 		vcpu->arch.sie_block->eca |= ECA_AIV;
3251 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3252 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3253 	}
3254 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3255 					| SDNXC;
3256 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3257 
3258 	if (sclp.has_kss)
3259 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3260 	else
3261 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3262 
3263 	if (vcpu->kvm->arch.use_cmma) {
3264 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3265 		if (rc)
3266 			return rc;
3267 	}
3268 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3269 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3270 
3271 	vcpu->arch.sie_block->hpid = HPID_KVM;
3272 
3273 	kvm_s390_vcpu_crypto_setup(vcpu);
3274 
3275 	mutex_lock(&vcpu->kvm->lock);
3276 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3277 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3278 		if (rc)
3279 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3280 	}
3281 	mutex_unlock(&vcpu->kvm->lock);
3282 
3283 	return rc;
3284 }
3285 
3286 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3287 {
3288 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3289 		return -EINVAL;
3290 	return 0;
3291 }
3292 
3293 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3294 {
3295 	struct sie_page *sie_page;
3296 	int rc;
3297 
3298 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3299 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3300 	if (!sie_page)
3301 		return -ENOMEM;
3302 
3303 	vcpu->arch.sie_block = &sie_page->sie_block;
3304 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3305 
3306 	/* the real guest size will always be smaller than msl */
3307 	vcpu->arch.sie_block->mso = 0;
3308 	vcpu->arch.sie_block->msl = sclp.hamax;
3309 
3310 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3311 	spin_lock_init(&vcpu->arch.local_int.lock);
3312 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3313 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3314 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3315 	seqcount_init(&vcpu->arch.cputm_seqcount);
3316 
3317 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3318 	kvm_clear_async_pf_completion_queue(vcpu);
3319 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3320 				    KVM_SYNC_GPRS |
3321 				    KVM_SYNC_ACRS |
3322 				    KVM_SYNC_CRS |
3323 				    KVM_SYNC_ARCH0 |
3324 				    KVM_SYNC_PFAULT |
3325 				    KVM_SYNC_DIAG318;
3326 	kvm_s390_set_prefix(vcpu, 0);
3327 	if (test_kvm_facility(vcpu->kvm, 64))
3328 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3329 	if (test_kvm_facility(vcpu->kvm, 82))
3330 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3331 	if (test_kvm_facility(vcpu->kvm, 133))
3332 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3333 	if (test_kvm_facility(vcpu->kvm, 156))
3334 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3335 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3336 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3337 	 */
3338 	if (MACHINE_HAS_VX)
3339 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3340 	else
3341 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3342 
3343 	if (kvm_is_ucontrol(vcpu->kvm)) {
3344 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3345 		if (rc)
3346 			goto out_free_sie_block;
3347 	}
3348 
3349 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3350 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3351 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3352 
3353 	rc = kvm_s390_vcpu_setup(vcpu);
3354 	if (rc)
3355 		goto out_ucontrol_uninit;
3356 	return 0;
3357 
3358 out_ucontrol_uninit:
3359 	if (kvm_is_ucontrol(vcpu->kvm))
3360 		gmap_remove(vcpu->arch.gmap);
3361 out_free_sie_block:
3362 	free_page((unsigned long)(vcpu->arch.sie_block));
3363 	return rc;
3364 }
3365 
3366 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3367 {
3368 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3369 }
3370 
3371 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3372 {
3373 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3374 }
3375 
3376 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3377 {
3378 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3379 	exit_sie(vcpu);
3380 }
3381 
3382 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3383 {
3384 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3385 }
3386 
3387 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3388 {
3389 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3390 	exit_sie(vcpu);
3391 }
3392 
3393 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3394 {
3395 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3396 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3397 }
3398 
3399 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3400 {
3401 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3402 }
3403 
3404 /*
3405  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3406  * If the CPU is not running (e.g. waiting as idle) the function will
3407  * return immediately. */
3408 void exit_sie(struct kvm_vcpu *vcpu)
3409 {
3410 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3411 	kvm_s390_vsie_kick(vcpu);
3412 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3413 		cpu_relax();
3414 }
3415 
3416 /* Kick a guest cpu out of SIE to process a request synchronously */
3417 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3418 {
3419 	kvm_make_request(req, vcpu);
3420 	kvm_s390_vcpu_request(vcpu);
3421 }
3422 
3423 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3424 			      unsigned long end)
3425 {
3426 	struct kvm *kvm = gmap->private;
3427 	struct kvm_vcpu *vcpu;
3428 	unsigned long prefix;
3429 	int i;
3430 
3431 	if (gmap_is_shadow(gmap))
3432 		return;
3433 	if (start >= 1UL << 31)
3434 		/* We are only interested in prefix pages */
3435 		return;
3436 	kvm_for_each_vcpu(i, vcpu, kvm) {
3437 		/* match against both prefix pages */
3438 		prefix = kvm_s390_get_prefix(vcpu);
3439 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3440 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3441 				   start, end);
3442 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3443 		}
3444 	}
3445 }
3446 
3447 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3448 {
3449 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3450 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3451 	    halt_poll_max_steal) {
3452 		vcpu->stat.halt_no_poll_steal++;
3453 		return true;
3454 	}
3455 	return false;
3456 }
3457 
3458 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3459 {
3460 	/* kvm common code refers to this, but never calls it */
3461 	BUG();
3462 	return 0;
3463 }
3464 
3465 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3466 					   struct kvm_one_reg *reg)
3467 {
3468 	int r = -EINVAL;
3469 
3470 	switch (reg->id) {
3471 	case KVM_REG_S390_TODPR:
3472 		r = put_user(vcpu->arch.sie_block->todpr,
3473 			     (u32 __user *)reg->addr);
3474 		break;
3475 	case KVM_REG_S390_EPOCHDIFF:
3476 		r = put_user(vcpu->arch.sie_block->epoch,
3477 			     (u64 __user *)reg->addr);
3478 		break;
3479 	case KVM_REG_S390_CPU_TIMER:
3480 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3481 			     (u64 __user *)reg->addr);
3482 		break;
3483 	case KVM_REG_S390_CLOCK_COMP:
3484 		r = put_user(vcpu->arch.sie_block->ckc,
3485 			     (u64 __user *)reg->addr);
3486 		break;
3487 	case KVM_REG_S390_PFTOKEN:
3488 		r = put_user(vcpu->arch.pfault_token,
3489 			     (u64 __user *)reg->addr);
3490 		break;
3491 	case KVM_REG_S390_PFCOMPARE:
3492 		r = put_user(vcpu->arch.pfault_compare,
3493 			     (u64 __user *)reg->addr);
3494 		break;
3495 	case KVM_REG_S390_PFSELECT:
3496 		r = put_user(vcpu->arch.pfault_select,
3497 			     (u64 __user *)reg->addr);
3498 		break;
3499 	case KVM_REG_S390_PP:
3500 		r = put_user(vcpu->arch.sie_block->pp,
3501 			     (u64 __user *)reg->addr);
3502 		break;
3503 	case KVM_REG_S390_GBEA:
3504 		r = put_user(vcpu->arch.sie_block->gbea,
3505 			     (u64 __user *)reg->addr);
3506 		break;
3507 	default:
3508 		break;
3509 	}
3510 
3511 	return r;
3512 }
3513 
3514 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3515 					   struct kvm_one_reg *reg)
3516 {
3517 	int r = -EINVAL;
3518 	__u64 val;
3519 
3520 	switch (reg->id) {
3521 	case KVM_REG_S390_TODPR:
3522 		r = get_user(vcpu->arch.sie_block->todpr,
3523 			     (u32 __user *)reg->addr);
3524 		break;
3525 	case KVM_REG_S390_EPOCHDIFF:
3526 		r = get_user(vcpu->arch.sie_block->epoch,
3527 			     (u64 __user *)reg->addr);
3528 		break;
3529 	case KVM_REG_S390_CPU_TIMER:
3530 		r = get_user(val, (u64 __user *)reg->addr);
3531 		if (!r)
3532 			kvm_s390_set_cpu_timer(vcpu, val);
3533 		break;
3534 	case KVM_REG_S390_CLOCK_COMP:
3535 		r = get_user(vcpu->arch.sie_block->ckc,
3536 			     (u64 __user *)reg->addr);
3537 		break;
3538 	case KVM_REG_S390_PFTOKEN:
3539 		r = get_user(vcpu->arch.pfault_token,
3540 			     (u64 __user *)reg->addr);
3541 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3542 			kvm_clear_async_pf_completion_queue(vcpu);
3543 		break;
3544 	case KVM_REG_S390_PFCOMPARE:
3545 		r = get_user(vcpu->arch.pfault_compare,
3546 			     (u64 __user *)reg->addr);
3547 		break;
3548 	case KVM_REG_S390_PFSELECT:
3549 		r = get_user(vcpu->arch.pfault_select,
3550 			     (u64 __user *)reg->addr);
3551 		break;
3552 	case KVM_REG_S390_PP:
3553 		r = get_user(vcpu->arch.sie_block->pp,
3554 			     (u64 __user *)reg->addr);
3555 		break;
3556 	case KVM_REG_S390_GBEA:
3557 		r = get_user(vcpu->arch.sie_block->gbea,
3558 			     (u64 __user *)reg->addr);
3559 		break;
3560 	default:
3561 		break;
3562 	}
3563 
3564 	return r;
3565 }
3566 
3567 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3568 {
3569 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3570 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3571 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3572 
3573 	kvm_clear_async_pf_completion_queue(vcpu);
3574 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3575 		kvm_s390_vcpu_stop(vcpu);
3576 	kvm_s390_clear_local_irqs(vcpu);
3577 }
3578 
3579 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3580 {
3581 	/* Initial reset is a superset of the normal reset */
3582 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3583 
3584 	/*
3585 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3586 	 * We do not only reset the internal data, but also ...
3587 	 */
3588 	vcpu->arch.sie_block->gpsw.mask = 0;
3589 	vcpu->arch.sie_block->gpsw.addr = 0;
3590 	kvm_s390_set_prefix(vcpu, 0);
3591 	kvm_s390_set_cpu_timer(vcpu, 0);
3592 	vcpu->arch.sie_block->ckc = 0;
3593 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3594 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3595 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3596 
3597 	/* ... the data in sync regs */
3598 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3599 	vcpu->run->s.regs.ckc = 0;
3600 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3601 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3602 	vcpu->run->psw_addr = 0;
3603 	vcpu->run->psw_mask = 0;
3604 	vcpu->run->s.regs.todpr = 0;
3605 	vcpu->run->s.regs.cputm = 0;
3606 	vcpu->run->s.regs.ckc = 0;
3607 	vcpu->run->s.regs.pp = 0;
3608 	vcpu->run->s.regs.gbea = 1;
3609 	vcpu->run->s.regs.fpc = 0;
3610 	/*
3611 	 * Do not reset these registers in the protected case, as some of
3612 	 * them are overlayed and they are not accessible in this case
3613 	 * anyway.
3614 	 */
3615 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3616 		vcpu->arch.sie_block->gbea = 1;
3617 		vcpu->arch.sie_block->pp = 0;
3618 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3619 		vcpu->arch.sie_block->todpr = 0;
3620 	}
3621 }
3622 
3623 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3624 {
3625 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3626 
3627 	/* Clear reset is a superset of the initial reset */
3628 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3629 
3630 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3631 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3632 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3633 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3634 
3635 	regs->etoken = 0;
3636 	regs->etoken_extension = 0;
3637 }
3638 
3639 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3640 {
3641 	vcpu_load(vcpu);
3642 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3643 	vcpu_put(vcpu);
3644 	return 0;
3645 }
3646 
3647 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3648 {
3649 	vcpu_load(vcpu);
3650 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3651 	vcpu_put(vcpu);
3652 	return 0;
3653 }
3654 
3655 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3656 				  struct kvm_sregs *sregs)
3657 {
3658 	vcpu_load(vcpu);
3659 
3660 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3661 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3662 
3663 	vcpu_put(vcpu);
3664 	return 0;
3665 }
3666 
3667 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3668 				  struct kvm_sregs *sregs)
3669 {
3670 	vcpu_load(vcpu);
3671 
3672 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3673 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3674 
3675 	vcpu_put(vcpu);
3676 	return 0;
3677 }
3678 
3679 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3680 {
3681 	int ret = 0;
3682 
3683 	vcpu_load(vcpu);
3684 
3685 	if (test_fp_ctl(fpu->fpc)) {
3686 		ret = -EINVAL;
3687 		goto out;
3688 	}
3689 	vcpu->run->s.regs.fpc = fpu->fpc;
3690 	if (MACHINE_HAS_VX)
3691 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3692 				 (freg_t *) fpu->fprs);
3693 	else
3694 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3695 
3696 out:
3697 	vcpu_put(vcpu);
3698 	return ret;
3699 }
3700 
3701 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3702 {
3703 	vcpu_load(vcpu);
3704 
3705 	/* make sure we have the latest values */
3706 	save_fpu_regs();
3707 	if (MACHINE_HAS_VX)
3708 		convert_vx_to_fp((freg_t *) fpu->fprs,
3709 				 (__vector128 *) vcpu->run->s.regs.vrs);
3710 	else
3711 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3712 	fpu->fpc = vcpu->run->s.regs.fpc;
3713 
3714 	vcpu_put(vcpu);
3715 	return 0;
3716 }
3717 
3718 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3719 {
3720 	int rc = 0;
3721 
3722 	if (!is_vcpu_stopped(vcpu))
3723 		rc = -EBUSY;
3724 	else {
3725 		vcpu->run->psw_mask = psw.mask;
3726 		vcpu->run->psw_addr = psw.addr;
3727 	}
3728 	return rc;
3729 }
3730 
3731 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3732 				  struct kvm_translation *tr)
3733 {
3734 	return -EINVAL; /* not implemented yet */
3735 }
3736 
3737 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3738 			      KVM_GUESTDBG_USE_HW_BP | \
3739 			      KVM_GUESTDBG_ENABLE)
3740 
3741 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3742 					struct kvm_guest_debug *dbg)
3743 {
3744 	int rc = 0;
3745 
3746 	vcpu_load(vcpu);
3747 
3748 	vcpu->guest_debug = 0;
3749 	kvm_s390_clear_bp_data(vcpu);
3750 
3751 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3752 		rc = -EINVAL;
3753 		goto out;
3754 	}
3755 	if (!sclp.has_gpere) {
3756 		rc = -EINVAL;
3757 		goto out;
3758 	}
3759 
3760 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3761 		vcpu->guest_debug = dbg->control;
3762 		/* enforce guest PER */
3763 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3764 
3765 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3766 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3767 	} else {
3768 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3769 		vcpu->arch.guestdbg.last_bp = 0;
3770 	}
3771 
3772 	if (rc) {
3773 		vcpu->guest_debug = 0;
3774 		kvm_s390_clear_bp_data(vcpu);
3775 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3776 	}
3777 
3778 out:
3779 	vcpu_put(vcpu);
3780 	return rc;
3781 }
3782 
3783 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3784 				    struct kvm_mp_state *mp_state)
3785 {
3786 	int ret;
3787 
3788 	vcpu_load(vcpu);
3789 
3790 	/* CHECK_STOP and LOAD are not supported yet */
3791 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3792 				      KVM_MP_STATE_OPERATING;
3793 
3794 	vcpu_put(vcpu);
3795 	return ret;
3796 }
3797 
3798 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3799 				    struct kvm_mp_state *mp_state)
3800 {
3801 	int rc = 0;
3802 
3803 	vcpu_load(vcpu);
3804 
3805 	/* user space knows about this interface - let it control the state */
3806 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3807 
3808 	switch (mp_state->mp_state) {
3809 	case KVM_MP_STATE_STOPPED:
3810 		rc = kvm_s390_vcpu_stop(vcpu);
3811 		break;
3812 	case KVM_MP_STATE_OPERATING:
3813 		rc = kvm_s390_vcpu_start(vcpu);
3814 		break;
3815 	case KVM_MP_STATE_LOAD:
3816 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3817 			rc = -ENXIO;
3818 			break;
3819 		}
3820 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3821 		break;
3822 	case KVM_MP_STATE_CHECK_STOP:
3823 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3824 	default:
3825 		rc = -ENXIO;
3826 	}
3827 
3828 	vcpu_put(vcpu);
3829 	return rc;
3830 }
3831 
3832 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3833 {
3834 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3835 }
3836 
3837 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3838 {
3839 retry:
3840 	kvm_s390_vcpu_request_handled(vcpu);
3841 	if (!kvm_request_pending(vcpu))
3842 		return 0;
3843 	/*
3844 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3845 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3846 	 * This ensures that the ipte instruction for this request has
3847 	 * already finished. We might race against a second unmapper that
3848 	 * wants to set the blocking bit. Lets just retry the request loop.
3849 	 */
3850 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3851 		int rc;
3852 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3853 					  kvm_s390_get_prefix(vcpu),
3854 					  PAGE_SIZE * 2, PROT_WRITE);
3855 		if (rc) {
3856 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3857 			return rc;
3858 		}
3859 		goto retry;
3860 	}
3861 
3862 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3863 		vcpu->arch.sie_block->ihcpu = 0xffff;
3864 		goto retry;
3865 	}
3866 
3867 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3868 		if (!ibs_enabled(vcpu)) {
3869 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3870 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3871 		}
3872 		goto retry;
3873 	}
3874 
3875 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3876 		if (ibs_enabled(vcpu)) {
3877 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3878 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3879 		}
3880 		goto retry;
3881 	}
3882 
3883 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3884 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3885 		goto retry;
3886 	}
3887 
3888 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3889 		/*
3890 		 * Disable CMM virtualization; we will emulate the ESSA
3891 		 * instruction manually, in order to provide additional
3892 		 * functionalities needed for live migration.
3893 		 */
3894 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3895 		goto retry;
3896 	}
3897 
3898 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3899 		/*
3900 		 * Re-enable CMM virtualization if CMMA is available and
3901 		 * CMM has been used.
3902 		 */
3903 		if ((vcpu->kvm->arch.use_cmma) &&
3904 		    (vcpu->kvm->mm->context.uses_cmm))
3905 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3906 		goto retry;
3907 	}
3908 
3909 	/* nothing to do, just clear the request */
3910 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3911 	/* we left the vsie handler, nothing to do, just clear the request */
3912 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3913 
3914 	return 0;
3915 }
3916 
3917 void kvm_s390_set_tod_clock(struct kvm *kvm,
3918 			    const struct kvm_s390_vm_tod_clock *gtod)
3919 {
3920 	struct kvm_vcpu *vcpu;
3921 	union tod_clock clk;
3922 	int i;
3923 
3924 	mutex_lock(&kvm->lock);
3925 	preempt_disable();
3926 
3927 	store_tod_clock_ext(&clk);
3928 
3929 	kvm->arch.epoch = gtod->tod - clk.tod;
3930 	kvm->arch.epdx = 0;
3931 	if (test_kvm_facility(kvm, 139)) {
3932 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3933 		if (kvm->arch.epoch > gtod->tod)
3934 			kvm->arch.epdx -= 1;
3935 	}
3936 
3937 	kvm_s390_vcpu_block_all(kvm);
3938 	kvm_for_each_vcpu(i, vcpu, kvm) {
3939 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3940 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3941 	}
3942 
3943 	kvm_s390_vcpu_unblock_all(kvm);
3944 	preempt_enable();
3945 	mutex_unlock(&kvm->lock);
3946 }
3947 
3948 /**
3949  * kvm_arch_fault_in_page - fault-in guest page if necessary
3950  * @vcpu: The corresponding virtual cpu
3951  * @gpa: Guest physical address
3952  * @writable: Whether the page should be writable or not
3953  *
3954  * Make sure that a guest page has been faulted-in on the host.
3955  *
3956  * Return: Zero on success, negative error code otherwise.
3957  */
3958 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3959 {
3960 	return gmap_fault(vcpu->arch.gmap, gpa,
3961 			  writable ? FAULT_FLAG_WRITE : 0);
3962 }
3963 
3964 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3965 				      unsigned long token)
3966 {
3967 	struct kvm_s390_interrupt inti;
3968 	struct kvm_s390_irq irq;
3969 
3970 	if (start_token) {
3971 		irq.u.ext.ext_params2 = token;
3972 		irq.type = KVM_S390_INT_PFAULT_INIT;
3973 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3974 	} else {
3975 		inti.type = KVM_S390_INT_PFAULT_DONE;
3976 		inti.parm64 = token;
3977 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3978 	}
3979 }
3980 
3981 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3982 				     struct kvm_async_pf *work)
3983 {
3984 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3985 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3986 
3987 	return true;
3988 }
3989 
3990 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3991 				 struct kvm_async_pf *work)
3992 {
3993 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3994 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3995 }
3996 
3997 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3998 			       struct kvm_async_pf *work)
3999 {
4000 	/* s390 will always inject the page directly */
4001 }
4002 
4003 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4004 {
4005 	/*
4006 	 * s390 will always inject the page directly,
4007 	 * but we still want check_async_completion to cleanup
4008 	 */
4009 	return true;
4010 }
4011 
4012 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4013 {
4014 	hva_t hva;
4015 	struct kvm_arch_async_pf arch;
4016 
4017 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4018 		return false;
4019 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4020 	    vcpu->arch.pfault_compare)
4021 		return false;
4022 	if (psw_extint_disabled(vcpu))
4023 		return false;
4024 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4025 		return false;
4026 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4027 		return false;
4028 	if (!vcpu->arch.gmap->pfault_enabled)
4029 		return false;
4030 
4031 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4032 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4033 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4034 		return false;
4035 
4036 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4037 }
4038 
4039 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4040 {
4041 	int rc, cpuflags;
4042 
4043 	/*
4044 	 * On s390 notifications for arriving pages will be delivered directly
4045 	 * to the guest but the house keeping for completed pfaults is
4046 	 * handled outside the worker.
4047 	 */
4048 	kvm_check_async_pf_completion(vcpu);
4049 
4050 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4051 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4052 
4053 	if (need_resched())
4054 		schedule();
4055 
4056 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4057 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4058 		if (rc)
4059 			return rc;
4060 	}
4061 
4062 	rc = kvm_s390_handle_requests(vcpu);
4063 	if (rc)
4064 		return rc;
4065 
4066 	if (guestdbg_enabled(vcpu)) {
4067 		kvm_s390_backup_guest_per_regs(vcpu);
4068 		kvm_s390_patch_guest_per_regs(vcpu);
4069 	}
4070 
4071 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4072 
4073 	vcpu->arch.sie_block->icptcode = 0;
4074 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4075 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4076 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4077 
4078 	return 0;
4079 }
4080 
4081 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4082 {
4083 	struct kvm_s390_pgm_info pgm_info = {
4084 		.code = PGM_ADDRESSING,
4085 	};
4086 	u8 opcode, ilen;
4087 	int rc;
4088 
4089 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4090 	trace_kvm_s390_sie_fault(vcpu);
4091 
4092 	/*
4093 	 * We want to inject an addressing exception, which is defined as a
4094 	 * suppressing or terminating exception. However, since we came here
4095 	 * by a DAT access exception, the PSW still points to the faulting
4096 	 * instruction since DAT exceptions are nullifying. So we've got
4097 	 * to look up the current opcode to get the length of the instruction
4098 	 * to be able to forward the PSW.
4099 	 */
4100 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4101 	ilen = insn_length(opcode);
4102 	if (rc < 0) {
4103 		return rc;
4104 	} else if (rc) {
4105 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4106 		 * Forward by arbitrary ilc, injection will take care of
4107 		 * nullification if necessary.
4108 		 */
4109 		pgm_info = vcpu->arch.pgm;
4110 		ilen = 4;
4111 	}
4112 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4113 	kvm_s390_forward_psw(vcpu, ilen);
4114 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4115 }
4116 
4117 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4118 {
4119 	struct mcck_volatile_info *mcck_info;
4120 	struct sie_page *sie_page;
4121 
4122 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4123 		   vcpu->arch.sie_block->icptcode);
4124 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4125 
4126 	if (guestdbg_enabled(vcpu))
4127 		kvm_s390_restore_guest_per_regs(vcpu);
4128 
4129 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4130 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4131 
4132 	if (exit_reason == -EINTR) {
4133 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4134 		sie_page = container_of(vcpu->arch.sie_block,
4135 					struct sie_page, sie_block);
4136 		mcck_info = &sie_page->mcck_info;
4137 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4138 		return 0;
4139 	}
4140 
4141 	if (vcpu->arch.sie_block->icptcode > 0) {
4142 		int rc = kvm_handle_sie_intercept(vcpu);
4143 
4144 		if (rc != -EOPNOTSUPP)
4145 			return rc;
4146 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4147 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4148 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4149 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4150 		return -EREMOTE;
4151 	} else if (exit_reason != -EFAULT) {
4152 		vcpu->stat.exit_null++;
4153 		return 0;
4154 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4155 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4156 		vcpu->run->s390_ucontrol.trans_exc_code =
4157 						current->thread.gmap_addr;
4158 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4159 		return -EREMOTE;
4160 	} else if (current->thread.gmap_pfault) {
4161 		trace_kvm_s390_major_guest_pfault(vcpu);
4162 		current->thread.gmap_pfault = 0;
4163 		if (kvm_arch_setup_async_pf(vcpu))
4164 			return 0;
4165 		vcpu->stat.pfault_sync++;
4166 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4167 	}
4168 	return vcpu_post_run_fault_in_sie(vcpu);
4169 }
4170 
4171 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4172 static int __vcpu_run(struct kvm_vcpu *vcpu)
4173 {
4174 	int rc, exit_reason;
4175 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4176 
4177 	/*
4178 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4179 	 * ning the guest), so that memslots (and other stuff) are protected
4180 	 */
4181 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4182 
4183 	do {
4184 		rc = vcpu_pre_run(vcpu);
4185 		if (rc)
4186 			break;
4187 
4188 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4189 		/*
4190 		 * As PF_VCPU will be used in fault handler, between
4191 		 * guest_enter and guest_exit should be no uaccess.
4192 		 */
4193 		local_irq_disable();
4194 		guest_enter_irqoff();
4195 		__disable_cpu_timer_accounting(vcpu);
4196 		local_irq_enable();
4197 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4198 			memcpy(sie_page->pv_grregs,
4199 			       vcpu->run->s.regs.gprs,
4200 			       sizeof(sie_page->pv_grregs));
4201 		}
4202 		if (test_cpu_flag(CIF_FPU))
4203 			load_fpu_regs();
4204 		exit_reason = sie64a(vcpu->arch.sie_block,
4205 				     vcpu->run->s.regs.gprs);
4206 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4207 			memcpy(vcpu->run->s.regs.gprs,
4208 			       sie_page->pv_grregs,
4209 			       sizeof(sie_page->pv_grregs));
4210 			/*
4211 			 * We're not allowed to inject interrupts on intercepts
4212 			 * that leave the guest state in an "in-between" state
4213 			 * where the next SIE entry will do a continuation.
4214 			 * Fence interrupts in our "internal" PSW.
4215 			 */
4216 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4217 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4218 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4219 			}
4220 		}
4221 		local_irq_disable();
4222 		__enable_cpu_timer_accounting(vcpu);
4223 		guest_exit_irqoff();
4224 		local_irq_enable();
4225 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4226 
4227 		rc = vcpu_post_run(vcpu, exit_reason);
4228 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4229 
4230 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4231 	return rc;
4232 }
4233 
4234 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4235 {
4236 	struct kvm_run *kvm_run = vcpu->run;
4237 	struct runtime_instr_cb *riccb;
4238 	struct gs_cb *gscb;
4239 
4240 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4241 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4242 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4243 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4244 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4245 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4246 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4247 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4248 	}
4249 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4250 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4251 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4252 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4253 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4254 			kvm_clear_async_pf_completion_queue(vcpu);
4255 	}
4256 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4257 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4258 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4259 	}
4260 	/*
4261 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4262 	 * we should enable RI here instead of doing the lazy enablement.
4263 	 */
4264 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4265 	    test_kvm_facility(vcpu->kvm, 64) &&
4266 	    riccb->v &&
4267 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4268 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4269 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4270 	}
4271 	/*
4272 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4273 	 * we should enable GS here instead of doing the lazy enablement.
4274 	 */
4275 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4276 	    test_kvm_facility(vcpu->kvm, 133) &&
4277 	    gscb->gssm &&
4278 	    !vcpu->arch.gs_enabled) {
4279 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4280 		vcpu->arch.sie_block->ecb |= ECB_GS;
4281 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4282 		vcpu->arch.gs_enabled = 1;
4283 	}
4284 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4285 	    test_kvm_facility(vcpu->kvm, 82)) {
4286 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4287 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4288 	}
4289 	if (MACHINE_HAS_GS) {
4290 		preempt_disable();
4291 		__ctl_set_bit(2, 4);
4292 		if (current->thread.gs_cb) {
4293 			vcpu->arch.host_gscb = current->thread.gs_cb;
4294 			save_gs_cb(vcpu->arch.host_gscb);
4295 		}
4296 		if (vcpu->arch.gs_enabled) {
4297 			current->thread.gs_cb = (struct gs_cb *)
4298 						&vcpu->run->s.regs.gscb;
4299 			restore_gs_cb(current->thread.gs_cb);
4300 		}
4301 		preempt_enable();
4302 	}
4303 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4304 }
4305 
4306 static void sync_regs(struct kvm_vcpu *vcpu)
4307 {
4308 	struct kvm_run *kvm_run = vcpu->run;
4309 
4310 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4311 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4312 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4313 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4314 		/* some control register changes require a tlb flush */
4315 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4316 	}
4317 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4318 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4319 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4320 	}
4321 	save_access_regs(vcpu->arch.host_acrs);
4322 	restore_access_regs(vcpu->run->s.regs.acrs);
4323 	/* save host (userspace) fprs/vrs */
4324 	save_fpu_regs();
4325 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4326 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4327 	if (MACHINE_HAS_VX)
4328 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4329 	else
4330 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4331 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4332 	if (test_fp_ctl(current->thread.fpu.fpc))
4333 		/* User space provided an invalid FPC, let's clear it */
4334 		current->thread.fpu.fpc = 0;
4335 
4336 	/* Sync fmt2 only data */
4337 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4338 		sync_regs_fmt2(vcpu);
4339 	} else {
4340 		/*
4341 		 * In several places we have to modify our internal view to
4342 		 * not do things that are disallowed by the ultravisor. For
4343 		 * example we must not inject interrupts after specific exits
4344 		 * (e.g. 112 prefix page not secure). We do this by turning
4345 		 * off the machine check, external and I/O interrupt bits
4346 		 * of our PSW copy. To avoid getting validity intercepts, we
4347 		 * do only accept the condition code from userspace.
4348 		 */
4349 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4350 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4351 						   PSW_MASK_CC;
4352 	}
4353 
4354 	kvm_run->kvm_dirty_regs = 0;
4355 }
4356 
4357 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4358 {
4359 	struct kvm_run *kvm_run = vcpu->run;
4360 
4361 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4362 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4363 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4364 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4365 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4366 	if (MACHINE_HAS_GS) {
4367 		preempt_disable();
4368 		__ctl_set_bit(2, 4);
4369 		if (vcpu->arch.gs_enabled)
4370 			save_gs_cb(current->thread.gs_cb);
4371 		current->thread.gs_cb = vcpu->arch.host_gscb;
4372 		restore_gs_cb(vcpu->arch.host_gscb);
4373 		if (!vcpu->arch.host_gscb)
4374 			__ctl_clear_bit(2, 4);
4375 		vcpu->arch.host_gscb = NULL;
4376 		preempt_enable();
4377 	}
4378 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4379 }
4380 
4381 static void store_regs(struct kvm_vcpu *vcpu)
4382 {
4383 	struct kvm_run *kvm_run = vcpu->run;
4384 
4385 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4386 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4387 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4388 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4389 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4390 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4391 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4392 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4393 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4394 	save_access_regs(vcpu->run->s.regs.acrs);
4395 	restore_access_regs(vcpu->arch.host_acrs);
4396 	/* Save guest register state */
4397 	save_fpu_regs();
4398 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4399 	/* Restore will be done lazily at return */
4400 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4401 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4402 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4403 		store_regs_fmt2(vcpu);
4404 }
4405 
4406 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4407 {
4408 	struct kvm_run *kvm_run = vcpu->run;
4409 	int rc;
4410 
4411 	if (kvm_run->immediate_exit)
4412 		return -EINTR;
4413 
4414 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4415 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4416 		return -EINVAL;
4417 
4418 	vcpu_load(vcpu);
4419 
4420 	if (guestdbg_exit_pending(vcpu)) {
4421 		kvm_s390_prepare_debug_exit(vcpu);
4422 		rc = 0;
4423 		goto out;
4424 	}
4425 
4426 	kvm_sigset_activate(vcpu);
4427 
4428 	/*
4429 	 * no need to check the return value of vcpu_start as it can only have
4430 	 * an error for protvirt, but protvirt means user cpu state
4431 	 */
4432 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4433 		kvm_s390_vcpu_start(vcpu);
4434 	} else if (is_vcpu_stopped(vcpu)) {
4435 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4436 				   vcpu->vcpu_id);
4437 		rc = -EINVAL;
4438 		goto out;
4439 	}
4440 
4441 	sync_regs(vcpu);
4442 	enable_cpu_timer_accounting(vcpu);
4443 
4444 	might_fault();
4445 	rc = __vcpu_run(vcpu);
4446 
4447 	if (signal_pending(current) && !rc) {
4448 		kvm_run->exit_reason = KVM_EXIT_INTR;
4449 		rc = -EINTR;
4450 	}
4451 
4452 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4453 		kvm_s390_prepare_debug_exit(vcpu);
4454 		rc = 0;
4455 	}
4456 
4457 	if (rc == -EREMOTE) {
4458 		/* userspace support is needed, kvm_run has been prepared */
4459 		rc = 0;
4460 	}
4461 
4462 	disable_cpu_timer_accounting(vcpu);
4463 	store_regs(vcpu);
4464 
4465 	kvm_sigset_deactivate(vcpu);
4466 
4467 	vcpu->stat.exit_userspace++;
4468 out:
4469 	vcpu_put(vcpu);
4470 	return rc;
4471 }
4472 
4473 /*
4474  * store status at address
4475  * we use have two special cases:
4476  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4477  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4478  */
4479 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4480 {
4481 	unsigned char archmode = 1;
4482 	freg_t fprs[NUM_FPRS];
4483 	unsigned int px;
4484 	u64 clkcomp, cputm;
4485 	int rc;
4486 
4487 	px = kvm_s390_get_prefix(vcpu);
4488 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4489 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4490 			return -EFAULT;
4491 		gpa = 0;
4492 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4493 		if (write_guest_real(vcpu, 163, &archmode, 1))
4494 			return -EFAULT;
4495 		gpa = px;
4496 	} else
4497 		gpa -= __LC_FPREGS_SAVE_AREA;
4498 
4499 	/* manually convert vector registers if necessary */
4500 	if (MACHINE_HAS_VX) {
4501 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4502 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4503 				     fprs, 128);
4504 	} else {
4505 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4506 				     vcpu->run->s.regs.fprs, 128);
4507 	}
4508 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4509 			      vcpu->run->s.regs.gprs, 128);
4510 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4511 			      &vcpu->arch.sie_block->gpsw, 16);
4512 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4513 			      &px, 4);
4514 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4515 			      &vcpu->run->s.regs.fpc, 4);
4516 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4517 			      &vcpu->arch.sie_block->todpr, 4);
4518 	cputm = kvm_s390_get_cpu_timer(vcpu);
4519 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4520 			      &cputm, 8);
4521 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4522 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4523 			      &clkcomp, 8);
4524 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4525 			      &vcpu->run->s.regs.acrs, 64);
4526 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4527 			      &vcpu->arch.sie_block->gcr, 128);
4528 	return rc ? -EFAULT : 0;
4529 }
4530 
4531 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4532 {
4533 	/*
4534 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4535 	 * switch in the run ioctl. Let's update our copies before we save
4536 	 * it into the save area
4537 	 */
4538 	save_fpu_regs();
4539 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4540 	save_access_regs(vcpu->run->s.regs.acrs);
4541 
4542 	return kvm_s390_store_status_unloaded(vcpu, addr);
4543 }
4544 
4545 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4546 {
4547 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4548 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4549 }
4550 
4551 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4552 {
4553 	unsigned int i;
4554 	struct kvm_vcpu *vcpu;
4555 
4556 	kvm_for_each_vcpu(i, vcpu, kvm) {
4557 		__disable_ibs_on_vcpu(vcpu);
4558 	}
4559 }
4560 
4561 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4562 {
4563 	if (!sclp.has_ibs)
4564 		return;
4565 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4566 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4567 }
4568 
4569 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4570 {
4571 	int i, online_vcpus, r = 0, started_vcpus = 0;
4572 
4573 	if (!is_vcpu_stopped(vcpu))
4574 		return 0;
4575 
4576 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4577 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4578 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4579 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4580 
4581 	/* Let's tell the UV that we want to change into the operating state */
4582 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4583 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4584 		if (r) {
4585 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4586 			return r;
4587 		}
4588 	}
4589 
4590 	for (i = 0; i < online_vcpus; i++) {
4591 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4592 			started_vcpus++;
4593 	}
4594 
4595 	if (started_vcpus == 0) {
4596 		/* we're the only active VCPU -> speed it up */
4597 		__enable_ibs_on_vcpu(vcpu);
4598 	} else if (started_vcpus == 1) {
4599 		/*
4600 		 * As we are starting a second VCPU, we have to disable
4601 		 * the IBS facility on all VCPUs to remove potentially
4602 		 * outstanding ENABLE requests.
4603 		 */
4604 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4605 	}
4606 
4607 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4608 	/*
4609 	 * The real PSW might have changed due to a RESTART interpreted by the
4610 	 * ultravisor. We block all interrupts and let the next sie exit
4611 	 * refresh our view.
4612 	 */
4613 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4614 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4615 	/*
4616 	 * Another VCPU might have used IBS while we were offline.
4617 	 * Let's play safe and flush the VCPU at startup.
4618 	 */
4619 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4620 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4621 	return 0;
4622 }
4623 
4624 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4625 {
4626 	int i, online_vcpus, r = 0, started_vcpus = 0;
4627 	struct kvm_vcpu *started_vcpu = NULL;
4628 
4629 	if (is_vcpu_stopped(vcpu))
4630 		return 0;
4631 
4632 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4633 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4634 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4635 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4636 
4637 	/* Let's tell the UV that we want to change into the stopped state */
4638 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4639 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4640 		if (r) {
4641 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4642 			return r;
4643 		}
4644 	}
4645 
4646 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4647 	kvm_s390_clear_stop_irq(vcpu);
4648 
4649 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4650 	__disable_ibs_on_vcpu(vcpu);
4651 
4652 	for (i = 0; i < online_vcpus; i++) {
4653 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4654 			started_vcpus++;
4655 			started_vcpu = vcpu->kvm->vcpus[i];
4656 		}
4657 	}
4658 
4659 	if (started_vcpus == 1) {
4660 		/*
4661 		 * As we only have one VCPU left, we want to enable the
4662 		 * IBS facility for that VCPU to speed it up.
4663 		 */
4664 		__enable_ibs_on_vcpu(started_vcpu);
4665 	}
4666 
4667 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4668 	return 0;
4669 }
4670 
4671 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4672 				     struct kvm_enable_cap *cap)
4673 {
4674 	int r;
4675 
4676 	if (cap->flags)
4677 		return -EINVAL;
4678 
4679 	switch (cap->cap) {
4680 	case KVM_CAP_S390_CSS_SUPPORT:
4681 		if (!vcpu->kvm->arch.css_support) {
4682 			vcpu->kvm->arch.css_support = 1;
4683 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4684 			trace_kvm_s390_enable_css(vcpu->kvm);
4685 		}
4686 		r = 0;
4687 		break;
4688 	default:
4689 		r = -EINVAL;
4690 		break;
4691 	}
4692 	return r;
4693 }
4694 
4695 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4696 				   struct kvm_s390_mem_op *mop)
4697 {
4698 	void __user *uaddr = (void __user *)mop->buf;
4699 	int r = 0;
4700 
4701 	if (mop->flags || !mop->size)
4702 		return -EINVAL;
4703 	if (mop->size + mop->sida_offset < mop->size)
4704 		return -EINVAL;
4705 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4706 		return -E2BIG;
4707 
4708 	switch (mop->op) {
4709 	case KVM_S390_MEMOP_SIDA_READ:
4710 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4711 				 mop->sida_offset), mop->size))
4712 			r = -EFAULT;
4713 
4714 		break;
4715 	case KVM_S390_MEMOP_SIDA_WRITE:
4716 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4717 				   mop->sida_offset), uaddr, mop->size))
4718 			r = -EFAULT;
4719 		break;
4720 	}
4721 	return r;
4722 }
4723 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4724 				  struct kvm_s390_mem_op *mop)
4725 {
4726 	void __user *uaddr = (void __user *)mop->buf;
4727 	void *tmpbuf = NULL;
4728 	int r = 0;
4729 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4730 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4731 
4732 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4733 		return -EINVAL;
4734 
4735 	if (mop->size > MEM_OP_MAX_SIZE)
4736 		return -E2BIG;
4737 
4738 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4739 		return -EINVAL;
4740 
4741 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4742 		tmpbuf = vmalloc(mop->size);
4743 		if (!tmpbuf)
4744 			return -ENOMEM;
4745 	}
4746 
4747 	switch (mop->op) {
4748 	case KVM_S390_MEMOP_LOGICAL_READ:
4749 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4750 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4751 					    mop->size, GACC_FETCH);
4752 			break;
4753 		}
4754 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4755 		if (r == 0) {
4756 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4757 				r = -EFAULT;
4758 		}
4759 		break;
4760 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4761 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4762 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4763 					    mop->size, GACC_STORE);
4764 			break;
4765 		}
4766 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4767 			r = -EFAULT;
4768 			break;
4769 		}
4770 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4771 		break;
4772 	}
4773 
4774 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4775 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4776 
4777 	vfree(tmpbuf);
4778 	return r;
4779 }
4780 
4781 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4782 				      struct kvm_s390_mem_op *mop)
4783 {
4784 	int r, srcu_idx;
4785 
4786 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4787 
4788 	switch (mop->op) {
4789 	case KVM_S390_MEMOP_LOGICAL_READ:
4790 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4791 		r = kvm_s390_guest_mem_op(vcpu, mop);
4792 		break;
4793 	case KVM_S390_MEMOP_SIDA_READ:
4794 	case KVM_S390_MEMOP_SIDA_WRITE:
4795 		/* we are locked against sida going away by the vcpu->mutex */
4796 		r = kvm_s390_guest_sida_op(vcpu, mop);
4797 		break;
4798 	default:
4799 		r = -EINVAL;
4800 	}
4801 
4802 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4803 	return r;
4804 }
4805 
4806 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4807 			       unsigned int ioctl, unsigned long arg)
4808 {
4809 	struct kvm_vcpu *vcpu = filp->private_data;
4810 	void __user *argp = (void __user *)arg;
4811 
4812 	switch (ioctl) {
4813 	case KVM_S390_IRQ: {
4814 		struct kvm_s390_irq s390irq;
4815 
4816 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4817 			return -EFAULT;
4818 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4819 	}
4820 	case KVM_S390_INTERRUPT: {
4821 		struct kvm_s390_interrupt s390int;
4822 		struct kvm_s390_irq s390irq = {};
4823 
4824 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4825 			return -EFAULT;
4826 		if (s390int_to_s390irq(&s390int, &s390irq))
4827 			return -EINVAL;
4828 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4829 	}
4830 	}
4831 	return -ENOIOCTLCMD;
4832 }
4833 
4834 long kvm_arch_vcpu_ioctl(struct file *filp,
4835 			 unsigned int ioctl, unsigned long arg)
4836 {
4837 	struct kvm_vcpu *vcpu = filp->private_data;
4838 	void __user *argp = (void __user *)arg;
4839 	int idx;
4840 	long r;
4841 	u16 rc, rrc;
4842 
4843 	vcpu_load(vcpu);
4844 
4845 	switch (ioctl) {
4846 	case KVM_S390_STORE_STATUS:
4847 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4848 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4849 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4850 		break;
4851 	case KVM_S390_SET_INITIAL_PSW: {
4852 		psw_t psw;
4853 
4854 		r = -EFAULT;
4855 		if (copy_from_user(&psw, argp, sizeof(psw)))
4856 			break;
4857 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4858 		break;
4859 	}
4860 	case KVM_S390_CLEAR_RESET:
4861 		r = 0;
4862 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4863 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4864 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4865 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4866 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4867 				   rc, rrc);
4868 		}
4869 		break;
4870 	case KVM_S390_INITIAL_RESET:
4871 		r = 0;
4872 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4873 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4874 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4875 					  UVC_CMD_CPU_RESET_INITIAL,
4876 					  &rc, &rrc);
4877 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4878 				   rc, rrc);
4879 		}
4880 		break;
4881 	case KVM_S390_NORMAL_RESET:
4882 		r = 0;
4883 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4884 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4885 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4886 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4887 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4888 				   rc, rrc);
4889 		}
4890 		break;
4891 	case KVM_SET_ONE_REG:
4892 	case KVM_GET_ONE_REG: {
4893 		struct kvm_one_reg reg;
4894 		r = -EINVAL;
4895 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4896 			break;
4897 		r = -EFAULT;
4898 		if (copy_from_user(&reg, argp, sizeof(reg)))
4899 			break;
4900 		if (ioctl == KVM_SET_ONE_REG)
4901 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4902 		else
4903 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4904 		break;
4905 	}
4906 #ifdef CONFIG_KVM_S390_UCONTROL
4907 	case KVM_S390_UCAS_MAP: {
4908 		struct kvm_s390_ucas_mapping ucasmap;
4909 
4910 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4911 			r = -EFAULT;
4912 			break;
4913 		}
4914 
4915 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4916 			r = -EINVAL;
4917 			break;
4918 		}
4919 
4920 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4921 				     ucasmap.vcpu_addr, ucasmap.length);
4922 		break;
4923 	}
4924 	case KVM_S390_UCAS_UNMAP: {
4925 		struct kvm_s390_ucas_mapping ucasmap;
4926 
4927 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4928 			r = -EFAULT;
4929 			break;
4930 		}
4931 
4932 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4933 			r = -EINVAL;
4934 			break;
4935 		}
4936 
4937 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4938 			ucasmap.length);
4939 		break;
4940 	}
4941 #endif
4942 	case KVM_S390_VCPU_FAULT: {
4943 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4944 		break;
4945 	}
4946 	case KVM_ENABLE_CAP:
4947 	{
4948 		struct kvm_enable_cap cap;
4949 		r = -EFAULT;
4950 		if (copy_from_user(&cap, argp, sizeof(cap)))
4951 			break;
4952 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4953 		break;
4954 	}
4955 	case KVM_S390_MEM_OP: {
4956 		struct kvm_s390_mem_op mem_op;
4957 
4958 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4959 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4960 		else
4961 			r = -EFAULT;
4962 		break;
4963 	}
4964 	case KVM_S390_SET_IRQ_STATE: {
4965 		struct kvm_s390_irq_state irq_state;
4966 
4967 		r = -EFAULT;
4968 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4969 			break;
4970 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4971 		    irq_state.len == 0 ||
4972 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4973 			r = -EINVAL;
4974 			break;
4975 		}
4976 		/* do not use irq_state.flags, it will break old QEMUs */
4977 		r = kvm_s390_set_irq_state(vcpu,
4978 					   (void __user *) irq_state.buf,
4979 					   irq_state.len);
4980 		break;
4981 	}
4982 	case KVM_S390_GET_IRQ_STATE: {
4983 		struct kvm_s390_irq_state irq_state;
4984 
4985 		r = -EFAULT;
4986 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4987 			break;
4988 		if (irq_state.len == 0) {
4989 			r = -EINVAL;
4990 			break;
4991 		}
4992 		/* do not use irq_state.flags, it will break old QEMUs */
4993 		r = kvm_s390_get_irq_state(vcpu,
4994 					   (__u8 __user *)  irq_state.buf,
4995 					   irq_state.len);
4996 		break;
4997 	}
4998 	default:
4999 		r = -ENOTTY;
5000 	}
5001 
5002 	vcpu_put(vcpu);
5003 	return r;
5004 }
5005 
5006 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5007 {
5008 #ifdef CONFIG_KVM_S390_UCONTROL
5009 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5010 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5011 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5012 		get_page(vmf->page);
5013 		return 0;
5014 	}
5015 #endif
5016 	return VM_FAULT_SIGBUS;
5017 }
5018 
5019 /* Section: memory related */
5020 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5021 				   struct kvm_memory_slot *memslot,
5022 				   const struct kvm_userspace_memory_region *mem,
5023 				   enum kvm_mr_change change)
5024 {
5025 	/* A few sanity checks. We can have memory slots which have to be
5026 	   located/ended at a segment boundary (1MB). The memory in userland is
5027 	   ok to be fragmented into various different vmas. It is okay to mmap()
5028 	   and munmap() stuff in this slot after doing this call at any time */
5029 
5030 	if (mem->userspace_addr & 0xffffful)
5031 		return -EINVAL;
5032 
5033 	if (mem->memory_size & 0xffffful)
5034 		return -EINVAL;
5035 
5036 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5037 		return -EINVAL;
5038 
5039 	/* When we are protected, we should not change the memory slots */
5040 	if (kvm_s390_pv_get_handle(kvm))
5041 		return -EINVAL;
5042 	return 0;
5043 }
5044 
5045 void kvm_arch_commit_memory_region(struct kvm *kvm,
5046 				const struct kvm_userspace_memory_region *mem,
5047 				struct kvm_memory_slot *old,
5048 				const struct kvm_memory_slot *new,
5049 				enum kvm_mr_change change)
5050 {
5051 	int rc = 0;
5052 
5053 	switch (change) {
5054 	case KVM_MR_DELETE:
5055 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5056 					old->npages * PAGE_SIZE);
5057 		break;
5058 	case KVM_MR_MOVE:
5059 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5060 					old->npages * PAGE_SIZE);
5061 		if (rc)
5062 			break;
5063 		fallthrough;
5064 	case KVM_MR_CREATE:
5065 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5066 				      mem->guest_phys_addr, mem->memory_size);
5067 		break;
5068 	case KVM_MR_FLAGS_ONLY:
5069 		break;
5070 	default:
5071 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5072 	}
5073 	if (rc)
5074 		pr_warn("failed to commit memory region\n");
5075 	return;
5076 }
5077 
5078 static inline unsigned long nonhyp_mask(int i)
5079 {
5080 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5081 
5082 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5083 }
5084 
5085 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5086 {
5087 	vcpu->valid_wakeup = false;
5088 }
5089 
5090 static int __init kvm_s390_init(void)
5091 {
5092 	int i;
5093 
5094 	if (!sclp.has_sief2) {
5095 		pr_info("SIE is not available\n");
5096 		return -ENODEV;
5097 	}
5098 
5099 	if (nested && hpage) {
5100 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5101 		return -EINVAL;
5102 	}
5103 
5104 	for (i = 0; i < 16; i++)
5105 		kvm_s390_fac_base[i] |=
5106 			stfle_fac_list[i] & nonhyp_mask(i);
5107 
5108 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5109 }
5110 
5111 static void __exit kvm_s390_exit(void)
5112 {
5113 	kvm_exit();
5114 }
5115 
5116 module_init(kvm_s390_init);
5117 module_exit(kvm_s390_exit);
5118 
5119 /*
5120  * Enable autoloading of the kvm module.
5121  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5122  * since x86 takes a different approach.
5123  */
5124 #include <linux/miscdevice.h>
5125 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5126 MODULE_ALIAS("devname:kvm");
5127