xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision c6fbbf1e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10  *               Jason J. Herne <jjherne@us.ibm.com>
11  */
12 
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include <asm/ap.h>
46 #include <asm/uv.h>
47 #include <asm/fpu/api.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
61 	KVM_GENERIC_VM_STATS(),
62 	STATS_DESC_COUNTER(VM, inject_io),
63 	STATS_DESC_COUNTER(VM, inject_float_mchk),
64 	STATS_DESC_COUNTER(VM, inject_pfault_done),
65 	STATS_DESC_COUNTER(VM, inject_service_signal),
66 	STATS_DESC_COUNTER(VM, inject_virtio)
67 };
68 
69 const struct kvm_stats_header kvm_vm_stats_header = {
70 	.name_size = KVM_STATS_NAME_SIZE,
71 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
72 	.id_offset = sizeof(struct kvm_stats_header),
73 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
74 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
75 		       sizeof(kvm_vm_stats_desc),
76 };
77 
78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
79 	KVM_GENERIC_VCPU_STATS(),
80 	STATS_DESC_COUNTER(VCPU, exit_userspace),
81 	STATS_DESC_COUNTER(VCPU, exit_null),
82 	STATS_DESC_COUNTER(VCPU, exit_external_request),
83 	STATS_DESC_COUNTER(VCPU, exit_io_request),
84 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
85 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
86 	STATS_DESC_COUNTER(VCPU, exit_validity),
87 	STATS_DESC_COUNTER(VCPU, exit_instruction),
88 	STATS_DESC_COUNTER(VCPU, exit_pei),
89 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
90 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
92 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
94 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
95 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
96 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
97 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
98 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
99 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
100 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
101 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
103 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_program),
107 	STATS_DESC_COUNTER(VCPU, deliver_io),
108 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
109 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
110 	STATS_DESC_COUNTER(VCPU, inject_ckc),
111 	STATS_DESC_COUNTER(VCPU, inject_cputm),
112 	STATS_DESC_COUNTER(VCPU, inject_external_call),
113 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
114 	STATS_DESC_COUNTER(VCPU, inject_mchk),
115 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
116 	STATS_DESC_COUNTER(VCPU, inject_program),
117 	STATS_DESC_COUNTER(VCPU, inject_restart),
118 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
119 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
120 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
121 	STATS_DESC_COUNTER(VCPU, instruction_gs),
122 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
123 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
125 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
126 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
127 	STATS_DESC_COUNTER(VCPU, instruction_sck),
128 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
129 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
130 	STATS_DESC_COUNTER(VCPU, instruction_spx),
131 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stap),
133 	STATS_DESC_COUNTER(VCPU, instruction_iske),
134 	STATS_DESC_COUNTER(VCPU, instruction_ri),
135 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
136 	STATS_DESC_COUNTER(VCPU, instruction_sske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
138 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
139 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
140 	STATS_DESC_COUNTER(VCPU, instruction_tb),
141 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
142 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
143 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
144 	STATS_DESC_COUNTER(VCPU, instruction_sie),
145 	STATS_DESC_COUNTER(VCPU, instruction_essa),
146 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
147 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
163 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
166 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
168 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
172 	STATS_DESC_COUNTER(VCPU, pfault_sync)
173 };
174 
175 const struct kvm_stats_header kvm_vcpu_stats_header = {
176 	.name_size = KVM_STATS_NAME_SIZE,
177 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
178 	.id_offset = sizeof(struct kvm_stats_header),
179 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
180 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
181 		       sizeof(kvm_vcpu_stats_desc),
182 };
183 
184 /* allow nested virtualization in KVM (if enabled by user space) */
185 static int nested;
186 module_param(nested, int, S_IRUGO);
187 MODULE_PARM_DESC(nested, "Nested virtualization support");
188 
189 /* allow 1m huge page guest backing, if !nested */
190 static int hpage;
191 module_param(hpage, int, 0444);
192 MODULE_PARM_DESC(hpage, "1m huge page backing support");
193 
194 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
195 static u8 halt_poll_max_steal = 10;
196 module_param(halt_poll_max_steal, byte, 0644);
197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
198 
199 /* if set to true, the GISA will be initialized and used if available */
200 static bool use_gisa  = true;
201 module_param(use_gisa, bool, 0644);
202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
203 
204 /* maximum diag9c forwarding per second */
205 unsigned int diag9c_forwarding_hz;
206 module_param(diag9c_forwarding_hz, uint, 0644);
207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
208 
209 /*
210  * For now we handle at most 16 double words as this is what the s390 base
211  * kernel handles and stores in the prefix page. If we ever need to go beyond
212  * this, this requires changes to code, but the external uapi can stay.
213  */
214 #define SIZE_INTERNAL 16
215 
216 /*
217  * Base feature mask that defines default mask for facilities. Consists of the
218  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
219  */
220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
221 /*
222  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
223  * and defines the facilities that can be enabled via a cpu model.
224  */
225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
226 
227 static unsigned long kvm_s390_fac_size(void)
228 {
229 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
232 		sizeof(stfle_fac_list));
233 
234 	return SIZE_INTERNAL;
235 }
236 
237 /* available cpu features supported by kvm */
238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
239 /* available subfunctions indicated via query / "test bit" */
240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
241 
242 static struct gmap_notifier gmap_notifier;
243 static struct gmap_notifier vsie_gmap_notifier;
244 debug_info_t *kvm_s390_dbf;
245 debug_info_t *kvm_s390_dbf_uv;
246 
247 /* Section: not file related */
248 int kvm_arch_hardware_enable(void)
249 {
250 	/* every s390 is virtualization enabled ;-) */
251 	return 0;
252 }
253 
254 int kvm_arch_check_processor_compat(void *opaque)
255 {
256 	return 0;
257 }
258 
259 /* forward declarations */
260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
261 			      unsigned long end);
262 static int sca_switch_to_extended(struct kvm *kvm);
263 
264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
265 {
266 	u8 delta_idx = 0;
267 
268 	/*
269 	 * The TOD jumps by delta, we have to compensate this by adding
270 	 * -delta to the epoch.
271 	 */
272 	delta = -delta;
273 
274 	/* sign-extension - we're adding to signed values below */
275 	if ((s64)delta < 0)
276 		delta_idx = -1;
277 
278 	scb->epoch += delta;
279 	if (scb->ecd & ECD_MEF) {
280 		scb->epdx += delta_idx;
281 		if (scb->epoch < delta)
282 			scb->epdx += 1;
283 	}
284 }
285 
286 /*
287  * This callback is executed during stop_machine(). All CPUs are therefore
288  * temporarily stopped. In order not to change guest behavior, we have to
289  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
290  * so a CPU won't be stopped while calculating with the epoch.
291  */
292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
293 			  void *v)
294 {
295 	struct kvm *kvm;
296 	struct kvm_vcpu *vcpu;
297 	unsigned long i;
298 	unsigned long long *delta = v;
299 
300 	list_for_each_entry(kvm, &vm_list, vm_list) {
301 		kvm_for_each_vcpu(i, vcpu, kvm) {
302 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
303 			if (i == 0) {
304 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
305 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
306 			}
307 			if (vcpu->arch.cputm_enabled)
308 				vcpu->arch.cputm_start += *delta;
309 			if (vcpu->arch.vsie_block)
310 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
311 						   *delta);
312 		}
313 	}
314 	return NOTIFY_OK;
315 }
316 
317 static struct notifier_block kvm_clock_notifier = {
318 	.notifier_call = kvm_clock_sync,
319 };
320 
321 int kvm_arch_hardware_setup(void *opaque)
322 {
323 	gmap_notifier.notifier_call = kvm_gmap_notifier;
324 	gmap_register_pte_notifier(&gmap_notifier);
325 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
326 	gmap_register_pte_notifier(&vsie_gmap_notifier);
327 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
328 				       &kvm_clock_notifier);
329 	return 0;
330 }
331 
332 void kvm_arch_hardware_unsetup(void)
333 {
334 	gmap_unregister_pte_notifier(&gmap_notifier);
335 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
336 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
337 					 &kvm_clock_notifier);
338 }
339 
340 static void allow_cpu_feat(unsigned long nr)
341 {
342 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
343 }
344 
345 static inline int plo_test_bit(unsigned char nr)
346 {
347 	unsigned long function = (unsigned long)nr | 0x100;
348 	int cc;
349 
350 	asm volatile(
351 		"	lgr	0,%[function]\n"
352 		/* Parameter registers are ignored for "test bit" */
353 		"	plo	0,0,0,0(0)\n"
354 		"	ipm	%0\n"
355 		"	srl	%0,28\n"
356 		: "=d" (cc)
357 		: [function] "d" (function)
358 		: "cc", "0");
359 	return cc == 0;
360 }
361 
362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
363 {
364 	asm volatile(
365 		"	lghi	0,0\n"
366 		"	lgr	1,%[query]\n"
367 		/* Parameter registers are ignored */
368 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
369 		:
370 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
371 		: "cc", "memory", "0", "1");
372 }
373 
374 #define INSN_SORTL 0xb938
375 #define INSN_DFLTCC 0xb939
376 
377 static void kvm_s390_cpu_feat_init(void)
378 {
379 	int i;
380 
381 	for (i = 0; i < 256; ++i) {
382 		if (plo_test_bit(i))
383 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
384 	}
385 
386 	if (test_facility(28)) /* TOD-clock steering */
387 		ptff(kvm_s390_available_subfunc.ptff,
388 		     sizeof(kvm_s390_available_subfunc.ptff),
389 		     PTFF_QAF);
390 
391 	if (test_facility(17)) { /* MSA */
392 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmac);
394 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmc);
396 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.km);
398 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
399 			      kvm_s390_available_subfunc.kimd);
400 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.klmd);
402 	}
403 	if (test_facility(76)) /* MSA3 */
404 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.pckmo);
406 	if (test_facility(77)) { /* MSA4 */
407 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
408 			      kvm_s390_available_subfunc.kmctr);
409 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.kmf);
411 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
412 			      kvm_s390_available_subfunc.kmo);
413 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
414 			      kvm_s390_available_subfunc.pcc);
415 	}
416 	if (test_facility(57)) /* MSA5 */
417 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
418 			      kvm_s390_available_subfunc.ppno);
419 
420 	if (test_facility(146)) /* MSA8 */
421 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
422 			      kvm_s390_available_subfunc.kma);
423 
424 	if (test_facility(155)) /* MSA9 */
425 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
426 			      kvm_s390_available_subfunc.kdsa);
427 
428 	if (test_facility(150)) /* SORTL */
429 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
430 
431 	if (test_facility(151)) /* DFLTCC */
432 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
433 
434 	if (MACHINE_HAS_ESOP)
435 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
436 	/*
437 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
438 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
439 	 */
440 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
441 	    !test_facility(3) || !nested)
442 		return;
443 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
444 	if (sclp.has_64bscao)
445 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
446 	if (sclp.has_siif)
447 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
448 	if (sclp.has_gpere)
449 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
450 	if (sclp.has_gsls)
451 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
452 	if (sclp.has_ib)
453 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
454 	if (sclp.has_cei)
455 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
456 	if (sclp.has_ibs)
457 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
458 	if (sclp.has_kss)
459 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
460 	/*
461 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
462 	 * all skey handling functions read/set the skey from the PGSTE
463 	 * instead of the real storage key.
464 	 *
465 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
466 	 * pages being detected as preserved although they are resident.
467 	 *
468 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
469 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
470 	 *
471 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
472 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
473 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
474 	 *
475 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
476 	 * cannot easily shadow the SCA because of the ipte lock.
477 	 */
478 }
479 
480 int kvm_arch_init(void *opaque)
481 {
482 	int rc = -ENOMEM;
483 
484 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
485 	if (!kvm_s390_dbf)
486 		return -ENOMEM;
487 
488 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
489 	if (!kvm_s390_dbf_uv)
490 		goto out;
491 
492 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
493 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
494 		goto out;
495 
496 	kvm_s390_cpu_feat_init();
497 
498 	/* Register floating interrupt controller interface. */
499 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
500 	if (rc) {
501 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
502 		goto out;
503 	}
504 
505 	rc = kvm_s390_gib_init(GAL_ISC);
506 	if (rc)
507 		goto out;
508 
509 	return 0;
510 
511 out:
512 	kvm_arch_exit();
513 	return rc;
514 }
515 
516 void kvm_arch_exit(void)
517 {
518 	kvm_s390_gib_destroy();
519 	debug_unregister(kvm_s390_dbf);
520 	debug_unregister(kvm_s390_dbf_uv);
521 }
522 
523 /* Section: device related */
524 long kvm_arch_dev_ioctl(struct file *filp,
525 			unsigned int ioctl, unsigned long arg)
526 {
527 	if (ioctl == KVM_S390_ENABLE_SIE)
528 		return s390_enable_sie();
529 	return -EINVAL;
530 }
531 
532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
533 {
534 	int r;
535 
536 	switch (ext) {
537 	case KVM_CAP_S390_PSW:
538 	case KVM_CAP_S390_GMAP:
539 	case KVM_CAP_SYNC_MMU:
540 #ifdef CONFIG_KVM_S390_UCONTROL
541 	case KVM_CAP_S390_UCONTROL:
542 #endif
543 	case KVM_CAP_ASYNC_PF:
544 	case KVM_CAP_SYNC_REGS:
545 	case KVM_CAP_ONE_REG:
546 	case KVM_CAP_ENABLE_CAP:
547 	case KVM_CAP_S390_CSS_SUPPORT:
548 	case KVM_CAP_IOEVENTFD:
549 	case KVM_CAP_DEVICE_CTRL:
550 	case KVM_CAP_S390_IRQCHIP:
551 	case KVM_CAP_VM_ATTRIBUTES:
552 	case KVM_CAP_MP_STATE:
553 	case KVM_CAP_IMMEDIATE_EXIT:
554 	case KVM_CAP_S390_INJECT_IRQ:
555 	case KVM_CAP_S390_USER_SIGP:
556 	case KVM_CAP_S390_USER_STSI:
557 	case KVM_CAP_S390_SKEYS:
558 	case KVM_CAP_S390_IRQ_STATE:
559 	case KVM_CAP_S390_USER_INSTR0:
560 	case KVM_CAP_S390_CMMA_MIGRATION:
561 	case KVM_CAP_S390_AIS:
562 	case KVM_CAP_S390_AIS_MIGRATION:
563 	case KVM_CAP_S390_VCPU_RESETS:
564 	case KVM_CAP_SET_GUEST_DEBUG:
565 	case KVM_CAP_S390_DIAG318:
566 	case KVM_CAP_S390_MEM_OP_EXTENSION:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		if (ext == KVM_CAP_NR_VCPUS)
589 			r = min_t(unsigned int, num_online_cpus(), r);
590 		break;
591 	case KVM_CAP_S390_COW:
592 		r = MACHINE_HAS_ESOP;
593 		break;
594 	case KVM_CAP_S390_VECTOR_REGISTERS:
595 		r = MACHINE_HAS_VX;
596 		break;
597 	case KVM_CAP_S390_RI:
598 		r = test_facility(64);
599 		break;
600 	case KVM_CAP_S390_GS:
601 		r = test_facility(133);
602 		break;
603 	case KVM_CAP_S390_BPB:
604 		r = test_facility(82);
605 		break;
606 	case KVM_CAP_S390_PROTECTED:
607 		r = is_prot_virt_host();
608 		break;
609 	default:
610 		r = 0;
611 	}
612 	return r;
613 }
614 
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617 	int i;
618 	gfn_t cur_gfn, last_gfn;
619 	unsigned long gaddr, vmaddr;
620 	struct gmap *gmap = kvm->arch.gmap;
621 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622 
623 	/* Loop over all guest segments */
624 	cur_gfn = memslot->base_gfn;
625 	last_gfn = memslot->base_gfn + memslot->npages;
626 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 		gaddr = gfn_to_gpa(cur_gfn);
628 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 		if (kvm_is_error_hva(vmaddr))
630 			continue;
631 
632 		bitmap_zero(bitmap, _PAGE_ENTRIES);
633 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 		for (i = 0; i < _PAGE_ENTRIES; i++) {
635 			if (test_bit(i, bitmap))
636 				mark_page_dirty(kvm, cur_gfn + i);
637 		}
638 
639 		if (fatal_signal_pending(current))
640 			return;
641 		cond_resched();
642 	}
643 }
644 
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 			       struct kvm_dirty_log *log)
653 {
654 	int r;
655 	unsigned long n;
656 	struct kvm_memory_slot *memslot;
657 	int is_dirty;
658 
659 	if (kvm_is_ucontrol(kvm))
660 		return -EINVAL;
661 
662 	mutex_lock(&kvm->slots_lock);
663 
664 	r = -EINVAL;
665 	if (log->slot >= KVM_USER_MEM_SLOTS)
666 		goto out;
667 
668 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669 	if (r)
670 		goto out;
671 
672 	/* Clear the dirty log */
673 	if (is_dirty) {
674 		n = kvm_dirty_bitmap_bytes(memslot);
675 		memset(memslot->dirty_bitmap, 0, n);
676 	}
677 	r = 0;
678 out:
679 	mutex_unlock(&kvm->slots_lock);
680 	return r;
681 }
682 
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685 	unsigned long i;
686 	struct kvm_vcpu *vcpu;
687 
688 	kvm_for_each_vcpu(i, vcpu, kvm) {
689 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690 	}
691 }
692 
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695 	int r;
696 
697 	if (cap->flags)
698 		return -EINVAL;
699 
700 	switch (cap->cap) {
701 	case KVM_CAP_S390_IRQCHIP:
702 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 		kvm->arch.use_irqchip = 1;
704 		r = 0;
705 		break;
706 	case KVM_CAP_S390_USER_SIGP:
707 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 		kvm->arch.user_sigp = 1;
709 		r = 0;
710 		break;
711 	case KVM_CAP_S390_VECTOR_REGISTERS:
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (MACHINE_HAS_VX) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 			set_kvm_facility(kvm->arch.model.fac_list, 129);
718 			if (test_facility(134)) {
719 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 				set_kvm_facility(kvm->arch.model.fac_list, 134);
721 			}
722 			if (test_facility(135)) {
723 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 				set_kvm_facility(kvm->arch.model.fac_list, 135);
725 			}
726 			if (test_facility(148)) {
727 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 				set_kvm_facility(kvm->arch.model.fac_list, 148);
729 			}
730 			if (test_facility(152)) {
731 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 				set_kvm_facility(kvm->arch.model.fac_list, 152);
733 			}
734 			if (test_facility(192)) {
735 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 				set_kvm_facility(kvm->arch.model.fac_list, 192);
737 			}
738 			r = 0;
739 		} else
740 			r = -EINVAL;
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_RI:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(64)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 			set_kvm_facility(kvm->arch.model.fac_list, 64);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_AIS:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus) {
762 			r = -EBUSY;
763 		} else {
764 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 			set_kvm_facility(kvm->arch.model.fac_list, 72);
766 			r = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_GS:
773 		r = -EINVAL;
774 		mutex_lock(&kvm->lock);
775 		if (kvm->created_vcpus) {
776 			r = -EBUSY;
777 		} else if (test_facility(133)) {
778 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 			set_kvm_facility(kvm->arch.model.fac_list, 133);
780 			r = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 			 r ? "(not available)" : "(success)");
785 		break;
786 	case KVM_CAP_S390_HPAGE_1M:
787 		mutex_lock(&kvm->lock);
788 		if (kvm->created_vcpus)
789 			r = -EBUSY;
790 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791 			r = -EINVAL;
792 		else {
793 			r = 0;
794 			mmap_write_lock(kvm->mm);
795 			kvm->mm->context.allow_gmap_hpage_1m = 1;
796 			mmap_write_unlock(kvm->mm);
797 			/*
798 			 * We might have to create fake 4k page
799 			 * tables. To avoid that the hardware works on
800 			 * stale PGSTEs, we emulate these instructions.
801 			 */
802 			kvm->arch.use_skf = 0;
803 			kvm->arch.use_pfmfi = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_USER_STSI:
810 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 		kvm->arch.user_stsi = 1;
812 		r = 0;
813 		break;
814 	case KVM_CAP_S390_USER_INSTR0:
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 		kvm->arch.user_instr0 = 1;
817 		icpt_operexc_on_all_vcpus(kvm);
818 		r = 0;
819 		break;
820 	default:
821 		r = -EINVAL;
822 		break;
823 	}
824 	return r;
825 }
826 
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829 	int ret;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_MEM_LIMIT_SIZE:
833 		ret = 0;
834 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 			 kvm->arch.mem_limit);
836 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837 			ret = -EFAULT;
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	int ret;
849 	unsigned int idx;
850 	switch (attr->attr) {
851 	case KVM_S390_VM_MEM_ENABLE_CMMA:
852 		ret = -ENXIO;
853 		if (!sclp.has_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 		mutex_lock(&kvm->lock);
858 		if (kvm->created_vcpus)
859 			ret = -EBUSY;
860 		else if (kvm->mm->context.allow_gmap_hpage_1m)
861 			ret = -EINVAL;
862 		else {
863 			kvm->arch.use_cmma = 1;
864 			/* Not compatible with cmma. */
865 			kvm->arch.use_pfmfi = 0;
866 			ret = 0;
867 		}
868 		mutex_unlock(&kvm->lock);
869 		break;
870 	case KVM_S390_VM_MEM_CLR_CMMA:
871 		ret = -ENXIO;
872 		if (!sclp.has_cmma)
873 			break;
874 		ret = -EINVAL;
875 		if (!kvm->arch.use_cmma)
876 			break;
877 
878 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 		mutex_lock(&kvm->lock);
880 		idx = srcu_read_lock(&kvm->srcu);
881 		s390_reset_cmma(kvm->arch.gmap->mm);
882 		srcu_read_unlock(&kvm->srcu, idx);
883 		mutex_unlock(&kvm->lock);
884 		ret = 0;
885 		break;
886 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 		unsigned long new_limit;
888 
889 		if (kvm_is_ucontrol(kvm))
890 			return -EINVAL;
891 
892 		if (get_user(new_limit, (u64 __user *)attr->addr))
893 			return -EFAULT;
894 
895 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 		    new_limit > kvm->arch.mem_limit)
897 			return -E2BIG;
898 
899 		if (!new_limit)
900 			return -EINVAL;
901 
902 		/* gmap_create takes last usable address */
903 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
904 			new_limit -= 1;
905 
906 		ret = -EBUSY;
907 		mutex_lock(&kvm->lock);
908 		if (!kvm->created_vcpus) {
909 			/* gmap_create will round the limit up */
910 			struct gmap *new = gmap_create(current->mm, new_limit);
911 
912 			if (!new) {
913 				ret = -ENOMEM;
914 			} else {
915 				gmap_remove(kvm->arch.gmap);
916 				new->private = kvm;
917 				kvm->arch.gmap = new;
918 				ret = 0;
919 			}
920 		}
921 		mutex_unlock(&kvm->lock);
922 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 			 (void *) kvm->arch.gmap->asce);
925 		break;
926 	}
927 	default:
928 		ret = -ENXIO;
929 		break;
930 	}
931 	return ret;
932 }
933 
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935 
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938 	struct kvm_vcpu *vcpu;
939 	unsigned long i;
940 
941 	kvm_s390_vcpu_block_all(kvm);
942 
943 	kvm_for_each_vcpu(i, vcpu, kvm) {
944 		kvm_s390_vcpu_crypto_setup(vcpu);
945 		/* recreate the shadow crycb by leaving the VSIE handler */
946 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 	}
948 
949 	kvm_s390_vcpu_unblock_all(kvm);
950 }
951 
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954 	mutex_lock(&kvm->lock);
955 	switch (attr->attr) {
956 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		get_random_bytes(
962 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		kvm->arch.crypto.aes_kw = 1;
965 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 		if (!test_kvm_facility(kvm, 76)) {
969 			mutex_unlock(&kvm->lock);
970 			return -EINVAL;
971 		}
972 		get_random_bytes(
973 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 		kvm->arch.crypto.dea_kw = 1;
976 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 		if (!test_kvm_facility(kvm, 76)) {
980 			mutex_unlock(&kvm->lock);
981 			return -EINVAL;
982 		}
983 		kvm->arch.crypto.aes_kw = 0;
984 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987 		break;
988 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 		if (!test_kvm_facility(kvm, 76)) {
990 			mutex_unlock(&kvm->lock);
991 			return -EINVAL;
992 		}
993 		kvm->arch.crypto.dea_kw = 0;
994 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997 		break;
998 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 		if (!ap_instructions_available()) {
1000 			mutex_unlock(&kvm->lock);
1001 			return -EOPNOTSUPP;
1002 		}
1003 		kvm->arch.crypto.apie = 1;
1004 		break;
1005 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 		if (!ap_instructions_available()) {
1007 			mutex_unlock(&kvm->lock);
1008 			return -EOPNOTSUPP;
1009 		}
1010 		kvm->arch.crypto.apie = 0;
1011 		break;
1012 	default:
1013 		mutex_unlock(&kvm->lock);
1014 		return -ENXIO;
1015 	}
1016 
1017 	kvm_s390_vcpu_crypto_reset_all(kvm);
1018 	mutex_unlock(&kvm->lock);
1019 	return 0;
1020 }
1021 
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024 	unsigned long cx;
1025 	struct kvm_vcpu *vcpu;
1026 
1027 	kvm_for_each_vcpu(cx, vcpu, kvm)
1028 		kvm_s390_sync_request(req, vcpu);
1029 }
1030 
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037 	struct kvm_memory_slot *ms;
1038 	struct kvm_memslots *slots;
1039 	unsigned long ram_pages = 0;
1040 	int bkt;
1041 
1042 	/* migration mode already enabled */
1043 	if (kvm->arch.migration_mode)
1044 		return 0;
1045 	slots = kvm_memslots(kvm);
1046 	if (!slots || kvm_memslots_empty(slots))
1047 		return -EINVAL;
1048 
1049 	if (!kvm->arch.use_cmma) {
1050 		kvm->arch.migration_mode = 1;
1051 		return 0;
1052 	}
1053 	/* mark all the pages in active slots as dirty */
1054 	kvm_for_each_memslot(ms, bkt, slots) {
1055 		if (!ms->dirty_bitmap)
1056 			return -EINVAL;
1057 		/*
1058 		 * The second half of the bitmap is only used on x86,
1059 		 * and would be wasted otherwise, so we put it to good
1060 		 * use here to keep track of the state of the storage
1061 		 * attributes.
1062 		 */
1063 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 		ram_pages += ms->npages;
1065 	}
1066 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 	kvm->arch.migration_mode = 1;
1068 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069 	return 0;
1070 }
1071 
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078 	/* migration mode already disabled */
1079 	if (!kvm->arch.migration_mode)
1080 		return 0;
1081 	kvm->arch.migration_mode = 0;
1082 	if (kvm->arch.use_cmma)
1083 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084 	return 0;
1085 }
1086 
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 				     struct kvm_device_attr *attr)
1089 {
1090 	int res = -ENXIO;
1091 
1092 	mutex_lock(&kvm->slots_lock);
1093 	switch (attr->attr) {
1094 	case KVM_S390_VM_MIGRATION_START:
1095 		res = kvm_s390_vm_start_migration(kvm);
1096 		break;
1097 	case KVM_S390_VM_MIGRATION_STOP:
1098 		res = kvm_s390_vm_stop_migration(kvm);
1099 		break;
1100 	default:
1101 		break;
1102 	}
1103 	mutex_unlock(&kvm->slots_lock);
1104 
1105 	return res;
1106 }
1107 
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 				     struct kvm_device_attr *attr)
1110 {
1111 	u64 mig = kvm->arch.migration_mode;
1112 
1113 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114 		return -ENXIO;
1115 
1116 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117 		return -EFAULT;
1118 	return 0;
1119 }
1120 
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 	struct kvm_s390_vm_tod_clock gtod;
1124 
1125 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126 		return -EFAULT;
1127 
1128 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129 		return -EINVAL;
1130 	kvm_s390_set_tod_clock(kvm, &gtod);
1131 
1132 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 		gtod.epoch_idx, gtod.tod);
1134 
1135 	return 0;
1136 }
1137 
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 	u8 gtod_high;
1141 
1142 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143 					   sizeof(gtod_high)))
1144 		return -EFAULT;
1145 
1146 	if (gtod_high != 0)
1147 		return -EINVAL;
1148 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149 
1150 	return 0;
1151 }
1152 
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1156 
1157 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158 			   sizeof(gtod.tod)))
1159 		return -EFAULT;
1160 
1161 	kvm_s390_set_tod_clock(kvm, &gtod);
1162 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163 	return 0;
1164 }
1165 
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 	int ret;
1169 
1170 	if (attr->flags)
1171 		return -EINVAL;
1172 
1173 	switch (attr->attr) {
1174 	case KVM_S390_VM_TOD_EXT:
1175 		ret = kvm_s390_set_tod_ext(kvm, attr);
1176 		break;
1177 	case KVM_S390_VM_TOD_HIGH:
1178 		ret = kvm_s390_set_tod_high(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_LOW:
1181 		ret = kvm_s390_set_tod_low(kvm, attr);
1182 		break;
1183 	default:
1184 		ret = -ENXIO;
1185 		break;
1186 	}
1187 	return ret;
1188 }
1189 
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 				   struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193 	union tod_clock clk;
1194 
1195 	preempt_disable();
1196 
1197 	store_tod_clock_ext(&clk);
1198 
1199 	gtod->tod = clk.tod + kvm->arch.epoch;
1200 	gtod->epoch_idx = 0;
1201 	if (test_kvm_facility(kvm, 139)) {
1202 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 		if (gtod->tod < clk.tod)
1204 			gtod->epoch_idx += 1;
1205 	}
1206 
1207 	preempt_enable();
1208 }
1209 
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 	struct kvm_s390_vm_tod_clock gtod;
1213 
1214 	memset(&gtod, 0, sizeof(gtod));
1215 	kvm_s390_get_tod_clock(kvm, &gtod);
1216 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 		return -EFAULT;
1218 
1219 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 		gtod.epoch_idx, gtod.tod);
1221 	return 0;
1222 }
1223 
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 	u8 gtod_high = 0;
1227 
1228 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229 					 sizeof(gtod_high)))
1230 		return -EFAULT;
1231 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232 
1233 	return 0;
1234 }
1235 
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	u64 gtod;
1239 
1240 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242 		return -EFAULT;
1243 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244 
1245 	return 0;
1246 }
1247 
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250 	int ret;
1251 
1252 	if (attr->flags)
1253 		return -EINVAL;
1254 
1255 	switch (attr->attr) {
1256 	case KVM_S390_VM_TOD_EXT:
1257 		ret = kvm_s390_get_tod_ext(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_TOD_HIGH:
1260 		ret = kvm_s390_get_tod_high(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_TOD_LOW:
1263 		ret = kvm_s390_get_tod_low(kvm, attr);
1264 		break;
1265 	default:
1266 		ret = -ENXIO;
1267 		break;
1268 	}
1269 	return ret;
1270 }
1271 
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274 	struct kvm_s390_vm_cpu_processor *proc;
1275 	u16 lowest_ibc, unblocked_ibc;
1276 	int ret = 0;
1277 
1278 	mutex_lock(&kvm->lock);
1279 	if (kvm->created_vcpus) {
1280 		ret = -EBUSY;
1281 		goto out;
1282 	}
1283 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284 	if (!proc) {
1285 		ret = -ENOMEM;
1286 		goto out;
1287 	}
1288 	if (!copy_from_user(proc, (void __user *)attr->addr,
1289 			    sizeof(*proc))) {
1290 		kvm->arch.model.cpuid = proc->cpuid;
1291 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 		unblocked_ibc = sclp.ibc & 0xfff;
1293 		if (lowest_ibc && proc->ibc) {
1294 			if (proc->ibc > unblocked_ibc)
1295 				kvm->arch.model.ibc = unblocked_ibc;
1296 			else if (proc->ibc < lowest_ibc)
1297 				kvm->arch.model.ibc = lowest_ibc;
1298 			else
1299 				kvm->arch.model.ibc = proc->ibc;
1300 		}
1301 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 			 kvm->arch.model.ibc,
1305 			 kvm->arch.model.cpuid);
1306 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 			 kvm->arch.model.fac_list[0],
1308 			 kvm->arch.model.fac_list[1],
1309 			 kvm->arch.model.fac_list[2]);
1310 	} else
1311 		ret = -EFAULT;
1312 	kfree(proc);
1313 out:
1314 	mutex_unlock(&kvm->lock);
1315 	return ret;
1316 }
1317 
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 				       struct kvm_device_attr *attr)
1320 {
1321 	struct kvm_s390_vm_cpu_feat data;
1322 
1323 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324 		return -EFAULT;
1325 	if (!bitmap_subset((unsigned long *) data.feat,
1326 			   kvm_s390_available_cpu_feat,
1327 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1328 		return -EINVAL;
1329 
1330 	mutex_lock(&kvm->lock);
1331 	if (kvm->created_vcpus) {
1332 		mutex_unlock(&kvm->lock);
1333 		return -EBUSY;
1334 	}
1335 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1337 	mutex_unlock(&kvm->lock);
1338 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339 			 data.feat[0],
1340 			 data.feat[1],
1341 			 data.feat[2]);
1342 	return 0;
1343 }
1344 
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346 					  struct kvm_device_attr *attr)
1347 {
1348 	mutex_lock(&kvm->lock);
1349 	if (kvm->created_vcpus) {
1350 		mutex_unlock(&kvm->lock);
1351 		return -EBUSY;
1352 	}
1353 
1354 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356 		mutex_unlock(&kvm->lock);
1357 		return -EFAULT;
1358 	}
1359 	mutex_unlock(&kvm->lock);
1360 
1361 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418 
1419 	return 0;
1420 }
1421 
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424 	int ret = -ENXIO;
1425 
1426 	switch (attr->attr) {
1427 	case KVM_S390_VM_CPU_PROCESSOR:
1428 		ret = kvm_s390_set_processor(kvm, attr);
1429 		break;
1430 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431 		ret = kvm_s390_set_processor_feat(kvm, attr);
1432 		break;
1433 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435 		break;
1436 	}
1437 	return ret;
1438 }
1439 
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442 	struct kvm_s390_vm_cpu_processor *proc;
1443 	int ret = 0;
1444 
1445 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446 	if (!proc) {
1447 		ret = -ENOMEM;
1448 		goto out;
1449 	}
1450 	proc->cpuid = kvm->arch.model.cpuid;
1451 	proc->ibc = kvm->arch.model.ibc;
1452 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 		 kvm->arch.model.ibc,
1456 		 kvm->arch.model.cpuid);
1457 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 		 kvm->arch.model.fac_list[0],
1459 		 kvm->arch.model.fac_list[1],
1460 		 kvm->arch.model.fac_list[2]);
1461 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462 		ret = -EFAULT;
1463 	kfree(proc);
1464 out:
1465 	return ret;
1466 }
1467 
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470 	struct kvm_s390_vm_cpu_machine *mach;
1471 	int ret = 0;
1472 
1473 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474 	if (!mach) {
1475 		ret = -ENOMEM;
1476 		goto out;
1477 	}
1478 	get_cpu_id((struct cpuid *) &mach->cpuid);
1479 	mach->ibc = sclp.ibc;
1480 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1482 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483 	       sizeof(stfle_fac_list));
1484 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485 		 kvm->arch.model.ibc,
1486 		 kvm->arch.model.cpuid);
1487 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488 		 mach->fac_mask[0],
1489 		 mach->fac_mask[1],
1490 		 mach->fac_mask[2]);
1491 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492 		 mach->fac_list[0],
1493 		 mach->fac_list[1],
1494 		 mach->fac_list[2]);
1495 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496 		ret = -EFAULT;
1497 	kfree(mach);
1498 out:
1499 	return ret;
1500 }
1501 
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503 				       struct kvm_device_attr *attr)
1504 {
1505 	struct kvm_s390_vm_cpu_feat data;
1506 
1507 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1509 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510 		return -EFAULT;
1511 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512 			 data.feat[0],
1513 			 data.feat[1],
1514 			 data.feat[2]);
1515 	return 0;
1516 }
1517 
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519 				     struct kvm_device_attr *attr)
1520 {
1521 	struct kvm_s390_vm_cpu_feat data;
1522 
1523 	bitmap_copy((unsigned long *) data.feat,
1524 		    kvm_s390_available_cpu_feat,
1525 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1526 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527 		return -EFAULT;
1528 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529 			 data.feat[0],
1530 			 data.feat[1],
1531 			 data.feat[2]);
1532 	return 0;
1533 }
1534 
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536 					  struct kvm_device_attr *attr)
1537 {
1538 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540 		return -EFAULT;
1541 
1542 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599 
1600 	return 0;
1601 }
1602 
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604 					struct kvm_device_attr *attr)
1605 {
1606 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608 		return -EFAULT;
1609 
1610 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667 
1668 	return 0;
1669 }
1670 
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673 	int ret = -ENXIO;
1674 
1675 	switch (attr->attr) {
1676 	case KVM_S390_VM_CPU_PROCESSOR:
1677 		ret = kvm_s390_get_processor(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_CPU_MACHINE:
1680 		ret = kvm_s390_get_machine(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683 		ret = kvm_s390_get_processor_feat(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1686 		ret = kvm_s390_get_machine_feat(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690 		break;
1691 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693 		break;
1694 	}
1695 	return ret;
1696 }
1697 
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700 	int ret;
1701 
1702 	switch (attr->group) {
1703 	case KVM_S390_VM_MEM_CTRL:
1704 		ret = kvm_s390_set_mem_control(kvm, attr);
1705 		break;
1706 	case KVM_S390_VM_TOD:
1707 		ret = kvm_s390_set_tod(kvm, attr);
1708 		break;
1709 	case KVM_S390_VM_CPU_MODEL:
1710 		ret = kvm_s390_set_cpu_model(kvm, attr);
1711 		break;
1712 	case KVM_S390_VM_CRYPTO:
1713 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1714 		break;
1715 	case KVM_S390_VM_MIGRATION:
1716 		ret = kvm_s390_vm_set_migration(kvm, attr);
1717 		break;
1718 	default:
1719 		ret = -ENXIO;
1720 		break;
1721 	}
1722 
1723 	return ret;
1724 }
1725 
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728 	int ret;
1729 
1730 	switch (attr->group) {
1731 	case KVM_S390_VM_MEM_CTRL:
1732 		ret = kvm_s390_get_mem_control(kvm, attr);
1733 		break;
1734 	case KVM_S390_VM_TOD:
1735 		ret = kvm_s390_get_tod(kvm, attr);
1736 		break;
1737 	case KVM_S390_VM_CPU_MODEL:
1738 		ret = kvm_s390_get_cpu_model(kvm, attr);
1739 		break;
1740 	case KVM_S390_VM_MIGRATION:
1741 		ret = kvm_s390_vm_get_migration(kvm, attr);
1742 		break;
1743 	default:
1744 		ret = -ENXIO;
1745 		break;
1746 	}
1747 
1748 	return ret;
1749 }
1750 
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753 	int ret;
1754 
1755 	switch (attr->group) {
1756 	case KVM_S390_VM_MEM_CTRL:
1757 		switch (attr->attr) {
1758 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1759 		case KVM_S390_VM_MEM_CLR_CMMA:
1760 			ret = sclp.has_cmma ? 0 : -ENXIO;
1761 			break;
1762 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1763 			ret = 0;
1764 			break;
1765 		default:
1766 			ret = -ENXIO;
1767 			break;
1768 		}
1769 		break;
1770 	case KVM_S390_VM_TOD:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_TOD_LOW:
1773 		case KVM_S390_VM_TOD_HIGH:
1774 			ret = 0;
1775 			break;
1776 		default:
1777 			ret = -ENXIO;
1778 			break;
1779 		}
1780 		break;
1781 	case KVM_S390_VM_CPU_MODEL:
1782 		switch (attr->attr) {
1783 		case KVM_S390_VM_CPU_PROCESSOR:
1784 		case KVM_S390_VM_CPU_MACHINE:
1785 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1787 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789 			ret = 0;
1790 			break;
1791 		default:
1792 			ret = -ENXIO;
1793 			break;
1794 		}
1795 		break;
1796 	case KVM_S390_VM_CRYPTO:
1797 		switch (attr->attr) {
1798 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802 			ret = 0;
1803 			break;
1804 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806 			ret = ap_instructions_available() ? 0 : -ENXIO;
1807 			break;
1808 		default:
1809 			ret = -ENXIO;
1810 			break;
1811 		}
1812 		break;
1813 	case KVM_S390_VM_MIGRATION:
1814 		ret = 0;
1815 		break;
1816 	default:
1817 		ret = -ENXIO;
1818 		break;
1819 	}
1820 
1821 	return ret;
1822 }
1823 
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826 	uint8_t *keys;
1827 	uint64_t hva;
1828 	int srcu_idx, i, r = 0;
1829 
1830 	if (args->flags != 0)
1831 		return -EINVAL;
1832 
1833 	/* Is this guest using storage keys? */
1834 	if (!mm_uses_skeys(current->mm))
1835 		return KVM_S390_GET_SKEYS_NONE;
1836 
1837 	/* Enforce sane limit on memory allocation */
1838 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839 		return -EINVAL;
1840 
1841 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842 	if (!keys)
1843 		return -ENOMEM;
1844 
1845 	mmap_read_lock(current->mm);
1846 	srcu_idx = srcu_read_lock(&kvm->srcu);
1847 	for (i = 0; i < args->count; i++) {
1848 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1849 		if (kvm_is_error_hva(hva)) {
1850 			r = -EFAULT;
1851 			break;
1852 		}
1853 
1854 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855 		if (r)
1856 			break;
1857 	}
1858 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1859 	mmap_read_unlock(current->mm);
1860 
1861 	if (!r) {
1862 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863 				 sizeof(uint8_t) * args->count);
1864 		if (r)
1865 			r = -EFAULT;
1866 	}
1867 
1868 	kvfree(keys);
1869 	return r;
1870 }
1871 
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874 	uint8_t *keys;
1875 	uint64_t hva;
1876 	int srcu_idx, i, r = 0;
1877 	bool unlocked;
1878 
1879 	if (args->flags != 0)
1880 		return -EINVAL;
1881 
1882 	/* Enforce sane limit on memory allocation */
1883 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884 		return -EINVAL;
1885 
1886 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887 	if (!keys)
1888 		return -ENOMEM;
1889 
1890 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891 			   sizeof(uint8_t) * args->count);
1892 	if (r) {
1893 		r = -EFAULT;
1894 		goto out;
1895 	}
1896 
1897 	/* Enable storage key handling for the guest */
1898 	r = s390_enable_skey();
1899 	if (r)
1900 		goto out;
1901 
1902 	i = 0;
1903 	mmap_read_lock(current->mm);
1904 	srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906 		unlocked = false;
1907 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1908 		if (kvm_is_error_hva(hva)) {
1909 			r = -EFAULT;
1910 			break;
1911 		}
1912 
1913 		/* Lowest order bit is reserved */
1914 		if (keys[i] & 0x01) {
1915 			r = -EINVAL;
1916 			break;
1917 		}
1918 
1919 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920 		if (r) {
1921 			r = fixup_user_fault(current->mm, hva,
1922 					     FAULT_FLAG_WRITE, &unlocked);
1923 			if (r)
1924 				break;
1925 		}
1926 		if (!r)
1927 			i++;
1928 	}
1929 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1930 	mmap_read_unlock(current->mm);
1931 out:
1932 	kvfree(keys);
1933 	return r;
1934 }
1935 
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944 
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946 			      u8 *res, unsigned long bufsize)
1947 {
1948 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1949 
1950 	args->count = 0;
1951 	while (args->count < bufsize) {
1952 		hva = gfn_to_hva(kvm, cur_gfn);
1953 		/*
1954 		 * We return an error if the first value was invalid, but we
1955 		 * return successfully if at least one value was copied.
1956 		 */
1957 		if (kvm_is_error_hva(hva))
1958 			return args->count ? 0 : -EFAULT;
1959 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1960 			pgstev = 0;
1961 		res[args->count++] = (pgstev >> 24) & 0x43;
1962 		cur_gfn++;
1963 	}
1964 
1965 	return 0;
1966 }
1967 
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1969 						     gfn_t gfn)
1970 {
1971 	return ____gfn_to_memslot(slots, gfn, true);
1972 }
1973 
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975 					      unsigned long cur_gfn)
1976 {
1977 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978 	unsigned long ofs = cur_gfn - ms->base_gfn;
1979 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1980 
1981 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1982 		mnode = rb_next(mnode);
1983 		/* If we are above the highest slot, wrap around */
1984 		if (!mnode)
1985 			mnode = rb_first(&slots->gfn_tree);
1986 
1987 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1988 		ofs = 0;
1989 	}
1990 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1994 	}
1995 	return ms->base_gfn + ofs;
1996 }
1997 
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 			     u8 *res, unsigned long bufsize)
2000 {
2001 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 	struct kvm_memslots *slots = kvm_memslots(kvm);
2003 	struct kvm_memory_slot *ms;
2004 
2005 	if (unlikely(kvm_memslots_empty(slots)))
2006 		return 0;
2007 
2008 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 	ms = gfn_to_memslot(kvm, cur_gfn);
2010 	args->count = 0;
2011 	args->start_gfn = cur_gfn;
2012 	if (!ms)
2013 		return 0;
2014 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 	mem_end = kvm_s390_get_gfn_end(slots);
2016 
2017 	while (args->count < bufsize) {
2018 		hva = gfn_to_hva(kvm, cur_gfn);
2019 		if (kvm_is_error_hva(hva))
2020 			return 0;
2021 		/* Decrement only if we actually flipped the bit to 0 */
2022 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025 			pgstev = 0;
2026 		/* Save the value */
2027 		res[args->count++] = (pgstev >> 24) & 0x43;
2028 		/* If the next bit is too far away, stop. */
2029 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030 			return 0;
2031 		/* If we reached the previous "next", find the next one */
2032 		if (cur_gfn == next_gfn)
2033 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 		/* Reached the end of memory or of the buffer, stop */
2035 		if ((next_gfn >= mem_end) ||
2036 		    (next_gfn - args->start_gfn >= bufsize))
2037 			return 0;
2038 		cur_gfn++;
2039 		/* Reached the end of the current memslot, take the next one. */
2040 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 			ms = gfn_to_memslot(kvm, cur_gfn);
2042 			if (!ms)
2043 				return 0;
2044 		}
2045 	}
2046 	return 0;
2047 }
2048 
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 				  struct kvm_s390_cmma_log *args)
2059 {
2060 	unsigned long bufsize;
2061 	int srcu_idx, peek, ret;
2062 	u8 *values;
2063 
2064 	if (!kvm->arch.use_cmma)
2065 		return -ENXIO;
2066 	/* Invalid/unsupported flags were specified */
2067 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2068 		return -EINVAL;
2069 	/* Migration mode query, and we are not doing a migration */
2070 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 	if (!peek && !kvm->arch.migration_mode)
2072 		return -EINVAL;
2073 	/* CMMA is disabled or was not used, or the buffer has length zero */
2074 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 		memset(args, 0, sizeof(*args));
2077 		return 0;
2078 	}
2079 	/* We are not peeking, and there are no dirty pages */
2080 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 		memset(args, 0, sizeof(*args));
2082 		return 0;
2083 	}
2084 
2085 	values = vmalloc(bufsize);
2086 	if (!values)
2087 		return -ENOMEM;
2088 
2089 	mmap_read_lock(kvm->mm);
2090 	srcu_idx = srcu_read_lock(&kvm->srcu);
2091 	if (peek)
2092 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093 	else
2094 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 	mmap_read_unlock(kvm->mm);
2097 
2098 	if (kvm->arch.migration_mode)
2099 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100 	else
2101 		args->remaining = 0;
2102 
2103 	if (copy_to_user((void __user *)args->values, values, args->count))
2104 		ret = -EFAULT;
2105 
2106 	vfree(values);
2107 	return ret;
2108 }
2109 
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 				  const struct kvm_s390_cmma_log *args)
2117 {
2118 	unsigned long hva, mask, pgstev, i;
2119 	uint8_t *bits;
2120 	int srcu_idx, r = 0;
2121 
2122 	mask = args->mask;
2123 
2124 	if (!kvm->arch.use_cmma)
2125 		return -ENXIO;
2126 	/* invalid/unsupported flags */
2127 	if (args->flags != 0)
2128 		return -EINVAL;
2129 	/* Enforce sane limit on memory allocation */
2130 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131 		return -EINVAL;
2132 	/* Nothing to do */
2133 	if (args->count == 0)
2134 		return 0;
2135 
2136 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2137 	if (!bits)
2138 		return -ENOMEM;
2139 
2140 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2141 	if (r) {
2142 		r = -EFAULT;
2143 		goto out;
2144 	}
2145 
2146 	mmap_read_lock(kvm->mm);
2147 	srcu_idx = srcu_read_lock(&kvm->srcu);
2148 	for (i = 0; i < args->count; i++) {
2149 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 		if (kvm_is_error_hva(hva)) {
2151 			r = -EFAULT;
2152 			break;
2153 		}
2154 
2155 		pgstev = bits[i];
2156 		pgstev = pgstev << 24;
2157 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159 	}
2160 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 	mmap_read_unlock(kvm->mm);
2162 
2163 	if (!kvm->mm->context.uses_cmm) {
2164 		mmap_write_lock(kvm->mm);
2165 		kvm->mm->context.uses_cmm = 1;
2166 		mmap_write_unlock(kvm->mm);
2167 	}
2168 out:
2169 	vfree(bits);
2170 	return r;
2171 }
2172 
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175 	struct kvm_vcpu *vcpu;
2176 	u16 rc, rrc;
2177 	int ret = 0;
2178 	unsigned long i;
2179 
2180 	/*
2181 	 * We ignore failures and try to destroy as many CPUs as possible.
2182 	 * At the same time we must not free the assigned resources when
2183 	 * this fails, as the ultravisor has still access to that memory.
2184 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185 	 * behind.
2186 	 * We want to return the first failure rc and rrc, though.
2187 	 */
2188 	kvm_for_each_vcpu(i, vcpu, kvm) {
2189 		mutex_lock(&vcpu->mutex);
2190 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191 			*rcp = rc;
2192 			*rrcp = rrc;
2193 			ret = -EIO;
2194 		}
2195 		mutex_unlock(&vcpu->mutex);
2196 	}
2197 	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2198 	if (use_gisa)
2199 		kvm_s390_gisa_enable(kvm);
2200 	return ret;
2201 }
2202 
2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2204 {
2205 	unsigned long i;
2206 	int r = 0;
2207 	u16 dummy;
2208 
2209 	struct kvm_vcpu *vcpu;
2210 
2211 	/* Disable the GISA if the ultravisor does not support AIV. */
2212 	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2213 		kvm_s390_gisa_disable(kvm);
2214 
2215 	kvm_for_each_vcpu(i, vcpu, kvm) {
2216 		mutex_lock(&vcpu->mutex);
2217 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2218 		mutex_unlock(&vcpu->mutex);
2219 		if (r)
2220 			break;
2221 	}
2222 	if (r)
2223 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2224 	return r;
2225 }
2226 
2227 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2228 {
2229 	int r = 0;
2230 	u16 dummy;
2231 	void __user *argp = (void __user *)cmd->data;
2232 
2233 	switch (cmd->cmd) {
2234 	case KVM_PV_ENABLE: {
2235 		r = -EINVAL;
2236 		if (kvm_s390_pv_is_protected(kvm))
2237 			break;
2238 
2239 		/*
2240 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2241 		 *  esca, we need no cleanup in the error cases below
2242 		 */
2243 		r = sca_switch_to_extended(kvm);
2244 		if (r)
2245 			break;
2246 
2247 		mmap_write_lock(current->mm);
2248 		r = gmap_mark_unmergeable();
2249 		mmap_write_unlock(current->mm);
2250 		if (r)
2251 			break;
2252 
2253 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2254 		if (r)
2255 			break;
2256 
2257 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2258 		if (r)
2259 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2260 
2261 		/* we need to block service interrupts from now on */
2262 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2263 		break;
2264 	}
2265 	case KVM_PV_DISABLE: {
2266 		r = -EINVAL;
2267 		if (!kvm_s390_pv_is_protected(kvm))
2268 			break;
2269 
2270 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2271 		/*
2272 		 * If a CPU could not be destroyed, destroy VM will also fail.
2273 		 * There is no point in trying to destroy it. Instead return
2274 		 * the rc and rrc from the first CPU that failed destroying.
2275 		 */
2276 		if (r)
2277 			break;
2278 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2279 
2280 		/* no need to block service interrupts any more */
2281 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2282 		break;
2283 	}
2284 	case KVM_PV_SET_SEC_PARMS: {
2285 		struct kvm_s390_pv_sec_parm parms = {};
2286 		void *hdr;
2287 
2288 		r = -EINVAL;
2289 		if (!kvm_s390_pv_is_protected(kvm))
2290 			break;
2291 
2292 		r = -EFAULT;
2293 		if (copy_from_user(&parms, argp, sizeof(parms)))
2294 			break;
2295 
2296 		/* Currently restricted to 8KB */
2297 		r = -EINVAL;
2298 		if (parms.length > PAGE_SIZE * 2)
2299 			break;
2300 
2301 		r = -ENOMEM;
2302 		hdr = vmalloc(parms.length);
2303 		if (!hdr)
2304 			break;
2305 
2306 		r = -EFAULT;
2307 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2308 				    parms.length))
2309 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2310 						      &cmd->rc, &cmd->rrc);
2311 
2312 		vfree(hdr);
2313 		break;
2314 	}
2315 	case KVM_PV_UNPACK: {
2316 		struct kvm_s390_pv_unp unp = {};
2317 
2318 		r = -EINVAL;
2319 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2320 			break;
2321 
2322 		r = -EFAULT;
2323 		if (copy_from_user(&unp, argp, sizeof(unp)))
2324 			break;
2325 
2326 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2327 				       &cmd->rc, &cmd->rrc);
2328 		break;
2329 	}
2330 	case KVM_PV_VERIFY: {
2331 		r = -EINVAL;
2332 		if (!kvm_s390_pv_is_protected(kvm))
2333 			break;
2334 
2335 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2336 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2337 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2338 			     cmd->rrc);
2339 		break;
2340 	}
2341 	case KVM_PV_PREP_RESET: {
2342 		r = -EINVAL;
2343 		if (!kvm_s390_pv_is_protected(kvm))
2344 			break;
2345 
2346 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2347 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2348 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2349 			     cmd->rc, cmd->rrc);
2350 		break;
2351 	}
2352 	case KVM_PV_UNSHARE_ALL: {
2353 		r = -EINVAL;
2354 		if (!kvm_s390_pv_is_protected(kvm))
2355 			break;
2356 
2357 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2358 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2359 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2360 			     cmd->rc, cmd->rrc);
2361 		break;
2362 	}
2363 	default:
2364 		r = -ENOTTY;
2365 	}
2366 	return r;
2367 }
2368 
2369 static bool access_key_invalid(u8 access_key)
2370 {
2371 	return access_key > 0xf;
2372 }
2373 
2374 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2375 {
2376 	void __user *uaddr = (void __user *)mop->buf;
2377 	u64 supported_flags;
2378 	void *tmpbuf = NULL;
2379 	int r, srcu_idx;
2380 
2381 	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2382 			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2383 	if (mop->flags & ~supported_flags || !mop->size)
2384 		return -EINVAL;
2385 	if (mop->size > MEM_OP_MAX_SIZE)
2386 		return -E2BIG;
2387 	/*
2388 	 * This is technically a heuristic only, if the kvm->lock is not
2389 	 * taken, it is not guaranteed that the vm is/remains non-protected.
2390 	 * This is ok from a kernel perspective, wrongdoing is detected
2391 	 * on the access, -EFAULT is returned and the vm may crash the
2392 	 * next time it accesses the memory in question.
2393 	 * There is no sane usecase to do switching and a memop on two
2394 	 * different CPUs at the same time.
2395 	 */
2396 	if (kvm_s390_pv_get_handle(kvm))
2397 		return -EINVAL;
2398 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2399 		if (access_key_invalid(mop->key))
2400 			return -EINVAL;
2401 	} else {
2402 		mop->key = 0;
2403 	}
2404 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2405 		tmpbuf = vmalloc(mop->size);
2406 		if (!tmpbuf)
2407 			return -ENOMEM;
2408 	}
2409 
2410 	srcu_idx = srcu_read_lock(&kvm->srcu);
2411 
2412 	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2413 		r = PGM_ADDRESSING;
2414 		goto out_unlock;
2415 	}
2416 
2417 	switch (mop->op) {
2418 	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2419 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2420 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2421 		} else {
2422 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2423 						      mop->size, GACC_FETCH, mop->key);
2424 			if (r == 0) {
2425 				if (copy_to_user(uaddr, tmpbuf, mop->size))
2426 					r = -EFAULT;
2427 			}
2428 		}
2429 		break;
2430 	}
2431 	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2432 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2433 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2434 		} else {
2435 			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2436 				r = -EFAULT;
2437 				break;
2438 			}
2439 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2440 						      mop->size, GACC_STORE, mop->key);
2441 		}
2442 		break;
2443 	}
2444 	default:
2445 		r = -EINVAL;
2446 	}
2447 
2448 out_unlock:
2449 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2450 
2451 	vfree(tmpbuf);
2452 	return r;
2453 }
2454 
2455 long kvm_arch_vm_ioctl(struct file *filp,
2456 		       unsigned int ioctl, unsigned long arg)
2457 {
2458 	struct kvm *kvm = filp->private_data;
2459 	void __user *argp = (void __user *)arg;
2460 	struct kvm_device_attr attr;
2461 	int r;
2462 
2463 	switch (ioctl) {
2464 	case KVM_S390_INTERRUPT: {
2465 		struct kvm_s390_interrupt s390int;
2466 
2467 		r = -EFAULT;
2468 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2469 			break;
2470 		r = kvm_s390_inject_vm(kvm, &s390int);
2471 		break;
2472 	}
2473 	case KVM_CREATE_IRQCHIP: {
2474 		struct kvm_irq_routing_entry routing;
2475 
2476 		r = -EINVAL;
2477 		if (kvm->arch.use_irqchip) {
2478 			/* Set up dummy routing. */
2479 			memset(&routing, 0, sizeof(routing));
2480 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2481 		}
2482 		break;
2483 	}
2484 	case KVM_SET_DEVICE_ATTR: {
2485 		r = -EFAULT;
2486 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2487 			break;
2488 		r = kvm_s390_vm_set_attr(kvm, &attr);
2489 		break;
2490 	}
2491 	case KVM_GET_DEVICE_ATTR: {
2492 		r = -EFAULT;
2493 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2494 			break;
2495 		r = kvm_s390_vm_get_attr(kvm, &attr);
2496 		break;
2497 	}
2498 	case KVM_HAS_DEVICE_ATTR: {
2499 		r = -EFAULT;
2500 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2501 			break;
2502 		r = kvm_s390_vm_has_attr(kvm, &attr);
2503 		break;
2504 	}
2505 	case KVM_S390_GET_SKEYS: {
2506 		struct kvm_s390_skeys args;
2507 
2508 		r = -EFAULT;
2509 		if (copy_from_user(&args, argp,
2510 				   sizeof(struct kvm_s390_skeys)))
2511 			break;
2512 		r = kvm_s390_get_skeys(kvm, &args);
2513 		break;
2514 	}
2515 	case KVM_S390_SET_SKEYS: {
2516 		struct kvm_s390_skeys args;
2517 
2518 		r = -EFAULT;
2519 		if (copy_from_user(&args, argp,
2520 				   sizeof(struct kvm_s390_skeys)))
2521 			break;
2522 		r = kvm_s390_set_skeys(kvm, &args);
2523 		break;
2524 	}
2525 	case KVM_S390_GET_CMMA_BITS: {
2526 		struct kvm_s390_cmma_log args;
2527 
2528 		r = -EFAULT;
2529 		if (copy_from_user(&args, argp, sizeof(args)))
2530 			break;
2531 		mutex_lock(&kvm->slots_lock);
2532 		r = kvm_s390_get_cmma_bits(kvm, &args);
2533 		mutex_unlock(&kvm->slots_lock);
2534 		if (!r) {
2535 			r = copy_to_user(argp, &args, sizeof(args));
2536 			if (r)
2537 				r = -EFAULT;
2538 		}
2539 		break;
2540 	}
2541 	case KVM_S390_SET_CMMA_BITS: {
2542 		struct kvm_s390_cmma_log args;
2543 
2544 		r = -EFAULT;
2545 		if (copy_from_user(&args, argp, sizeof(args)))
2546 			break;
2547 		mutex_lock(&kvm->slots_lock);
2548 		r = kvm_s390_set_cmma_bits(kvm, &args);
2549 		mutex_unlock(&kvm->slots_lock);
2550 		break;
2551 	}
2552 	case KVM_S390_PV_COMMAND: {
2553 		struct kvm_pv_cmd args;
2554 
2555 		/* protvirt means user cpu state */
2556 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2557 		r = 0;
2558 		if (!is_prot_virt_host()) {
2559 			r = -EINVAL;
2560 			break;
2561 		}
2562 		if (copy_from_user(&args, argp, sizeof(args))) {
2563 			r = -EFAULT;
2564 			break;
2565 		}
2566 		if (args.flags) {
2567 			r = -EINVAL;
2568 			break;
2569 		}
2570 		mutex_lock(&kvm->lock);
2571 		r = kvm_s390_handle_pv(kvm, &args);
2572 		mutex_unlock(&kvm->lock);
2573 		if (copy_to_user(argp, &args, sizeof(args))) {
2574 			r = -EFAULT;
2575 			break;
2576 		}
2577 		break;
2578 	}
2579 	case KVM_S390_MEM_OP: {
2580 		struct kvm_s390_mem_op mem_op;
2581 
2582 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2583 			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2584 		else
2585 			r = -EFAULT;
2586 		break;
2587 	}
2588 	default:
2589 		r = -ENOTTY;
2590 	}
2591 
2592 	return r;
2593 }
2594 
2595 static int kvm_s390_apxa_installed(void)
2596 {
2597 	struct ap_config_info info;
2598 
2599 	if (ap_instructions_available()) {
2600 		if (ap_qci(&info) == 0)
2601 			return info.apxa;
2602 	}
2603 
2604 	return 0;
2605 }
2606 
2607 /*
2608  * The format of the crypto control block (CRYCB) is specified in the 3 low
2609  * order bits of the CRYCB designation (CRYCBD) field as follows:
2610  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2611  *	     AP extended addressing (APXA) facility are installed.
2612  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2613  * Format 2: Both the APXA and MSAX3 facilities are installed
2614  */
2615 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2616 {
2617 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2618 
2619 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2620 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2621 
2622 	/* Check whether MSAX3 is installed */
2623 	if (!test_kvm_facility(kvm, 76))
2624 		return;
2625 
2626 	if (kvm_s390_apxa_installed())
2627 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2628 	else
2629 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2630 }
2631 
2632 /*
2633  * kvm_arch_crypto_set_masks
2634  *
2635  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2636  *	 to be set.
2637  * @apm: the mask identifying the accessible AP adapters
2638  * @aqm: the mask identifying the accessible AP domains
2639  * @adm: the mask identifying the accessible AP control domains
2640  *
2641  * Set the masks that identify the adapters, domains and control domains to
2642  * which the KVM guest is granted access.
2643  *
2644  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2645  *	 function.
2646  */
2647 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2648 			       unsigned long *aqm, unsigned long *adm)
2649 {
2650 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2651 
2652 	kvm_s390_vcpu_block_all(kvm);
2653 
2654 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2655 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2656 		memcpy(crycb->apcb1.apm, apm, 32);
2657 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2658 			 apm[0], apm[1], apm[2], apm[3]);
2659 		memcpy(crycb->apcb1.aqm, aqm, 32);
2660 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2661 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2662 		memcpy(crycb->apcb1.adm, adm, 32);
2663 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2664 			 adm[0], adm[1], adm[2], adm[3]);
2665 		break;
2666 	case CRYCB_FORMAT1:
2667 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2668 		memcpy(crycb->apcb0.apm, apm, 8);
2669 		memcpy(crycb->apcb0.aqm, aqm, 2);
2670 		memcpy(crycb->apcb0.adm, adm, 2);
2671 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2672 			 apm[0], *((unsigned short *)aqm),
2673 			 *((unsigned short *)adm));
2674 		break;
2675 	default:	/* Can not happen */
2676 		break;
2677 	}
2678 
2679 	/* recreate the shadow crycb for each vcpu */
2680 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2681 	kvm_s390_vcpu_unblock_all(kvm);
2682 }
2683 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2684 
2685 /*
2686  * kvm_arch_crypto_clear_masks
2687  *
2688  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2689  *	 to be cleared.
2690  *
2691  * Clear the masks that identify the adapters, domains and control domains to
2692  * which the KVM guest is granted access.
2693  *
2694  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2695  *	 function.
2696  */
2697 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2698 {
2699 	kvm_s390_vcpu_block_all(kvm);
2700 
2701 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2702 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2703 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2704 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2705 
2706 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2707 	/* recreate the shadow crycb for each vcpu */
2708 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2709 	kvm_s390_vcpu_unblock_all(kvm);
2710 }
2711 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2712 
2713 static u64 kvm_s390_get_initial_cpuid(void)
2714 {
2715 	struct cpuid cpuid;
2716 
2717 	get_cpu_id(&cpuid);
2718 	cpuid.version = 0xff;
2719 	return *((u64 *) &cpuid);
2720 }
2721 
2722 static void kvm_s390_crypto_init(struct kvm *kvm)
2723 {
2724 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2725 	kvm_s390_set_crycb_format(kvm);
2726 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2727 
2728 	if (!test_kvm_facility(kvm, 76))
2729 		return;
2730 
2731 	/* Enable AES/DEA protected key functions by default */
2732 	kvm->arch.crypto.aes_kw = 1;
2733 	kvm->arch.crypto.dea_kw = 1;
2734 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2735 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2736 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2737 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2738 }
2739 
2740 static void sca_dispose(struct kvm *kvm)
2741 {
2742 	if (kvm->arch.use_esca)
2743 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2744 	else
2745 		free_page((unsigned long)(kvm->arch.sca));
2746 	kvm->arch.sca = NULL;
2747 }
2748 
2749 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2750 {
2751 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2752 	int i, rc;
2753 	char debug_name[16];
2754 	static unsigned long sca_offset;
2755 
2756 	rc = -EINVAL;
2757 #ifdef CONFIG_KVM_S390_UCONTROL
2758 	if (type & ~KVM_VM_S390_UCONTROL)
2759 		goto out_err;
2760 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2761 		goto out_err;
2762 #else
2763 	if (type)
2764 		goto out_err;
2765 #endif
2766 
2767 	rc = s390_enable_sie();
2768 	if (rc)
2769 		goto out_err;
2770 
2771 	rc = -ENOMEM;
2772 
2773 	if (!sclp.has_64bscao)
2774 		alloc_flags |= GFP_DMA;
2775 	rwlock_init(&kvm->arch.sca_lock);
2776 	/* start with basic SCA */
2777 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2778 	if (!kvm->arch.sca)
2779 		goto out_err;
2780 	mutex_lock(&kvm_lock);
2781 	sca_offset += 16;
2782 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2783 		sca_offset = 0;
2784 	kvm->arch.sca = (struct bsca_block *)
2785 			((char *) kvm->arch.sca + sca_offset);
2786 	mutex_unlock(&kvm_lock);
2787 
2788 	sprintf(debug_name, "kvm-%u", current->pid);
2789 
2790 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2791 	if (!kvm->arch.dbf)
2792 		goto out_err;
2793 
2794 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2795 	kvm->arch.sie_page2 =
2796 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2797 	if (!kvm->arch.sie_page2)
2798 		goto out_err;
2799 
2800 	kvm->arch.sie_page2->kvm = kvm;
2801 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2802 
2803 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2804 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2805 					      (kvm_s390_fac_base[i] |
2806 					       kvm_s390_fac_ext[i]);
2807 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2808 					      kvm_s390_fac_base[i];
2809 	}
2810 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2811 
2812 	/* we are always in czam mode - even on pre z14 machines */
2813 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2814 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2815 	/* we emulate STHYI in kvm */
2816 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2817 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2818 	if (MACHINE_HAS_TLB_GUEST) {
2819 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2820 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2821 	}
2822 
2823 	if (css_general_characteristics.aiv && test_facility(65))
2824 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2825 
2826 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2827 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2828 
2829 	kvm_s390_crypto_init(kvm);
2830 
2831 	mutex_init(&kvm->arch.float_int.ais_lock);
2832 	spin_lock_init(&kvm->arch.float_int.lock);
2833 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2834 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2835 	init_waitqueue_head(&kvm->arch.ipte_wq);
2836 	mutex_init(&kvm->arch.ipte_mutex);
2837 
2838 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2839 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2840 
2841 	if (type & KVM_VM_S390_UCONTROL) {
2842 		kvm->arch.gmap = NULL;
2843 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2844 	} else {
2845 		if (sclp.hamax == U64_MAX)
2846 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2847 		else
2848 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2849 						    sclp.hamax + 1);
2850 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2851 		if (!kvm->arch.gmap)
2852 			goto out_err;
2853 		kvm->arch.gmap->private = kvm;
2854 		kvm->arch.gmap->pfault_enabled = 0;
2855 	}
2856 
2857 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2858 	kvm->arch.use_skf = sclp.has_skey;
2859 	spin_lock_init(&kvm->arch.start_stop_lock);
2860 	kvm_s390_vsie_init(kvm);
2861 	if (use_gisa)
2862 		kvm_s390_gisa_init(kvm);
2863 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2864 
2865 	return 0;
2866 out_err:
2867 	free_page((unsigned long)kvm->arch.sie_page2);
2868 	debug_unregister(kvm->arch.dbf);
2869 	sca_dispose(kvm);
2870 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2871 	return rc;
2872 }
2873 
2874 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2875 {
2876 	u16 rc, rrc;
2877 
2878 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2879 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2880 	kvm_s390_clear_local_irqs(vcpu);
2881 	kvm_clear_async_pf_completion_queue(vcpu);
2882 	if (!kvm_is_ucontrol(vcpu->kvm))
2883 		sca_del_vcpu(vcpu);
2884 
2885 	if (kvm_is_ucontrol(vcpu->kvm))
2886 		gmap_remove(vcpu->arch.gmap);
2887 
2888 	if (vcpu->kvm->arch.use_cmma)
2889 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2890 	/* We can not hold the vcpu mutex here, we are already dying */
2891 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2892 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2893 	free_page((unsigned long)(vcpu->arch.sie_block));
2894 }
2895 
2896 void kvm_arch_destroy_vm(struct kvm *kvm)
2897 {
2898 	u16 rc, rrc;
2899 
2900 	kvm_destroy_vcpus(kvm);
2901 	sca_dispose(kvm);
2902 	kvm_s390_gisa_destroy(kvm);
2903 	/*
2904 	 * We are already at the end of life and kvm->lock is not taken.
2905 	 * This is ok as the file descriptor is closed by now and nobody
2906 	 * can mess with the pv state. To avoid lockdep_assert_held from
2907 	 * complaining we do not use kvm_s390_pv_is_protected.
2908 	 */
2909 	if (kvm_s390_pv_get_handle(kvm))
2910 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2911 	debug_unregister(kvm->arch.dbf);
2912 	free_page((unsigned long)kvm->arch.sie_page2);
2913 	if (!kvm_is_ucontrol(kvm))
2914 		gmap_remove(kvm->arch.gmap);
2915 	kvm_s390_destroy_adapters(kvm);
2916 	kvm_s390_clear_float_irqs(kvm);
2917 	kvm_s390_vsie_destroy(kvm);
2918 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2919 }
2920 
2921 /* Section: vcpu related */
2922 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2923 {
2924 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2925 	if (!vcpu->arch.gmap)
2926 		return -ENOMEM;
2927 	vcpu->arch.gmap->private = vcpu->kvm;
2928 
2929 	return 0;
2930 }
2931 
2932 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2933 {
2934 	if (!kvm_s390_use_sca_entries())
2935 		return;
2936 	read_lock(&vcpu->kvm->arch.sca_lock);
2937 	if (vcpu->kvm->arch.use_esca) {
2938 		struct esca_block *sca = vcpu->kvm->arch.sca;
2939 
2940 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2941 		sca->cpu[vcpu->vcpu_id].sda = 0;
2942 	} else {
2943 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2944 
2945 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2946 		sca->cpu[vcpu->vcpu_id].sda = 0;
2947 	}
2948 	read_unlock(&vcpu->kvm->arch.sca_lock);
2949 }
2950 
2951 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2952 {
2953 	if (!kvm_s390_use_sca_entries()) {
2954 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2955 
2956 		/* we still need the basic sca for the ipte control */
2957 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2958 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2959 		return;
2960 	}
2961 	read_lock(&vcpu->kvm->arch.sca_lock);
2962 	if (vcpu->kvm->arch.use_esca) {
2963 		struct esca_block *sca = vcpu->kvm->arch.sca;
2964 
2965 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2966 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2967 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2968 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2969 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2970 	} else {
2971 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2972 
2973 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2974 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2975 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2976 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2977 	}
2978 	read_unlock(&vcpu->kvm->arch.sca_lock);
2979 }
2980 
2981 /* Basic SCA to Extended SCA data copy routines */
2982 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2983 {
2984 	d->sda = s->sda;
2985 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2986 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2987 }
2988 
2989 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2990 {
2991 	int i;
2992 
2993 	d->ipte_control = s->ipte_control;
2994 	d->mcn[0] = s->mcn;
2995 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2996 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2997 }
2998 
2999 static int sca_switch_to_extended(struct kvm *kvm)
3000 {
3001 	struct bsca_block *old_sca = kvm->arch.sca;
3002 	struct esca_block *new_sca;
3003 	struct kvm_vcpu *vcpu;
3004 	unsigned long vcpu_idx;
3005 	u32 scaol, scaoh;
3006 
3007 	if (kvm->arch.use_esca)
3008 		return 0;
3009 
3010 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3011 	if (!new_sca)
3012 		return -ENOMEM;
3013 
3014 	scaoh = (u32)((u64)(new_sca) >> 32);
3015 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
3016 
3017 	kvm_s390_vcpu_block_all(kvm);
3018 	write_lock(&kvm->arch.sca_lock);
3019 
3020 	sca_copy_b_to_e(new_sca, old_sca);
3021 
3022 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3023 		vcpu->arch.sie_block->scaoh = scaoh;
3024 		vcpu->arch.sie_block->scaol = scaol;
3025 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3026 	}
3027 	kvm->arch.sca = new_sca;
3028 	kvm->arch.use_esca = 1;
3029 
3030 	write_unlock(&kvm->arch.sca_lock);
3031 	kvm_s390_vcpu_unblock_all(kvm);
3032 
3033 	free_page((unsigned long)old_sca);
3034 
3035 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3036 		 old_sca, kvm->arch.sca);
3037 	return 0;
3038 }
3039 
3040 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3041 {
3042 	int rc;
3043 
3044 	if (!kvm_s390_use_sca_entries()) {
3045 		if (id < KVM_MAX_VCPUS)
3046 			return true;
3047 		return false;
3048 	}
3049 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3050 		return true;
3051 	if (!sclp.has_esca || !sclp.has_64bscao)
3052 		return false;
3053 
3054 	mutex_lock(&kvm->lock);
3055 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3056 	mutex_unlock(&kvm->lock);
3057 
3058 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3059 }
3060 
3061 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3062 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3063 {
3064 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3065 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3066 	vcpu->arch.cputm_start = get_tod_clock_fast();
3067 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3068 }
3069 
3070 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3071 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3072 {
3073 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3074 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3075 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3076 	vcpu->arch.cputm_start = 0;
3077 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3078 }
3079 
3080 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3081 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3082 {
3083 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3084 	vcpu->arch.cputm_enabled = true;
3085 	__start_cpu_timer_accounting(vcpu);
3086 }
3087 
3088 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3089 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3090 {
3091 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3092 	__stop_cpu_timer_accounting(vcpu);
3093 	vcpu->arch.cputm_enabled = false;
3094 }
3095 
3096 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3097 {
3098 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3099 	__enable_cpu_timer_accounting(vcpu);
3100 	preempt_enable();
3101 }
3102 
3103 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3104 {
3105 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3106 	__disable_cpu_timer_accounting(vcpu);
3107 	preempt_enable();
3108 }
3109 
3110 /* set the cpu timer - may only be called from the VCPU thread itself */
3111 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3112 {
3113 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3114 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3115 	if (vcpu->arch.cputm_enabled)
3116 		vcpu->arch.cputm_start = get_tod_clock_fast();
3117 	vcpu->arch.sie_block->cputm = cputm;
3118 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3119 	preempt_enable();
3120 }
3121 
3122 /* update and get the cpu timer - can also be called from other VCPU threads */
3123 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3124 {
3125 	unsigned int seq;
3126 	__u64 value;
3127 
3128 	if (unlikely(!vcpu->arch.cputm_enabled))
3129 		return vcpu->arch.sie_block->cputm;
3130 
3131 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3132 	do {
3133 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3134 		/*
3135 		 * If the writer would ever execute a read in the critical
3136 		 * section, e.g. in irq context, we have a deadlock.
3137 		 */
3138 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3139 		value = vcpu->arch.sie_block->cputm;
3140 		/* if cputm_start is 0, accounting is being started/stopped */
3141 		if (likely(vcpu->arch.cputm_start))
3142 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3143 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3144 	preempt_enable();
3145 	return value;
3146 }
3147 
3148 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3149 {
3150 
3151 	gmap_enable(vcpu->arch.enabled_gmap);
3152 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3153 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3154 		__start_cpu_timer_accounting(vcpu);
3155 	vcpu->cpu = cpu;
3156 }
3157 
3158 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3159 {
3160 	vcpu->cpu = -1;
3161 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3162 		__stop_cpu_timer_accounting(vcpu);
3163 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3164 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3165 	gmap_disable(vcpu->arch.enabled_gmap);
3166 
3167 }
3168 
3169 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3170 {
3171 	mutex_lock(&vcpu->kvm->lock);
3172 	preempt_disable();
3173 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3174 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3175 	preempt_enable();
3176 	mutex_unlock(&vcpu->kvm->lock);
3177 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3178 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3179 		sca_add_vcpu(vcpu);
3180 	}
3181 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3182 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3183 	/* make vcpu_load load the right gmap on the first trigger */
3184 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3185 }
3186 
3187 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3188 {
3189 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3190 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3191 		return true;
3192 	return false;
3193 }
3194 
3195 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3196 {
3197 	/* At least one ECC subfunction must be present */
3198 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3199 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3200 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3201 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3202 	       kvm_has_pckmo_subfunc(kvm, 41);
3203 
3204 }
3205 
3206 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3207 {
3208 	/*
3209 	 * If the AP instructions are not being interpreted and the MSAX3
3210 	 * facility is not configured for the guest, there is nothing to set up.
3211 	 */
3212 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3213 		return;
3214 
3215 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3216 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3217 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3218 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3219 
3220 	if (vcpu->kvm->arch.crypto.apie)
3221 		vcpu->arch.sie_block->eca |= ECA_APIE;
3222 
3223 	/* Set up protected key support */
3224 	if (vcpu->kvm->arch.crypto.aes_kw) {
3225 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3226 		/* ecc is also wrapped with AES key */
3227 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3228 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3229 	}
3230 
3231 	if (vcpu->kvm->arch.crypto.dea_kw)
3232 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3233 }
3234 
3235 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3236 {
3237 	free_page(vcpu->arch.sie_block->cbrlo);
3238 	vcpu->arch.sie_block->cbrlo = 0;
3239 }
3240 
3241 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3242 {
3243 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3244 	if (!vcpu->arch.sie_block->cbrlo)
3245 		return -ENOMEM;
3246 	return 0;
3247 }
3248 
3249 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3250 {
3251 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3252 
3253 	vcpu->arch.sie_block->ibc = model->ibc;
3254 	if (test_kvm_facility(vcpu->kvm, 7))
3255 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3256 }
3257 
3258 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3259 {
3260 	int rc = 0;
3261 	u16 uvrc, uvrrc;
3262 
3263 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3264 						    CPUSTAT_SM |
3265 						    CPUSTAT_STOPPED);
3266 
3267 	if (test_kvm_facility(vcpu->kvm, 78))
3268 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3269 	else if (test_kvm_facility(vcpu->kvm, 8))
3270 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3271 
3272 	kvm_s390_vcpu_setup_model(vcpu);
3273 
3274 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3275 	if (MACHINE_HAS_ESOP)
3276 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3277 	if (test_kvm_facility(vcpu->kvm, 9))
3278 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3279 	if (test_kvm_facility(vcpu->kvm, 73))
3280 		vcpu->arch.sie_block->ecb |= ECB_TE;
3281 	if (!kvm_is_ucontrol(vcpu->kvm))
3282 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3283 
3284 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3285 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3286 	if (test_kvm_facility(vcpu->kvm, 130))
3287 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3288 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3289 	if (sclp.has_cei)
3290 		vcpu->arch.sie_block->eca |= ECA_CEI;
3291 	if (sclp.has_ib)
3292 		vcpu->arch.sie_block->eca |= ECA_IB;
3293 	if (sclp.has_siif)
3294 		vcpu->arch.sie_block->eca |= ECA_SII;
3295 	if (sclp.has_sigpif)
3296 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3297 	if (test_kvm_facility(vcpu->kvm, 129)) {
3298 		vcpu->arch.sie_block->eca |= ECA_VX;
3299 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3300 	}
3301 	if (test_kvm_facility(vcpu->kvm, 139))
3302 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3303 	if (test_kvm_facility(vcpu->kvm, 156))
3304 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3305 	if (vcpu->arch.sie_block->gd) {
3306 		vcpu->arch.sie_block->eca |= ECA_AIV;
3307 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3308 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3309 	}
3310 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3311 					| SDNXC;
3312 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3313 
3314 	if (sclp.has_kss)
3315 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3316 	else
3317 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3318 
3319 	if (vcpu->kvm->arch.use_cmma) {
3320 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3321 		if (rc)
3322 			return rc;
3323 	}
3324 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3325 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3326 
3327 	vcpu->arch.sie_block->hpid = HPID_KVM;
3328 
3329 	kvm_s390_vcpu_crypto_setup(vcpu);
3330 
3331 	mutex_lock(&vcpu->kvm->lock);
3332 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3333 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3334 		if (rc)
3335 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3336 	}
3337 	mutex_unlock(&vcpu->kvm->lock);
3338 
3339 	return rc;
3340 }
3341 
3342 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3343 {
3344 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3345 		return -EINVAL;
3346 	return 0;
3347 }
3348 
3349 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3350 {
3351 	struct sie_page *sie_page;
3352 	int rc;
3353 
3354 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3355 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3356 	if (!sie_page)
3357 		return -ENOMEM;
3358 
3359 	vcpu->arch.sie_block = &sie_page->sie_block;
3360 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3361 
3362 	/* the real guest size will always be smaller than msl */
3363 	vcpu->arch.sie_block->mso = 0;
3364 	vcpu->arch.sie_block->msl = sclp.hamax;
3365 
3366 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3367 	spin_lock_init(&vcpu->arch.local_int.lock);
3368 	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3369 	seqcount_init(&vcpu->arch.cputm_seqcount);
3370 
3371 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3372 	kvm_clear_async_pf_completion_queue(vcpu);
3373 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3374 				    KVM_SYNC_GPRS |
3375 				    KVM_SYNC_ACRS |
3376 				    KVM_SYNC_CRS |
3377 				    KVM_SYNC_ARCH0 |
3378 				    KVM_SYNC_PFAULT |
3379 				    KVM_SYNC_DIAG318;
3380 	kvm_s390_set_prefix(vcpu, 0);
3381 	if (test_kvm_facility(vcpu->kvm, 64))
3382 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3383 	if (test_kvm_facility(vcpu->kvm, 82))
3384 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3385 	if (test_kvm_facility(vcpu->kvm, 133))
3386 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3387 	if (test_kvm_facility(vcpu->kvm, 156))
3388 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3389 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3390 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3391 	 */
3392 	if (MACHINE_HAS_VX)
3393 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3394 	else
3395 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3396 
3397 	if (kvm_is_ucontrol(vcpu->kvm)) {
3398 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3399 		if (rc)
3400 			goto out_free_sie_block;
3401 	}
3402 
3403 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3404 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3405 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3406 
3407 	rc = kvm_s390_vcpu_setup(vcpu);
3408 	if (rc)
3409 		goto out_ucontrol_uninit;
3410 	return 0;
3411 
3412 out_ucontrol_uninit:
3413 	if (kvm_is_ucontrol(vcpu->kvm))
3414 		gmap_remove(vcpu->arch.gmap);
3415 out_free_sie_block:
3416 	free_page((unsigned long)(vcpu->arch.sie_block));
3417 	return rc;
3418 }
3419 
3420 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3421 {
3422 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3423 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3424 }
3425 
3426 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3427 {
3428 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3429 }
3430 
3431 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3432 {
3433 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3434 	exit_sie(vcpu);
3435 }
3436 
3437 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3438 {
3439 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3440 }
3441 
3442 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3443 {
3444 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3445 	exit_sie(vcpu);
3446 }
3447 
3448 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3449 {
3450 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3451 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3452 }
3453 
3454 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3455 {
3456 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3457 }
3458 
3459 /*
3460  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3461  * If the CPU is not running (e.g. waiting as idle) the function will
3462  * return immediately. */
3463 void exit_sie(struct kvm_vcpu *vcpu)
3464 {
3465 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3466 	kvm_s390_vsie_kick(vcpu);
3467 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3468 		cpu_relax();
3469 }
3470 
3471 /* Kick a guest cpu out of SIE to process a request synchronously */
3472 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3473 {
3474 	__kvm_make_request(req, vcpu);
3475 	kvm_s390_vcpu_request(vcpu);
3476 }
3477 
3478 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3479 			      unsigned long end)
3480 {
3481 	struct kvm *kvm = gmap->private;
3482 	struct kvm_vcpu *vcpu;
3483 	unsigned long prefix;
3484 	unsigned long i;
3485 
3486 	if (gmap_is_shadow(gmap))
3487 		return;
3488 	if (start >= 1UL << 31)
3489 		/* We are only interested in prefix pages */
3490 		return;
3491 	kvm_for_each_vcpu(i, vcpu, kvm) {
3492 		/* match against both prefix pages */
3493 		prefix = kvm_s390_get_prefix(vcpu);
3494 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3495 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3496 				   start, end);
3497 			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3498 		}
3499 	}
3500 }
3501 
3502 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3503 {
3504 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3505 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3506 	    READ_ONCE(halt_poll_max_steal)) {
3507 		vcpu->stat.halt_no_poll_steal++;
3508 		return true;
3509 	}
3510 	return false;
3511 }
3512 
3513 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3514 {
3515 	/* kvm common code refers to this, but never calls it */
3516 	BUG();
3517 	return 0;
3518 }
3519 
3520 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3521 					   struct kvm_one_reg *reg)
3522 {
3523 	int r = -EINVAL;
3524 
3525 	switch (reg->id) {
3526 	case KVM_REG_S390_TODPR:
3527 		r = put_user(vcpu->arch.sie_block->todpr,
3528 			     (u32 __user *)reg->addr);
3529 		break;
3530 	case KVM_REG_S390_EPOCHDIFF:
3531 		r = put_user(vcpu->arch.sie_block->epoch,
3532 			     (u64 __user *)reg->addr);
3533 		break;
3534 	case KVM_REG_S390_CPU_TIMER:
3535 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3536 			     (u64 __user *)reg->addr);
3537 		break;
3538 	case KVM_REG_S390_CLOCK_COMP:
3539 		r = put_user(vcpu->arch.sie_block->ckc,
3540 			     (u64 __user *)reg->addr);
3541 		break;
3542 	case KVM_REG_S390_PFTOKEN:
3543 		r = put_user(vcpu->arch.pfault_token,
3544 			     (u64 __user *)reg->addr);
3545 		break;
3546 	case KVM_REG_S390_PFCOMPARE:
3547 		r = put_user(vcpu->arch.pfault_compare,
3548 			     (u64 __user *)reg->addr);
3549 		break;
3550 	case KVM_REG_S390_PFSELECT:
3551 		r = put_user(vcpu->arch.pfault_select,
3552 			     (u64 __user *)reg->addr);
3553 		break;
3554 	case KVM_REG_S390_PP:
3555 		r = put_user(vcpu->arch.sie_block->pp,
3556 			     (u64 __user *)reg->addr);
3557 		break;
3558 	case KVM_REG_S390_GBEA:
3559 		r = put_user(vcpu->arch.sie_block->gbea,
3560 			     (u64 __user *)reg->addr);
3561 		break;
3562 	default:
3563 		break;
3564 	}
3565 
3566 	return r;
3567 }
3568 
3569 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3570 					   struct kvm_one_reg *reg)
3571 {
3572 	int r = -EINVAL;
3573 	__u64 val;
3574 
3575 	switch (reg->id) {
3576 	case KVM_REG_S390_TODPR:
3577 		r = get_user(vcpu->arch.sie_block->todpr,
3578 			     (u32 __user *)reg->addr);
3579 		break;
3580 	case KVM_REG_S390_EPOCHDIFF:
3581 		r = get_user(vcpu->arch.sie_block->epoch,
3582 			     (u64 __user *)reg->addr);
3583 		break;
3584 	case KVM_REG_S390_CPU_TIMER:
3585 		r = get_user(val, (u64 __user *)reg->addr);
3586 		if (!r)
3587 			kvm_s390_set_cpu_timer(vcpu, val);
3588 		break;
3589 	case KVM_REG_S390_CLOCK_COMP:
3590 		r = get_user(vcpu->arch.sie_block->ckc,
3591 			     (u64 __user *)reg->addr);
3592 		break;
3593 	case KVM_REG_S390_PFTOKEN:
3594 		r = get_user(vcpu->arch.pfault_token,
3595 			     (u64 __user *)reg->addr);
3596 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3597 			kvm_clear_async_pf_completion_queue(vcpu);
3598 		break;
3599 	case KVM_REG_S390_PFCOMPARE:
3600 		r = get_user(vcpu->arch.pfault_compare,
3601 			     (u64 __user *)reg->addr);
3602 		break;
3603 	case KVM_REG_S390_PFSELECT:
3604 		r = get_user(vcpu->arch.pfault_select,
3605 			     (u64 __user *)reg->addr);
3606 		break;
3607 	case KVM_REG_S390_PP:
3608 		r = get_user(vcpu->arch.sie_block->pp,
3609 			     (u64 __user *)reg->addr);
3610 		break;
3611 	case KVM_REG_S390_GBEA:
3612 		r = get_user(vcpu->arch.sie_block->gbea,
3613 			     (u64 __user *)reg->addr);
3614 		break;
3615 	default:
3616 		break;
3617 	}
3618 
3619 	return r;
3620 }
3621 
3622 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3623 {
3624 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3625 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3626 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3627 
3628 	kvm_clear_async_pf_completion_queue(vcpu);
3629 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3630 		kvm_s390_vcpu_stop(vcpu);
3631 	kvm_s390_clear_local_irqs(vcpu);
3632 }
3633 
3634 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3635 {
3636 	/* Initial reset is a superset of the normal reset */
3637 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3638 
3639 	/*
3640 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3641 	 * We do not only reset the internal data, but also ...
3642 	 */
3643 	vcpu->arch.sie_block->gpsw.mask = 0;
3644 	vcpu->arch.sie_block->gpsw.addr = 0;
3645 	kvm_s390_set_prefix(vcpu, 0);
3646 	kvm_s390_set_cpu_timer(vcpu, 0);
3647 	vcpu->arch.sie_block->ckc = 0;
3648 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3649 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3650 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3651 
3652 	/* ... the data in sync regs */
3653 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3654 	vcpu->run->s.regs.ckc = 0;
3655 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3656 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3657 	vcpu->run->psw_addr = 0;
3658 	vcpu->run->psw_mask = 0;
3659 	vcpu->run->s.regs.todpr = 0;
3660 	vcpu->run->s.regs.cputm = 0;
3661 	vcpu->run->s.regs.ckc = 0;
3662 	vcpu->run->s.regs.pp = 0;
3663 	vcpu->run->s.regs.gbea = 1;
3664 	vcpu->run->s.regs.fpc = 0;
3665 	/*
3666 	 * Do not reset these registers in the protected case, as some of
3667 	 * them are overlayed and they are not accessible in this case
3668 	 * anyway.
3669 	 */
3670 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3671 		vcpu->arch.sie_block->gbea = 1;
3672 		vcpu->arch.sie_block->pp = 0;
3673 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3674 		vcpu->arch.sie_block->todpr = 0;
3675 	}
3676 }
3677 
3678 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3679 {
3680 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3681 
3682 	/* Clear reset is a superset of the initial reset */
3683 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3684 
3685 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3686 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3687 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3688 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3689 
3690 	regs->etoken = 0;
3691 	regs->etoken_extension = 0;
3692 }
3693 
3694 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3695 {
3696 	vcpu_load(vcpu);
3697 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3698 	vcpu_put(vcpu);
3699 	return 0;
3700 }
3701 
3702 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3703 {
3704 	vcpu_load(vcpu);
3705 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3706 	vcpu_put(vcpu);
3707 	return 0;
3708 }
3709 
3710 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3711 				  struct kvm_sregs *sregs)
3712 {
3713 	vcpu_load(vcpu);
3714 
3715 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3716 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3717 
3718 	vcpu_put(vcpu);
3719 	return 0;
3720 }
3721 
3722 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3723 				  struct kvm_sregs *sregs)
3724 {
3725 	vcpu_load(vcpu);
3726 
3727 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3728 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3729 
3730 	vcpu_put(vcpu);
3731 	return 0;
3732 }
3733 
3734 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3735 {
3736 	int ret = 0;
3737 
3738 	vcpu_load(vcpu);
3739 
3740 	if (test_fp_ctl(fpu->fpc)) {
3741 		ret = -EINVAL;
3742 		goto out;
3743 	}
3744 	vcpu->run->s.regs.fpc = fpu->fpc;
3745 	if (MACHINE_HAS_VX)
3746 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3747 				 (freg_t *) fpu->fprs);
3748 	else
3749 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3750 
3751 out:
3752 	vcpu_put(vcpu);
3753 	return ret;
3754 }
3755 
3756 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3757 {
3758 	vcpu_load(vcpu);
3759 
3760 	/* make sure we have the latest values */
3761 	save_fpu_regs();
3762 	if (MACHINE_HAS_VX)
3763 		convert_vx_to_fp((freg_t *) fpu->fprs,
3764 				 (__vector128 *) vcpu->run->s.regs.vrs);
3765 	else
3766 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3767 	fpu->fpc = vcpu->run->s.regs.fpc;
3768 
3769 	vcpu_put(vcpu);
3770 	return 0;
3771 }
3772 
3773 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3774 {
3775 	int rc = 0;
3776 
3777 	if (!is_vcpu_stopped(vcpu))
3778 		rc = -EBUSY;
3779 	else {
3780 		vcpu->run->psw_mask = psw.mask;
3781 		vcpu->run->psw_addr = psw.addr;
3782 	}
3783 	return rc;
3784 }
3785 
3786 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3787 				  struct kvm_translation *tr)
3788 {
3789 	return -EINVAL; /* not implemented yet */
3790 }
3791 
3792 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3793 			      KVM_GUESTDBG_USE_HW_BP | \
3794 			      KVM_GUESTDBG_ENABLE)
3795 
3796 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3797 					struct kvm_guest_debug *dbg)
3798 {
3799 	int rc = 0;
3800 
3801 	vcpu_load(vcpu);
3802 
3803 	vcpu->guest_debug = 0;
3804 	kvm_s390_clear_bp_data(vcpu);
3805 
3806 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3807 		rc = -EINVAL;
3808 		goto out;
3809 	}
3810 	if (!sclp.has_gpere) {
3811 		rc = -EINVAL;
3812 		goto out;
3813 	}
3814 
3815 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3816 		vcpu->guest_debug = dbg->control;
3817 		/* enforce guest PER */
3818 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3819 
3820 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3821 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3822 	} else {
3823 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3824 		vcpu->arch.guestdbg.last_bp = 0;
3825 	}
3826 
3827 	if (rc) {
3828 		vcpu->guest_debug = 0;
3829 		kvm_s390_clear_bp_data(vcpu);
3830 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3831 	}
3832 
3833 out:
3834 	vcpu_put(vcpu);
3835 	return rc;
3836 }
3837 
3838 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3839 				    struct kvm_mp_state *mp_state)
3840 {
3841 	int ret;
3842 
3843 	vcpu_load(vcpu);
3844 
3845 	/* CHECK_STOP and LOAD are not supported yet */
3846 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3847 				      KVM_MP_STATE_OPERATING;
3848 
3849 	vcpu_put(vcpu);
3850 	return ret;
3851 }
3852 
3853 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3854 				    struct kvm_mp_state *mp_state)
3855 {
3856 	int rc = 0;
3857 
3858 	vcpu_load(vcpu);
3859 
3860 	/* user space knows about this interface - let it control the state */
3861 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3862 
3863 	switch (mp_state->mp_state) {
3864 	case KVM_MP_STATE_STOPPED:
3865 		rc = kvm_s390_vcpu_stop(vcpu);
3866 		break;
3867 	case KVM_MP_STATE_OPERATING:
3868 		rc = kvm_s390_vcpu_start(vcpu);
3869 		break;
3870 	case KVM_MP_STATE_LOAD:
3871 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3872 			rc = -ENXIO;
3873 			break;
3874 		}
3875 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3876 		break;
3877 	case KVM_MP_STATE_CHECK_STOP:
3878 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3879 	default:
3880 		rc = -ENXIO;
3881 	}
3882 
3883 	vcpu_put(vcpu);
3884 	return rc;
3885 }
3886 
3887 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3888 {
3889 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3890 }
3891 
3892 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3893 {
3894 retry:
3895 	kvm_s390_vcpu_request_handled(vcpu);
3896 	if (!kvm_request_pending(vcpu))
3897 		return 0;
3898 	/*
3899 	 * If the guest prefix changed, re-arm the ipte notifier for the
3900 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3901 	 * This ensures that the ipte instruction for this request has
3902 	 * already finished. We might race against a second unmapper that
3903 	 * wants to set the blocking bit. Lets just retry the request loop.
3904 	 */
3905 	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
3906 		int rc;
3907 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3908 					  kvm_s390_get_prefix(vcpu),
3909 					  PAGE_SIZE * 2, PROT_WRITE);
3910 		if (rc) {
3911 			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3912 			return rc;
3913 		}
3914 		goto retry;
3915 	}
3916 
3917 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3918 		vcpu->arch.sie_block->ihcpu = 0xffff;
3919 		goto retry;
3920 	}
3921 
3922 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3923 		if (!ibs_enabled(vcpu)) {
3924 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3925 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3926 		}
3927 		goto retry;
3928 	}
3929 
3930 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3931 		if (ibs_enabled(vcpu)) {
3932 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3933 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3934 		}
3935 		goto retry;
3936 	}
3937 
3938 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3939 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3940 		goto retry;
3941 	}
3942 
3943 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3944 		/*
3945 		 * Disable CMM virtualization; we will emulate the ESSA
3946 		 * instruction manually, in order to provide additional
3947 		 * functionalities needed for live migration.
3948 		 */
3949 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3950 		goto retry;
3951 	}
3952 
3953 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3954 		/*
3955 		 * Re-enable CMM virtualization if CMMA is available and
3956 		 * CMM has been used.
3957 		 */
3958 		if ((vcpu->kvm->arch.use_cmma) &&
3959 		    (vcpu->kvm->mm->context.uses_cmm))
3960 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3961 		goto retry;
3962 	}
3963 
3964 	/* nothing to do, just clear the request */
3965 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3966 	/* we left the vsie handler, nothing to do, just clear the request */
3967 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3968 
3969 	return 0;
3970 }
3971 
3972 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3973 {
3974 	struct kvm_vcpu *vcpu;
3975 	union tod_clock clk;
3976 	unsigned long i;
3977 
3978 	preempt_disable();
3979 
3980 	store_tod_clock_ext(&clk);
3981 
3982 	kvm->arch.epoch = gtod->tod - clk.tod;
3983 	kvm->arch.epdx = 0;
3984 	if (test_kvm_facility(kvm, 139)) {
3985 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3986 		if (kvm->arch.epoch > gtod->tod)
3987 			kvm->arch.epdx -= 1;
3988 	}
3989 
3990 	kvm_s390_vcpu_block_all(kvm);
3991 	kvm_for_each_vcpu(i, vcpu, kvm) {
3992 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3993 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3994 	}
3995 
3996 	kvm_s390_vcpu_unblock_all(kvm);
3997 	preempt_enable();
3998 }
3999 
4000 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4001 {
4002 	mutex_lock(&kvm->lock);
4003 	__kvm_s390_set_tod_clock(kvm, gtod);
4004 	mutex_unlock(&kvm->lock);
4005 }
4006 
4007 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4008 {
4009 	if (!mutex_trylock(&kvm->lock))
4010 		return 0;
4011 	__kvm_s390_set_tod_clock(kvm, gtod);
4012 	mutex_unlock(&kvm->lock);
4013 	return 1;
4014 }
4015 
4016 /**
4017  * kvm_arch_fault_in_page - fault-in guest page if necessary
4018  * @vcpu: The corresponding virtual cpu
4019  * @gpa: Guest physical address
4020  * @writable: Whether the page should be writable or not
4021  *
4022  * Make sure that a guest page has been faulted-in on the host.
4023  *
4024  * Return: Zero on success, negative error code otherwise.
4025  */
4026 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4027 {
4028 	return gmap_fault(vcpu->arch.gmap, gpa,
4029 			  writable ? FAULT_FLAG_WRITE : 0);
4030 }
4031 
4032 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4033 				      unsigned long token)
4034 {
4035 	struct kvm_s390_interrupt inti;
4036 	struct kvm_s390_irq irq;
4037 
4038 	if (start_token) {
4039 		irq.u.ext.ext_params2 = token;
4040 		irq.type = KVM_S390_INT_PFAULT_INIT;
4041 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4042 	} else {
4043 		inti.type = KVM_S390_INT_PFAULT_DONE;
4044 		inti.parm64 = token;
4045 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4046 	}
4047 }
4048 
4049 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4050 				     struct kvm_async_pf *work)
4051 {
4052 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4053 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4054 
4055 	return true;
4056 }
4057 
4058 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4059 				 struct kvm_async_pf *work)
4060 {
4061 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4062 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4063 }
4064 
4065 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4066 			       struct kvm_async_pf *work)
4067 {
4068 	/* s390 will always inject the page directly */
4069 }
4070 
4071 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4072 {
4073 	/*
4074 	 * s390 will always inject the page directly,
4075 	 * but we still want check_async_completion to cleanup
4076 	 */
4077 	return true;
4078 }
4079 
4080 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4081 {
4082 	hva_t hva;
4083 	struct kvm_arch_async_pf arch;
4084 
4085 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4086 		return false;
4087 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4088 	    vcpu->arch.pfault_compare)
4089 		return false;
4090 	if (psw_extint_disabled(vcpu))
4091 		return false;
4092 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4093 		return false;
4094 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4095 		return false;
4096 	if (!vcpu->arch.gmap->pfault_enabled)
4097 		return false;
4098 
4099 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4100 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4101 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4102 		return false;
4103 
4104 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4105 }
4106 
4107 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4108 {
4109 	int rc, cpuflags;
4110 
4111 	/*
4112 	 * On s390 notifications for arriving pages will be delivered directly
4113 	 * to the guest but the house keeping for completed pfaults is
4114 	 * handled outside the worker.
4115 	 */
4116 	kvm_check_async_pf_completion(vcpu);
4117 
4118 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4119 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4120 
4121 	if (need_resched())
4122 		schedule();
4123 
4124 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4125 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4126 		if (rc)
4127 			return rc;
4128 	}
4129 
4130 	rc = kvm_s390_handle_requests(vcpu);
4131 	if (rc)
4132 		return rc;
4133 
4134 	if (guestdbg_enabled(vcpu)) {
4135 		kvm_s390_backup_guest_per_regs(vcpu);
4136 		kvm_s390_patch_guest_per_regs(vcpu);
4137 	}
4138 
4139 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4140 
4141 	vcpu->arch.sie_block->icptcode = 0;
4142 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4143 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4144 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4145 
4146 	return 0;
4147 }
4148 
4149 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4150 {
4151 	struct kvm_s390_pgm_info pgm_info = {
4152 		.code = PGM_ADDRESSING,
4153 	};
4154 	u8 opcode, ilen;
4155 	int rc;
4156 
4157 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4158 	trace_kvm_s390_sie_fault(vcpu);
4159 
4160 	/*
4161 	 * We want to inject an addressing exception, which is defined as a
4162 	 * suppressing or terminating exception. However, since we came here
4163 	 * by a DAT access exception, the PSW still points to the faulting
4164 	 * instruction since DAT exceptions are nullifying. So we've got
4165 	 * to look up the current opcode to get the length of the instruction
4166 	 * to be able to forward the PSW.
4167 	 */
4168 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4169 	ilen = insn_length(opcode);
4170 	if (rc < 0) {
4171 		return rc;
4172 	} else if (rc) {
4173 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4174 		 * Forward by arbitrary ilc, injection will take care of
4175 		 * nullification if necessary.
4176 		 */
4177 		pgm_info = vcpu->arch.pgm;
4178 		ilen = 4;
4179 	}
4180 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4181 	kvm_s390_forward_psw(vcpu, ilen);
4182 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4183 }
4184 
4185 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4186 {
4187 	struct mcck_volatile_info *mcck_info;
4188 	struct sie_page *sie_page;
4189 
4190 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4191 		   vcpu->arch.sie_block->icptcode);
4192 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4193 
4194 	if (guestdbg_enabled(vcpu))
4195 		kvm_s390_restore_guest_per_regs(vcpu);
4196 
4197 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4198 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4199 
4200 	if (exit_reason == -EINTR) {
4201 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4202 		sie_page = container_of(vcpu->arch.sie_block,
4203 					struct sie_page, sie_block);
4204 		mcck_info = &sie_page->mcck_info;
4205 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4206 		return 0;
4207 	}
4208 
4209 	if (vcpu->arch.sie_block->icptcode > 0) {
4210 		int rc = kvm_handle_sie_intercept(vcpu);
4211 
4212 		if (rc != -EOPNOTSUPP)
4213 			return rc;
4214 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4215 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4216 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4217 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4218 		return -EREMOTE;
4219 	} else if (exit_reason != -EFAULT) {
4220 		vcpu->stat.exit_null++;
4221 		return 0;
4222 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4223 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4224 		vcpu->run->s390_ucontrol.trans_exc_code =
4225 						current->thread.gmap_addr;
4226 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4227 		return -EREMOTE;
4228 	} else if (current->thread.gmap_pfault) {
4229 		trace_kvm_s390_major_guest_pfault(vcpu);
4230 		current->thread.gmap_pfault = 0;
4231 		if (kvm_arch_setup_async_pf(vcpu))
4232 			return 0;
4233 		vcpu->stat.pfault_sync++;
4234 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4235 	}
4236 	return vcpu_post_run_fault_in_sie(vcpu);
4237 }
4238 
4239 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4240 static int __vcpu_run(struct kvm_vcpu *vcpu)
4241 {
4242 	int rc, exit_reason;
4243 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4244 
4245 	/*
4246 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4247 	 * ning the guest), so that memslots (and other stuff) are protected
4248 	 */
4249 	kvm_vcpu_srcu_read_lock(vcpu);
4250 
4251 	do {
4252 		rc = vcpu_pre_run(vcpu);
4253 		if (rc)
4254 			break;
4255 
4256 		kvm_vcpu_srcu_read_unlock(vcpu);
4257 		/*
4258 		 * As PF_VCPU will be used in fault handler, between
4259 		 * guest_enter and guest_exit should be no uaccess.
4260 		 */
4261 		local_irq_disable();
4262 		guest_enter_irqoff();
4263 		__disable_cpu_timer_accounting(vcpu);
4264 		local_irq_enable();
4265 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4266 			memcpy(sie_page->pv_grregs,
4267 			       vcpu->run->s.regs.gprs,
4268 			       sizeof(sie_page->pv_grregs));
4269 		}
4270 		if (test_cpu_flag(CIF_FPU))
4271 			load_fpu_regs();
4272 		exit_reason = sie64a(vcpu->arch.sie_block,
4273 				     vcpu->run->s.regs.gprs);
4274 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4275 			memcpy(vcpu->run->s.regs.gprs,
4276 			       sie_page->pv_grregs,
4277 			       sizeof(sie_page->pv_grregs));
4278 			/*
4279 			 * We're not allowed to inject interrupts on intercepts
4280 			 * that leave the guest state in an "in-between" state
4281 			 * where the next SIE entry will do a continuation.
4282 			 * Fence interrupts in our "internal" PSW.
4283 			 */
4284 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4285 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4286 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4287 			}
4288 		}
4289 		local_irq_disable();
4290 		__enable_cpu_timer_accounting(vcpu);
4291 		guest_exit_irqoff();
4292 		local_irq_enable();
4293 		kvm_vcpu_srcu_read_lock(vcpu);
4294 
4295 		rc = vcpu_post_run(vcpu, exit_reason);
4296 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4297 
4298 	kvm_vcpu_srcu_read_unlock(vcpu);
4299 	return rc;
4300 }
4301 
4302 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4303 {
4304 	struct kvm_run *kvm_run = vcpu->run;
4305 	struct runtime_instr_cb *riccb;
4306 	struct gs_cb *gscb;
4307 
4308 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4309 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4310 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4311 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4312 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4313 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4314 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4315 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4316 	}
4317 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4318 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4319 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4320 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4321 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4322 			kvm_clear_async_pf_completion_queue(vcpu);
4323 	}
4324 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4325 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4326 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4327 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4328 	}
4329 	/*
4330 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4331 	 * we should enable RI here instead of doing the lazy enablement.
4332 	 */
4333 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4334 	    test_kvm_facility(vcpu->kvm, 64) &&
4335 	    riccb->v &&
4336 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4337 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4338 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4339 	}
4340 	/*
4341 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4342 	 * we should enable GS here instead of doing the lazy enablement.
4343 	 */
4344 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4345 	    test_kvm_facility(vcpu->kvm, 133) &&
4346 	    gscb->gssm &&
4347 	    !vcpu->arch.gs_enabled) {
4348 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4349 		vcpu->arch.sie_block->ecb |= ECB_GS;
4350 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4351 		vcpu->arch.gs_enabled = 1;
4352 	}
4353 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4354 	    test_kvm_facility(vcpu->kvm, 82)) {
4355 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4356 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4357 	}
4358 	if (MACHINE_HAS_GS) {
4359 		preempt_disable();
4360 		__ctl_set_bit(2, 4);
4361 		if (current->thread.gs_cb) {
4362 			vcpu->arch.host_gscb = current->thread.gs_cb;
4363 			save_gs_cb(vcpu->arch.host_gscb);
4364 		}
4365 		if (vcpu->arch.gs_enabled) {
4366 			current->thread.gs_cb = (struct gs_cb *)
4367 						&vcpu->run->s.regs.gscb;
4368 			restore_gs_cb(current->thread.gs_cb);
4369 		}
4370 		preempt_enable();
4371 	}
4372 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4373 }
4374 
4375 static void sync_regs(struct kvm_vcpu *vcpu)
4376 {
4377 	struct kvm_run *kvm_run = vcpu->run;
4378 
4379 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4380 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4381 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4382 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4383 		/* some control register changes require a tlb flush */
4384 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4385 	}
4386 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4387 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4388 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4389 	}
4390 	save_access_regs(vcpu->arch.host_acrs);
4391 	restore_access_regs(vcpu->run->s.regs.acrs);
4392 	/* save host (userspace) fprs/vrs */
4393 	save_fpu_regs();
4394 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4395 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4396 	if (MACHINE_HAS_VX)
4397 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4398 	else
4399 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4400 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4401 	if (test_fp_ctl(current->thread.fpu.fpc))
4402 		/* User space provided an invalid FPC, let's clear it */
4403 		current->thread.fpu.fpc = 0;
4404 
4405 	/* Sync fmt2 only data */
4406 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4407 		sync_regs_fmt2(vcpu);
4408 	} else {
4409 		/*
4410 		 * In several places we have to modify our internal view to
4411 		 * not do things that are disallowed by the ultravisor. For
4412 		 * example we must not inject interrupts after specific exits
4413 		 * (e.g. 112 prefix page not secure). We do this by turning
4414 		 * off the machine check, external and I/O interrupt bits
4415 		 * of our PSW copy. To avoid getting validity intercepts, we
4416 		 * do only accept the condition code from userspace.
4417 		 */
4418 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4419 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4420 						   PSW_MASK_CC;
4421 	}
4422 
4423 	kvm_run->kvm_dirty_regs = 0;
4424 }
4425 
4426 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4427 {
4428 	struct kvm_run *kvm_run = vcpu->run;
4429 
4430 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4431 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4432 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4433 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4434 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4435 	if (MACHINE_HAS_GS) {
4436 		preempt_disable();
4437 		__ctl_set_bit(2, 4);
4438 		if (vcpu->arch.gs_enabled)
4439 			save_gs_cb(current->thread.gs_cb);
4440 		current->thread.gs_cb = vcpu->arch.host_gscb;
4441 		restore_gs_cb(vcpu->arch.host_gscb);
4442 		if (!vcpu->arch.host_gscb)
4443 			__ctl_clear_bit(2, 4);
4444 		vcpu->arch.host_gscb = NULL;
4445 		preempt_enable();
4446 	}
4447 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4448 }
4449 
4450 static void store_regs(struct kvm_vcpu *vcpu)
4451 {
4452 	struct kvm_run *kvm_run = vcpu->run;
4453 
4454 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4455 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4456 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4457 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4458 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4459 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4460 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4461 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4462 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4463 	save_access_regs(vcpu->run->s.regs.acrs);
4464 	restore_access_regs(vcpu->arch.host_acrs);
4465 	/* Save guest register state */
4466 	save_fpu_regs();
4467 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4468 	/* Restore will be done lazily at return */
4469 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4470 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4471 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4472 		store_regs_fmt2(vcpu);
4473 }
4474 
4475 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4476 {
4477 	struct kvm_run *kvm_run = vcpu->run;
4478 	int rc;
4479 
4480 	if (kvm_run->immediate_exit)
4481 		return -EINTR;
4482 
4483 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4484 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4485 		return -EINVAL;
4486 
4487 	vcpu_load(vcpu);
4488 
4489 	if (guestdbg_exit_pending(vcpu)) {
4490 		kvm_s390_prepare_debug_exit(vcpu);
4491 		rc = 0;
4492 		goto out;
4493 	}
4494 
4495 	kvm_sigset_activate(vcpu);
4496 
4497 	/*
4498 	 * no need to check the return value of vcpu_start as it can only have
4499 	 * an error for protvirt, but protvirt means user cpu state
4500 	 */
4501 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4502 		kvm_s390_vcpu_start(vcpu);
4503 	} else if (is_vcpu_stopped(vcpu)) {
4504 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4505 				   vcpu->vcpu_id);
4506 		rc = -EINVAL;
4507 		goto out;
4508 	}
4509 
4510 	sync_regs(vcpu);
4511 	enable_cpu_timer_accounting(vcpu);
4512 
4513 	might_fault();
4514 	rc = __vcpu_run(vcpu);
4515 
4516 	if (signal_pending(current) && !rc) {
4517 		kvm_run->exit_reason = KVM_EXIT_INTR;
4518 		rc = -EINTR;
4519 	}
4520 
4521 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4522 		kvm_s390_prepare_debug_exit(vcpu);
4523 		rc = 0;
4524 	}
4525 
4526 	if (rc == -EREMOTE) {
4527 		/* userspace support is needed, kvm_run has been prepared */
4528 		rc = 0;
4529 	}
4530 
4531 	disable_cpu_timer_accounting(vcpu);
4532 	store_regs(vcpu);
4533 
4534 	kvm_sigset_deactivate(vcpu);
4535 
4536 	vcpu->stat.exit_userspace++;
4537 out:
4538 	vcpu_put(vcpu);
4539 	return rc;
4540 }
4541 
4542 /*
4543  * store status at address
4544  * we use have two special cases:
4545  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4546  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4547  */
4548 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4549 {
4550 	unsigned char archmode = 1;
4551 	freg_t fprs[NUM_FPRS];
4552 	unsigned int px;
4553 	u64 clkcomp, cputm;
4554 	int rc;
4555 
4556 	px = kvm_s390_get_prefix(vcpu);
4557 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4558 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4559 			return -EFAULT;
4560 		gpa = 0;
4561 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4562 		if (write_guest_real(vcpu, 163, &archmode, 1))
4563 			return -EFAULT;
4564 		gpa = px;
4565 	} else
4566 		gpa -= __LC_FPREGS_SAVE_AREA;
4567 
4568 	/* manually convert vector registers if necessary */
4569 	if (MACHINE_HAS_VX) {
4570 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4571 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4572 				     fprs, 128);
4573 	} else {
4574 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4575 				     vcpu->run->s.regs.fprs, 128);
4576 	}
4577 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4578 			      vcpu->run->s.regs.gprs, 128);
4579 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4580 			      &vcpu->arch.sie_block->gpsw, 16);
4581 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4582 			      &px, 4);
4583 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4584 			      &vcpu->run->s.regs.fpc, 4);
4585 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4586 			      &vcpu->arch.sie_block->todpr, 4);
4587 	cputm = kvm_s390_get_cpu_timer(vcpu);
4588 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4589 			      &cputm, 8);
4590 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4591 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4592 			      &clkcomp, 8);
4593 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4594 			      &vcpu->run->s.regs.acrs, 64);
4595 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4596 			      &vcpu->arch.sie_block->gcr, 128);
4597 	return rc ? -EFAULT : 0;
4598 }
4599 
4600 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4601 {
4602 	/*
4603 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4604 	 * switch in the run ioctl. Let's update our copies before we save
4605 	 * it into the save area
4606 	 */
4607 	save_fpu_regs();
4608 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4609 	save_access_regs(vcpu->run->s.regs.acrs);
4610 
4611 	return kvm_s390_store_status_unloaded(vcpu, addr);
4612 }
4613 
4614 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4615 {
4616 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4617 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4618 }
4619 
4620 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4621 {
4622 	unsigned long i;
4623 	struct kvm_vcpu *vcpu;
4624 
4625 	kvm_for_each_vcpu(i, vcpu, kvm) {
4626 		__disable_ibs_on_vcpu(vcpu);
4627 	}
4628 }
4629 
4630 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4631 {
4632 	if (!sclp.has_ibs)
4633 		return;
4634 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4635 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4636 }
4637 
4638 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4639 {
4640 	int i, online_vcpus, r = 0, started_vcpus = 0;
4641 
4642 	if (!is_vcpu_stopped(vcpu))
4643 		return 0;
4644 
4645 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4646 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4647 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4648 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4649 
4650 	/* Let's tell the UV that we want to change into the operating state */
4651 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4652 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4653 		if (r) {
4654 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4655 			return r;
4656 		}
4657 	}
4658 
4659 	for (i = 0; i < online_vcpus; i++) {
4660 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4661 			started_vcpus++;
4662 	}
4663 
4664 	if (started_vcpus == 0) {
4665 		/* we're the only active VCPU -> speed it up */
4666 		__enable_ibs_on_vcpu(vcpu);
4667 	} else if (started_vcpus == 1) {
4668 		/*
4669 		 * As we are starting a second VCPU, we have to disable
4670 		 * the IBS facility on all VCPUs to remove potentially
4671 		 * outstanding ENABLE requests.
4672 		 */
4673 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4674 	}
4675 
4676 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4677 	/*
4678 	 * The real PSW might have changed due to a RESTART interpreted by the
4679 	 * ultravisor. We block all interrupts and let the next sie exit
4680 	 * refresh our view.
4681 	 */
4682 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4683 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4684 	/*
4685 	 * Another VCPU might have used IBS while we were offline.
4686 	 * Let's play safe and flush the VCPU at startup.
4687 	 */
4688 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4689 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4690 	return 0;
4691 }
4692 
4693 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4694 {
4695 	int i, online_vcpus, r = 0, started_vcpus = 0;
4696 	struct kvm_vcpu *started_vcpu = NULL;
4697 
4698 	if (is_vcpu_stopped(vcpu))
4699 		return 0;
4700 
4701 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4702 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4703 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4704 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4705 
4706 	/* Let's tell the UV that we want to change into the stopped state */
4707 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4708 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4709 		if (r) {
4710 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4711 			return r;
4712 		}
4713 	}
4714 
4715 	/*
4716 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4717 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4718 	 * have been fully processed. This will ensure that the VCPU
4719 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4720 	 */
4721 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4722 	kvm_s390_clear_stop_irq(vcpu);
4723 
4724 	__disable_ibs_on_vcpu(vcpu);
4725 
4726 	for (i = 0; i < online_vcpus; i++) {
4727 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4728 
4729 		if (!is_vcpu_stopped(tmp)) {
4730 			started_vcpus++;
4731 			started_vcpu = tmp;
4732 		}
4733 	}
4734 
4735 	if (started_vcpus == 1) {
4736 		/*
4737 		 * As we only have one VCPU left, we want to enable the
4738 		 * IBS facility for that VCPU to speed it up.
4739 		 */
4740 		__enable_ibs_on_vcpu(started_vcpu);
4741 	}
4742 
4743 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4744 	return 0;
4745 }
4746 
4747 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4748 				     struct kvm_enable_cap *cap)
4749 {
4750 	int r;
4751 
4752 	if (cap->flags)
4753 		return -EINVAL;
4754 
4755 	switch (cap->cap) {
4756 	case KVM_CAP_S390_CSS_SUPPORT:
4757 		if (!vcpu->kvm->arch.css_support) {
4758 			vcpu->kvm->arch.css_support = 1;
4759 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4760 			trace_kvm_s390_enable_css(vcpu->kvm);
4761 		}
4762 		r = 0;
4763 		break;
4764 	default:
4765 		r = -EINVAL;
4766 		break;
4767 	}
4768 	return r;
4769 }
4770 
4771 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4772 				  struct kvm_s390_mem_op *mop)
4773 {
4774 	void __user *uaddr = (void __user *)mop->buf;
4775 	int r = 0;
4776 
4777 	if (mop->flags || !mop->size)
4778 		return -EINVAL;
4779 	if (mop->size + mop->sida_offset < mop->size)
4780 		return -EINVAL;
4781 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4782 		return -E2BIG;
4783 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4784 		return -EINVAL;
4785 
4786 	switch (mop->op) {
4787 	case KVM_S390_MEMOP_SIDA_READ:
4788 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4789 				 mop->sida_offset), mop->size))
4790 			r = -EFAULT;
4791 
4792 		break;
4793 	case KVM_S390_MEMOP_SIDA_WRITE:
4794 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4795 				   mop->sida_offset), uaddr, mop->size))
4796 			r = -EFAULT;
4797 		break;
4798 	}
4799 	return r;
4800 }
4801 
4802 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4803 				 struct kvm_s390_mem_op *mop)
4804 {
4805 	void __user *uaddr = (void __user *)mop->buf;
4806 	void *tmpbuf = NULL;
4807 	int r = 0;
4808 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4809 				    | KVM_S390_MEMOP_F_CHECK_ONLY
4810 				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4811 
4812 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4813 		return -EINVAL;
4814 	if (mop->size > MEM_OP_MAX_SIZE)
4815 		return -E2BIG;
4816 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4817 		return -EINVAL;
4818 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4819 		if (access_key_invalid(mop->key))
4820 			return -EINVAL;
4821 	} else {
4822 		mop->key = 0;
4823 	}
4824 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4825 		tmpbuf = vmalloc(mop->size);
4826 		if (!tmpbuf)
4827 			return -ENOMEM;
4828 	}
4829 
4830 	switch (mop->op) {
4831 	case KVM_S390_MEMOP_LOGICAL_READ:
4832 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4833 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4834 					    GACC_FETCH, mop->key);
4835 			break;
4836 		}
4837 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4838 					mop->size, mop->key);
4839 		if (r == 0) {
4840 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4841 				r = -EFAULT;
4842 		}
4843 		break;
4844 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4845 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4846 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4847 					    GACC_STORE, mop->key);
4848 			break;
4849 		}
4850 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4851 			r = -EFAULT;
4852 			break;
4853 		}
4854 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4855 					 mop->size, mop->key);
4856 		break;
4857 	}
4858 
4859 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4860 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4861 
4862 	vfree(tmpbuf);
4863 	return r;
4864 }
4865 
4866 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4867 				     struct kvm_s390_mem_op *mop)
4868 {
4869 	int r, srcu_idx;
4870 
4871 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4872 
4873 	switch (mop->op) {
4874 	case KVM_S390_MEMOP_LOGICAL_READ:
4875 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4876 		r = kvm_s390_vcpu_mem_op(vcpu, mop);
4877 		break;
4878 	case KVM_S390_MEMOP_SIDA_READ:
4879 	case KVM_S390_MEMOP_SIDA_WRITE:
4880 		/* we are locked against sida going away by the vcpu->mutex */
4881 		r = kvm_s390_vcpu_sida_op(vcpu, mop);
4882 		break;
4883 	default:
4884 		r = -EINVAL;
4885 	}
4886 
4887 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4888 	return r;
4889 }
4890 
4891 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4892 			       unsigned int ioctl, unsigned long arg)
4893 {
4894 	struct kvm_vcpu *vcpu = filp->private_data;
4895 	void __user *argp = (void __user *)arg;
4896 
4897 	switch (ioctl) {
4898 	case KVM_S390_IRQ: {
4899 		struct kvm_s390_irq s390irq;
4900 
4901 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4902 			return -EFAULT;
4903 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4904 	}
4905 	case KVM_S390_INTERRUPT: {
4906 		struct kvm_s390_interrupt s390int;
4907 		struct kvm_s390_irq s390irq = {};
4908 
4909 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4910 			return -EFAULT;
4911 		if (s390int_to_s390irq(&s390int, &s390irq))
4912 			return -EINVAL;
4913 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4914 	}
4915 	}
4916 	return -ENOIOCTLCMD;
4917 }
4918 
4919 long kvm_arch_vcpu_ioctl(struct file *filp,
4920 			 unsigned int ioctl, unsigned long arg)
4921 {
4922 	struct kvm_vcpu *vcpu = filp->private_data;
4923 	void __user *argp = (void __user *)arg;
4924 	int idx;
4925 	long r;
4926 	u16 rc, rrc;
4927 
4928 	vcpu_load(vcpu);
4929 
4930 	switch (ioctl) {
4931 	case KVM_S390_STORE_STATUS:
4932 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4933 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4934 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4935 		break;
4936 	case KVM_S390_SET_INITIAL_PSW: {
4937 		psw_t psw;
4938 
4939 		r = -EFAULT;
4940 		if (copy_from_user(&psw, argp, sizeof(psw)))
4941 			break;
4942 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4943 		break;
4944 	}
4945 	case KVM_S390_CLEAR_RESET:
4946 		r = 0;
4947 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4948 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4949 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4950 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4951 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4952 				   rc, rrc);
4953 		}
4954 		break;
4955 	case KVM_S390_INITIAL_RESET:
4956 		r = 0;
4957 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4958 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4959 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4960 					  UVC_CMD_CPU_RESET_INITIAL,
4961 					  &rc, &rrc);
4962 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4963 				   rc, rrc);
4964 		}
4965 		break;
4966 	case KVM_S390_NORMAL_RESET:
4967 		r = 0;
4968 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4969 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4970 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4971 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4972 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4973 				   rc, rrc);
4974 		}
4975 		break;
4976 	case KVM_SET_ONE_REG:
4977 	case KVM_GET_ONE_REG: {
4978 		struct kvm_one_reg reg;
4979 		r = -EINVAL;
4980 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4981 			break;
4982 		r = -EFAULT;
4983 		if (copy_from_user(&reg, argp, sizeof(reg)))
4984 			break;
4985 		if (ioctl == KVM_SET_ONE_REG)
4986 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4987 		else
4988 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4989 		break;
4990 	}
4991 #ifdef CONFIG_KVM_S390_UCONTROL
4992 	case KVM_S390_UCAS_MAP: {
4993 		struct kvm_s390_ucas_mapping ucasmap;
4994 
4995 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4996 			r = -EFAULT;
4997 			break;
4998 		}
4999 
5000 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5001 			r = -EINVAL;
5002 			break;
5003 		}
5004 
5005 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5006 				     ucasmap.vcpu_addr, ucasmap.length);
5007 		break;
5008 	}
5009 	case KVM_S390_UCAS_UNMAP: {
5010 		struct kvm_s390_ucas_mapping ucasmap;
5011 
5012 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5013 			r = -EFAULT;
5014 			break;
5015 		}
5016 
5017 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5018 			r = -EINVAL;
5019 			break;
5020 		}
5021 
5022 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5023 			ucasmap.length);
5024 		break;
5025 	}
5026 #endif
5027 	case KVM_S390_VCPU_FAULT: {
5028 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5029 		break;
5030 	}
5031 	case KVM_ENABLE_CAP:
5032 	{
5033 		struct kvm_enable_cap cap;
5034 		r = -EFAULT;
5035 		if (copy_from_user(&cap, argp, sizeof(cap)))
5036 			break;
5037 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5038 		break;
5039 	}
5040 	case KVM_S390_MEM_OP: {
5041 		struct kvm_s390_mem_op mem_op;
5042 
5043 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5044 			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5045 		else
5046 			r = -EFAULT;
5047 		break;
5048 	}
5049 	case KVM_S390_SET_IRQ_STATE: {
5050 		struct kvm_s390_irq_state irq_state;
5051 
5052 		r = -EFAULT;
5053 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5054 			break;
5055 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5056 		    irq_state.len == 0 ||
5057 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5058 			r = -EINVAL;
5059 			break;
5060 		}
5061 		/* do not use irq_state.flags, it will break old QEMUs */
5062 		r = kvm_s390_set_irq_state(vcpu,
5063 					   (void __user *) irq_state.buf,
5064 					   irq_state.len);
5065 		break;
5066 	}
5067 	case KVM_S390_GET_IRQ_STATE: {
5068 		struct kvm_s390_irq_state irq_state;
5069 
5070 		r = -EFAULT;
5071 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5072 			break;
5073 		if (irq_state.len == 0) {
5074 			r = -EINVAL;
5075 			break;
5076 		}
5077 		/* do not use irq_state.flags, it will break old QEMUs */
5078 		r = kvm_s390_get_irq_state(vcpu,
5079 					   (__u8 __user *)  irq_state.buf,
5080 					   irq_state.len);
5081 		break;
5082 	}
5083 	default:
5084 		r = -ENOTTY;
5085 	}
5086 
5087 	vcpu_put(vcpu);
5088 	return r;
5089 }
5090 
5091 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5092 {
5093 #ifdef CONFIG_KVM_S390_UCONTROL
5094 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5095 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5096 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5097 		get_page(vmf->page);
5098 		return 0;
5099 	}
5100 #endif
5101 	return VM_FAULT_SIGBUS;
5102 }
5103 
5104 /* Section: memory related */
5105 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5106 				   const struct kvm_memory_slot *old,
5107 				   struct kvm_memory_slot *new,
5108 				   enum kvm_mr_change change)
5109 {
5110 	gpa_t size;
5111 
5112 	/* When we are protected, we should not change the memory slots */
5113 	if (kvm_s390_pv_get_handle(kvm))
5114 		return -EINVAL;
5115 
5116 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5117 		return 0;
5118 
5119 	/* A few sanity checks. We can have memory slots which have to be
5120 	   located/ended at a segment boundary (1MB). The memory in userland is
5121 	   ok to be fragmented into various different vmas. It is okay to mmap()
5122 	   and munmap() stuff in this slot after doing this call at any time */
5123 
5124 	if (new->userspace_addr & 0xffffful)
5125 		return -EINVAL;
5126 
5127 	size = new->npages * PAGE_SIZE;
5128 	if (size & 0xffffful)
5129 		return -EINVAL;
5130 
5131 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5132 		return -EINVAL;
5133 
5134 	return 0;
5135 }
5136 
5137 void kvm_arch_commit_memory_region(struct kvm *kvm,
5138 				struct kvm_memory_slot *old,
5139 				const struct kvm_memory_slot *new,
5140 				enum kvm_mr_change change)
5141 {
5142 	int rc = 0;
5143 
5144 	switch (change) {
5145 	case KVM_MR_DELETE:
5146 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5147 					old->npages * PAGE_SIZE);
5148 		break;
5149 	case KVM_MR_MOVE:
5150 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5151 					old->npages * PAGE_SIZE);
5152 		if (rc)
5153 			break;
5154 		fallthrough;
5155 	case KVM_MR_CREATE:
5156 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5157 				      new->base_gfn * PAGE_SIZE,
5158 				      new->npages * PAGE_SIZE);
5159 		break;
5160 	case KVM_MR_FLAGS_ONLY:
5161 		break;
5162 	default:
5163 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5164 	}
5165 	if (rc)
5166 		pr_warn("failed to commit memory region\n");
5167 	return;
5168 }
5169 
5170 static inline unsigned long nonhyp_mask(int i)
5171 {
5172 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5173 
5174 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5175 }
5176 
5177 static int __init kvm_s390_init(void)
5178 {
5179 	int i;
5180 
5181 	if (!sclp.has_sief2) {
5182 		pr_info("SIE is not available\n");
5183 		return -ENODEV;
5184 	}
5185 
5186 	if (nested && hpage) {
5187 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5188 		return -EINVAL;
5189 	}
5190 
5191 	for (i = 0; i < 16; i++)
5192 		kvm_s390_fac_base[i] |=
5193 			stfle_fac_list[i] & nonhyp_mask(i);
5194 
5195 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5196 }
5197 
5198 static void __exit kvm_s390_exit(void)
5199 {
5200 	kvm_exit();
5201 }
5202 
5203 module_init(kvm_s390_init);
5204 module_exit(kvm_s390_exit);
5205 
5206 /*
5207  * Enable autoloading of the kvm module.
5208  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5209  * since x86 takes a different approach.
5210  */
5211 #include <linux/miscdevice.h>
5212 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5213 MODULE_ALIAS("devname:kvm");
5214