xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision a48acad7)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10  *               Jason J. Herne <jjherne@us.ibm.com>
11  */
12 
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 #include <linux/mmu_notifier.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 #include "pci.h"
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
63 	KVM_GENERIC_VM_STATS(),
64 	STATS_DESC_COUNTER(VM, inject_io),
65 	STATS_DESC_COUNTER(VM, inject_float_mchk),
66 	STATS_DESC_COUNTER(VM, inject_pfault_done),
67 	STATS_DESC_COUNTER(VM, inject_service_signal),
68 	STATS_DESC_COUNTER(VM, inject_virtio),
69 	STATS_DESC_COUNTER(VM, aen_forward)
70 };
71 
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 	.name_size = KVM_STATS_NAME_SIZE,
74 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 	.id_offset = sizeof(struct kvm_stats_header),
76 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 		       sizeof(kvm_vm_stats_desc),
79 };
80 
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 	KVM_GENERIC_VCPU_STATS(),
83 	STATS_DESC_COUNTER(VCPU, exit_userspace),
84 	STATS_DESC_COUNTER(VCPU, exit_null),
85 	STATS_DESC_COUNTER(VCPU, exit_external_request),
86 	STATS_DESC_COUNTER(VCPU, exit_io_request),
87 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 	STATS_DESC_COUNTER(VCPU, exit_validity),
90 	STATS_DESC_COUNTER(VCPU, exit_instruction),
91 	STATS_DESC_COUNTER(VCPU, exit_pei),
92 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 	STATS_DESC_COUNTER(VCPU, deliver_program),
110 	STATS_DESC_COUNTER(VCPU, deliver_io),
111 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 	STATS_DESC_COUNTER(VCPU, inject_ckc),
114 	STATS_DESC_COUNTER(VCPU, inject_cputm),
115 	STATS_DESC_COUNTER(VCPU, inject_external_call),
116 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 	STATS_DESC_COUNTER(VCPU, inject_mchk),
118 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 	STATS_DESC_COUNTER(VCPU, inject_program),
120 	STATS_DESC_COUNTER(VCPU, inject_restart),
121 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_gs),
125 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 	STATS_DESC_COUNTER(VCPU, instruction_sck),
131 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 	STATS_DESC_COUNTER(VCPU, instruction_spx),
134 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 	STATS_DESC_COUNTER(VCPU, instruction_stap),
136 	STATS_DESC_COUNTER(VCPU, instruction_iske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ri),
138 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 	STATS_DESC_COUNTER(VCPU, instruction_sske),
140 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 	STATS_DESC_COUNTER(VCPU, instruction_tb),
144 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 	STATS_DESC_COUNTER(VCPU, instruction_sie),
148 	STATS_DESC_COUNTER(VCPU, instruction_essa),
149 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 	STATS_DESC_COUNTER(VCPU, pfault_sync)
176 };
177 
178 const struct kvm_stats_header kvm_vcpu_stats_header = {
179 	.name_size = KVM_STATS_NAME_SIZE,
180 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
181 	.id_offset = sizeof(struct kvm_stats_header),
182 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
183 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
184 		       sizeof(kvm_vcpu_stats_desc),
185 };
186 
187 /* allow nested virtualization in KVM (if enabled by user space) */
188 static int nested;
189 module_param(nested, int, S_IRUGO);
190 MODULE_PARM_DESC(nested, "Nested virtualization support");
191 
192 /* allow 1m huge page guest backing, if !nested */
193 static int hpage;
194 module_param(hpage, int, 0444);
195 MODULE_PARM_DESC(hpage, "1m huge page backing support");
196 
197 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
198 static u8 halt_poll_max_steal = 10;
199 module_param(halt_poll_max_steal, byte, 0644);
200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
201 
202 /* if set to true, the GISA will be initialized and used if available */
203 static bool use_gisa  = true;
204 module_param(use_gisa, bool, 0644);
205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
206 
207 /* maximum diag9c forwarding per second */
208 unsigned int diag9c_forwarding_hz;
209 module_param(diag9c_forwarding_hz, uint, 0644);
210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211 
212 /*
213  * For now we handle at most 16 double words as this is what the s390 base
214  * kernel handles and stores in the prefix page. If we ever need to go beyond
215  * this, this requires changes to code, but the external uapi can stay.
216  */
217 #define SIZE_INTERNAL 16
218 
219 /*
220  * Base feature mask that defines default mask for facilities. Consists of the
221  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
222  */
223 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
224 /*
225  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
226  * and defines the facilities that can be enabled via a cpu model.
227  */
228 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
229 
230 static unsigned long kvm_s390_fac_size(void)
231 {
232 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
233 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
234 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
235 		sizeof(stfle_fac_list));
236 
237 	return SIZE_INTERNAL;
238 }
239 
240 /* available cpu features supported by kvm */
241 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
242 /* available subfunctions indicated via query / "test bit" */
243 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
244 
245 static struct gmap_notifier gmap_notifier;
246 static struct gmap_notifier vsie_gmap_notifier;
247 debug_info_t *kvm_s390_dbf;
248 debug_info_t *kvm_s390_dbf_uv;
249 
250 /* Section: not file related */
251 int kvm_arch_hardware_enable(void)
252 {
253 	/* every s390 is virtualization enabled ;-) */
254 	return 0;
255 }
256 
257 int kvm_arch_check_processor_compat(void *opaque)
258 {
259 	return 0;
260 }
261 
262 /* forward declarations */
263 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
264 			      unsigned long end);
265 static int sca_switch_to_extended(struct kvm *kvm);
266 
267 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
268 {
269 	u8 delta_idx = 0;
270 
271 	/*
272 	 * The TOD jumps by delta, we have to compensate this by adding
273 	 * -delta to the epoch.
274 	 */
275 	delta = -delta;
276 
277 	/* sign-extension - we're adding to signed values below */
278 	if ((s64)delta < 0)
279 		delta_idx = -1;
280 
281 	scb->epoch += delta;
282 	if (scb->ecd & ECD_MEF) {
283 		scb->epdx += delta_idx;
284 		if (scb->epoch < delta)
285 			scb->epdx += 1;
286 	}
287 }
288 
289 /*
290  * This callback is executed during stop_machine(). All CPUs are therefore
291  * temporarily stopped. In order not to change guest behavior, we have to
292  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
293  * so a CPU won't be stopped while calculating with the epoch.
294  */
295 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
296 			  void *v)
297 {
298 	struct kvm *kvm;
299 	struct kvm_vcpu *vcpu;
300 	unsigned long i;
301 	unsigned long long *delta = v;
302 
303 	list_for_each_entry(kvm, &vm_list, vm_list) {
304 		kvm_for_each_vcpu(i, vcpu, kvm) {
305 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
306 			if (i == 0) {
307 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
308 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
309 			}
310 			if (vcpu->arch.cputm_enabled)
311 				vcpu->arch.cputm_start += *delta;
312 			if (vcpu->arch.vsie_block)
313 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
314 						   *delta);
315 		}
316 	}
317 	return NOTIFY_OK;
318 }
319 
320 static struct notifier_block kvm_clock_notifier = {
321 	.notifier_call = kvm_clock_sync,
322 };
323 
324 int kvm_arch_hardware_setup(void *opaque)
325 {
326 	gmap_notifier.notifier_call = kvm_gmap_notifier;
327 	gmap_register_pte_notifier(&gmap_notifier);
328 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
329 	gmap_register_pte_notifier(&vsie_gmap_notifier);
330 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
331 				       &kvm_clock_notifier);
332 	return 0;
333 }
334 
335 void kvm_arch_hardware_unsetup(void)
336 {
337 	gmap_unregister_pte_notifier(&gmap_notifier);
338 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
339 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
340 					 &kvm_clock_notifier);
341 }
342 
343 static void allow_cpu_feat(unsigned long nr)
344 {
345 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
346 }
347 
348 static inline int plo_test_bit(unsigned char nr)
349 {
350 	unsigned long function = (unsigned long)nr | 0x100;
351 	int cc;
352 
353 	asm volatile(
354 		"	lgr	0,%[function]\n"
355 		/* Parameter registers are ignored for "test bit" */
356 		"	plo	0,0,0,0(0)\n"
357 		"	ipm	%0\n"
358 		"	srl	%0,28\n"
359 		: "=d" (cc)
360 		: [function] "d" (function)
361 		: "cc", "0");
362 	return cc == 0;
363 }
364 
365 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
366 {
367 	asm volatile(
368 		"	lghi	0,0\n"
369 		"	lgr	1,%[query]\n"
370 		/* Parameter registers are ignored */
371 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
372 		:
373 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
374 		: "cc", "memory", "0", "1");
375 }
376 
377 #define INSN_SORTL 0xb938
378 #define INSN_DFLTCC 0xb939
379 
380 static void kvm_s390_cpu_feat_init(void)
381 {
382 	int i;
383 
384 	for (i = 0; i < 256; ++i) {
385 		if (plo_test_bit(i))
386 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
387 	}
388 
389 	if (test_facility(28)) /* TOD-clock steering */
390 		ptff(kvm_s390_available_subfunc.ptff,
391 		     sizeof(kvm_s390_available_subfunc.ptff),
392 		     PTFF_QAF);
393 
394 	if (test_facility(17)) { /* MSA */
395 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmac);
397 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.kmc);
399 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.km);
401 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.kimd);
403 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
404 			      kvm_s390_available_subfunc.klmd);
405 	}
406 	if (test_facility(76)) /* MSA3 */
407 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
408 			      kvm_s390_available_subfunc.pckmo);
409 	if (test_facility(77)) { /* MSA4 */
410 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmctr);
412 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmf);
414 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.kmo);
416 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
417 			      kvm_s390_available_subfunc.pcc);
418 	}
419 	if (test_facility(57)) /* MSA5 */
420 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
421 			      kvm_s390_available_subfunc.ppno);
422 
423 	if (test_facility(146)) /* MSA8 */
424 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
425 			      kvm_s390_available_subfunc.kma);
426 
427 	if (test_facility(155)) /* MSA9 */
428 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
429 			      kvm_s390_available_subfunc.kdsa);
430 
431 	if (test_facility(150)) /* SORTL */
432 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
433 
434 	if (test_facility(151)) /* DFLTCC */
435 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
436 
437 	if (MACHINE_HAS_ESOP)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
439 	/*
440 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
441 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
442 	 */
443 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
444 	    !test_facility(3) || !nested)
445 		return;
446 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
447 	if (sclp.has_64bscao)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
449 	if (sclp.has_siif)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
451 	if (sclp.has_gpere)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
453 	if (sclp.has_gsls)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
455 	if (sclp.has_ib)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
457 	if (sclp.has_cei)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
459 	if (sclp.has_ibs)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
461 	if (sclp.has_kss)
462 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
463 	/*
464 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
465 	 * all skey handling functions read/set the skey from the PGSTE
466 	 * instead of the real storage key.
467 	 *
468 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
469 	 * pages being detected as preserved although they are resident.
470 	 *
471 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
472 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
473 	 *
474 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
475 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
476 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
477 	 *
478 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
479 	 * cannot easily shadow the SCA because of the ipte lock.
480 	 */
481 }
482 
483 int kvm_arch_init(void *opaque)
484 {
485 	int rc = -ENOMEM;
486 
487 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
488 	if (!kvm_s390_dbf)
489 		return -ENOMEM;
490 
491 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
492 	if (!kvm_s390_dbf_uv)
493 		goto out;
494 
495 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
496 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
497 		goto out;
498 
499 	kvm_s390_cpu_feat_init();
500 
501 	/* Register floating interrupt controller interface. */
502 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
503 	if (rc) {
504 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
505 		goto out;
506 	}
507 
508 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
509 		rc = kvm_s390_pci_init();
510 		if (rc) {
511 			pr_err("Unable to allocate AIFT for PCI\n");
512 			goto out;
513 		}
514 	}
515 
516 	rc = kvm_s390_gib_init(GAL_ISC);
517 	if (rc)
518 		goto out;
519 
520 	return 0;
521 
522 out:
523 	kvm_arch_exit();
524 	return rc;
525 }
526 
527 void kvm_arch_exit(void)
528 {
529 	kvm_s390_gib_destroy();
530 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
531 		kvm_s390_pci_exit();
532 	debug_unregister(kvm_s390_dbf);
533 	debug_unregister(kvm_s390_dbf_uv);
534 }
535 
536 /* Section: device related */
537 long kvm_arch_dev_ioctl(struct file *filp,
538 			unsigned int ioctl, unsigned long arg)
539 {
540 	if (ioctl == KVM_S390_ENABLE_SIE)
541 		return s390_enable_sie();
542 	return -EINVAL;
543 }
544 
545 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
546 {
547 	int r;
548 
549 	switch (ext) {
550 	case KVM_CAP_S390_PSW:
551 	case KVM_CAP_S390_GMAP:
552 	case KVM_CAP_SYNC_MMU:
553 #ifdef CONFIG_KVM_S390_UCONTROL
554 	case KVM_CAP_S390_UCONTROL:
555 #endif
556 	case KVM_CAP_ASYNC_PF:
557 	case KVM_CAP_SYNC_REGS:
558 	case KVM_CAP_ONE_REG:
559 	case KVM_CAP_ENABLE_CAP:
560 	case KVM_CAP_S390_CSS_SUPPORT:
561 	case KVM_CAP_IOEVENTFD:
562 	case KVM_CAP_DEVICE_CTRL:
563 	case KVM_CAP_S390_IRQCHIP:
564 	case KVM_CAP_VM_ATTRIBUTES:
565 	case KVM_CAP_MP_STATE:
566 	case KVM_CAP_IMMEDIATE_EXIT:
567 	case KVM_CAP_S390_INJECT_IRQ:
568 	case KVM_CAP_S390_USER_SIGP:
569 	case KVM_CAP_S390_USER_STSI:
570 	case KVM_CAP_S390_SKEYS:
571 	case KVM_CAP_S390_IRQ_STATE:
572 	case KVM_CAP_S390_USER_INSTR0:
573 	case KVM_CAP_S390_CMMA_MIGRATION:
574 	case KVM_CAP_S390_AIS:
575 	case KVM_CAP_S390_AIS_MIGRATION:
576 	case KVM_CAP_S390_VCPU_RESETS:
577 	case KVM_CAP_SET_GUEST_DEBUG:
578 	case KVM_CAP_S390_DIAG318:
579 	case KVM_CAP_S390_MEM_OP_EXTENSION:
580 		r = 1;
581 		break;
582 	case KVM_CAP_SET_GUEST_DEBUG2:
583 		r = KVM_GUESTDBG_VALID_MASK;
584 		break;
585 	case KVM_CAP_S390_HPAGE_1M:
586 		r = 0;
587 		if (hpage && !kvm_is_ucontrol(kvm))
588 			r = 1;
589 		break;
590 	case KVM_CAP_S390_MEM_OP:
591 		r = MEM_OP_MAX_SIZE;
592 		break;
593 	case KVM_CAP_NR_VCPUS:
594 	case KVM_CAP_MAX_VCPUS:
595 	case KVM_CAP_MAX_VCPU_ID:
596 		r = KVM_S390_BSCA_CPU_SLOTS;
597 		if (!kvm_s390_use_sca_entries())
598 			r = KVM_MAX_VCPUS;
599 		else if (sclp.has_esca && sclp.has_64bscao)
600 			r = KVM_S390_ESCA_CPU_SLOTS;
601 		if (ext == KVM_CAP_NR_VCPUS)
602 			r = min_t(unsigned int, num_online_cpus(), r);
603 		break;
604 	case KVM_CAP_S390_COW:
605 		r = MACHINE_HAS_ESOP;
606 		break;
607 	case KVM_CAP_S390_VECTOR_REGISTERS:
608 		r = MACHINE_HAS_VX;
609 		break;
610 	case KVM_CAP_S390_RI:
611 		r = test_facility(64);
612 		break;
613 	case KVM_CAP_S390_GS:
614 		r = test_facility(133);
615 		break;
616 	case KVM_CAP_S390_BPB:
617 		r = test_facility(82);
618 		break;
619 	case KVM_CAP_S390_PROTECTED:
620 		r = is_prot_virt_host();
621 		break;
622 	case KVM_CAP_S390_PROTECTED_DUMP: {
623 		u64 pv_cmds_dump[] = {
624 			BIT_UVC_CMD_DUMP_INIT,
625 			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
626 			BIT_UVC_CMD_DUMP_CPU,
627 			BIT_UVC_CMD_DUMP_COMPLETE,
628 		};
629 		int i;
630 
631 		r = is_prot_virt_host();
632 
633 		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
634 			if (!test_bit_inv(pv_cmds_dump[i],
635 					  (unsigned long *)&uv_info.inst_calls_list)) {
636 				r = 0;
637 				break;
638 			}
639 		}
640 		break;
641 	}
642 	case KVM_CAP_S390_ZPCI_OP:
643 		r = kvm_s390_pci_interp_allowed();
644 		break;
645 	case KVM_CAP_S390_CPU_TOPOLOGY:
646 		r = test_facility(11);
647 		break;
648 	default:
649 		r = 0;
650 	}
651 	return r;
652 }
653 
654 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
655 {
656 	int i;
657 	gfn_t cur_gfn, last_gfn;
658 	unsigned long gaddr, vmaddr;
659 	struct gmap *gmap = kvm->arch.gmap;
660 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
661 
662 	/* Loop over all guest segments */
663 	cur_gfn = memslot->base_gfn;
664 	last_gfn = memslot->base_gfn + memslot->npages;
665 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
666 		gaddr = gfn_to_gpa(cur_gfn);
667 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
668 		if (kvm_is_error_hva(vmaddr))
669 			continue;
670 
671 		bitmap_zero(bitmap, _PAGE_ENTRIES);
672 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
673 		for (i = 0; i < _PAGE_ENTRIES; i++) {
674 			if (test_bit(i, bitmap))
675 				mark_page_dirty(kvm, cur_gfn + i);
676 		}
677 
678 		if (fatal_signal_pending(current))
679 			return;
680 		cond_resched();
681 	}
682 }
683 
684 /* Section: vm related */
685 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
686 
687 /*
688  * Get (and clear) the dirty memory log for a memory slot.
689  */
690 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
691 			       struct kvm_dirty_log *log)
692 {
693 	int r;
694 	unsigned long n;
695 	struct kvm_memory_slot *memslot;
696 	int is_dirty;
697 
698 	if (kvm_is_ucontrol(kvm))
699 		return -EINVAL;
700 
701 	mutex_lock(&kvm->slots_lock);
702 
703 	r = -EINVAL;
704 	if (log->slot >= KVM_USER_MEM_SLOTS)
705 		goto out;
706 
707 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
708 	if (r)
709 		goto out;
710 
711 	/* Clear the dirty log */
712 	if (is_dirty) {
713 		n = kvm_dirty_bitmap_bytes(memslot);
714 		memset(memslot->dirty_bitmap, 0, n);
715 	}
716 	r = 0;
717 out:
718 	mutex_unlock(&kvm->slots_lock);
719 	return r;
720 }
721 
722 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
723 {
724 	unsigned long i;
725 	struct kvm_vcpu *vcpu;
726 
727 	kvm_for_each_vcpu(i, vcpu, kvm) {
728 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
729 	}
730 }
731 
732 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
733 {
734 	int r;
735 
736 	if (cap->flags)
737 		return -EINVAL;
738 
739 	switch (cap->cap) {
740 	case KVM_CAP_S390_IRQCHIP:
741 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
742 		kvm->arch.use_irqchip = 1;
743 		r = 0;
744 		break;
745 	case KVM_CAP_S390_USER_SIGP:
746 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
747 		kvm->arch.user_sigp = 1;
748 		r = 0;
749 		break;
750 	case KVM_CAP_S390_VECTOR_REGISTERS:
751 		mutex_lock(&kvm->lock);
752 		if (kvm->created_vcpus) {
753 			r = -EBUSY;
754 		} else if (MACHINE_HAS_VX) {
755 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
756 			set_kvm_facility(kvm->arch.model.fac_list, 129);
757 			if (test_facility(134)) {
758 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
759 				set_kvm_facility(kvm->arch.model.fac_list, 134);
760 			}
761 			if (test_facility(135)) {
762 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
763 				set_kvm_facility(kvm->arch.model.fac_list, 135);
764 			}
765 			if (test_facility(148)) {
766 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
767 				set_kvm_facility(kvm->arch.model.fac_list, 148);
768 			}
769 			if (test_facility(152)) {
770 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
771 				set_kvm_facility(kvm->arch.model.fac_list, 152);
772 			}
773 			if (test_facility(192)) {
774 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
775 				set_kvm_facility(kvm->arch.model.fac_list, 192);
776 			}
777 			r = 0;
778 		} else
779 			r = -EINVAL;
780 		mutex_unlock(&kvm->lock);
781 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
782 			 r ? "(not available)" : "(success)");
783 		break;
784 	case KVM_CAP_S390_RI:
785 		r = -EINVAL;
786 		mutex_lock(&kvm->lock);
787 		if (kvm->created_vcpus) {
788 			r = -EBUSY;
789 		} else if (test_facility(64)) {
790 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
791 			set_kvm_facility(kvm->arch.model.fac_list, 64);
792 			r = 0;
793 		}
794 		mutex_unlock(&kvm->lock);
795 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
796 			 r ? "(not available)" : "(success)");
797 		break;
798 	case KVM_CAP_S390_AIS:
799 		mutex_lock(&kvm->lock);
800 		if (kvm->created_vcpus) {
801 			r = -EBUSY;
802 		} else {
803 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
804 			set_kvm_facility(kvm->arch.model.fac_list, 72);
805 			r = 0;
806 		}
807 		mutex_unlock(&kvm->lock);
808 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
809 			 r ? "(not available)" : "(success)");
810 		break;
811 	case KVM_CAP_S390_GS:
812 		r = -EINVAL;
813 		mutex_lock(&kvm->lock);
814 		if (kvm->created_vcpus) {
815 			r = -EBUSY;
816 		} else if (test_facility(133)) {
817 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
818 			set_kvm_facility(kvm->arch.model.fac_list, 133);
819 			r = 0;
820 		}
821 		mutex_unlock(&kvm->lock);
822 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
823 			 r ? "(not available)" : "(success)");
824 		break;
825 	case KVM_CAP_S390_HPAGE_1M:
826 		mutex_lock(&kvm->lock);
827 		if (kvm->created_vcpus)
828 			r = -EBUSY;
829 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
830 			r = -EINVAL;
831 		else {
832 			r = 0;
833 			mmap_write_lock(kvm->mm);
834 			kvm->mm->context.allow_gmap_hpage_1m = 1;
835 			mmap_write_unlock(kvm->mm);
836 			/*
837 			 * We might have to create fake 4k page
838 			 * tables. To avoid that the hardware works on
839 			 * stale PGSTEs, we emulate these instructions.
840 			 */
841 			kvm->arch.use_skf = 0;
842 			kvm->arch.use_pfmfi = 0;
843 		}
844 		mutex_unlock(&kvm->lock);
845 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
846 			 r ? "(not available)" : "(success)");
847 		break;
848 	case KVM_CAP_S390_USER_STSI:
849 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
850 		kvm->arch.user_stsi = 1;
851 		r = 0;
852 		break;
853 	case KVM_CAP_S390_USER_INSTR0:
854 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
855 		kvm->arch.user_instr0 = 1;
856 		icpt_operexc_on_all_vcpus(kvm);
857 		r = 0;
858 		break;
859 	case KVM_CAP_S390_CPU_TOPOLOGY:
860 		r = -EINVAL;
861 		mutex_lock(&kvm->lock);
862 		if (kvm->created_vcpus) {
863 			r = -EBUSY;
864 		} else if (test_facility(11)) {
865 			set_kvm_facility(kvm->arch.model.fac_mask, 11);
866 			set_kvm_facility(kvm->arch.model.fac_list, 11);
867 			r = 0;
868 		}
869 		mutex_unlock(&kvm->lock);
870 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
871 			 r ? "(not available)" : "(success)");
872 		break;
873 	default:
874 		r = -EINVAL;
875 		break;
876 	}
877 	return r;
878 }
879 
880 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
881 {
882 	int ret;
883 
884 	switch (attr->attr) {
885 	case KVM_S390_VM_MEM_LIMIT_SIZE:
886 		ret = 0;
887 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
888 			 kvm->arch.mem_limit);
889 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
890 			ret = -EFAULT;
891 		break;
892 	default:
893 		ret = -ENXIO;
894 		break;
895 	}
896 	return ret;
897 }
898 
899 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
900 {
901 	int ret;
902 	unsigned int idx;
903 	switch (attr->attr) {
904 	case KVM_S390_VM_MEM_ENABLE_CMMA:
905 		ret = -ENXIO;
906 		if (!sclp.has_cmma)
907 			break;
908 
909 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
910 		mutex_lock(&kvm->lock);
911 		if (kvm->created_vcpus)
912 			ret = -EBUSY;
913 		else if (kvm->mm->context.allow_gmap_hpage_1m)
914 			ret = -EINVAL;
915 		else {
916 			kvm->arch.use_cmma = 1;
917 			/* Not compatible with cmma. */
918 			kvm->arch.use_pfmfi = 0;
919 			ret = 0;
920 		}
921 		mutex_unlock(&kvm->lock);
922 		break;
923 	case KVM_S390_VM_MEM_CLR_CMMA:
924 		ret = -ENXIO;
925 		if (!sclp.has_cmma)
926 			break;
927 		ret = -EINVAL;
928 		if (!kvm->arch.use_cmma)
929 			break;
930 
931 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
932 		mutex_lock(&kvm->lock);
933 		idx = srcu_read_lock(&kvm->srcu);
934 		s390_reset_cmma(kvm->arch.gmap->mm);
935 		srcu_read_unlock(&kvm->srcu, idx);
936 		mutex_unlock(&kvm->lock);
937 		ret = 0;
938 		break;
939 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
940 		unsigned long new_limit;
941 
942 		if (kvm_is_ucontrol(kvm))
943 			return -EINVAL;
944 
945 		if (get_user(new_limit, (u64 __user *)attr->addr))
946 			return -EFAULT;
947 
948 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
949 		    new_limit > kvm->arch.mem_limit)
950 			return -E2BIG;
951 
952 		if (!new_limit)
953 			return -EINVAL;
954 
955 		/* gmap_create takes last usable address */
956 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
957 			new_limit -= 1;
958 
959 		ret = -EBUSY;
960 		mutex_lock(&kvm->lock);
961 		if (!kvm->created_vcpus) {
962 			/* gmap_create will round the limit up */
963 			struct gmap *new = gmap_create(current->mm, new_limit);
964 
965 			if (!new) {
966 				ret = -ENOMEM;
967 			} else {
968 				gmap_remove(kvm->arch.gmap);
969 				new->private = kvm;
970 				kvm->arch.gmap = new;
971 				ret = 0;
972 			}
973 		}
974 		mutex_unlock(&kvm->lock);
975 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
976 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
977 			 (void *) kvm->arch.gmap->asce);
978 		break;
979 	}
980 	default:
981 		ret = -ENXIO;
982 		break;
983 	}
984 	return ret;
985 }
986 
987 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
988 
989 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
990 {
991 	struct kvm_vcpu *vcpu;
992 	unsigned long i;
993 
994 	kvm_s390_vcpu_block_all(kvm);
995 
996 	kvm_for_each_vcpu(i, vcpu, kvm) {
997 		kvm_s390_vcpu_crypto_setup(vcpu);
998 		/* recreate the shadow crycb by leaving the VSIE handler */
999 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1000 	}
1001 
1002 	kvm_s390_vcpu_unblock_all(kvm);
1003 }
1004 
1005 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007 	mutex_lock(&kvm->lock);
1008 	switch (attr->attr) {
1009 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010 		if (!test_kvm_facility(kvm, 76)) {
1011 			mutex_unlock(&kvm->lock);
1012 			return -EINVAL;
1013 		}
1014 		get_random_bytes(
1015 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1016 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1017 		kvm->arch.crypto.aes_kw = 1;
1018 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1019 		break;
1020 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1021 		if (!test_kvm_facility(kvm, 76)) {
1022 			mutex_unlock(&kvm->lock);
1023 			return -EINVAL;
1024 		}
1025 		get_random_bytes(
1026 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1027 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1028 		kvm->arch.crypto.dea_kw = 1;
1029 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1030 		break;
1031 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1032 		if (!test_kvm_facility(kvm, 76)) {
1033 			mutex_unlock(&kvm->lock);
1034 			return -EINVAL;
1035 		}
1036 		kvm->arch.crypto.aes_kw = 0;
1037 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1038 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1039 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1040 		break;
1041 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1042 		if (!test_kvm_facility(kvm, 76)) {
1043 			mutex_unlock(&kvm->lock);
1044 			return -EINVAL;
1045 		}
1046 		kvm->arch.crypto.dea_kw = 0;
1047 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1048 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1049 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1050 		break;
1051 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1052 		if (!ap_instructions_available()) {
1053 			mutex_unlock(&kvm->lock);
1054 			return -EOPNOTSUPP;
1055 		}
1056 		kvm->arch.crypto.apie = 1;
1057 		break;
1058 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1059 		if (!ap_instructions_available()) {
1060 			mutex_unlock(&kvm->lock);
1061 			return -EOPNOTSUPP;
1062 		}
1063 		kvm->arch.crypto.apie = 0;
1064 		break;
1065 	default:
1066 		mutex_unlock(&kvm->lock);
1067 		return -ENXIO;
1068 	}
1069 
1070 	kvm_s390_vcpu_crypto_reset_all(kvm);
1071 	mutex_unlock(&kvm->lock);
1072 	return 0;
1073 }
1074 
1075 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1076 {
1077 	/* Only set the ECB bits after guest requests zPCI interpretation */
1078 	if (!vcpu->kvm->arch.use_zpci_interp)
1079 		return;
1080 
1081 	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1082 	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1083 }
1084 
1085 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1086 {
1087 	struct kvm_vcpu *vcpu;
1088 	unsigned long i;
1089 
1090 	lockdep_assert_held(&kvm->lock);
1091 
1092 	if (!kvm_s390_pci_interp_allowed())
1093 		return;
1094 
1095 	/*
1096 	 * If host is configured for PCI and the necessary facilities are
1097 	 * available, turn on interpretation for the life of this guest
1098 	 */
1099 	kvm->arch.use_zpci_interp = 1;
1100 
1101 	kvm_s390_vcpu_block_all(kvm);
1102 
1103 	kvm_for_each_vcpu(i, vcpu, kvm) {
1104 		kvm_s390_vcpu_pci_setup(vcpu);
1105 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1106 	}
1107 
1108 	kvm_s390_vcpu_unblock_all(kvm);
1109 }
1110 
1111 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1112 {
1113 	unsigned long cx;
1114 	struct kvm_vcpu *vcpu;
1115 
1116 	kvm_for_each_vcpu(cx, vcpu, kvm)
1117 		kvm_s390_sync_request(req, vcpu);
1118 }
1119 
1120 /*
1121  * Must be called with kvm->srcu held to avoid races on memslots, and with
1122  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1123  */
1124 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1125 {
1126 	struct kvm_memory_slot *ms;
1127 	struct kvm_memslots *slots;
1128 	unsigned long ram_pages = 0;
1129 	int bkt;
1130 
1131 	/* migration mode already enabled */
1132 	if (kvm->arch.migration_mode)
1133 		return 0;
1134 	slots = kvm_memslots(kvm);
1135 	if (!slots || kvm_memslots_empty(slots))
1136 		return -EINVAL;
1137 
1138 	if (!kvm->arch.use_cmma) {
1139 		kvm->arch.migration_mode = 1;
1140 		return 0;
1141 	}
1142 	/* mark all the pages in active slots as dirty */
1143 	kvm_for_each_memslot(ms, bkt, slots) {
1144 		if (!ms->dirty_bitmap)
1145 			return -EINVAL;
1146 		/*
1147 		 * The second half of the bitmap is only used on x86,
1148 		 * and would be wasted otherwise, so we put it to good
1149 		 * use here to keep track of the state of the storage
1150 		 * attributes.
1151 		 */
1152 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1153 		ram_pages += ms->npages;
1154 	}
1155 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1156 	kvm->arch.migration_mode = 1;
1157 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1158 	return 0;
1159 }
1160 
1161 /*
1162  * Must be called with kvm->slots_lock to avoid races with ourselves and
1163  * kvm_s390_vm_start_migration.
1164  */
1165 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1166 {
1167 	/* migration mode already disabled */
1168 	if (!kvm->arch.migration_mode)
1169 		return 0;
1170 	kvm->arch.migration_mode = 0;
1171 	if (kvm->arch.use_cmma)
1172 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1173 	return 0;
1174 }
1175 
1176 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1177 				     struct kvm_device_attr *attr)
1178 {
1179 	int res = -ENXIO;
1180 
1181 	mutex_lock(&kvm->slots_lock);
1182 	switch (attr->attr) {
1183 	case KVM_S390_VM_MIGRATION_START:
1184 		res = kvm_s390_vm_start_migration(kvm);
1185 		break;
1186 	case KVM_S390_VM_MIGRATION_STOP:
1187 		res = kvm_s390_vm_stop_migration(kvm);
1188 		break;
1189 	default:
1190 		break;
1191 	}
1192 	mutex_unlock(&kvm->slots_lock);
1193 
1194 	return res;
1195 }
1196 
1197 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1198 				     struct kvm_device_attr *attr)
1199 {
1200 	u64 mig = kvm->arch.migration_mode;
1201 
1202 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1203 		return -ENXIO;
1204 
1205 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1206 		return -EFAULT;
1207 	return 0;
1208 }
1209 
1210 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1211 
1212 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1213 {
1214 	struct kvm_s390_vm_tod_clock gtod;
1215 
1216 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1217 		return -EFAULT;
1218 
1219 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1220 		return -EINVAL;
1221 	__kvm_s390_set_tod_clock(kvm, &gtod);
1222 
1223 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1224 		gtod.epoch_idx, gtod.tod);
1225 
1226 	return 0;
1227 }
1228 
1229 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1230 {
1231 	u8 gtod_high;
1232 
1233 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1234 					   sizeof(gtod_high)))
1235 		return -EFAULT;
1236 
1237 	if (gtod_high != 0)
1238 		return -EINVAL;
1239 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1240 
1241 	return 0;
1242 }
1243 
1244 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1247 
1248 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1249 			   sizeof(gtod.tod)))
1250 		return -EFAULT;
1251 
1252 	__kvm_s390_set_tod_clock(kvm, &gtod);
1253 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1254 	return 0;
1255 }
1256 
1257 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1258 {
1259 	int ret;
1260 
1261 	if (attr->flags)
1262 		return -EINVAL;
1263 
1264 	mutex_lock(&kvm->lock);
1265 	/*
1266 	 * For protected guests, the TOD is managed by the ultravisor, so trying
1267 	 * to change it will never bring the expected results.
1268 	 */
1269 	if (kvm_s390_pv_is_protected(kvm)) {
1270 		ret = -EOPNOTSUPP;
1271 		goto out_unlock;
1272 	}
1273 
1274 	switch (attr->attr) {
1275 	case KVM_S390_VM_TOD_EXT:
1276 		ret = kvm_s390_set_tod_ext(kvm, attr);
1277 		break;
1278 	case KVM_S390_VM_TOD_HIGH:
1279 		ret = kvm_s390_set_tod_high(kvm, attr);
1280 		break;
1281 	case KVM_S390_VM_TOD_LOW:
1282 		ret = kvm_s390_set_tod_low(kvm, attr);
1283 		break;
1284 	default:
1285 		ret = -ENXIO;
1286 		break;
1287 	}
1288 
1289 out_unlock:
1290 	mutex_unlock(&kvm->lock);
1291 	return ret;
1292 }
1293 
1294 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1295 				   struct kvm_s390_vm_tod_clock *gtod)
1296 {
1297 	union tod_clock clk;
1298 
1299 	preempt_disable();
1300 
1301 	store_tod_clock_ext(&clk);
1302 
1303 	gtod->tod = clk.tod + kvm->arch.epoch;
1304 	gtod->epoch_idx = 0;
1305 	if (test_kvm_facility(kvm, 139)) {
1306 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1307 		if (gtod->tod < clk.tod)
1308 			gtod->epoch_idx += 1;
1309 	}
1310 
1311 	preempt_enable();
1312 }
1313 
1314 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1315 {
1316 	struct kvm_s390_vm_tod_clock gtod;
1317 
1318 	memset(&gtod, 0, sizeof(gtod));
1319 	kvm_s390_get_tod_clock(kvm, &gtod);
1320 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1321 		return -EFAULT;
1322 
1323 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1324 		gtod.epoch_idx, gtod.tod);
1325 	return 0;
1326 }
1327 
1328 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1329 {
1330 	u8 gtod_high = 0;
1331 
1332 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1333 					 sizeof(gtod_high)))
1334 		return -EFAULT;
1335 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1336 
1337 	return 0;
1338 }
1339 
1340 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1341 {
1342 	u64 gtod;
1343 
1344 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1345 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1346 		return -EFAULT;
1347 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1348 
1349 	return 0;
1350 }
1351 
1352 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1353 {
1354 	int ret;
1355 
1356 	if (attr->flags)
1357 		return -EINVAL;
1358 
1359 	switch (attr->attr) {
1360 	case KVM_S390_VM_TOD_EXT:
1361 		ret = kvm_s390_get_tod_ext(kvm, attr);
1362 		break;
1363 	case KVM_S390_VM_TOD_HIGH:
1364 		ret = kvm_s390_get_tod_high(kvm, attr);
1365 		break;
1366 	case KVM_S390_VM_TOD_LOW:
1367 		ret = kvm_s390_get_tod_low(kvm, attr);
1368 		break;
1369 	default:
1370 		ret = -ENXIO;
1371 		break;
1372 	}
1373 	return ret;
1374 }
1375 
1376 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1377 {
1378 	struct kvm_s390_vm_cpu_processor *proc;
1379 	u16 lowest_ibc, unblocked_ibc;
1380 	int ret = 0;
1381 
1382 	mutex_lock(&kvm->lock);
1383 	if (kvm->created_vcpus) {
1384 		ret = -EBUSY;
1385 		goto out;
1386 	}
1387 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1388 	if (!proc) {
1389 		ret = -ENOMEM;
1390 		goto out;
1391 	}
1392 	if (!copy_from_user(proc, (void __user *)attr->addr,
1393 			    sizeof(*proc))) {
1394 		kvm->arch.model.cpuid = proc->cpuid;
1395 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1396 		unblocked_ibc = sclp.ibc & 0xfff;
1397 		if (lowest_ibc && proc->ibc) {
1398 			if (proc->ibc > unblocked_ibc)
1399 				kvm->arch.model.ibc = unblocked_ibc;
1400 			else if (proc->ibc < lowest_ibc)
1401 				kvm->arch.model.ibc = lowest_ibc;
1402 			else
1403 				kvm->arch.model.ibc = proc->ibc;
1404 		}
1405 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1406 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1407 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1408 			 kvm->arch.model.ibc,
1409 			 kvm->arch.model.cpuid);
1410 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1411 			 kvm->arch.model.fac_list[0],
1412 			 kvm->arch.model.fac_list[1],
1413 			 kvm->arch.model.fac_list[2]);
1414 	} else
1415 		ret = -EFAULT;
1416 	kfree(proc);
1417 out:
1418 	mutex_unlock(&kvm->lock);
1419 	return ret;
1420 }
1421 
1422 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1423 				       struct kvm_device_attr *attr)
1424 {
1425 	struct kvm_s390_vm_cpu_feat data;
1426 
1427 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1428 		return -EFAULT;
1429 	if (!bitmap_subset((unsigned long *) data.feat,
1430 			   kvm_s390_available_cpu_feat,
1431 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1432 		return -EINVAL;
1433 
1434 	mutex_lock(&kvm->lock);
1435 	if (kvm->created_vcpus) {
1436 		mutex_unlock(&kvm->lock);
1437 		return -EBUSY;
1438 	}
1439 	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1440 	mutex_unlock(&kvm->lock);
1441 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1442 			 data.feat[0],
1443 			 data.feat[1],
1444 			 data.feat[2]);
1445 	return 0;
1446 }
1447 
1448 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1449 					  struct kvm_device_attr *attr)
1450 {
1451 	mutex_lock(&kvm->lock);
1452 	if (kvm->created_vcpus) {
1453 		mutex_unlock(&kvm->lock);
1454 		return -EBUSY;
1455 	}
1456 
1457 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1458 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1459 		mutex_unlock(&kvm->lock);
1460 		return -EFAULT;
1461 	}
1462 	mutex_unlock(&kvm->lock);
1463 
1464 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1465 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1466 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1467 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1468 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1469 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1470 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1471 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1472 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1473 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1474 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1475 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1476 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1477 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1478 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1479 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1480 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1481 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1482 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1483 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1484 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1485 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1486 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1487 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1488 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1489 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1490 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1491 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1492 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1493 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1494 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1495 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1496 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1497 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1498 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1499 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1500 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1501 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1502 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1503 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1504 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1505 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1506 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1507 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1508 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1509 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1510 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1511 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1512 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1513 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1514 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1516 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1517 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1520 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1521 
1522 	return 0;
1523 }
1524 
1525 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1526 {
1527 	int ret = -ENXIO;
1528 
1529 	switch (attr->attr) {
1530 	case KVM_S390_VM_CPU_PROCESSOR:
1531 		ret = kvm_s390_set_processor(kvm, attr);
1532 		break;
1533 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1534 		ret = kvm_s390_set_processor_feat(kvm, attr);
1535 		break;
1536 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1537 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1538 		break;
1539 	}
1540 	return ret;
1541 }
1542 
1543 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1544 {
1545 	struct kvm_s390_vm_cpu_processor *proc;
1546 	int ret = 0;
1547 
1548 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1549 	if (!proc) {
1550 		ret = -ENOMEM;
1551 		goto out;
1552 	}
1553 	proc->cpuid = kvm->arch.model.cpuid;
1554 	proc->ibc = kvm->arch.model.ibc;
1555 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1556 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1557 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1558 		 kvm->arch.model.ibc,
1559 		 kvm->arch.model.cpuid);
1560 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1561 		 kvm->arch.model.fac_list[0],
1562 		 kvm->arch.model.fac_list[1],
1563 		 kvm->arch.model.fac_list[2]);
1564 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1565 		ret = -EFAULT;
1566 	kfree(proc);
1567 out:
1568 	return ret;
1569 }
1570 
1571 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1572 {
1573 	struct kvm_s390_vm_cpu_machine *mach;
1574 	int ret = 0;
1575 
1576 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1577 	if (!mach) {
1578 		ret = -ENOMEM;
1579 		goto out;
1580 	}
1581 	get_cpu_id((struct cpuid *) &mach->cpuid);
1582 	mach->ibc = sclp.ibc;
1583 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1584 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1585 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1586 	       sizeof(stfle_fac_list));
1587 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1588 		 kvm->arch.model.ibc,
1589 		 kvm->arch.model.cpuid);
1590 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1591 		 mach->fac_mask[0],
1592 		 mach->fac_mask[1],
1593 		 mach->fac_mask[2]);
1594 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1595 		 mach->fac_list[0],
1596 		 mach->fac_list[1],
1597 		 mach->fac_list[2]);
1598 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1599 		ret = -EFAULT;
1600 	kfree(mach);
1601 out:
1602 	return ret;
1603 }
1604 
1605 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1606 				       struct kvm_device_attr *attr)
1607 {
1608 	struct kvm_s390_vm_cpu_feat data;
1609 
1610 	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1611 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1612 		return -EFAULT;
1613 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1614 			 data.feat[0],
1615 			 data.feat[1],
1616 			 data.feat[2]);
1617 	return 0;
1618 }
1619 
1620 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1621 				     struct kvm_device_attr *attr)
1622 {
1623 	struct kvm_s390_vm_cpu_feat data;
1624 
1625 	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1626 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1627 		return -EFAULT;
1628 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1629 			 data.feat[0],
1630 			 data.feat[1],
1631 			 data.feat[2]);
1632 	return 0;
1633 }
1634 
1635 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1636 					  struct kvm_device_attr *attr)
1637 {
1638 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1639 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1640 		return -EFAULT;
1641 
1642 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1643 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1644 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1645 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1646 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1647 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1648 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1649 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1650 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1651 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1652 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1653 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1654 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1655 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1656 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1657 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1658 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1659 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1660 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1661 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1662 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1663 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1664 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1665 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1666 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1667 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1668 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1669 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1670 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1671 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1672 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1673 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1674 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1675 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1676 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1677 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1678 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1679 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1680 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1681 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1682 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1683 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1684 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1685 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1686 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1687 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1688 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1689 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1690 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1691 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1692 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1693 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1694 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1695 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1696 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1697 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1698 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1699 
1700 	return 0;
1701 }
1702 
1703 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1704 					struct kvm_device_attr *attr)
1705 {
1706 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1707 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1708 		return -EFAULT;
1709 
1710 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1711 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1712 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1713 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1714 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1715 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1716 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1717 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1718 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1719 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1720 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1721 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1722 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1723 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1724 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1725 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1726 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1727 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1728 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1729 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1730 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1731 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1732 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1733 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1734 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1735 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1736 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1737 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1738 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1739 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1740 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1741 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1742 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1743 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1744 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1745 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1746 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1747 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1748 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1749 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1750 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1751 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1752 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1753 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1754 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1755 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1756 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1757 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1758 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1759 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1760 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1761 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1762 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1763 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1764 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1765 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1766 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1767 
1768 	return 0;
1769 }
1770 
1771 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1772 {
1773 	int ret = -ENXIO;
1774 
1775 	switch (attr->attr) {
1776 	case KVM_S390_VM_CPU_PROCESSOR:
1777 		ret = kvm_s390_get_processor(kvm, attr);
1778 		break;
1779 	case KVM_S390_VM_CPU_MACHINE:
1780 		ret = kvm_s390_get_machine(kvm, attr);
1781 		break;
1782 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1783 		ret = kvm_s390_get_processor_feat(kvm, attr);
1784 		break;
1785 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1786 		ret = kvm_s390_get_machine_feat(kvm, attr);
1787 		break;
1788 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1790 		break;
1791 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1792 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1793 		break;
1794 	}
1795 	return ret;
1796 }
1797 
1798 /**
1799  * kvm_s390_update_topology_change_report - update CPU topology change report
1800  * @kvm: guest KVM description
1801  * @val: set or clear the MTCR bit
1802  *
1803  * Updates the Multiprocessor Topology-Change-Report bit to signal
1804  * the guest with a topology change.
1805  * This is only relevant if the topology facility is present.
1806  *
1807  * The SCA version, bsca or esca, doesn't matter as offset is the same.
1808  */
1809 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1810 {
1811 	union sca_utility new, old;
1812 	struct bsca_block *sca;
1813 
1814 	read_lock(&kvm->arch.sca_lock);
1815 	sca = kvm->arch.sca;
1816 	do {
1817 		old = READ_ONCE(sca->utility);
1818 		new = old;
1819 		new.mtcr = val;
1820 	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1821 	read_unlock(&kvm->arch.sca_lock);
1822 }
1823 
1824 static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1825 					       struct kvm_device_attr *attr)
1826 {
1827 	if (!test_kvm_facility(kvm, 11))
1828 		return -ENXIO;
1829 
1830 	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1831 	return 0;
1832 }
1833 
1834 static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1835 					       struct kvm_device_attr *attr)
1836 {
1837 	u8 topo;
1838 
1839 	if (!test_kvm_facility(kvm, 11))
1840 		return -ENXIO;
1841 
1842 	read_lock(&kvm->arch.sca_lock);
1843 	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1844 	read_unlock(&kvm->arch.sca_lock);
1845 
1846 	return put_user(topo, (u8 __user *)attr->addr);
1847 }
1848 
1849 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1850 {
1851 	int ret;
1852 
1853 	switch (attr->group) {
1854 	case KVM_S390_VM_MEM_CTRL:
1855 		ret = kvm_s390_set_mem_control(kvm, attr);
1856 		break;
1857 	case KVM_S390_VM_TOD:
1858 		ret = kvm_s390_set_tod(kvm, attr);
1859 		break;
1860 	case KVM_S390_VM_CPU_MODEL:
1861 		ret = kvm_s390_set_cpu_model(kvm, attr);
1862 		break;
1863 	case KVM_S390_VM_CRYPTO:
1864 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1865 		break;
1866 	case KVM_S390_VM_MIGRATION:
1867 		ret = kvm_s390_vm_set_migration(kvm, attr);
1868 		break;
1869 	case KVM_S390_VM_CPU_TOPOLOGY:
1870 		ret = kvm_s390_set_topo_change_indication(kvm, attr);
1871 		break;
1872 	default:
1873 		ret = -ENXIO;
1874 		break;
1875 	}
1876 
1877 	return ret;
1878 }
1879 
1880 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1881 {
1882 	int ret;
1883 
1884 	switch (attr->group) {
1885 	case KVM_S390_VM_MEM_CTRL:
1886 		ret = kvm_s390_get_mem_control(kvm, attr);
1887 		break;
1888 	case KVM_S390_VM_TOD:
1889 		ret = kvm_s390_get_tod(kvm, attr);
1890 		break;
1891 	case KVM_S390_VM_CPU_MODEL:
1892 		ret = kvm_s390_get_cpu_model(kvm, attr);
1893 		break;
1894 	case KVM_S390_VM_MIGRATION:
1895 		ret = kvm_s390_vm_get_migration(kvm, attr);
1896 		break;
1897 	case KVM_S390_VM_CPU_TOPOLOGY:
1898 		ret = kvm_s390_get_topo_change_indication(kvm, attr);
1899 		break;
1900 	default:
1901 		ret = -ENXIO;
1902 		break;
1903 	}
1904 
1905 	return ret;
1906 }
1907 
1908 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1909 {
1910 	int ret;
1911 
1912 	switch (attr->group) {
1913 	case KVM_S390_VM_MEM_CTRL:
1914 		switch (attr->attr) {
1915 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1916 		case KVM_S390_VM_MEM_CLR_CMMA:
1917 			ret = sclp.has_cmma ? 0 : -ENXIO;
1918 			break;
1919 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1920 			ret = 0;
1921 			break;
1922 		default:
1923 			ret = -ENXIO;
1924 			break;
1925 		}
1926 		break;
1927 	case KVM_S390_VM_TOD:
1928 		switch (attr->attr) {
1929 		case KVM_S390_VM_TOD_LOW:
1930 		case KVM_S390_VM_TOD_HIGH:
1931 			ret = 0;
1932 			break;
1933 		default:
1934 			ret = -ENXIO;
1935 			break;
1936 		}
1937 		break;
1938 	case KVM_S390_VM_CPU_MODEL:
1939 		switch (attr->attr) {
1940 		case KVM_S390_VM_CPU_PROCESSOR:
1941 		case KVM_S390_VM_CPU_MACHINE:
1942 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1943 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1944 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1945 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1946 			ret = 0;
1947 			break;
1948 		default:
1949 			ret = -ENXIO;
1950 			break;
1951 		}
1952 		break;
1953 	case KVM_S390_VM_CRYPTO:
1954 		switch (attr->attr) {
1955 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1956 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1957 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1958 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1959 			ret = 0;
1960 			break;
1961 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1962 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1963 			ret = ap_instructions_available() ? 0 : -ENXIO;
1964 			break;
1965 		default:
1966 			ret = -ENXIO;
1967 			break;
1968 		}
1969 		break;
1970 	case KVM_S390_VM_MIGRATION:
1971 		ret = 0;
1972 		break;
1973 	case KVM_S390_VM_CPU_TOPOLOGY:
1974 		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1975 		break;
1976 	default:
1977 		ret = -ENXIO;
1978 		break;
1979 	}
1980 
1981 	return ret;
1982 }
1983 
1984 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1985 {
1986 	uint8_t *keys;
1987 	uint64_t hva;
1988 	int srcu_idx, i, r = 0;
1989 
1990 	if (args->flags != 0)
1991 		return -EINVAL;
1992 
1993 	/* Is this guest using storage keys? */
1994 	if (!mm_uses_skeys(current->mm))
1995 		return KVM_S390_GET_SKEYS_NONE;
1996 
1997 	/* Enforce sane limit on memory allocation */
1998 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1999 		return -EINVAL;
2000 
2001 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2002 	if (!keys)
2003 		return -ENOMEM;
2004 
2005 	mmap_read_lock(current->mm);
2006 	srcu_idx = srcu_read_lock(&kvm->srcu);
2007 	for (i = 0; i < args->count; i++) {
2008 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2009 		if (kvm_is_error_hva(hva)) {
2010 			r = -EFAULT;
2011 			break;
2012 		}
2013 
2014 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
2015 		if (r)
2016 			break;
2017 	}
2018 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2019 	mmap_read_unlock(current->mm);
2020 
2021 	if (!r) {
2022 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2023 				 sizeof(uint8_t) * args->count);
2024 		if (r)
2025 			r = -EFAULT;
2026 	}
2027 
2028 	kvfree(keys);
2029 	return r;
2030 }
2031 
2032 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2033 {
2034 	uint8_t *keys;
2035 	uint64_t hva;
2036 	int srcu_idx, i, r = 0;
2037 	bool unlocked;
2038 
2039 	if (args->flags != 0)
2040 		return -EINVAL;
2041 
2042 	/* Enforce sane limit on memory allocation */
2043 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2044 		return -EINVAL;
2045 
2046 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2047 	if (!keys)
2048 		return -ENOMEM;
2049 
2050 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2051 			   sizeof(uint8_t) * args->count);
2052 	if (r) {
2053 		r = -EFAULT;
2054 		goto out;
2055 	}
2056 
2057 	/* Enable storage key handling for the guest */
2058 	r = s390_enable_skey();
2059 	if (r)
2060 		goto out;
2061 
2062 	i = 0;
2063 	mmap_read_lock(current->mm);
2064 	srcu_idx = srcu_read_lock(&kvm->srcu);
2065         while (i < args->count) {
2066 		unlocked = false;
2067 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2068 		if (kvm_is_error_hva(hva)) {
2069 			r = -EFAULT;
2070 			break;
2071 		}
2072 
2073 		/* Lowest order bit is reserved */
2074 		if (keys[i] & 0x01) {
2075 			r = -EINVAL;
2076 			break;
2077 		}
2078 
2079 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2080 		if (r) {
2081 			r = fixup_user_fault(current->mm, hva,
2082 					     FAULT_FLAG_WRITE, &unlocked);
2083 			if (r)
2084 				break;
2085 		}
2086 		if (!r)
2087 			i++;
2088 	}
2089 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2090 	mmap_read_unlock(current->mm);
2091 out:
2092 	kvfree(keys);
2093 	return r;
2094 }
2095 
2096 /*
2097  * Base address and length must be sent at the start of each block, therefore
2098  * it's cheaper to send some clean data, as long as it's less than the size of
2099  * two longs.
2100  */
2101 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2102 /* for consistency */
2103 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2104 
2105 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2106 			      u8 *res, unsigned long bufsize)
2107 {
2108 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2109 
2110 	args->count = 0;
2111 	while (args->count < bufsize) {
2112 		hva = gfn_to_hva(kvm, cur_gfn);
2113 		/*
2114 		 * We return an error if the first value was invalid, but we
2115 		 * return successfully if at least one value was copied.
2116 		 */
2117 		if (kvm_is_error_hva(hva))
2118 			return args->count ? 0 : -EFAULT;
2119 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2120 			pgstev = 0;
2121 		res[args->count++] = (pgstev >> 24) & 0x43;
2122 		cur_gfn++;
2123 	}
2124 
2125 	return 0;
2126 }
2127 
2128 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2129 						     gfn_t gfn)
2130 {
2131 	return ____gfn_to_memslot(slots, gfn, true);
2132 }
2133 
2134 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2135 					      unsigned long cur_gfn)
2136 {
2137 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2138 	unsigned long ofs = cur_gfn - ms->base_gfn;
2139 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2140 
2141 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2142 		mnode = rb_next(mnode);
2143 		/* If we are above the highest slot, wrap around */
2144 		if (!mnode)
2145 			mnode = rb_first(&slots->gfn_tree);
2146 
2147 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2148 		ofs = 0;
2149 	}
2150 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2151 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2152 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2153 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2154 	}
2155 	return ms->base_gfn + ofs;
2156 }
2157 
2158 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2159 			     u8 *res, unsigned long bufsize)
2160 {
2161 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2162 	struct kvm_memslots *slots = kvm_memslots(kvm);
2163 	struct kvm_memory_slot *ms;
2164 
2165 	if (unlikely(kvm_memslots_empty(slots)))
2166 		return 0;
2167 
2168 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2169 	ms = gfn_to_memslot(kvm, cur_gfn);
2170 	args->count = 0;
2171 	args->start_gfn = cur_gfn;
2172 	if (!ms)
2173 		return 0;
2174 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2175 	mem_end = kvm_s390_get_gfn_end(slots);
2176 
2177 	while (args->count < bufsize) {
2178 		hva = gfn_to_hva(kvm, cur_gfn);
2179 		if (kvm_is_error_hva(hva))
2180 			return 0;
2181 		/* Decrement only if we actually flipped the bit to 0 */
2182 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2183 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2184 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2185 			pgstev = 0;
2186 		/* Save the value */
2187 		res[args->count++] = (pgstev >> 24) & 0x43;
2188 		/* If the next bit is too far away, stop. */
2189 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2190 			return 0;
2191 		/* If we reached the previous "next", find the next one */
2192 		if (cur_gfn == next_gfn)
2193 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2194 		/* Reached the end of memory or of the buffer, stop */
2195 		if ((next_gfn >= mem_end) ||
2196 		    (next_gfn - args->start_gfn >= bufsize))
2197 			return 0;
2198 		cur_gfn++;
2199 		/* Reached the end of the current memslot, take the next one. */
2200 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2201 			ms = gfn_to_memslot(kvm, cur_gfn);
2202 			if (!ms)
2203 				return 0;
2204 		}
2205 	}
2206 	return 0;
2207 }
2208 
2209 /*
2210  * This function searches for the next page with dirty CMMA attributes, and
2211  * saves the attributes in the buffer up to either the end of the buffer or
2212  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2213  * no trailing clean bytes are saved.
2214  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2215  * output buffer will indicate 0 as length.
2216  */
2217 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2218 				  struct kvm_s390_cmma_log *args)
2219 {
2220 	unsigned long bufsize;
2221 	int srcu_idx, peek, ret;
2222 	u8 *values;
2223 
2224 	if (!kvm->arch.use_cmma)
2225 		return -ENXIO;
2226 	/* Invalid/unsupported flags were specified */
2227 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2228 		return -EINVAL;
2229 	/* Migration mode query, and we are not doing a migration */
2230 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2231 	if (!peek && !kvm->arch.migration_mode)
2232 		return -EINVAL;
2233 	/* CMMA is disabled or was not used, or the buffer has length zero */
2234 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2235 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2236 		memset(args, 0, sizeof(*args));
2237 		return 0;
2238 	}
2239 	/* We are not peeking, and there are no dirty pages */
2240 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2241 		memset(args, 0, sizeof(*args));
2242 		return 0;
2243 	}
2244 
2245 	values = vmalloc(bufsize);
2246 	if (!values)
2247 		return -ENOMEM;
2248 
2249 	mmap_read_lock(kvm->mm);
2250 	srcu_idx = srcu_read_lock(&kvm->srcu);
2251 	if (peek)
2252 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2253 	else
2254 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2255 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2256 	mmap_read_unlock(kvm->mm);
2257 
2258 	if (kvm->arch.migration_mode)
2259 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2260 	else
2261 		args->remaining = 0;
2262 
2263 	if (copy_to_user((void __user *)args->values, values, args->count))
2264 		ret = -EFAULT;
2265 
2266 	vfree(values);
2267 	return ret;
2268 }
2269 
2270 /*
2271  * This function sets the CMMA attributes for the given pages. If the input
2272  * buffer has zero length, no action is taken, otherwise the attributes are
2273  * set and the mm->context.uses_cmm flag is set.
2274  */
2275 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2276 				  const struct kvm_s390_cmma_log *args)
2277 {
2278 	unsigned long hva, mask, pgstev, i;
2279 	uint8_t *bits;
2280 	int srcu_idx, r = 0;
2281 
2282 	mask = args->mask;
2283 
2284 	if (!kvm->arch.use_cmma)
2285 		return -ENXIO;
2286 	/* invalid/unsupported flags */
2287 	if (args->flags != 0)
2288 		return -EINVAL;
2289 	/* Enforce sane limit on memory allocation */
2290 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2291 		return -EINVAL;
2292 	/* Nothing to do */
2293 	if (args->count == 0)
2294 		return 0;
2295 
2296 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2297 	if (!bits)
2298 		return -ENOMEM;
2299 
2300 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2301 	if (r) {
2302 		r = -EFAULT;
2303 		goto out;
2304 	}
2305 
2306 	mmap_read_lock(kvm->mm);
2307 	srcu_idx = srcu_read_lock(&kvm->srcu);
2308 	for (i = 0; i < args->count; i++) {
2309 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2310 		if (kvm_is_error_hva(hva)) {
2311 			r = -EFAULT;
2312 			break;
2313 		}
2314 
2315 		pgstev = bits[i];
2316 		pgstev = pgstev << 24;
2317 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2318 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2319 	}
2320 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2321 	mmap_read_unlock(kvm->mm);
2322 
2323 	if (!kvm->mm->context.uses_cmm) {
2324 		mmap_write_lock(kvm->mm);
2325 		kvm->mm->context.uses_cmm = 1;
2326 		mmap_write_unlock(kvm->mm);
2327 	}
2328 out:
2329 	vfree(bits);
2330 	return r;
2331 }
2332 
2333 /**
2334  * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2335  * non protected.
2336  * @kvm: the VM whose protected vCPUs are to be converted
2337  * @rc: return value for the RC field of the UVC (in case of error)
2338  * @rrc: return value for the RRC field of the UVC (in case of error)
2339  *
2340  * Does not stop in case of error, tries to convert as many
2341  * CPUs as possible. In case of error, the RC and RRC of the last error are
2342  * returned.
2343  *
2344  * Return: 0 in case of success, otherwise -EIO
2345  */
2346 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2347 {
2348 	struct kvm_vcpu *vcpu;
2349 	unsigned long i;
2350 	u16 _rc, _rrc;
2351 	int ret = 0;
2352 
2353 	/*
2354 	 * We ignore failures and try to destroy as many CPUs as possible.
2355 	 * At the same time we must not free the assigned resources when
2356 	 * this fails, as the ultravisor has still access to that memory.
2357 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2358 	 * behind.
2359 	 * We want to return the first failure rc and rrc, though.
2360 	 */
2361 	kvm_for_each_vcpu(i, vcpu, kvm) {
2362 		mutex_lock(&vcpu->mutex);
2363 		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2364 			*rc = _rc;
2365 			*rrc = _rrc;
2366 			ret = -EIO;
2367 		}
2368 		mutex_unlock(&vcpu->mutex);
2369 	}
2370 	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2371 	if (use_gisa)
2372 		kvm_s390_gisa_enable(kvm);
2373 	return ret;
2374 }
2375 
2376 /**
2377  * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2378  * to protected.
2379  * @kvm: the VM whose protected vCPUs are to be converted
2380  * @rc: return value for the RC field of the UVC (in case of error)
2381  * @rrc: return value for the RRC field of the UVC (in case of error)
2382  *
2383  * Tries to undo the conversion in case of error.
2384  *
2385  * Return: 0 in case of success, otherwise -EIO
2386  */
2387 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2388 {
2389 	unsigned long i;
2390 	int r = 0;
2391 	u16 dummy;
2392 
2393 	struct kvm_vcpu *vcpu;
2394 
2395 	/* Disable the GISA if the ultravisor does not support AIV. */
2396 	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2397 		kvm_s390_gisa_disable(kvm);
2398 
2399 	kvm_for_each_vcpu(i, vcpu, kvm) {
2400 		mutex_lock(&vcpu->mutex);
2401 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2402 		mutex_unlock(&vcpu->mutex);
2403 		if (r)
2404 			break;
2405 	}
2406 	if (r)
2407 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2408 	return r;
2409 }
2410 
2411 /*
2412  * Here we provide user space with a direct interface to query UV
2413  * related data like UV maxima and available features as well as
2414  * feature specific data.
2415  *
2416  * To facilitate future extension of the data structures we'll try to
2417  * write data up to the maximum requested length.
2418  */
2419 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2420 {
2421 	ssize_t len_min;
2422 
2423 	switch (info->header.id) {
2424 	case KVM_PV_INFO_VM: {
2425 		len_min =  sizeof(info->header) + sizeof(info->vm);
2426 
2427 		if (info->header.len_max < len_min)
2428 			return -EINVAL;
2429 
2430 		memcpy(info->vm.inst_calls_list,
2431 		       uv_info.inst_calls_list,
2432 		       sizeof(uv_info.inst_calls_list));
2433 
2434 		/* It's max cpuid not max cpus, so it's off by one */
2435 		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2436 		info->vm.max_guests = uv_info.max_num_sec_conf;
2437 		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2438 		info->vm.feature_indication = uv_info.uv_feature_indications;
2439 
2440 		return len_min;
2441 	}
2442 	case KVM_PV_INFO_DUMP: {
2443 		len_min =  sizeof(info->header) + sizeof(info->dump);
2444 
2445 		if (info->header.len_max < len_min)
2446 			return -EINVAL;
2447 
2448 		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2449 		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2450 		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2451 		return len_min;
2452 	}
2453 	default:
2454 		return -EINVAL;
2455 	}
2456 }
2457 
2458 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2459 			   struct kvm_s390_pv_dmp dmp)
2460 {
2461 	int r = -EINVAL;
2462 	void __user *result_buff = (void __user *)dmp.buff_addr;
2463 
2464 	switch (dmp.subcmd) {
2465 	case KVM_PV_DUMP_INIT: {
2466 		if (kvm->arch.pv.dumping)
2467 			break;
2468 
2469 		/*
2470 		 * Block SIE entry as concurrent dump UVCs could lead
2471 		 * to validities.
2472 		 */
2473 		kvm_s390_vcpu_block_all(kvm);
2474 
2475 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2476 				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2477 		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2478 			     cmd->rc, cmd->rrc);
2479 		if (!r) {
2480 			kvm->arch.pv.dumping = true;
2481 		} else {
2482 			kvm_s390_vcpu_unblock_all(kvm);
2483 			r = -EINVAL;
2484 		}
2485 		break;
2486 	}
2487 	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2488 		if (!kvm->arch.pv.dumping)
2489 			break;
2490 
2491 		/*
2492 		 * gaddr is an output parameter since we might stop
2493 		 * early. As dmp will be copied back in our caller, we
2494 		 * don't need to do it ourselves.
2495 		 */
2496 		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2497 						&cmd->rc, &cmd->rrc);
2498 		break;
2499 	}
2500 	case KVM_PV_DUMP_COMPLETE: {
2501 		if (!kvm->arch.pv.dumping)
2502 			break;
2503 
2504 		r = -EINVAL;
2505 		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2506 			break;
2507 
2508 		r = kvm_s390_pv_dump_complete(kvm, result_buff,
2509 					      &cmd->rc, &cmd->rrc);
2510 		break;
2511 	}
2512 	default:
2513 		r = -ENOTTY;
2514 		break;
2515 	}
2516 
2517 	return r;
2518 }
2519 
2520 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2521 {
2522 	int r = 0;
2523 	u16 dummy;
2524 	void __user *argp = (void __user *)cmd->data;
2525 
2526 	switch (cmd->cmd) {
2527 	case KVM_PV_ENABLE: {
2528 		r = -EINVAL;
2529 		if (kvm_s390_pv_is_protected(kvm))
2530 			break;
2531 
2532 		/*
2533 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2534 		 *  esca, we need no cleanup in the error cases below
2535 		 */
2536 		r = sca_switch_to_extended(kvm);
2537 		if (r)
2538 			break;
2539 
2540 		mmap_write_lock(current->mm);
2541 		r = gmap_mark_unmergeable();
2542 		mmap_write_unlock(current->mm);
2543 		if (r)
2544 			break;
2545 
2546 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2547 		if (r)
2548 			break;
2549 
2550 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2551 		if (r)
2552 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2553 
2554 		/* we need to block service interrupts from now on */
2555 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2556 		break;
2557 	}
2558 	case KVM_PV_DISABLE: {
2559 		r = -EINVAL;
2560 		if (!kvm_s390_pv_is_protected(kvm))
2561 			break;
2562 
2563 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2564 		/*
2565 		 * If a CPU could not be destroyed, destroy VM will also fail.
2566 		 * There is no point in trying to destroy it. Instead return
2567 		 * the rc and rrc from the first CPU that failed destroying.
2568 		 */
2569 		if (r)
2570 			break;
2571 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2572 
2573 		/* no need to block service interrupts any more */
2574 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2575 		break;
2576 	}
2577 	case KVM_PV_SET_SEC_PARMS: {
2578 		struct kvm_s390_pv_sec_parm parms = {};
2579 		void *hdr;
2580 
2581 		r = -EINVAL;
2582 		if (!kvm_s390_pv_is_protected(kvm))
2583 			break;
2584 
2585 		r = -EFAULT;
2586 		if (copy_from_user(&parms, argp, sizeof(parms)))
2587 			break;
2588 
2589 		/* Currently restricted to 8KB */
2590 		r = -EINVAL;
2591 		if (parms.length > PAGE_SIZE * 2)
2592 			break;
2593 
2594 		r = -ENOMEM;
2595 		hdr = vmalloc(parms.length);
2596 		if (!hdr)
2597 			break;
2598 
2599 		r = -EFAULT;
2600 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2601 				    parms.length))
2602 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2603 						      &cmd->rc, &cmd->rrc);
2604 
2605 		vfree(hdr);
2606 		break;
2607 	}
2608 	case KVM_PV_UNPACK: {
2609 		struct kvm_s390_pv_unp unp = {};
2610 
2611 		r = -EINVAL;
2612 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2613 			break;
2614 
2615 		r = -EFAULT;
2616 		if (copy_from_user(&unp, argp, sizeof(unp)))
2617 			break;
2618 
2619 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2620 				       &cmd->rc, &cmd->rrc);
2621 		break;
2622 	}
2623 	case KVM_PV_VERIFY: {
2624 		r = -EINVAL;
2625 		if (!kvm_s390_pv_is_protected(kvm))
2626 			break;
2627 
2628 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2629 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2630 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2631 			     cmd->rrc);
2632 		break;
2633 	}
2634 	case KVM_PV_PREP_RESET: {
2635 		r = -EINVAL;
2636 		if (!kvm_s390_pv_is_protected(kvm))
2637 			break;
2638 
2639 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2640 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2641 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2642 			     cmd->rc, cmd->rrc);
2643 		break;
2644 	}
2645 	case KVM_PV_UNSHARE_ALL: {
2646 		r = -EINVAL;
2647 		if (!kvm_s390_pv_is_protected(kvm))
2648 			break;
2649 
2650 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2651 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2652 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2653 			     cmd->rc, cmd->rrc);
2654 		break;
2655 	}
2656 	case KVM_PV_INFO: {
2657 		struct kvm_s390_pv_info info = {};
2658 		ssize_t data_len;
2659 
2660 		/*
2661 		 * No need to check the VM protection here.
2662 		 *
2663 		 * Maybe user space wants to query some of the data
2664 		 * when the VM is still unprotected. If we see the
2665 		 * need to fence a new data command we can still
2666 		 * return an error in the info handler.
2667 		 */
2668 
2669 		r = -EFAULT;
2670 		if (copy_from_user(&info, argp, sizeof(info.header)))
2671 			break;
2672 
2673 		r = -EINVAL;
2674 		if (info.header.len_max < sizeof(info.header))
2675 			break;
2676 
2677 		data_len = kvm_s390_handle_pv_info(&info);
2678 		if (data_len < 0) {
2679 			r = data_len;
2680 			break;
2681 		}
2682 		/*
2683 		 * If a data command struct is extended (multiple
2684 		 * times) this can be used to determine how much of it
2685 		 * is valid.
2686 		 */
2687 		info.header.len_written = data_len;
2688 
2689 		r = -EFAULT;
2690 		if (copy_to_user(argp, &info, data_len))
2691 			break;
2692 
2693 		r = 0;
2694 		break;
2695 	}
2696 	case KVM_PV_DUMP: {
2697 		struct kvm_s390_pv_dmp dmp;
2698 
2699 		r = -EINVAL;
2700 		if (!kvm_s390_pv_is_protected(kvm))
2701 			break;
2702 
2703 		r = -EFAULT;
2704 		if (copy_from_user(&dmp, argp, sizeof(dmp)))
2705 			break;
2706 
2707 		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2708 		if (r)
2709 			break;
2710 
2711 		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2712 			r = -EFAULT;
2713 			break;
2714 		}
2715 
2716 		break;
2717 	}
2718 	default:
2719 		r = -ENOTTY;
2720 	}
2721 	return r;
2722 }
2723 
2724 static bool access_key_invalid(u8 access_key)
2725 {
2726 	return access_key > 0xf;
2727 }
2728 
2729 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2730 {
2731 	void __user *uaddr = (void __user *)mop->buf;
2732 	u64 supported_flags;
2733 	void *tmpbuf = NULL;
2734 	int r, srcu_idx;
2735 
2736 	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2737 			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2738 	if (mop->flags & ~supported_flags || !mop->size)
2739 		return -EINVAL;
2740 	if (mop->size > MEM_OP_MAX_SIZE)
2741 		return -E2BIG;
2742 	/*
2743 	 * This is technically a heuristic only, if the kvm->lock is not
2744 	 * taken, it is not guaranteed that the vm is/remains non-protected.
2745 	 * This is ok from a kernel perspective, wrongdoing is detected
2746 	 * on the access, -EFAULT is returned and the vm may crash the
2747 	 * next time it accesses the memory in question.
2748 	 * There is no sane usecase to do switching and a memop on two
2749 	 * different CPUs at the same time.
2750 	 */
2751 	if (kvm_s390_pv_get_handle(kvm))
2752 		return -EINVAL;
2753 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2754 		if (access_key_invalid(mop->key))
2755 			return -EINVAL;
2756 	} else {
2757 		mop->key = 0;
2758 	}
2759 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2760 		tmpbuf = vmalloc(mop->size);
2761 		if (!tmpbuf)
2762 			return -ENOMEM;
2763 	}
2764 
2765 	srcu_idx = srcu_read_lock(&kvm->srcu);
2766 
2767 	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2768 		r = PGM_ADDRESSING;
2769 		goto out_unlock;
2770 	}
2771 
2772 	switch (mop->op) {
2773 	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2774 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2775 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2776 		} else {
2777 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2778 						      mop->size, GACC_FETCH, mop->key);
2779 			if (r == 0) {
2780 				if (copy_to_user(uaddr, tmpbuf, mop->size))
2781 					r = -EFAULT;
2782 			}
2783 		}
2784 		break;
2785 	}
2786 	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2787 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2788 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2789 		} else {
2790 			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2791 				r = -EFAULT;
2792 				break;
2793 			}
2794 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2795 						      mop->size, GACC_STORE, mop->key);
2796 		}
2797 		break;
2798 	}
2799 	default:
2800 		r = -EINVAL;
2801 	}
2802 
2803 out_unlock:
2804 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2805 
2806 	vfree(tmpbuf);
2807 	return r;
2808 }
2809 
2810 long kvm_arch_vm_ioctl(struct file *filp,
2811 		       unsigned int ioctl, unsigned long arg)
2812 {
2813 	struct kvm *kvm = filp->private_data;
2814 	void __user *argp = (void __user *)arg;
2815 	struct kvm_device_attr attr;
2816 	int r;
2817 
2818 	switch (ioctl) {
2819 	case KVM_S390_INTERRUPT: {
2820 		struct kvm_s390_interrupt s390int;
2821 
2822 		r = -EFAULT;
2823 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2824 			break;
2825 		r = kvm_s390_inject_vm(kvm, &s390int);
2826 		break;
2827 	}
2828 	case KVM_CREATE_IRQCHIP: {
2829 		struct kvm_irq_routing_entry routing;
2830 
2831 		r = -EINVAL;
2832 		if (kvm->arch.use_irqchip) {
2833 			/* Set up dummy routing. */
2834 			memset(&routing, 0, sizeof(routing));
2835 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2836 		}
2837 		break;
2838 	}
2839 	case KVM_SET_DEVICE_ATTR: {
2840 		r = -EFAULT;
2841 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2842 			break;
2843 		r = kvm_s390_vm_set_attr(kvm, &attr);
2844 		break;
2845 	}
2846 	case KVM_GET_DEVICE_ATTR: {
2847 		r = -EFAULT;
2848 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2849 			break;
2850 		r = kvm_s390_vm_get_attr(kvm, &attr);
2851 		break;
2852 	}
2853 	case KVM_HAS_DEVICE_ATTR: {
2854 		r = -EFAULT;
2855 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2856 			break;
2857 		r = kvm_s390_vm_has_attr(kvm, &attr);
2858 		break;
2859 	}
2860 	case KVM_S390_GET_SKEYS: {
2861 		struct kvm_s390_skeys args;
2862 
2863 		r = -EFAULT;
2864 		if (copy_from_user(&args, argp,
2865 				   sizeof(struct kvm_s390_skeys)))
2866 			break;
2867 		r = kvm_s390_get_skeys(kvm, &args);
2868 		break;
2869 	}
2870 	case KVM_S390_SET_SKEYS: {
2871 		struct kvm_s390_skeys args;
2872 
2873 		r = -EFAULT;
2874 		if (copy_from_user(&args, argp,
2875 				   sizeof(struct kvm_s390_skeys)))
2876 			break;
2877 		r = kvm_s390_set_skeys(kvm, &args);
2878 		break;
2879 	}
2880 	case KVM_S390_GET_CMMA_BITS: {
2881 		struct kvm_s390_cmma_log args;
2882 
2883 		r = -EFAULT;
2884 		if (copy_from_user(&args, argp, sizeof(args)))
2885 			break;
2886 		mutex_lock(&kvm->slots_lock);
2887 		r = kvm_s390_get_cmma_bits(kvm, &args);
2888 		mutex_unlock(&kvm->slots_lock);
2889 		if (!r) {
2890 			r = copy_to_user(argp, &args, sizeof(args));
2891 			if (r)
2892 				r = -EFAULT;
2893 		}
2894 		break;
2895 	}
2896 	case KVM_S390_SET_CMMA_BITS: {
2897 		struct kvm_s390_cmma_log args;
2898 
2899 		r = -EFAULT;
2900 		if (copy_from_user(&args, argp, sizeof(args)))
2901 			break;
2902 		mutex_lock(&kvm->slots_lock);
2903 		r = kvm_s390_set_cmma_bits(kvm, &args);
2904 		mutex_unlock(&kvm->slots_lock);
2905 		break;
2906 	}
2907 	case KVM_S390_PV_COMMAND: {
2908 		struct kvm_pv_cmd args;
2909 
2910 		/* protvirt means user cpu state */
2911 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2912 		r = 0;
2913 		if (!is_prot_virt_host()) {
2914 			r = -EINVAL;
2915 			break;
2916 		}
2917 		if (copy_from_user(&args, argp, sizeof(args))) {
2918 			r = -EFAULT;
2919 			break;
2920 		}
2921 		if (args.flags) {
2922 			r = -EINVAL;
2923 			break;
2924 		}
2925 		mutex_lock(&kvm->lock);
2926 		r = kvm_s390_handle_pv(kvm, &args);
2927 		mutex_unlock(&kvm->lock);
2928 		if (copy_to_user(argp, &args, sizeof(args))) {
2929 			r = -EFAULT;
2930 			break;
2931 		}
2932 		break;
2933 	}
2934 	case KVM_S390_MEM_OP: {
2935 		struct kvm_s390_mem_op mem_op;
2936 
2937 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2938 			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2939 		else
2940 			r = -EFAULT;
2941 		break;
2942 	}
2943 	case KVM_S390_ZPCI_OP: {
2944 		struct kvm_s390_zpci_op args;
2945 
2946 		r = -EINVAL;
2947 		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2948 			break;
2949 		if (copy_from_user(&args, argp, sizeof(args))) {
2950 			r = -EFAULT;
2951 			break;
2952 		}
2953 		r = kvm_s390_pci_zpci_op(kvm, &args);
2954 		break;
2955 	}
2956 	default:
2957 		r = -ENOTTY;
2958 	}
2959 
2960 	return r;
2961 }
2962 
2963 static int kvm_s390_apxa_installed(void)
2964 {
2965 	struct ap_config_info info;
2966 
2967 	if (ap_instructions_available()) {
2968 		if (ap_qci(&info) == 0)
2969 			return info.apxa;
2970 	}
2971 
2972 	return 0;
2973 }
2974 
2975 /*
2976  * The format of the crypto control block (CRYCB) is specified in the 3 low
2977  * order bits of the CRYCB designation (CRYCBD) field as follows:
2978  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2979  *	     AP extended addressing (APXA) facility are installed.
2980  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2981  * Format 2: Both the APXA and MSAX3 facilities are installed
2982  */
2983 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2984 {
2985 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2986 
2987 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2988 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2989 
2990 	/* Check whether MSAX3 is installed */
2991 	if (!test_kvm_facility(kvm, 76))
2992 		return;
2993 
2994 	if (kvm_s390_apxa_installed())
2995 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2996 	else
2997 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2998 }
2999 
3000 /*
3001  * kvm_arch_crypto_set_masks
3002  *
3003  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3004  *	 to be set.
3005  * @apm: the mask identifying the accessible AP adapters
3006  * @aqm: the mask identifying the accessible AP domains
3007  * @adm: the mask identifying the accessible AP control domains
3008  *
3009  * Set the masks that identify the adapters, domains and control domains to
3010  * which the KVM guest is granted access.
3011  *
3012  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3013  *	 function.
3014  */
3015 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3016 			       unsigned long *aqm, unsigned long *adm)
3017 {
3018 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3019 
3020 	kvm_s390_vcpu_block_all(kvm);
3021 
3022 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3023 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3024 		memcpy(crycb->apcb1.apm, apm, 32);
3025 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3026 			 apm[0], apm[1], apm[2], apm[3]);
3027 		memcpy(crycb->apcb1.aqm, aqm, 32);
3028 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3029 			 aqm[0], aqm[1], aqm[2], aqm[3]);
3030 		memcpy(crycb->apcb1.adm, adm, 32);
3031 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3032 			 adm[0], adm[1], adm[2], adm[3]);
3033 		break;
3034 	case CRYCB_FORMAT1:
3035 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3036 		memcpy(crycb->apcb0.apm, apm, 8);
3037 		memcpy(crycb->apcb0.aqm, aqm, 2);
3038 		memcpy(crycb->apcb0.adm, adm, 2);
3039 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3040 			 apm[0], *((unsigned short *)aqm),
3041 			 *((unsigned short *)adm));
3042 		break;
3043 	default:	/* Can not happen */
3044 		break;
3045 	}
3046 
3047 	/* recreate the shadow crycb for each vcpu */
3048 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3049 	kvm_s390_vcpu_unblock_all(kvm);
3050 }
3051 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3052 
3053 /*
3054  * kvm_arch_crypto_clear_masks
3055  *
3056  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3057  *	 to be cleared.
3058  *
3059  * Clear the masks that identify the adapters, domains and control domains to
3060  * which the KVM guest is granted access.
3061  *
3062  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3063  *	 function.
3064  */
3065 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3066 {
3067 	kvm_s390_vcpu_block_all(kvm);
3068 
3069 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
3070 	       sizeof(kvm->arch.crypto.crycb->apcb0));
3071 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
3072 	       sizeof(kvm->arch.crypto.crycb->apcb1));
3073 
3074 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3075 	/* recreate the shadow crycb for each vcpu */
3076 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3077 	kvm_s390_vcpu_unblock_all(kvm);
3078 }
3079 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3080 
3081 static u64 kvm_s390_get_initial_cpuid(void)
3082 {
3083 	struct cpuid cpuid;
3084 
3085 	get_cpu_id(&cpuid);
3086 	cpuid.version = 0xff;
3087 	return *((u64 *) &cpuid);
3088 }
3089 
3090 static void kvm_s390_crypto_init(struct kvm *kvm)
3091 {
3092 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3093 	kvm_s390_set_crycb_format(kvm);
3094 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3095 
3096 	if (!test_kvm_facility(kvm, 76))
3097 		return;
3098 
3099 	/* Enable AES/DEA protected key functions by default */
3100 	kvm->arch.crypto.aes_kw = 1;
3101 	kvm->arch.crypto.dea_kw = 1;
3102 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3103 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3104 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3105 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3106 }
3107 
3108 static void sca_dispose(struct kvm *kvm)
3109 {
3110 	if (kvm->arch.use_esca)
3111 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3112 	else
3113 		free_page((unsigned long)(kvm->arch.sca));
3114 	kvm->arch.sca = NULL;
3115 }
3116 
3117 void kvm_arch_free_vm(struct kvm *kvm)
3118 {
3119 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3120 		kvm_s390_pci_clear_list(kvm);
3121 
3122 	__kvm_arch_free_vm(kvm);
3123 }
3124 
3125 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3126 {
3127 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3128 	int i, rc;
3129 	char debug_name[16];
3130 	static unsigned long sca_offset;
3131 
3132 	rc = -EINVAL;
3133 #ifdef CONFIG_KVM_S390_UCONTROL
3134 	if (type & ~KVM_VM_S390_UCONTROL)
3135 		goto out_err;
3136 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3137 		goto out_err;
3138 #else
3139 	if (type)
3140 		goto out_err;
3141 #endif
3142 
3143 	rc = s390_enable_sie();
3144 	if (rc)
3145 		goto out_err;
3146 
3147 	rc = -ENOMEM;
3148 
3149 	if (!sclp.has_64bscao)
3150 		alloc_flags |= GFP_DMA;
3151 	rwlock_init(&kvm->arch.sca_lock);
3152 	/* start with basic SCA */
3153 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3154 	if (!kvm->arch.sca)
3155 		goto out_err;
3156 	mutex_lock(&kvm_lock);
3157 	sca_offset += 16;
3158 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3159 		sca_offset = 0;
3160 	kvm->arch.sca = (struct bsca_block *)
3161 			((char *) kvm->arch.sca + sca_offset);
3162 	mutex_unlock(&kvm_lock);
3163 
3164 	sprintf(debug_name, "kvm-%u", current->pid);
3165 
3166 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3167 	if (!kvm->arch.dbf)
3168 		goto out_err;
3169 
3170 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3171 	kvm->arch.sie_page2 =
3172 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3173 	if (!kvm->arch.sie_page2)
3174 		goto out_err;
3175 
3176 	kvm->arch.sie_page2->kvm = kvm;
3177 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3178 
3179 	for (i = 0; i < kvm_s390_fac_size(); i++) {
3180 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3181 					      (kvm_s390_fac_base[i] |
3182 					       kvm_s390_fac_ext[i]);
3183 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3184 					      kvm_s390_fac_base[i];
3185 	}
3186 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3187 
3188 	/* we are always in czam mode - even on pre z14 machines */
3189 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
3190 	set_kvm_facility(kvm->arch.model.fac_list, 138);
3191 	/* we emulate STHYI in kvm */
3192 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
3193 	set_kvm_facility(kvm->arch.model.fac_list, 74);
3194 	if (MACHINE_HAS_TLB_GUEST) {
3195 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
3196 		set_kvm_facility(kvm->arch.model.fac_list, 147);
3197 	}
3198 
3199 	if (css_general_characteristics.aiv && test_facility(65))
3200 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
3201 
3202 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3203 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3204 
3205 	kvm_s390_crypto_init(kvm);
3206 
3207 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3208 		mutex_lock(&kvm->lock);
3209 		kvm_s390_pci_init_list(kvm);
3210 		kvm_s390_vcpu_pci_enable_interp(kvm);
3211 		mutex_unlock(&kvm->lock);
3212 	}
3213 
3214 	mutex_init(&kvm->arch.float_int.ais_lock);
3215 	spin_lock_init(&kvm->arch.float_int.lock);
3216 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
3217 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3218 	init_waitqueue_head(&kvm->arch.ipte_wq);
3219 	mutex_init(&kvm->arch.ipte_mutex);
3220 
3221 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3222 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
3223 
3224 	if (type & KVM_VM_S390_UCONTROL) {
3225 		kvm->arch.gmap = NULL;
3226 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3227 	} else {
3228 		if (sclp.hamax == U64_MAX)
3229 			kvm->arch.mem_limit = TASK_SIZE_MAX;
3230 		else
3231 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3232 						    sclp.hamax + 1);
3233 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3234 		if (!kvm->arch.gmap)
3235 			goto out_err;
3236 		kvm->arch.gmap->private = kvm;
3237 		kvm->arch.gmap->pfault_enabled = 0;
3238 	}
3239 
3240 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
3241 	kvm->arch.use_skf = sclp.has_skey;
3242 	spin_lock_init(&kvm->arch.start_stop_lock);
3243 	kvm_s390_vsie_init(kvm);
3244 	if (use_gisa)
3245 		kvm_s390_gisa_init(kvm);
3246 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3247 
3248 	return 0;
3249 out_err:
3250 	free_page((unsigned long)kvm->arch.sie_page2);
3251 	debug_unregister(kvm->arch.dbf);
3252 	sca_dispose(kvm);
3253 	KVM_EVENT(3, "creation of vm failed: %d", rc);
3254 	return rc;
3255 }
3256 
3257 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3258 {
3259 	u16 rc, rrc;
3260 
3261 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3262 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3263 	kvm_s390_clear_local_irqs(vcpu);
3264 	kvm_clear_async_pf_completion_queue(vcpu);
3265 	if (!kvm_is_ucontrol(vcpu->kvm))
3266 		sca_del_vcpu(vcpu);
3267 	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3268 
3269 	if (kvm_is_ucontrol(vcpu->kvm))
3270 		gmap_remove(vcpu->arch.gmap);
3271 
3272 	if (vcpu->kvm->arch.use_cmma)
3273 		kvm_s390_vcpu_unsetup_cmma(vcpu);
3274 	/* We can not hold the vcpu mutex here, we are already dying */
3275 	if (kvm_s390_pv_cpu_get_handle(vcpu))
3276 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3277 	free_page((unsigned long)(vcpu->arch.sie_block));
3278 }
3279 
3280 void kvm_arch_destroy_vm(struct kvm *kvm)
3281 {
3282 	u16 rc, rrc;
3283 
3284 	kvm_destroy_vcpus(kvm);
3285 	sca_dispose(kvm);
3286 	kvm_s390_gisa_destroy(kvm);
3287 	/*
3288 	 * We are already at the end of life and kvm->lock is not taken.
3289 	 * This is ok as the file descriptor is closed by now and nobody
3290 	 * can mess with the pv state. To avoid lockdep_assert_held from
3291 	 * complaining we do not use kvm_s390_pv_is_protected.
3292 	 */
3293 	if (kvm_s390_pv_get_handle(kvm))
3294 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
3295 	/*
3296 	 * Remove the mmu notifier only when the whole KVM VM is torn down,
3297 	 * and only if one was registered to begin with. If the VM is
3298 	 * currently not protected, but has been previously been protected,
3299 	 * then it's possible that the notifier is still registered.
3300 	 */
3301 	if (kvm->arch.pv.mmu_notifier.ops)
3302 		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3303 
3304 	debug_unregister(kvm->arch.dbf);
3305 	free_page((unsigned long)kvm->arch.sie_page2);
3306 	if (!kvm_is_ucontrol(kvm))
3307 		gmap_remove(kvm->arch.gmap);
3308 	kvm_s390_destroy_adapters(kvm);
3309 	kvm_s390_clear_float_irqs(kvm);
3310 	kvm_s390_vsie_destroy(kvm);
3311 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3312 }
3313 
3314 /* Section: vcpu related */
3315 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3316 {
3317 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3318 	if (!vcpu->arch.gmap)
3319 		return -ENOMEM;
3320 	vcpu->arch.gmap->private = vcpu->kvm;
3321 
3322 	return 0;
3323 }
3324 
3325 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3326 {
3327 	if (!kvm_s390_use_sca_entries())
3328 		return;
3329 	read_lock(&vcpu->kvm->arch.sca_lock);
3330 	if (vcpu->kvm->arch.use_esca) {
3331 		struct esca_block *sca = vcpu->kvm->arch.sca;
3332 
3333 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3334 		sca->cpu[vcpu->vcpu_id].sda = 0;
3335 	} else {
3336 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3337 
3338 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3339 		sca->cpu[vcpu->vcpu_id].sda = 0;
3340 	}
3341 	read_unlock(&vcpu->kvm->arch.sca_lock);
3342 }
3343 
3344 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3345 {
3346 	if (!kvm_s390_use_sca_entries()) {
3347 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3348 
3349 		/* we still need the basic sca for the ipte control */
3350 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3351 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3352 		return;
3353 	}
3354 	read_lock(&vcpu->kvm->arch.sca_lock);
3355 	if (vcpu->kvm->arch.use_esca) {
3356 		struct esca_block *sca = vcpu->kvm->arch.sca;
3357 
3358 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3359 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3360 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
3361 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3362 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3363 	} else {
3364 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3365 
3366 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3367 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3368 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3369 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3370 	}
3371 	read_unlock(&vcpu->kvm->arch.sca_lock);
3372 }
3373 
3374 /* Basic SCA to Extended SCA data copy routines */
3375 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3376 {
3377 	d->sda = s->sda;
3378 	d->sigp_ctrl.c = s->sigp_ctrl.c;
3379 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3380 }
3381 
3382 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3383 {
3384 	int i;
3385 
3386 	d->ipte_control = s->ipte_control;
3387 	d->mcn[0] = s->mcn;
3388 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3389 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3390 }
3391 
3392 static int sca_switch_to_extended(struct kvm *kvm)
3393 {
3394 	struct bsca_block *old_sca = kvm->arch.sca;
3395 	struct esca_block *new_sca;
3396 	struct kvm_vcpu *vcpu;
3397 	unsigned long vcpu_idx;
3398 	u32 scaol, scaoh;
3399 
3400 	if (kvm->arch.use_esca)
3401 		return 0;
3402 
3403 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3404 	if (!new_sca)
3405 		return -ENOMEM;
3406 
3407 	scaoh = (u32)((u64)(new_sca) >> 32);
3408 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
3409 
3410 	kvm_s390_vcpu_block_all(kvm);
3411 	write_lock(&kvm->arch.sca_lock);
3412 
3413 	sca_copy_b_to_e(new_sca, old_sca);
3414 
3415 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3416 		vcpu->arch.sie_block->scaoh = scaoh;
3417 		vcpu->arch.sie_block->scaol = scaol;
3418 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3419 	}
3420 	kvm->arch.sca = new_sca;
3421 	kvm->arch.use_esca = 1;
3422 
3423 	write_unlock(&kvm->arch.sca_lock);
3424 	kvm_s390_vcpu_unblock_all(kvm);
3425 
3426 	free_page((unsigned long)old_sca);
3427 
3428 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3429 		 old_sca, kvm->arch.sca);
3430 	return 0;
3431 }
3432 
3433 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3434 {
3435 	int rc;
3436 
3437 	if (!kvm_s390_use_sca_entries()) {
3438 		if (id < KVM_MAX_VCPUS)
3439 			return true;
3440 		return false;
3441 	}
3442 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3443 		return true;
3444 	if (!sclp.has_esca || !sclp.has_64bscao)
3445 		return false;
3446 
3447 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3448 
3449 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3450 }
3451 
3452 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3453 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3454 {
3455 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3456 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3457 	vcpu->arch.cputm_start = get_tod_clock_fast();
3458 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3459 }
3460 
3461 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3462 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3463 {
3464 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3465 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3466 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3467 	vcpu->arch.cputm_start = 0;
3468 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3469 }
3470 
3471 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3472 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3473 {
3474 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3475 	vcpu->arch.cputm_enabled = true;
3476 	__start_cpu_timer_accounting(vcpu);
3477 }
3478 
3479 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3480 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3481 {
3482 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3483 	__stop_cpu_timer_accounting(vcpu);
3484 	vcpu->arch.cputm_enabled = false;
3485 }
3486 
3487 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3488 {
3489 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3490 	__enable_cpu_timer_accounting(vcpu);
3491 	preempt_enable();
3492 }
3493 
3494 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3495 {
3496 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3497 	__disable_cpu_timer_accounting(vcpu);
3498 	preempt_enable();
3499 }
3500 
3501 /* set the cpu timer - may only be called from the VCPU thread itself */
3502 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3503 {
3504 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3505 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3506 	if (vcpu->arch.cputm_enabled)
3507 		vcpu->arch.cputm_start = get_tod_clock_fast();
3508 	vcpu->arch.sie_block->cputm = cputm;
3509 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3510 	preempt_enable();
3511 }
3512 
3513 /* update and get the cpu timer - can also be called from other VCPU threads */
3514 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3515 {
3516 	unsigned int seq;
3517 	__u64 value;
3518 
3519 	if (unlikely(!vcpu->arch.cputm_enabled))
3520 		return vcpu->arch.sie_block->cputm;
3521 
3522 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3523 	do {
3524 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3525 		/*
3526 		 * If the writer would ever execute a read in the critical
3527 		 * section, e.g. in irq context, we have a deadlock.
3528 		 */
3529 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3530 		value = vcpu->arch.sie_block->cputm;
3531 		/* if cputm_start is 0, accounting is being started/stopped */
3532 		if (likely(vcpu->arch.cputm_start))
3533 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3534 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3535 	preempt_enable();
3536 	return value;
3537 }
3538 
3539 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3540 {
3541 
3542 	gmap_enable(vcpu->arch.enabled_gmap);
3543 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3544 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3545 		__start_cpu_timer_accounting(vcpu);
3546 	vcpu->cpu = cpu;
3547 }
3548 
3549 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3550 {
3551 	vcpu->cpu = -1;
3552 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3553 		__stop_cpu_timer_accounting(vcpu);
3554 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3555 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3556 	gmap_disable(vcpu->arch.enabled_gmap);
3557 
3558 }
3559 
3560 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3561 {
3562 	mutex_lock(&vcpu->kvm->lock);
3563 	preempt_disable();
3564 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3565 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3566 	preempt_enable();
3567 	mutex_unlock(&vcpu->kvm->lock);
3568 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3569 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3570 		sca_add_vcpu(vcpu);
3571 	}
3572 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3573 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3574 	/* make vcpu_load load the right gmap on the first trigger */
3575 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3576 }
3577 
3578 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3579 {
3580 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3581 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3582 		return true;
3583 	return false;
3584 }
3585 
3586 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3587 {
3588 	/* At least one ECC subfunction must be present */
3589 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3590 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3591 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3592 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3593 	       kvm_has_pckmo_subfunc(kvm, 41);
3594 
3595 }
3596 
3597 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3598 {
3599 	/*
3600 	 * If the AP instructions are not being interpreted and the MSAX3
3601 	 * facility is not configured for the guest, there is nothing to set up.
3602 	 */
3603 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3604 		return;
3605 
3606 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3607 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3608 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3609 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3610 
3611 	if (vcpu->kvm->arch.crypto.apie)
3612 		vcpu->arch.sie_block->eca |= ECA_APIE;
3613 
3614 	/* Set up protected key support */
3615 	if (vcpu->kvm->arch.crypto.aes_kw) {
3616 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3617 		/* ecc is also wrapped with AES key */
3618 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3619 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3620 	}
3621 
3622 	if (vcpu->kvm->arch.crypto.dea_kw)
3623 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3624 }
3625 
3626 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3627 {
3628 	free_page(vcpu->arch.sie_block->cbrlo);
3629 	vcpu->arch.sie_block->cbrlo = 0;
3630 }
3631 
3632 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3633 {
3634 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3635 	if (!vcpu->arch.sie_block->cbrlo)
3636 		return -ENOMEM;
3637 	return 0;
3638 }
3639 
3640 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3641 {
3642 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3643 
3644 	vcpu->arch.sie_block->ibc = model->ibc;
3645 	if (test_kvm_facility(vcpu->kvm, 7))
3646 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3647 }
3648 
3649 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3650 {
3651 	int rc = 0;
3652 	u16 uvrc, uvrrc;
3653 
3654 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3655 						    CPUSTAT_SM |
3656 						    CPUSTAT_STOPPED);
3657 
3658 	if (test_kvm_facility(vcpu->kvm, 78))
3659 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3660 	else if (test_kvm_facility(vcpu->kvm, 8))
3661 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3662 
3663 	kvm_s390_vcpu_setup_model(vcpu);
3664 
3665 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3666 	if (MACHINE_HAS_ESOP)
3667 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3668 	if (test_kvm_facility(vcpu->kvm, 9))
3669 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3670 	if (test_kvm_facility(vcpu->kvm, 11))
3671 		vcpu->arch.sie_block->ecb |= ECB_PTF;
3672 	if (test_kvm_facility(vcpu->kvm, 73))
3673 		vcpu->arch.sie_block->ecb |= ECB_TE;
3674 	if (!kvm_is_ucontrol(vcpu->kvm))
3675 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3676 
3677 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3678 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3679 	if (test_kvm_facility(vcpu->kvm, 130))
3680 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3681 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3682 	if (sclp.has_cei)
3683 		vcpu->arch.sie_block->eca |= ECA_CEI;
3684 	if (sclp.has_ib)
3685 		vcpu->arch.sie_block->eca |= ECA_IB;
3686 	if (sclp.has_siif)
3687 		vcpu->arch.sie_block->eca |= ECA_SII;
3688 	if (sclp.has_sigpif)
3689 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3690 	if (test_kvm_facility(vcpu->kvm, 129)) {
3691 		vcpu->arch.sie_block->eca |= ECA_VX;
3692 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3693 	}
3694 	if (test_kvm_facility(vcpu->kvm, 139))
3695 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3696 	if (test_kvm_facility(vcpu->kvm, 156))
3697 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3698 	if (vcpu->arch.sie_block->gd) {
3699 		vcpu->arch.sie_block->eca |= ECA_AIV;
3700 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3701 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3702 	}
3703 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3704 					| SDNXC;
3705 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3706 
3707 	if (sclp.has_kss)
3708 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3709 	else
3710 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3711 
3712 	if (vcpu->kvm->arch.use_cmma) {
3713 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3714 		if (rc)
3715 			return rc;
3716 	}
3717 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3718 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3719 
3720 	vcpu->arch.sie_block->hpid = HPID_KVM;
3721 
3722 	kvm_s390_vcpu_crypto_setup(vcpu);
3723 
3724 	kvm_s390_vcpu_pci_setup(vcpu);
3725 
3726 	mutex_lock(&vcpu->kvm->lock);
3727 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3728 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3729 		if (rc)
3730 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3731 	}
3732 	mutex_unlock(&vcpu->kvm->lock);
3733 
3734 	return rc;
3735 }
3736 
3737 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3738 {
3739 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3740 		return -EINVAL;
3741 	return 0;
3742 }
3743 
3744 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3745 {
3746 	struct sie_page *sie_page;
3747 	int rc;
3748 
3749 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3750 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3751 	if (!sie_page)
3752 		return -ENOMEM;
3753 
3754 	vcpu->arch.sie_block = &sie_page->sie_block;
3755 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3756 
3757 	/* the real guest size will always be smaller than msl */
3758 	vcpu->arch.sie_block->mso = 0;
3759 	vcpu->arch.sie_block->msl = sclp.hamax;
3760 
3761 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3762 	spin_lock_init(&vcpu->arch.local_int.lock);
3763 	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3764 	seqcount_init(&vcpu->arch.cputm_seqcount);
3765 
3766 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3767 	kvm_clear_async_pf_completion_queue(vcpu);
3768 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3769 				    KVM_SYNC_GPRS |
3770 				    KVM_SYNC_ACRS |
3771 				    KVM_SYNC_CRS |
3772 				    KVM_SYNC_ARCH0 |
3773 				    KVM_SYNC_PFAULT |
3774 				    KVM_SYNC_DIAG318;
3775 	kvm_s390_set_prefix(vcpu, 0);
3776 	if (test_kvm_facility(vcpu->kvm, 64))
3777 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3778 	if (test_kvm_facility(vcpu->kvm, 82))
3779 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3780 	if (test_kvm_facility(vcpu->kvm, 133))
3781 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3782 	if (test_kvm_facility(vcpu->kvm, 156))
3783 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3784 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3785 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3786 	 */
3787 	if (MACHINE_HAS_VX)
3788 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3789 	else
3790 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3791 
3792 	if (kvm_is_ucontrol(vcpu->kvm)) {
3793 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3794 		if (rc)
3795 			goto out_free_sie_block;
3796 	}
3797 
3798 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3799 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3800 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3801 
3802 	rc = kvm_s390_vcpu_setup(vcpu);
3803 	if (rc)
3804 		goto out_ucontrol_uninit;
3805 
3806 	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3807 	return 0;
3808 
3809 out_ucontrol_uninit:
3810 	if (kvm_is_ucontrol(vcpu->kvm))
3811 		gmap_remove(vcpu->arch.gmap);
3812 out_free_sie_block:
3813 	free_page((unsigned long)(vcpu->arch.sie_block));
3814 	return rc;
3815 }
3816 
3817 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3818 {
3819 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3820 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3821 }
3822 
3823 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3824 {
3825 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3826 }
3827 
3828 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3829 {
3830 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3831 	exit_sie(vcpu);
3832 }
3833 
3834 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3835 {
3836 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3837 }
3838 
3839 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3840 {
3841 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3842 	exit_sie(vcpu);
3843 }
3844 
3845 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3846 {
3847 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3848 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3849 }
3850 
3851 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3852 {
3853 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3854 }
3855 
3856 /*
3857  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3858  * If the CPU is not running (e.g. waiting as idle) the function will
3859  * return immediately. */
3860 void exit_sie(struct kvm_vcpu *vcpu)
3861 {
3862 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3863 	kvm_s390_vsie_kick(vcpu);
3864 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3865 		cpu_relax();
3866 }
3867 
3868 /* Kick a guest cpu out of SIE to process a request synchronously */
3869 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3870 {
3871 	__kvm_make_request(req, vcpu);
3872 	kvm_s390_vcpu_request(vcpu);
3873 }
3874 
3875 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3876 			      unsigned long end)
3877 {
3878 	struct kvm *kvm = gmap->private;
3879 	struct kvm_vcpu *vcpu;
3880 	unsigned long prefix;
3881 	unsigned long i;
3882 
3883 	if (gmap_is_shadow(gmap))
3884 		return;
3885 	if (start >= 1UL << 31)
3886 		/* We are only interested in prefix pages */
3887 		return;
3888 	kvm_for_each_vcpu(i, vcpu, kvm) {
3889 		/* match against both prefix pages */
3890 		prefix = kvm_s390_get_prefix(vcpu);
3891 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3892 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3893 				   start, end);
3894 			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3895 		}
3896 	}
3897 }
3898 
3899 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3900 {
3901 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3902 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3903 	    READ_ONCE(halt_poll_max_steal)) {
3904 		vcpu->stat.halt_no_poll_steal++;
3905 		return true;
3906 	}
3907 	return false;
3908 }
3909 
3910 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3911 {
3912 	/* kvm common code refers to this, but never calls it */
3913 	BUG();
3914 	return 0;
3915 }
3916 
3917 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3918 					   struct kvm_one_reg *reg)
3919 {
3920 	int r = -EINVAL;
3921 
3922 	switch (reg->id) {
3923 	case KVM_REG_S390_TODPR:
3924 		r = put_user(vcpu->arch.sie_block->todpr,
3925 			     (u32 __user *)reg->addr);
3926 		break;
3927 	case KVM_REG_S390_EPOCHDIFF:
3928 		r = put_user(vcpu->arch.sie_block->epoch,
3929 			     (u64 __user *)reg->addr);
3930 		break;
3931 	case KVM_REG_S390_CPU_TIMER:
3932 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3933 			     (u64 __user *)reg->addr);
3934 		break;
3935 	case KVM_REG_S390_CLOCK_COMP:
3936 		r = put_user(vcpu->arch.sie_block->ckc,
3937 			     (u64 __user *)reg->addr);
3938 		break;
3939 	case KVM_REG_S390_PFTOKEN:
3940 		r = put_user(vcpu->arch.pfault_token,
3941 			     (u64 __user *)reg->addr);
3942 		break;
3943 	case KVM_REG_S390_PFCOMPARE:
3944 		r = put_user(vcpu->arch.pfault_compare,
3945 			     (u64 __user *)reg->addr);
3946 		break;
3947 	case KVM_REG_S390_PFSELECT:
3948 		r = put_user(vcpu->arch.pfault_select,
3949 			     (u64 __user *)reg->addr);
3950 		break;
3951 	case KVM_REG_S390_PP:
3952 		r = put_user(vcpu->arch.sie_block->pp,
3953 			     (u64 __user *)reg->addr);
3954 		break;
3955 	case KVM_REG_S390_GBEA:
3956 		r = put_user(vcpu->arch.sie_block->gbea,
3957 			     (u64 __user *)reg->addr);
3958 		break;
3959 	default:
3960 		break;
3961 	}
3962 
3963 	return r;
3964 }
3965 
3966 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3967 					   struct kvm_one_reg *reg)
3968 {
3969 	int r = -EINVAL;
3970 	__u64 val;
3971 
3972 	switch (reg->id) {
3973 	case KVM_REG_S390_TODPR:
3974 		r = get_user(vcpu->arch.sie_block->todpr,
3975 			     (u32 __user *)reg->addr);
3976 		break;
3977 	case KVM_REG_S390_EPOCHDIFF:
3978 		r = get_user(vcpu->arch.sie_block->epoch,
3979 			     (u64 __user *)reg->addr);
3980 		break;
3981 	case KVM_REG_S390_CPU_TIMER:
3982 		r = get_user(val, (u64 __user *)reg->addr);
3983 		if (!r)
3984 			kvm_s390_set_cpu_timer(vcpu, val);
3985 		break;
3986 	case KVM_REG_S390_CLOCK_COMP:
3987 		r = get_user(vcpu->arch.sie_block->ckc,
3988 			     (u64 __user *)reg->addr);
3989 		break;
3990 	case KVM_REG_S390_PFTOKEN:
3991 		r = get_user(vcpu->arch.pfault_token,
3992 			     (u64 __user *)reg->addr);
3993 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3994 			kvm_clear_async_pf_completion_queue(vcpu);
3995 		break;
3996 	case KVM_REG_S390_PFCOMPARE:
3997 		r = get_user(vcpu->arch.pfault_compare,
3998 			     (u64 __user *)reg->addr);
3999 		break;
4000 	case KVM_REG_S390_PFSELECT:
4001 		r = get_user(vcpu->arch.pfault_select,
4002 			     (u64 __user *)reg->addr);
4003 		break;
4004 	case KVM_REG_S390_PP:
4005 		r = get_user(vcpu->arch.sie_block->pp,
4006 			     (u64 __user *)reg->addr);
4007 		break;
4008 	case KVM_REG_S390_GBEA:
4009 		r = get_user(vcpu->arch.sie_block->gbea,
4010 			     (u64 __user *)reg->addr);
4011 		break;
4012 	default:
4013 		break;
4014 	}
4015 
4016 	return r;
4017 }
4018 
4019 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4020 {
4021 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4022 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4023 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4024 
4025 	kvm_clear_async_pf_completion_queue(vcpu);
4026 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4027 		kvm_s390_vcpu_stop(vcpu);
4028 	kvm_s390_clear_local_irqs(vcpu);
4029 }
4030 
4031 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4032 {
4033 	/* Initial reset is a superset of the normal reset */
4034 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4035 
4036 	/*
4037 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
4038 	 * We do not only reset the internal data, but also ...
4039 	 */
4040 	vcpu->arch.sie_block->gpsw.mask = 0;
4041 	vcpu->arch.sie_block->gpsw.addr = 0;
4042 	kvm_s390_set_prefix(vcpu, 0);
4043 	kvm_s390_set_cpu_timer(vcpu, 0);
4044 	vcpu->arch.sie_block->ckc = 0;
4045 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4046 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4047 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4048 
4049 	/* ... the data in sync regs */
4050 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4051 	vcpu->run->s.regs.ckc = 0;
4052 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4053 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4054 	vcpu->run->psw_addr = 0;
4055 	vcpu->run->psw_mask = 0;
4056 	vcpu->run->s.regs.todpr = 0;
4057 	vcpu->run->s.regs.cputm = 0;
4058 	vcpu->run->s.regs.ckc = 0;
4059 	vcpu->run->s.regs.pp = 0;
4060 	vcpu->run->s.regs.gbea = 1;
4061 	vcpu->run->s.regs.fpc = 0;
4062 	/*
4063 	 * Do not reset these registers in the protected case, as some of
4064 	 * them are overlayed and they are not accessible in this case
4065 	 * anyway.
4066 	 */
4067 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4068 		vcpu->arch.sie_block->gbea = 1;
4069 		vcpu->arch.sie_block->pp = 0;
4070 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4071 		vcpu->arch.sie_block->todpr = 0;
4072 	}
4073 }
4074 
4075 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4076 {
4077 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4078 
4079 	/* Clear reset is a superset of the initial reset */
4080 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4081 
4082 	memset(&regs->gprs, 0, sizeof(regs->gprs));
4083 	memset(&regs->vrs, 0, sizeof(regs->vrs));
4084 	memset(&regs->acrs, 0, sizeof(regs->acrs));
4085 	memset(&regs->gscb, 0, sizeof(regs->gscb));
4086 
4087 	regs->etoken = 0;
4088 	regs->etoken_extension = 0;
4089 }
4090 
4091 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4092 {
4093 	vcpu_load(vcpu);
4094 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4095 	vcpu_put(vcpu);
4096 	return 0;
4097 }
4098 
4099 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4100 {
4101 	vcpu_load(vcpu);
4102 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4103 	vcpu_put(vcpu);
4104 	return 0;
4105 }
4106 
4107 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4108 				  struct kvm_sregs *sregs)
4109 {
4110 	vcpu_load(vcpu);
4111 
4112 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4113 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4114 
4115 	vcpu_put(vcpu);
4116 	return 0;
4117 }
4118 
4119 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4120 				  struct kvm_sregs *sregs)
4121 {
4122 	vcpu_load(vcpu);
4123 
4124 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4125 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4126 
4127 	vcpu_put(vcpu);
4128 	return 0;
4129 }
4130 
4131 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4132 {
4133 	int ret = 0;
4134 
4135 	vcpu_load(vcpu);
4136 
4137 	if (test_fp_ctl(fpu->fpc)) {
4138 		ret = -EINVAL;
4139 		goto out;
4140 	}
4141 	vcpu->run->s.regs.fpc = fpu->fpc;
4142 	if (MACHINE_HAS_VX)
4143 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4144 				 (freg_t *) fpu->fprs);
4145 	else
4146 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4147 
4148 out:
4149 	vcpu_put(vcpu);
4150 	return ret;
4151 }
4152 
4153 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4154 {
4155 	vcpu_load(vcpu);
4156 
4157 	/* make sure we have the latest values */
4158 	save_fpu_regs();
4159 	if (MACHINE_HAS_VX)
4160 		convert_vx_to_fp((freg_t *) fpu->fprs,
4161 				 (__vector128 *) vcpu->run->s.regs.vrs);
4162 	else
4163 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4164 	fpu->fpc = vcpu->run->s.regs.fpc;
4165 
4166 	vcpu_put(vcpu);
4167 	return 0;
4168 }
4169 
4170 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4171 {
4172 	int rc = 0;
4173 
4174 	if (!is_vcpu_stopped(vcpu))
4175 		rc = -EBUSY;
4176 	else {
4177 		vcpu->run->psw_mask = psw.mask;
4178 		vcpu->run->psw_addr = psw.addr;
4179 	}
4180 	return rc;
4181 }
4182 
4183 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4184 				  struct kvm_translation *tr)
4185 {
4186 	return -EINVAL; /* not implemented yet */
4187 }
4188 
4189 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4190 			      KVM_GUESTDBG_USE_HW_BP | \
4191 			      KVM_GUESTDBG_ENABLE)
4192 
4193 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4194 					struct kvm_guest_debug *dbg)
4195 {
4196 	int rc = 0;
4197 
4198 	vcpu_load(vcpu);
4199 
4200 	vcpu->guest_debug = 0;
4201 	kvm_s390_clear_bp_data(vcpu);
4202 
4203 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4204 		rc = -EINVAL;
4205 		goto out;
4206 	}
4207 	if (!sclp.has_gpere) {
4208 		rc = -EINVAL;
4209 		goto out;
4210 	}
4211 
4212 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
4213 		vcpu->guest_debug = dbg->control;
4214 		/* enforce guest PER */
4215 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4216 
4217 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4218 			rc = kvm_s390_import_bp_data(vcpu, dbg);
4219 	} else {
4220 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4221 		vcpu->arch.guestdbg.last_bp = 0;
4222 	}
4223 
4224 	if (rc) {
4225 		vcpu->guest_debug = 0;
4226 		kvm_s390_clear_bp_data(vcpu);
4227 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4228 	}
4229 
4230 out:
4231 	vcpu_put(vcpu);
4232 	return rc;
4233 }
4234 
4235 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4236 				    struct kvm_mp_state *mp_state)
4237 {
4238 	int ret;
4239 
4240 	vcpu_load(vcpu);
4241 
4242 	/* CHECK_STOP and LOAD are not supported yet */
4243 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4244 				      KVM_MP_STATE_OPERATING;
4245 
4246 	vcpu_put(vcpu);
4247 	return ret;
4248 }
4249 
4250 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4251 				    struct kvm_mp_state *mp_state)
4252 {
4253 	int rc = 0;
4254 
4255 	vcpu_load(vcpu);
4256 
4257 	/* user space knows about this interface - let it control the state */
4258 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4259 
4260 	switch (mp_state->mp_state) {
4261 	case KVM_MP_STATE_STOPPED:
4262 		rc = kvm_s390_vcpu_stop(vcpu);
4263 		break;
4264 	case KVM_MP_STATE_OPERATING:
4265 		rc = kvm_s390_vcpu_start(vcpu);
4266 		break;
4267 	case KVM_MP_STATE_LOAD:
4268 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4269 			rc = -ENXIO;
4270 			break;
4271 		}
4272 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4273 		break;
4274 	case KVM_MP_STATE_CHECK_STOP:
4275 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
4276 	default:
4277 		rc = -ENXIO;
4278 	}
4279 
4280 	vcpu_put(vcpu);
4281 	return rc;
4282 }
4283 
4284 static bool ibs_enabled(struct kvm_vcpu *vcpu)
4285 {
4286 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4287 }
4288 
4289 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4290 {
4291 retry:
4292 	kvm_s390_vcpu_request_handled(vcpu);
4293 	if (!kvm_request_pending(vcpu))
4294 		return 0;
4295 	/*
4296 	 * If the guest prefix changed, re-arm the ipte notifier for the
4297 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4298 	 * This ensures that the ipte instruction for this request has
4299 	 * already finished. We might race against a second unmapper that
4300 	 * wants to set the blocking bit. Lets just retry the request loop.
4301 	 */
4302 	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4303 		int rc;
4304 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
4305 					  kvm_s390_get_prefix(vcpu),
4306 					  PAGE_SIZE * 2, PROT_WRITE);
4307 		if (rc) {
4308 			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4309 			return rc;
4310 		}
4311 		goto retry;
4312 	}
4313 
4314 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4315 		vcpu->arch.sie_block->ihcpu = 0xffff;
4316 		goto retry;
4317 	}
4318 
4319 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4320 		if (!ibs_enabled(vcpu)) {
4321 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4322 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4323 		}
4324 		goto retry;
4325 	}
4326 
4327 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4328 		if (ibs_enabled(vcpu)) {
4329 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4330 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4331 		}
4332 		goto retry;
4333 	}
4334 
4335 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4336 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4337 		goto retry;
4338 	}
4339 
4340 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4341 		/*
4342 		 * Disable CMM virtualization; we will emulate the ESSA
4343 		 * instruction manually, in order to provide additional
4344 		 * functionalities needed for live migration.
4345 		 */
4346 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4347 		goto retry;
4348 	}
4349 
4350 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4351 		/*
4352 		 * Re-enable CMM virtualization if CMMA is available and
4353 		 * CMM has been used.
4354 		 */
4355 		if ((vcpu->kvm->arch.use_cmma) &&
4356 		    (vcpu->kvm->mm->context.uses_cmm))
4357 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4358 		goto retry;
4359 	}
4360 
4361 	/* we left the vsie handler, nothing to do, just clear the request */
4362 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4363 
4364 	return 0;
4365 }
4366 
4367 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4368 {
4369 	struct kvm_vcpu *vcpu;
4370 	union tod_clock clk;
4371 	unsigned long i;
4372 
4373 	preempt_disable();
4374 
4375 	store_tod_clock_ext(&clk);
4376 
4377 	kvm->arch.epoch = gtod->tod - clk.tod;
4378 	kvm->arch.epdx = 0;
4379 	if (test_kvm_facility(kvm, 139)) {
4380 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4381 		if (kvm->arch.epoch > gtod->tod)
4382 			kvm->arch.epdx -= 1;
4383 	}
4384 
4385 	kvm_s390_vcpu_block_all(kvm);
4386 	kvm_for_each_vcpu(i, vcpu, kvm) {
4387 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4388 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4389 	}
4390 
4391 	kvm_s390_vcpu_unblock_all(kvm);
4392 	preempt_enable();
4393 }
4394 
4395 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4396 {
4397 	if (!mutex_trylock(&kvm->lock))
4398 		return 0;
4399 	__kvm_s390_set_tod_clock(kvm, gtod);
4400 	mutex_unlock(&kvm->lock);
4401 	return 1;
4402 }
4403 
4404 /**
4405  * kvm_arch_fault_in_page - fault-in guest page if necessary
4406  * @vcpu: The corresponding virtual cpu
4407  * @gpa: Guest physical address
4408  * @writable: Whether the page should be writable or not
4409  *
4410  * Make sure that a guest page has been faulted-in on the host.
4411  *
4412  * Return: Zero on success, negative error code otherwise.
4413  */
4414 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4415 {
4416 	return gmap_fault(vcpu->arch.gmap, gpa,
4417 			  writable ? FAULT_FLAG_WRITE : 0);
4418 }
4419 
4420 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4421 				      unsigned long token)
4422 {
4423 	struct kvm_s390_interrupt inti;
4424 	struct kvm_s390_irq irq;
4425 
4426 	if (start_token) {
4427 		irq.u.ext.ext_params2 = token;
4428 		irq.type = KVM_S390_INT_PFAULT_INIT;
4429 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4430 	} else {
4431 		inti.type = KVM_S390_INT_PFAULT_DONE;
4432 		inti.parm64 = token;
4433 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4434 	}
4435 }
4436 
4437 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4438 				     struct kvm_async_pf *work)
4439 {
4440 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4441 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4442 
4443 	return true;
4444 }
4445 
4446 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4447 				 struct kvm_async_pf *work)
4448 {
4449 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4450 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4451 }
4452 
4453 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4454 			       struct kvm_async_pf *work)
4455 {
4456 	/* s390 will always inject the page directly */
4457 }
4458 
4459 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4460 {
4461 	/*
4462 	 * s390 will always inject the page directly,
4463 	 * but we still want check_async_completion to cleanup
4464 	 */
4465 	return true;
4466 }
4467 
4468 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4469 {
4470 	hva_t hva;
4471 	struct kvm_arch_async_pf arch;
4472 
4473 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4474 		return false;
4475 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4476 	    vcpu->arch.pfault_compare)
4477 		return false;
4478 	if (psw_extint_disabled(vcpu))
4479 		return false;
4480 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4481 		return false;
4482 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4483 		return false;
4484 	if (!vcpu->arch.gmap->pfault_enabled)
4485 		return false;
4486 
4487 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4488 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4489 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4490 		return false;
4491 
4492 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4493 }
4494 
4495 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4496 {
4497 	int rc, cpuflags;
4498 
4499 	/*
4500 	 * On s390 notifications for arriving pages will be delivered directly
4501 	 * to the guest but the house keeping for completed pfaults is
4502 	 * handled outside the worker.
4503 	 */
4504 	kvm_check_async_pf_completion(vcpu);
4505 
4506 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4507 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4508 
4509 	if (need_resched())
4510 		schedule();
4511 
4512 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4513 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4514 		if (rc)
4515 			return rc;
4516 	}
4517 
4518 	rc = kvm_s390_handle_requests(vcpu);
4519 	if (rc)
4520 		return rc;
4521 
4522 	if (guestdbg_enabled(vcpu)) {
4523 		kvm_s390_backup_guest_per_regs(vcpu);
4524 		kvm_s390_patch_guest_per_regs(vcpu);
4525 	}
4526 
4527 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4528 
4529 	vcpu->arch.sie_block->icptcode = 0;
4530 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4531 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4532 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4533 
4534 	return 0;
4535 }
4536 
4537 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4538 {
4539 	struct kvm_s390_pgm_info pgm_info = {
4540 		.code = PGM_ADDRESSING,
4541 	};
4542 	u8 opcode, ilen;
4543 	int rc;
4544 
4545 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4546 	trace_kvm_s390_sie_fault(vcpu);
4547 
4548 	/*
4549 	 * We want to inject an addressing exception, which is defined as a
4550 	 * suppressing or terminating exception. However, since we came here
4551 	 * by a DAT access exception, the PSW still points to the faulting
4552 	 * instruction since DAT exceptions are nullifying. So we've got
4553 	 * to look up the current opcode to get the length of the instruction
4554 	 * to be able to forward the PSW.
4555 	 */
4556 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4557 	ilen = insn_length(opcode);
4558 	if (rc < 0) {
4559 		return rc;
4560 	} else if (rc) {
4561 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4562 		 * Forward by arbitrary ilc, injection will take care of
4563 		 * nullification if necessary.
4564 		 */
4565 		pgm_info = vcpu->arch.pgm;
4566 		ilen = 4;
4567 	}
4568 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4569 	kvm_s390_forward_psw(vcpu, ilen);
4570 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4571 }
4572 
4573 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4574 {
4575 	struct mcck_volatile_info *mcck_info;
4576 	struct sie_page *sie_page;
4577 
4578 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4579 		   vcpu->arch.sie_block->icptcode);
4580 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4581 
4582 	if (guestdbg_enabled(vcpu))
4583 		kvm_s390_restore_guest_per_regs(vcpu);
4584 
4585 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4586 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4587 
4588 	if (exit_reason == -EINTR) {
4589 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4590 		sie_page = container_of(vcpu->arch.sie_block,
4591 					struct sie_page, sie_block);
4592 		mcck_info = &sie_page->mcck_info;
4593 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4594 		return 0;
4595 	}
4596 
4597 	if (vcpu->arch.sie_block->icptcode > 0) {
4598 		int rc = kvm_handle_sie_intercept(vcpu);
4599 
4600 		if (rc != -EOPNOTSUPP)
4601 			return rc;
4602 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4603 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4604 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4605 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4606 		return -EREMOTE;
4607 	} else if (exit_reason != -EFAULT) {
4608 		vcpu->stat.exit_null++;
4609 		return 0;
4610 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4611 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4612 		vcpu->run->s390_ucontrol.trans_exc_code =
4613 						current->thread.gmap_addr;
4614 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4615 		return -EREMOTE;
4616 	} else if (current->thread.gmap_pfault) {
4617 		trace_kvm_s390_major_guest_pfault(vcpu);
4618 		current->thread.gmap_pfault = 0;
4619 		if (kvm_arch_setup_async_pf(vcpu))
4620 			return 0;
4621 		vcpu->stat.pfault_sync++;
4622 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4623 	}
4624 	return vcpu_post_run_fault_in_sie(vcpu);
4625 }
4626 
4627 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4628 static int __vcpu_run(struct kvm_vcpu *vcpu)
4629 {
4630 	int rc, exit_reason;
4631 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4632 
4633 	/*
4634 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4635 	 * ning the guest), so that memslots (and other stuff) are protected
4636 	 */
4637 	kvm_vcpu_srcu_read_lock(vcpu);
4638 
4639 	do {
4640 		rc = vcpu_pre_run(vcpu);
4641 		if (rc)
4642 			break;
4643 
4644 		kvm_vcpu_srcu_read_unlock(vcpu);
4645 		/*
4646 		 * As PF_VCPU will be used in fault handler, between
4647 		 * guest_enter and guest_exit should be no uaccess.
4648 		 */
4649 		local_irq_disable();
4650 		guest_enter_irqoff();
4651 		__disable_cpu_timer_accounting(vcpu);
4652 		local_irq_enable();
4653 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4654 			memcpy(sie_page->pv_grregs,
4655 			       vcpu->run->s.regs.gprs,
4656 			       sizeof(sie_page->pv_grregs));
4657 		}
4658 		if (test_cpu_flag(CIF_FPU))
4659 			load_fpu_regs();
4660 		exit_reason = sie64a(vcpu->arch.sie_block,
4661 				     vcpu->run->s.regs.gprs);
4662 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4663 			memcpy(vcpu->run->s.regs.gprs,
4664 			       sie_page->pv_grregs,
4665 			       sizeof(sie_page->pv_grregs));
4666 			/*
4667 			 * We're not allowed to inject interrupts on intercepts
4668 			 * that leave the guest state in an "in-between" state
4669 			 * where the next SIE entry will do a continuation.
4670 			 * Fence interrupts in our "internal" PSW.
4671 			 */
4672 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4673 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4674 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4675 			}
4676 		}
4677 		local_irq_disable();
4678 		__enable_cpu_timer_accounting(vcpu);
4679 		guest_exit_irqoff();
4680 		local_irq_enable();
4681 		kvm_vcpu_srcu_read_lock(vcpu);
4682 
4683 		rc = vcpu_post_run(vcpu, exit_reason);
4684 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4685 
4686 	kvm_vcpu_srcu_read_unlock(vcpu);
4687 	return rc;
4688 }
4689 
4690 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4691 {
4692 	struct kvm_run *kvm_run = vcpu->run;
4693 	struct runtime_instr_cb *riccb;
4694 	struct gs_cb *gscb;
4695 
4696 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4697 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4698 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4699 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4700 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4701 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4702 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4703 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4704 	}
4705 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4706 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4707 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4708 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4709 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4710 			kvm_clear_async_pf_completion_queue(vcpu);
4711 	}
4712 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4713 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4714 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4715 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4716 	}
4717 	/*
4718 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4719 	 * we should enable RI here instead of doing the lazy enablement.
4720 	 */
4721 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4722 	    test_kvm_facility(vcpu->kvm, 64) &&
4723 	    riccb->v &&
4724 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4725 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4726 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4727 	}
4728 	/*
4729 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4730 	 * we should enable GS here instead of doing the lazy enablement.
4731 	 */
4732 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4733 	    test_kvm_facility(vcpu->kvm, 133) &&
4734 	    gscb->gssm &&
4735 	    !vcpu->arch.gs_enabled) {
4736 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4737 		vcpu->arch.sie_block->ecb |= ECB_GS;
4738 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4739 		vcpu->arch.gs_enabled = 1;
4740 	}
4741 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4742 	    test_kvm_facility(vcpu->kvm, 82)) {
4743 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4744 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4745 	}
4746 	if (MACHINE_HAS_GS) {
4747 		preempt_disable();
4748 		__ctl_set_bit(2, 4);
4749 		if (current->thread.gs_cb) {
4750 			vcpu->arch.host_gscb = current->thread.gs_cb;
4751 			save_gs_cb(vcpu->arch.host_gscb);
4752 		}
4753 		if (vcpu->arch.gs_enabled) {
4754 			current->thread.gs_cb = (struct gs_cb *)
4755 						&vcpu->run->s.regs.gscb;
4756 			restore_gs_cb(current->thread.gs_cb);
4757 		}
4758 		preempt_enable();
4759 	}
4760 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4761 }
4762 
4763 static void sync_regs(struct kvm_vcpu *vcpu)
4764 {
4765 	struct kvm_run *kvm_run = vcpu->run;
4766 
4767 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4768 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4769 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4770 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4771 		/* some control register changes require a tlb flush */
4772 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4773 	}
4774 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4775 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4776 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4777 	}
4778 	save_access_regs(vcpu->arch.host_acrs);
4779 	restore_access_regs(vcpu->run->s.regs.acrs);
4780 	/* save host (userspace) fprs/vrs */
4781 	save_fpu_regs();
4782 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4783 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4784 	if (MACHINE_HAS_VX)
4785 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4786 	else
4787 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4788 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4789 	if (test_fp_ctl(current->thread.fpu.fpc))
4790 		/* User space provided an invalid FPC, let's clear it */
4791 		current->thread.fpu.fpc = 0;
4792 
4793 	/* Sync fmt2 only data */
4794 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4795 		sync_regs_fmt2(vcpu);
4796 	} else {
4797 		/*
4798 		 * In several places we have to modify our internal view to
4799 		 * not do things that are disallowed by the ultravisor. For
4800 		 * example we must not inject interrupts after specific exits
4801 		 * (e.g. 112 prefix page not secure). We do this by turning
4802 		 * off the machine check, external and I/O interrupt bits
4803 		 * of our PSW copy. To avoid getting validity intercepts, we
4804 		 * do only accept the condition code from userspace.
4805 		 */
4806 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4807 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4808 						   PSW_MASK_CC;
4809 	}
4810 
4811 	kvm_run->kvm_dirty_regs = 0;
4812 }
4813 
4814 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4815 {
4816 	struct kvm_run *kvm_run = vcpu->run;
4817 
4818 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4819 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4820 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4821 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4822 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4823 	if (MACHINE_HAS_GS) {
4824 		preempt_disable();
4825 		__ctl_set_bit(2, 4);
4826 		if (vcpu->arch.gs_enabled)
4827 			save_gs_cb(current->thread.gs_cb);
4828 		current->thread.gs_cb = vcpu->arch.host_gscb;
4829 		restore_gs_cb(vcpu->arch.host_gscb);
4830 		if (!vcpu->arch.host_gscb)
4831 			__ctl_clear_bit(2, 4);
4832 		vcpu->arch.host_gscb = NULL;
4833 		preempt_enable();
4834 	}
4835 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4836 }
4837 
4838 static void store_regs(struct kvm_vcpu *vcpu)
4839 {
4840 	struct kvm_run *kvm_run = vcpu->run;
4841 
4842 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4843 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4844 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4845 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4846 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4847 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4848 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4849 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4850 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4851 	save_access_regs(vcpu->run->s.regs.acrs);
4852 	restore_access_regs(vcpu->arch.host_acrs);
4853 	/* Save guest register state */
4854 	save_fpu_regs();
4855 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4856 	/* Restore will be done lazily at return */
4857 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4858 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4859 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4860 		store_regs_fmt2(vcpu);
4861 }
4862 
4863 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4864 {
4865 	struct kvm_run *kvm_run = vcpu->run;
4866 	int rc;
4867 
4868 	/*
4869 	 * Running a VM while dumping always has the potential to
4870 	 * produce inconsistent dump data. But for PV vcpus a SIE
4871 	 * entry while dumping could also lead to a fatal validity
4872 	 * intercept which we absolutely want to avoid.
4873 	 */
4874 	if (vcpu->kvm->arch.pv.dumping)
4875 		return -EINVAL;
4876 
4877 	if (kvm_run->immediate_exit)
4878 		return -EINTR;
4879 
4880 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4881 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4882 		return -EINVAL;
4883 
4884 	vcpu_load(vcpu);
4885 
4886 	if (guestdbg_exit_pending(vcpu)) {
4887 		kvm_s390_prepare_debug_exit(vcpu);
4888 		rc = 0;
4889 		goto out;
4890 	}
4891 
4892 	kvm_sigset_activate(vcpu);
4893 
4894 	/*
4895 	 * no need to check the return value of vcpu_start as it can only have
4896 	 * an error for protvirt, but protvirt means user cpu state
4897 	 */
4898 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4899 		kvm_s390_vcpu_start(vcpu);
4900 	} else if (is_vcpu_stopped(vcpu)) {
4901 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4902 				   vcpu->vcpu_id);
4903 		rc = -EINVAL;
4904 		goto out;
4905 	}
4906 
4907 	sync_regs(vcpu);
4908 	enable_cpu_timer_accounting(vcpu);
4909 
4910 	might_fault();
4911 	rc = __vcpu_run(vcpu);
4912 
4913 	if (signal_pending(current) && !rc) {
4914 		kvm_run->exit_reason = KVM_EXIT_INTR;
4915 		rc = -EINTR;
4916 	}
4917 
4918 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4919 		kvm_s390_prepare_debug_exit(vcpu);
4920 		rc = 0;
4921 	}
4922 
4923 	if (rc == -EREMOTE) {
4924 		/* userspace support is needed, kvm_run has been prepared */
4925 		rc = 0;
4926 	}
4927 
4928 	disable_cpu_timer_accounting(vcpu);
4929 	store_regs(vcpu);
4930 
4931 	kvm_sigset_deactivate(vcpu);
4932 
4933 	vcpu->stat.exit_userspace++;
4934 out:
4935 	vcpu_put(vcpu);
4936 	return rc;
4937 }
4938 
4939 /*
4940  * store status at address
4941  * we use have two special cases:
4942  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4943  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4944  */
4945 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4946 {
4947 	unsigned char archmode = 1;
4948 	freg_t fprs[NUM_FPRS];
4949 	unsigned int px;
4950 	u64 clkcomp, cputm;
4951 	int rc;
4952 
4953 	px = kvm_s390_get_prefix(vcpu);
4954 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4955 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4956 			return -EFAULT;
4957 		gpa = 0;
4958 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4959 		if (write_guest_real(vcpu, 163, &archmode, 1))
4960 			return -EFAULT;
4961 		gpa = px;
4962 	} else
4963 		gpa -= __LC_FPREGS_SAVE_AREA;
4964 
4965 	/* manually convert vector registers if necessary */
4966 	if (MACHINE_HAS_VX) {
4967 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4968 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4969 				     fprs, 128);
4970 	} else {
4971 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4972 				     vcpu->run->s.regs.fprs, 128);
4973 	}
4974 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4975 			      vcpu->run->s.regs.gprs, 128);
4976 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4977 			      &vcpu->arch.sie_block->gpsw, 16);
4978 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4979 			      &px, 4);
4980 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4981 			      &vcpu->run->s.regs.fpc, 4);
4982 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4983 			      &vcpu->arch.sie_block->todpr, 4);
4984 	cputm = kvm_s390_get_cpu_timer(vcpu);
4985 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4986 			      &cputm, 8);
4987 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4988 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4989 			      &clkcomp, 8);
4990 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4991 			      &vcpu->run->s.regs.acrs, 64);
4992 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4993 			      &vcpu->arch.sie_block->gcr, 128);
4994 	return rc ? -EFAULT : 0;
4995 }
4996 
4997 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4998 {
4999 	/*
5000 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5001 	 * switch in the run ioctl. Let's update our copies before we save
5002 	 * it into the save area
5003 	 */
5004 	save_fpu_regs();
5005 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5006 	save_access_regs(vcpu->run->s.regs.acrs);
5007 
5008 	return kvm_s390_store_status_unloaded(vcpu, addr);
5009 }
5010 
5011 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5012 {
5013 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5014 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5015 }
5016 
5017 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5018 {
5019 	unsigned long i;
5020 	struct kvm_vcpu *vcpu;
5021 
5022 	kvm_for_each_vcpu(i, vcpu, kvm) {
5023 		__disable_ibs_on_vcpu(vcpu);
5024 	}
5025 }
5026 
5027 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5028 {
5029 	if (!sclp.has_ibs)
5030 		return;
5031 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5032 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5033 }
5034 
5035 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5036 {
5037 	int i, online_vcpus, r = 0, started_vcpus = 0;
5038 
5039 	if (!is_vcpu_stopped(vcpu))
5040 		return 0;
5041 
5042 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5043 	/* Only one cpu at a time may enter/leave the STOPPED state. */
5044 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5045 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5046 
5047 	/* Let's tell the UV that we want to change into the operating state */
5048 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5049 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5050 		if (r) {
5051 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5052 			return r;
5053 		}
5054 	}
5055 
5056 	for (i = 0; i < online_vcpus; i++) {
5057 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5058 			started_vcpus++;
5059 	}
5060 
5061 	if (started_vcpus == 0) {
5062 		/* we're the only active VCPU -> speed it up */
5063 		__enable_ibs_on_vcpu(vcpu);
5064 	} else if (started_vcpus == 1) {
5065 		/*
5066 		 * As we are starting a second VCPU, we have to disable
5067 		 * the IBS facility on all VCPUs to remove potentially
5068 		 * outstanding ENABLE requests.
5069 		 */
5070 		__disable_ibs_on_all_vcpus(vcpu->kvm);
5071 	}
5072 
5073 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5074 	/*
5075 	 * The real PSW might have changed due to a RESTART interpreted by the
5076 	 * ultravisor. We block all interrupts and let the next sie exit
5077 	 * refresh our view.
5078 	 */
5079 	if (kvm_s390_pv_cpu_is_protected(vcpu))
5080 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5081 	/*
5082 	 * Another VCPU might have used IBS while we were offline.
5083 	 * Let's play safe and flush the VCPU at startup.
5084 	 */
5085 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5086 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5087 	return 0;
5088 }
5089 
5090 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5091 {
5092 	int i, online_vcpus, r = 0, started_vcpus = 0;
5093 	struct kvm_vcpu *started_vcpu = NULL;
5094 
5095 	if (is_vcpu_stopped(vcpu))
5096 		return 0;
5097 
5098 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5099 	/* Only one cpu at a time may enter/leave the STOPPED state. */
5100 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5101 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5102 
5103 	/* Let's tell the UV that we want to change into the stopped state */
5104 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5105 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5106 		if (r) {
5107 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5108 			return r;
5109 		}
5110 	}
5111 
5112 	/*
5113 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5114 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5115 	 * have been fully processed. This will ensure that the VCPU
5116 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5117 	 */
5118 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5119 	kvm_s390_clear_stop_irq(vcpu);
5120 
5121 	__disable_ibs_on_vcpu(vcpu);
5122 
5123 	for (i = 0; i < online_vcpus; i++) {
5124 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5125 
5126 		if (!is_vcpu_stopped(tmp)) {
5127 			started_vcpus++;
5128 			started_vcpu = tmp;
5129 		}
5130 	}
5131 
5132 	if (started_vcpus == 1) {
5133 		/*
5134 		 * As we only have one VCPU left, we want to enable the
5135 		 * IBS facility for that VCPU to speed it up.
5136 		 */
5137 		__enable_ibs_on_vcpu(started_vcpu);
5138 	}
5139 
5140 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5141 	return 0;
5142 }
5143 
5144 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5145 				     struct kvm_enable_cap *cap)
5146 {
5147 	int r;
5148 
5149 	if (cap->flags)
5150 		return -EINVAL;
5151 
5152 	switch (cap->cap) {
5153 	case KVM_CAP_S390_CSS_SUPPORT:
5154 		if (!vcpu->kvm->arch.css_support) {
5155 			vcpu->kvm->arch.css_support = 1;
5156 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5157 			trace_kvm_s390_enable_css(vcpu->kvm);
5158 		}
5159 		r = 0;
5160 		break;
5161 	default:
5162 		r = -EINVAL;
5163 		break;
5164 	}
5165 	return r;
5166 }
5167 
5168 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5169 				  struct kvm_s390_mem_op *mop)
5170 {
5171 	void __user *uaddr = (void __user *)mop->buf;
5172 	int r = 0;
5173 
5174 	if (mop->flags || !mop->size)
5175 		return -EINVAL;
5176 	if (mop->size + mop->sida_offset < mop->size)
5177 		return -EINVAL;
5178 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5179 		return -E2BIG;
5180 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
5181 		return -EINVAL;
5182 
5183 	switch (mop->op) {
5184 	case KVM_S390_MEMOP_SIDA_READ:
5185 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
5186 				 mop->sida_offset), mop->size))
5187 			r = -EFAULT;
5188 
5189 		break;
5190 	case KVM_S390_MEMOP_SIDA_WRITE:
5191 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
5192 				   mop->sida_offset), uaddr, mop->size))
5193 			r = -EFAULT;
5194 		break;
5195 	}
5196 	return r;
5197 }
5198 
5199 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5200 				 struct kvm_s390_mem_op *mop)
5201 {
5202 	void __user *uaddr = (void __user *)mop->buf;
5203 	void *tmpbuf = NULL;
5204 	int r = 0;
5205 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5206 				    | KVM_S390_MEMOP_F_CHECK_ONLY
5207 				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5208 
5209 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5210 		return -EINVAL;
5211 	if (mop->size > MEM_OP_MAX_SIZE)
5212 		return -E2BIG;
5213 	if (kvm_s390_pv_cpu_is_protected(vcpu))
5214 		return -EINVAL;
5215 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5216 		if (access_key_invalid(mop->key))
5217 			return -EINVAL;
5218 	} else {
5219 		mop->key = 0;
5220 	}
5221 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5222 		tmpbuf = vmalloc(mop->size);
5223 		if (!tmpbuf)
5224 			return -ENOMEM;
5225 	}
5226 
5227 	switch (mop->op) {
5228 	case KVM_S390_MEMOP_LOGICAL_READ:
5229 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5230 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5231 					    GACC_FETCH, mop->key);
5232 			break;
5233 		}
5234 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5235 					mop->size, mop->key);
5236 		if (r == 0) {
5237 			if (copy_to_user(uaddr, tmpbuf, mop->size))
5238 				r = -EFAULT;
5239 		}
5240 		break;
5241 	case KVM_S390_MEMOP_LOGICAL_WRITE:
5242 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5243 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5244 					    GACC_STORE, mop->key);
5245 			break;
5246 		}
5247 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5248 			r = -EFAULT;
5249 			break;
5250 		}
5251 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5252 					 mop->size, mop->key);
5253 		break;
5254 	}
5255 
5256 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5257 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5258 
5259 	vfree(tmpbuf);
5260 	return r;
5261 }
5262 
5263 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5264 				     struct kvm_s390_mem_op *mop)
5265 {
5266 	int r, srcu_idx;
5267 
5268 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5269 
5270 	switch (mop->op) {
5271 	case KVM_S390_MEMOP_LOGICAL_READ:
5272 	case KVM_S390_MEMOP_LOGICAL_WRITE:
5273 		r = kvm_s390_vcpu_mem_op(vcpu, mop);
5274 		break;
5275 	case KVM_S390_MEMOP_SIDA_READ:
5276 	case KVM_S390_MEMOP_SIDA_WRITE:
5277 		/* we are locked against sida going away by the vcpu->mutex */
5278 		r = kvm_s390_vcpu_sida_op(vcpu, mop);
5279 		break;
5280 	default:
5281 		r = -EINVAL;
5282 	}
5283 
5284 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5285 	return r;
5286 }
5287 
5288 long kvm_arch_vcpu_async_ioctl(struct file *filp,
5289 			       unsigned int ioctl, unsigned long arg)
5290 {
5291 	struct kvm_vcpu *vcpu = filp->private_data;
5292 	void __user *argp = (void __user *)arg;
5293 
5294 	switch (ioctl) {
5295 	case KVM_S390_IRQ: {
5296 		struct kvm_s390_irq s390irq;
5297 
5298 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5299 			return -EFAULT;
5300 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5301 	}
5302 	case KVM_S390_INTERRUPT: {
5303 		struct kvm_s390_interrupt s390int;
5304 		struct kvm_s390_irq s390irq = {};
5305 
5306 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
5307 			return -EFAULT;
5308 		if (s390int_to_s390irq(&s390int, &s390irq))
5309 			return -EINVAL;
5310 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5311 	}
5312 	}
5313 	return -ENOIOCTLCMD;
5314 }
5315 
5316 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5317 					struct kvm_pv_cmd *cmd)
5318 {
5319 	struct kvm_s390_pv_dmp dmp;
5320 	void *data;
5321 	int ret;
5322 
5323 	/* Dump initialization is a prerequisite */
5324 	if (!vcpu->kvm->arch.pv.dumping)
5325 		return -EINVAL;
5326 
5327 	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5328 		return -EFAULT;
5329 
5330 	/* We only handle this subcmd right now */
5331 	if (dmp.subcmd != KVM_PV_DUMP_CPU)
5332 		return -EINVAL;
5333 
5334 	/* CPU dump length is the same as create cpu storage donation. */
5335 	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5336 		return -EINVAL;
5337 
5338 	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5339 	if (!data)
5340 		return -ENOMEM;
5341 
5342 	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5343 
5344 	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5345 		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
5346 
5347 	if (ret)
5348 		ret = -EINVAL;
5349 
5350 	/* On success copy over the dump data */
5351 	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5352 		ret = -EFAULT;
5353 
5354 	kvfree(data);
5355 	return ret;
5356 }
5357 
5358 long kvm_arch_vcpu_ioctl(struct file *filp,
5359 			 unsigned int ioctl, unsigned long arg)
5360 {
5361 	struct kvm_vcpu *vcpu = filp->private_data;
5362 	void __user *argp = (void __user *)arg;
5363 	int idx;
5364 	long r;
5365 	u16 rc, rrc;
5366 
5367 	vcpu_load(vcpu);
5368 
5369 	switch (ioctl) {
5370 	case KVM_S390_STORE_STATUS:
5371 		idx = srcu_read_lock(&vcpu->kvm->srcu);
5372 		r = kvm_s390_store_status_unloaded(vcpu, arg);
5373 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5374 		break;
5375 	case KVM_S390_SET_INITIAL_PSW: {
5376 		psw_t psw;
5377 
5378 		r = -EFAULT;
5379 		if (copy_from_user(&psw, argp, sizeof(psw)))
5380 			break;
5381 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5382 		break;
5383 	}
5384 	case KVM_S390_CLEAR_RESET:
5385 		r = 0;
5386 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5387 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5388 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5389 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5390 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5391 				   rc, rrc);
5392 		}
5393 		break;
5394 	case KVM_S390_INITIAL_RESET:
5395 		r = 0;
5396 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5397 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5398 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5399 					  UVC_CMD_CPU_RESET_INITIAL,
5400 					  &rc, &rrc);
5401 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5402 				   rc, rrc);
5403 		}
5404 		break;
5405 	case KVM_S390_NORMAL_RESET:
5406 		r = 0;
5407 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5408 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5409 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5410 					  UVC_CMD_CPU_RESET, &rc, &rrc);
5411 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5412 				   rc, rrc);
5413 		}
5414 		break;
5415 	case KVM_SET_ONE_REG:
5416 	case KVM_GET_ONE_REG: {
5417 		struct kvm_one_reg reg;
5418 		r = -EINVAL;
5419 		if (kvm_s390_pv_cpu_is_protected(vcpu))
5420 			break;
5421 		r = -EFAULT;
5422 		if (copy_from_user(&reg, argp, sizeof(reg)))
5423 			break;
5424 		if (ioctl == KVM_SET_ONE_REG)
5425 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5426 		else
5427 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5428 		break;
5429 	}
5430 #ifdef CONFIG_KVM_S390_UCONTROL
5431 	case KVM_S390_UCAS_MAP: {
5432 		struct kvm_s390_ucas_mapping ucasmap;
5433 
5434 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5435 			r = -EFAULT;
5436 			break;
5437 		}
5438 
5439 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5440 			r = -EINVAL;
5441 			break;
5442 		}
5443 
5444 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5445 				     ucasmap.vcpu_addr, ucasmap.length);
5446 		break;
5447 	}
5448 	case KVM_S390_UCAS_UNMAP: {
5449 		struct kvm_s390_ucas_mapping ucasmap;
5450 
5451 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5452 			r = -EFAULT;
5453 			break;
5454 		}
5455 
5456 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5457 			r = -EINVAL;
5458 			break;
5459 		}
5460 
5461 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5462 			ucasmap.length);
5463 		break;
5464 	}
5465 #endif
5466 	case KVM_S390_VCPU_FAULT: {
5467 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5468 		break;
5469 	}
5470 	case KVM_ENABLE_CAP:
5471 	{
5472 		struct kvm_enable_cap cap;
5473 		r = -EFAULT;
5474 		if (copy_from_user(&cap, argp, sizeof(cap)))
5475 			break;
5476 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5477 		break;
5478 	}
5479 	case KVM_S390_MEM_OP: {
5480 		struct kvm_s390_mem_op mem_op;
5481 
5482 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5483 			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5484 		else
5485 			r = -EFAULT;
5486 		break;
5487 	}
5488 	case KVM_S390_SET_IRQ_STATE: {
5489 		struct kvm_s390_irq_state irq_state;
5490 
5491 		r = -EFAULT;
5492 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5493 			break;
5494 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5495 		    irq_state.len == 0 ||
5496 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5497 			r = -EINVAL;
5498 			break;
5499 		}
5500 		/* do not use irq_state.flags, it will break old QEMUs */
5501 		r = kvm_s390_set_irq_state(vcpu,
5502 					   (void __user *) irq_state.buf,
5503 					   irq_state.len);
5504 		break;
5505 	}
5506 	case KVM_S390_GET_IRQ_STATE: {
5507 		struct kvm_s390_irq_state irq_state;
5508 
5509 		r = -EFAULT;
5510 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5511 			break;
5512 		if (irq_state.len == 0) {
5513 			r = -EINVAL;
5514 			break;
5515 		}
5516 		/* do not use irq_state.flags, it will break old QEMUs */
5517 		r = kvm_s390_get_irq_state(vcpu,
5518 					   (__u8 __user *)  irq_state.buf,
5519 					   irq_state.len);
5520 		break;
5521 	}
5522 	case KVM_S390_PV_CPU_COMMAND: {
5523 		struct kvm_pv_cmd cmd;
5524 
5525 		r = -EINVAL;
5526 		if (!is_prot_virt_host())
5527 			break;
5528 
5529 		r = -EFAULT;
5530 		if (copy_from_user(&cmd, argp, sizeof(cmd)))
5531 			break;
5532 
5533 		r = -EINVAL;
5534 		if (cmd.flags)
5535 			break;
5536 
5537 		/* We only handle this cmd right now */
5538 		if (cmd.cmd != KVM_PV_DUMP)
5539 			break;
5540 
5541 		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5542 
5543 		/* Always copy over UV rc / rrc data */
5544 		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5545 				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5546 			r = -EFAULT;
5547 		break;
5548 	}
5549 	default:
5550 		r = -ENOTTY;
5551 	}
5552 
5553 	vcpu_put(vcpu);
5554 	return r;
5555 }
5556 
5557 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5558 {
5559 #ifdef CONFIG_KVM_S390_UCONTROL
5560 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5561 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5562 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5563 		get_page(vmf->page);
5564 		return 0;
5565 	}
5566 #endif
5567 	return VM_FAULT_SIGBUS;
5568 }
5569 
5570 /* Section: memory related */
5571 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5572 				   const struct kvm_memory_slot *old,
5573 				   struct kvm_memory_slot *new,
5574 				   enum kvm_mr_change change)
5575 {
5576 	gpa_t size;
5577 
5578 	/* When we are protected, we should not change the memory slots */
5579 	if (kvm_s390_pv_get_handle(kvm))
5580 		return -EINVAL;
5581 
5582 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5583 		return 0;
5584 
5585 	/* A few sanity checks. We can have memory slots which have to be
5586 	   located/ended at a segment boundary (1MB). The memory in userland is
5587 	   ok to be fragmented into various different vmas. It is okay to mmap()
5588 	   and munmap() stuff in this slot after doing this call at any time */
5589 
5590 	if (new->userspace_addr & 0xffffful)
5591 		return -EINVAL;
5592 
5593 	size = new->npages * PAGE_SIZE;
5594 	if (size & 0xffffful)
5595 		return -EINVAL;
5596 
5597 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5598 		return -EINVAL;
5599 
5600 	return 0;
5601 }
5602 
5603 void kvm_arch_commit_memory_region(struct kvm *kvm,
5604 				struct kvm_memory_slot *old,
5605 				const struct kvm_memory_slot *new,
5606 				enum kvm_mr_change change)
5607 {
5608 	int rc = 0;
5609 
5610 	switch (change) {
5611 	case KVM_MR_DELETE:
5612 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5613 					old->npages * PAGE_SIZE);
5614 		break;
5615 	case KVM_MR_MOVE:
5616 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5617 					old->npages * PAGE_SIZE);
5618 		if (rc)
5619 			break;
5620 		fallthrough;
5621 	case KVM_MR_CREATE:
5622 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5623 				      new->base_gfn * PAGE_SIZE,
5624 				      new->npages * PAGE_SIZE);
5625 		break;
5626 	case KVM_MR_FLAGS_ONLY:
5627 		break;
5628 	default:
5629 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5630 	}
5631 	if (rc)
5632 		pr_warn("failed to commit memory region\n");
5633 	return;
5634 }
5635 
5636 static inline unsigned long nonhyp_mask(int i)
5637 {
5638 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5639 
5640 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5641 }
5642 
5643 static int __init kvm_s390_init(void)
5644 {
5645 	int i;
5646 
5647 	if (!sclp.has_sief2) {
5648 		pr_info("SIE is not available\n");
5649 		return -ENODEV;
5650 	}
5651 
5652 	if (nested && hpage) {
5653 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5654 		return -EINVAL;
5655 	}
5656 
5657 	for (i = 0; i < 16; i++)
5658 		kvm_s390_fac_base[i] |=
5659 			stfle_fac_list[i] & nonhyp_mask(i);
5660 
5661 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5662 }
5663 
5664 static void __exit kvm_s390_exit(void)
5665 {
5666 	kvm_exit();
5667 }
5668 
5669 module_init(kvm_s390_init);
5670 module_exit(kvm_s390_exit);
5671 
5672 /*
5673  * Enable autoloading of the kvm module.
5674  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5675  * since x86 takes a different approach.
5676  */
5677 #include <linux/miscdevice.h>
5678 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5679 MODULE_ALIAS("devname:kvm");
5680