xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision dce8efa0575c8d9b5f9f9ae41437200c6d3e0bf3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
63 	{ "exit_null", VCPU_STAT(exit_null) },
64 	{ "exit_validity", VCPU_STAT(exit_validity) },
65 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
67 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
69 	{ "exit_pei", VCPU_STAT(exit_pei) },
70 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
91 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
92 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
93 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
94 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
95 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
96 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
97 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
98 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
99 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
100 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
101 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
102 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
103 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
104 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
105 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
106 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
107 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
108 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
109 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
110 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
111 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
112 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
113 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
114 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
115 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
116 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
117 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
118 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
119 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
120 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
121 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
122 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
123 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
124 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
125 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
126 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
127 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
128 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
129 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
130 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
131 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
132 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
133 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
134 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
135 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
136 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
137 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
138 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
139 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
140 	{ NULL }
141 };
142 
143 struct kvm_s390_tod_clock_ext {
144 	__u8 epoch_idx;
145 	__u64 tod;
146 	__u8 reserved[7];
147 } __packed;
148 
149 /* allow nested virtualization in KVM (if enabled by user space) */
150 static int nested;
151 module_param(nested, int, S_IRUGO);
152 MODULE_PARM_DESC(nested, "Nested virtualization support");
153 
154 /* upper facilities limit for kvm */
155 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
156 
157 unsigned long kvm_s390_fac_list_mask_size(void)
158 {
159 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
160 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
161 }
162 
163 /* available cpu features supported by kvm */
164 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
165 /* available subfunctions indicated via query / "test bit" */
166 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
167 
168 static struct gmap_notifier gmap_notifier;
169 static struct gmap_notifier vsie_gmap_notifier;
170 debug_info_t *kvm_s390_dbf;
171 
172 /* Section: not file related */
173 int kvm_arch_hardware_enable(void)
174 {
175 	/* every s390 is virtualization enabled ;-) */
176 	return 0;
177 }
178 
179 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
180 			      unsigned long end);
181 
182 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
183 {
184 	u8 delta_idx = 0;
185 
186 	/*
187 	 * The TOD jumps by delta, we have to compensate this by adding
188 	 * -delta to the epoch.
189 	 */
190 	delta = -delta;
191 
192 	/* sign-extension - we're adding to signed values below */
193 	if ((s64)delta < 0)
194 		delta_idx = -1;
195 
196 	scb->epoch += delta;
197 	if (scb->ecd & ECD_MEF) {
198 		scb->epdx += delta_idx;
199 		if (scb->epoch < delta)
200 			scb->epdx += 1;
201 	}
202 }
203 
204 /*
205  * This callback is executed during stop_machine(). All CPUs are therefore
206  * temporarily stopped. In order not to change guest behavior, we have to
207  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
208  * so a CPU won't be stopped while calculating with the epoch.
209  */
210 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
211 			  void *v)
212 {
213 	struct kvm *kvm;
214 	struct kvm_vcpu *vcpu;
215 	int i;
216 	unsigned long long *delta = v;
217 
218 	list_for_each_entry(kvm, &vm_list, vm_list) {
219 		kvm_for_each_vcpu(i, vcpu, kvm) {
220 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
221 			if (i == 0) {
222 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
223 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
224 			}
225 			if (vcpu->arch.cputm_enabled)
226 				vcpu->arch.cputm_start += *delta;
227 			if (vcpu->arch.vsie_block)
228 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
229 						   *delta);
230 		}
231 	}
232 	return NOTIFY_OK;
233 }
234 
235 static struct notifier_block kvm_clock_notifier = {
236 	.notifier_call = kvm_clock_sync,
237 };
238 
239 int kvm_arch_hardware_setup(void)
240 {
241 	gmap_notifier.notifier_call = kvm_gmap_notifier;
242 	gmap_register_pte_notifier(&gmap_notifier);
243 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
244 	gmap_register_pte_notifier(&vsie_gmap_notifier);
245 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
246 				       &kvm_clock_notifier);
247 	return 0;
248 }
249 
250 void kvm_arch_hardware_unsetup(void)
251 {
252 	gmap_unregister_pte_notifier(&gmap_notifier);
253 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
254 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
255 					 &kvm_clock_notifier);
256 }
257 
258 static void allow_cpu_feat(unsigned long nr)
259 {
260 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
261 }
262 
263 static inline int plo_test_bit(unsigned char nr)
264 {
265 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
266 	int cc;
267 
268 	asm volatile(
269 		/* Parameter registers are ignored for "test bit" */
270 		"	plo	0,0,0,0(0)\n"
271 		"	ipm	%0\n"
272 		"	srl	%0,28\n"
273 		: "=d" (cc)
274 		: "d" (r0)
275 		: "cc");
276 	return cc == 0;
277 }
278 
279 static void kvm_s390_cpu_feat_init(void)
280 {
281 	int i;
282 
283 	for (i = 0; i < 256; ++i) {
284 		if (plo_test_bit(i))
285 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
286 	}
287 
288 	if (test_facility(28)) /* TOD-clock steering */
289 		ptff(kvm_s390_available_subfunc.ptff,
290 		     sizeof(kvm_s390_available_subfunc.ptff),
291 		     PTFF_QAF);
292 
293 	if (test_facility(17)) { /* MSA */
294 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
295 			      kvm_s390_available_subfunc.kmac);
296 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
297 			      kvm_s390_available_subfunc.kmc);
298 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
299 			      kvm_s390_available_subfunc.km);
300 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
301 			      kvm_s390_available_subfunc.kimd);
302 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
303 			      kvm_s390_available_subfunc.klmd);
304 	}
305 	if (test_facility(76)) /* MSA3 */
306 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
307 			      kvm_s390_available_subfunc.pckmo);
308 	if (test_facility(77)) { /* MSA4 */
309 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
310 			      kvm_s390_available_subfunc.kmctr);
311 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
312 			      kvm_s390_available_subfunc.kmf);
313 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
314 			      kvm_s390_available_subfunc.kmo);
315 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
316 			      kvm_s390_available_subfunc.pcc);
317 	}
318 	if (test_facility(57)) /* MSA5 */
319 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
320 			      kvm_s390_available_subfunc.ppno);
321 
322 	if (test_facility(146)) /* MSA8 */
323 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
324 			      kvm_s390_available_subfunc.kma);
325 
326 	if (MACHINE_HAS_ESOP)
327 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
328 	/*
329 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
330 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
331 	 */
332 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
333 	    !test_facility(3) || !nested)
334 		return;
335 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
336 	if (sclp.has_64bscao)
337 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
338 	if (sclp.has_siif)
339 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
340 	if (sclp.has_gpere)
341 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
342 	if (sclp.has_gsls)
343 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
344 	if (sclp.has_ib)
345 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
346 	if (sclp.has_cei)
347 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
348 	if (sclp.has_ibs)
349 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
350 	if (sclp.has_kss)
351 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
352 	/*
353 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
354 	 * all skey handling functions read/set the skey from the PGSTE
355 	 * instead of the real storage key.
356 	 *
357 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
358 	 * pages being detected as preserved although they are resident.
359 	 *
360 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
361 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
362 	 *
363 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
364 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
365 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
366 	 *
367 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
368 	 * cannot easily shadow the SCA because of the ipte lock.
369 	 */
370 }
371 
372 int kvm_arch_init(void *opaque)
373 {
374 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
375 	if (!kvm_s390_dbf)
376 		return -ENOMEM;
377 
378 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
379 		debug_unregister(kvm_s390_dbf);
380 		return -ENOMEM;
381 	}
382 
383 	kvm_s390_cpu_feat_init();
384 
385 	/* Register floating interrupt controller interface. */
386 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
387 }
388 
389 void kvm_arch_exit(void)
390 {
391 	debug_unregister(kvm_s390_dbf);
392 }
393 
394 /* Section: device related */
395 long kvm_arch_dev_ioctl(struct file *filp,
396 			unsigned int ioctl, unsigned long arg)
397 {
398 	if (ioctl == KVM_S390_ENABLE_SIE)
399 		return s390_enable_sie();
400 	return -EINVAL;
401 }
402 
403 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
404 {
405 	int r;
406 
407 	switch (ext) {
408 	case KVM_CAP_S390_PSW:
409 	case KVM_CAP_S390_GMAP:
410 	case KVM_CAP_SYNC_MMU:
411 #ifdef CONFIG_KVM_S390_UCONTROL
412 	case KVM_CAP_S390_UCONTROL:
413 #endif
414 	case KVM_CAP_ASYNC_PF:
415 	case KVM_CAP_SYNC_REGS:
416 	case KVM_CAP_ONE_REG:
417 	case KVM_CAP_ENABLE_CAP:
418 	case KVM_CAP_S390_CSS_SUPPORT:
419 	case KVM_CAP_IOEVENTFD:
420 	case KVM_CAP_DEVICE_CTRL:
421 	case KVM_CAP_ENABLE_CAP_VM:
422 	case KVM_CAP_S390_IRQCHIP:
423 	case KVM_CAP_VM_ATTRIBUTES:
424 	case KVM_CAP_MP_STATE:
425 	case KVM_CAP_IMMEDIATE_EXIT:
426 	case KVM_CAP_S390_INJECT_IRQ:
427 	case KVM_CAP_S390_USER_SIGP:
428 	case KVM_CAP_S390_USER_STSI:
429 	case KVM_CAP_S390_SKEYS:
430 	case KVM_CAP_S390_IRQ_STATE:
431 	case KVM_CAP_S390_USER_INSTR0:
432 	case KVM_CAP_S390_CMMA_MIGRATION:
433 	case KVM_CAP_S390_AIS:
434 	case KVM_CAP_S390_AIS_MIGRATION:
435 		r = 1;
436 		break;
437 	case KVM_CAP_S390_MEM_OP:
438 		r = MEM_OP_MAX_SIZE;
439 		break;
440 	case KVM_CAP_NR_VCPUS:
441 	case KVM_CAP_MAX_VCPUS:
442 		r = KVM_S390_BSCA_CPU_SLOTS;
443 		if (!kvm_s390_use_sca_entries())
444 			r = KVM_MAX_VCPUS;
445 		else if (sclp.has_esca && sclp.has_64bscao)
446 			r = KVM_S390_ESCA_CPU_SLOTS;
447 		break;
448 	case KVM_CAP_NR_MEMSLOTS:
449 		r = KVM_USER_MEM_SLOTS;
450 		break;
451 	case KVM_CAP_S390_COW:
452 		r = MACHINE_HAS_ESOP;
453 		break;
454 	case KVM_CAP_S390_VECTOR_REGISTERS:
455 		r = MACHINE_HAS_VX;
456 		break;
457 	case KVM_CAP_S390_RI:
458 		r = test_facility(64);
459 		break;
460 	case KVM_CAP_S390_GS:
461 		r = test_facility(133);
462 		break;
463 	case KVM_CAP_S390_BPB:
464 		r = test_facility(82);
465 		break;
466 	default:
467 		r = 0;
468 	}
469 	return r;
470 }
471 
472 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
473 					struct kvm_memory_slot *memslot)
474 {
475 	gfn_t cur_gfn, last_gfn;
476 	unsigned long address;
477 	struct gmap *gmap = kvm->arch.gmap;
478 
479 	/* Loop over all guest pages */
480 	last_gfn = memslot->base_gfn + memslot->npages;
481 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
482 		address = gfn_to_hva_memslot(memslot, cur_gfn);
483 
484 		if (test_and_clear_guest_dirty(gmap->mm, address))
485 			mark_page_dirty(kvm, cur_gfn);
486 		if (fatal_signal_pending(current))
487 			return;
488 		cond_resched();
489 	}
490 }
491 
492 /* Section: vm related */
493 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
494 
495 /*
496  * Get (and clear) the dirty memory log for a memory slot.
497  */
498 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
499 			       struct kvm_dirty_log *log)
500 {
501 	int r;
502 	unsigned long n;
503 	struct kvm_memslots *slots;
504 	struct kvm_memory_slot *memslot;
505 	int is_dirty = 0;
506 
507 	if (kvm_is_ucontrol(kvm))
508 		return -EINVAL;
509 
510 	mutex_lock(&kvm->slots_lock);
511 
512 	r = -EINVAL;
513 	if (log->slot >= KVM_USER_MEM_SLOTS)
514 		goto out;
515 
516 	slots = kvm_memslots(kvm);
517 	memslot = id_to_memslot(slots, log->slot);
518 	r = -ENOENT;
519 	if (!memslot->dirty_bitmap)
520 		goto out;
521 
522 	kvm_s390_sync_dirty_log(kvm, memslot);
523 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
524 	if (r)
525 		goto out;
526 
527 	/* Clear the dirty log */
528 	if (is_dirty) {
529 		n = kvm_dirty_bitmap_bytes(memslot);
530 		memset(memslot->dirty_bitmap, 0, n);
531 	}
532 	r = 0;
533 out:
534 	mutex_unlock(&kvm->slots_lock);
535 	return r;
536 }
537 
538 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
539 {
540 	unsigned int i;
541 	struct kvm_vcpu *vcpu;
542 
543 	kvm_for_each_vcpu(i, vcpu, kvm) {
544 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
545 	}
546 }
547 
548 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
549 {
550 	int r;
551 
552 	if (cap->flags)
553 		return -EINVAL;
554 
555 	switch (cap->cap) {
556 	case KVM_CAP_S390_IRQCHIP:
557 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
558 		kvm->arch.use_irqchip = 1;
559 		r = 0;
560 		break;
561 	case KVM_CAP_S390_USER_SIGP:
562 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
563 		kvm->arch.user_sigp = 1;
564 		r = 0;
565 		break;
566 	case KVM_CAP_S390_VECTOR_REGISTERS:
567 		mutex_lock(&kvm->lock);
568 		if (kvm->created_vcpus) {
569 			r = -EBUSY;
570 		} else if (MACHINE_HAS_VX) {
571 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
572 			set_kvm_facility(kvm->arch.model.fac_list, 129);
573 			if (test_facility(134)) {
574 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
575 				set_kvm_facility(kvm->arch.model.fac_list, 134);
576 			}
577 			if (test_facility(135)) {
578 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
579 				set_kvm_facility(kvm->arch.model.fac_list, 135);
580 			}
581 			r = 0;
582 		} else
583 			r = -EINVAL;
584 		mutex_unlock(&kvm->lock);
585 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
586 			 r ? "(not available)" : "(success)");
587 		break;
588 	case KVM_CAP_S390_RI:
589 		r = -EINVAL;
590 		mutex_lock(&kvm->lock);
591 		if (kvm->created_vcpus) {
592 			r = -EBUSY;
593 		} else if (test_facility(64)) {
594 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
595 			set_kvm_facility(kvm->arch.model.fac_list, 64);
596 			r = 0;
597 		}
598 		mutex_unlock(&kvm->lock);
599 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
600 			 r ? "(not available)" : "(success)");
601 		break;
602 	case KVM_CAP_S390_AIS:
603 		mutex_lock(&kvm->lock);
604 		if (kvm->created_vcpus) {
605 			r = -EBUSY;
606 		} else {
607 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
608 			set_kvm_facility(kvm->arch.model.fac_list, 72);
609 			r = 0;
610 		}
611 		mutex_unlock(&kvm->lock);
612 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
613 			 r ? "(not available)" : "(success)");
614 		break;
615 	case KVM_CAP_S390_GS:
616 		r = -EINVAL;
617 		mutex_lock(&kvm->lock);
618 		if (kvm->created_vcpus) {
619 			r = -EBUSY;
620 		} else if (test_facility(133)) {
621 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
622 			set_kvm_facility(kvm->arch.model.fac_list, 133);
623 			r = 0;
624 		}
625 		mutex_unlock(&kvm->lock);
626 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
627 			 r ? "(not available)" : "(success)");
628 		break;
629 	case KVM_CAP_S390_USER_STSI:
630 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
631 		kvm->arch.user_stsi = 1;
632 		r = 0;
633 		break;
634 	case KVM_CAP_S390_USER_INSTR0:
635 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
636 		kvm->arch.user_instr0 = 1;
637 		icpt_operexc_on_all_vcpus(kvm);
638 		r = 0;
639 		break;
640 	default:
641 		r = -EINVAL;
642 		break;
643 	}
644 	return r;
645 }
646 
647 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
648 {
649 	int ret;
650 
651 	switch (attr->attr) {
652 	case KVM_S390_VM_MEM_LIMIT_SIZE:
653 		ret = 0;
654 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
655 			 kvm->arch.mem_limit);
656 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
657 			ret = -EFAULT;
658 		break;
659 	default:
660 		ret = -ENXIO;
661 		break;
662 	}
663 	return ret;
664 }
665 
666 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
667 {
668 	int ret;
669 	unsigned int idx;
670 	switch (attr->attr) {
671 	case KVM_S390_VM_MEM_ENABLE_CMMA:
672 		ret = -ENXIO;
673 		if (!sclp.has_cmma)
674 			break;
675 
676 		ret = -EBUSY;
677 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
678 		mutex_lock(&kvm->lock);
679 		if (!kvm->created_vcpus) {
680 			kvm->arch.use_cmma = 1;
681 			ret = 0;
682 		}
683 		mutex_unlock(&kvm->lock);
684 		break;
685 	case KVM_S390_VM_MEM_CLR_CMMA:
686 		ret = -ENXIO;
687 		if (!sclp.has_cmma)
688 			break;
689 		ret = -EINVAL;
690 		if (!kvm->arch.use_cmma)
691 			break;
692 
693 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
694 		mutex_lock(&kvm->lock);
695 		idx = srcu_read_lock(&kvm->srcu);
696 		s390_reset_cmma(kvm->arch.gmap->mm);
697 		srcu_read_unlock(&kvm->srcu, idx);
698 		mutex_unlock(&kvm->lock);
699 		ret = 0;
700 		break;
701 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
702 		unsigned long new_limit;
703 
704 		if (kvm_is_ucontrol(kvm))
705 			return -EINVAL;
706 
707 		if (get_user(new_limit, (u64 __user *)attr->addr))
708 			return -EFAULT;
709 
710 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
711 		    new_limit > kvm->arch.mem_limit)
712 			return -E2BIG;
713 
714 		if (!new_limit)
715 			return -EINVAL;
716 
717 		/* gmap_create takes last usable address */
718 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
719 			new_limit -= 1;
720 
721 		ret = -EBUSY;
722 		mutex_lock(&kvm->lock);
723 		if (!kvm->created_vcpus) {
724 			/* gmap_create will round the limit up */
725 			struct gmap *new = gmap_create(current->mm, new_limit);
726 
727 			if (!new) {
728 				ret = -ENOMEM;
729 			} else {
730 				gmap_remove(kvm->arch.gmap);
731 				new->private = kvm;
732 				kvm->arch.gmap = new;
733 				ret = 0;
734 			}
735 		}
736 		mutex_unlock(&kvm->lock);
737 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
738 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
739 			 (void *) kvm->arch.gmap->asce);
740 		break;
741 	}
742 	default:
743 		ret = -ENXIO;
744 		break;
745 	}
746 	return ret;
747 }
748 
749 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
750 
751 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
752 {
753 	struct kvm_vcpu *vcpu;
754 	int i;
755 
756 	if (!test_kvm_facility(kvm, 76))
757 		return -EINVAL;
758 
759 	mutex_lock(&kvm->lock);
760 	switch (attr->attr) {
761 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
762 		get_random_bytes(
763 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
764 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
765 		kvm->arch.crypto.aes_kw = 1;
766 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
767 		break;
768 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
769 		get_random_bytes(
770 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
771 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
772 		kvm->arch.crypto.dea_kw = 1;
773 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
774 		break;
775 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
776 		kvm->arch.crypto.aes_kw = 0;
777 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
778 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
779 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
780 		break;
781 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
782 		kvm->arch.crypto.dea_kw = 0;
783 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
784 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
785 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
786 		break;
787 	default:
788 		mutex_unlock(&kvm->lock);
789 		return -ENXIO;
790 	}
791 
792 	kvm_for_each_vcpu(i, vcpu, kvm) {
793 		kvm_s390_vcpu_crypto_setup(vcpu);
794 		exit_sie(vcpu);
795 	}
796 	mutex_unlock(&kvm->lock);
797 	return 0;
798 }
799 
800 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
801 {
802 	int cx;
803 	struct kvm_vcpu *vcpu;
804 
805 	kvm_for_each_vcpu(cx, vcpu, kvm)
806 		kvm_s390_sync_request(req, vcpu);
807 }
808 
809 /*
810  * Must be called with kvm->srcu held to avoid races on memslots, and with
811  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
812  */
813 static int kvm_s390_vm_start_migration(struct kvm *kvm)
814 {
815 	struct kvm_s390_migration_state *mgs;
816 	struct kvm_memory_slot *ms;
817 	/* should be the only one */
818 	struct kvm_memslots *slots;
819 	unsigned long ram_pages;
820 	int slotnr;
821 
822 	/* migration mode already enabled */
823 	if (kvm->arch.migration_state)
824 		return 0;
825 
826 	slots = kvm_memslots(kvm);
827 	if (!slots || !slots->used_slots)
828 		return -EINVAL;
829 
830 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
831 	if (!mgs)
832 		return -ENOMEM;
833 	kvm->arch.migration_state = mgs;
834 
835 	if (kvm->arch.use_cmma) {
836 		/*
837 		 * Get the first slot. They are reverse sorted by base_gfn, so
838 		 * the first slot is also the one at the end of the address
839 		 * space. We have verified above that at least one slot is
840 		 * present.
841 		 */
842 		ms = slots->memslots;
843 		/* round up so we only use full longs */
844 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
845 		/* allocate enough bytes to store all the bits */
846 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
847 		if (!mgs->pgste_bitmap) {
848 			kfree(mgs);
849 			kvm->arch.migration_state = NULL;
850 			return -ENOMEM;
851 		}
852 
853 		mgs->bitmap_size = ram_pages;
854 		atomic64_set(&mgs->dirty_pages, ram_pages);
855 		/* mark all the pages in active slots as dirty */
856 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
857 			ms = slots->memslots + slotnr;
858 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
859 		}
860 
861 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
862 	}
863 	return 0;
864 }
865 
866 /*
867  * Must be called with kvm->slots_lock to avoid races with ourselves and
868  * kvm_s390_vm_start_migration.
869  */
870 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
871 {
872 	struct kvm_s390_migration_state *mgs;
873 
874 	/* migration mode already disabled */
875 	if (!kvm->arch.migration_state)
876 		return 0;
877 	mgs = kvm->arch.migration_state;
878 	kvm->arch.migration_state = NULL;
879 
880 	if (kvm->arch.use_cmma) {
881 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
882 		/* We have to wait for the essa emulation to finish */
883 		synchronize_srcu(&kvm->srcu);
884 		vfree(mgs->pgste_bitmap);
885 	}
886 	kfree(mgs);
887 	return 0;
888 }
889 
890 static int kvm_s390_vm_set_migration(struct kvm *kvm,
891 				     struct kvm_device_attr *attr)
892 {
893 	int res = -ENXIO;
894 
895 	mutex_lock(&kvm->slots_lock);
896 	switch (attr->attr) {
897 	case KVM_S390_VM_MIGRATION_START:
898 		res = kvm_s390_vm_start_migration(kvm);
899 		break;
900 	case KVM_S390_VM_MIGRATION_STOP:
901 		res = kvm_s390_vm_stop_migration(kvm);
902 		break;
903 	default:
904 		break;
905 	}
906 	mutex_unlock(&kvm->slots_lock);
907 
908 	return res;
909 }
910 
911 static int kvm_s390_vm_get_migration(struct kvm *kvm,
912 				     struct kvm_device_attr *attr)
913 {
914 	u64 mig = (kvm->arch.migration_state != NULL);
915 
916 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
917 		return -ENXIO;
918 
919 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
920 		return -EFAULT;
921 	return 0;
922 }
923 
924 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
925 {
926 	struct kvm_s390_vm_tod_clock gtod;
927 
928 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
929 		return -EFAULT;
930 
931 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
932 		return -EINVAL;
933 	kvm_s390_set_tod_clock(kvm, &gtod);
934 
935 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
936 		gtod.epoch_idx, gtod.tod);
937 
938 	return 0;
939 }
940 
941 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
942 {
943 	u8 gtod_high;
944 
945 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
946 					   sizeof(gtod_high)))
947 		return -EFAULT;
948 
949 	if (gtod_high != 0)
950 		return -EINVAL;
951 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
952 
953 	return 0;
954 }
955 
956 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
957 {
958 	struct kvm_s390_vm_tod_clock gtod = { 0 };
959 
960 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
961 			   sizeof(gtod.tod)))
962 		return -EFAULT;
963 
964 	kvm_s390_set_tod_clock(kvm, &gtod);
965 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
966 	return 0;
967 }
968 
969 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
970 {
971 	int ret;
972 
973 	if (attr->flags)
974 		return -EINVAL;
975 
976 	switch (attr->attr) {
977 	case KVM_S390_VM_TOD_EXT:
978 		ret = kvm_s390_set_tod_ext(kvm, attr);
979 		break;
980 	case KVM_S390_VM_TOD_HIGH:
981 		ret = kvm_s390_set_tod_high(kvm, attr);
982 		break;
983 	case KVM_S390_VM_TOD_LOW:
984 		ret = kvm_s390_set_tod_low(kvm, attr);
985 		break;
986 	default:
987 		ret = -ENXIO;
988 		break;
989 	}
990 	return ret;
991 }
992 
993 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
994 					struct kvm_s390_vm_tod_clock *gtod)
995 {
996 	struct kvm_s390_tod_clock_ext htod;
997 
998 	preempt_disable();
999 
1000 	get_tod_clock_ext((char *)&htod);
1001 
1002 	gtod->tod = htod.tod + kvm->arch.epoch;
1003 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1004 
1005 	if (gtod->tod < htod.tod)
1006 		gtod->epoch_idx += 1;
1007 
1008 	preempt_enable();
1009 }
1010 
1011 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1012 {
1013 	struct kvm_s390_vm_tod_clock gtod;
1014 
1015 	memset(&gtod, 0, sizeof(gtod));
1016 
1017 	if (test_kvm_facility(kvm, 139))
1018 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
1019 	else
1020 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1021 
1022 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1023 		return -EFAULT;
1024 
1025 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1026 		gtod.epoch_idx, gtod.tod);
1027 	return 0;
1028 }
1029 
1030 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1031 {
1032 	u8 gtod_high = 0;
1033 
1034 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1035 					 sizeof(gtod_high)))
1036 		return -EFAULT;
1037 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1038 
1039 	return 0;
1040 }
1041 
1042 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1043 {
1044 	u64 gtod;
1045 
1046 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1047 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1048 		return -EFAULT;
1049 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1050 
1051 	return 0;
1052 }
1053 
1054 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1055 {
1056 	int ret;
1057 
1058 	if (attr->flags)
1059 		return -EINVAL;
1060 
1061 	switch (attr->attr) {
1062 	case KVM_S390_VM_TOD_EXT:
1063 		ret = kvm_s390_get_tod_ext(kvm, attr);
1064 		break;
1065 	case KVM_S390_VM_TOD_HIGH:
1066 		ret = kvm_s390_get_tod_high(kvm, attr);
1067 		break;
1068 	case KVM_S390_VM_TOD_LOW:
1069 		ret = kvm_s390_get_tod_low(kvm, attr);
1070 		break;
1071 	default:
1072 		ret = -ENXIO;
1073 		break;
1074 	}
1075 	return ret;
1076 }
1077 
1078 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1079 {
1080 	struct kvm_s390_vm_cpu_processor *proc;
1081 	u16 lowest_ibc, unblocked_ibc;
1082 	int ret = 0;
1083 
1084 	mutex_lock(&kvm->lock);
1085 	if (kvm->created_vcpus) {
1086 		ret = -EBUSY;
1087 		goto out;
1088 	}
1089 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1090 	if (!proc) {
1091 		ret = -ENOMEM;
1092 		goto out;
1093 	}
1094 	if (!copy_from_user(proc, (void __user *)attr->addr,
1095 			    sizeof(*proc))) {
1096 		kvm->arch.model.cpuid = proc->cpuid;
1097 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1098 		unblocked_ibc = sclp.ibc & 0xfff;
1099 		if (lowest_ibc && proc->ibc) {
1100 			if (proc->ibc > unblocked_ibc)
1101 				kvm->arch.model.ibc = unblocked_ibc;
1102 			else if (proc->ibc < lowest_ibc)
1103 				kvm->arch.model.ibc = lowest_ibc;
1104 			else
1105 				kvm->arch.model.ibc = proc->ibc;
1106 		}
1107 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1108 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1109 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1110 			 kvm->arch.model.ibc,
1111 			 kvm->arch.model.cpuid);
1112 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1113 			 kvm->arch.model.fac_list[0],
1114 			 kvm->arch.model.fac_list[1],
1115 			 kvm->arch.model.fac_list[2]);
1116 	} else
1117 		ret = -EFAULT;
1118 	kfree(proc);
1119 out:
1120 	mutex_unlock(&kvm->lock);
1121 	return ret;
1122 }
1123 
1124 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1125 				       struct kvm_device_attr *attr)
1126 {
1127 	struct kvm_s390_vm_cpu_feat data;
1128 
1129 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1130 		return -EFAULT;
1131 	if (!bitmap_subset((unsigned long *) data.feat,
1132 			   kvm_s390_available_cpu_feat,
1133 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1134 		return -EINVAL;
1135 
1136 	mutex_lock(&kvm->lock);
1137 	if (kvm->created_vcpus) {
1138 		mutex_unlock(&kvm->lock);
1139 		return -EBUSY;
1140 	}
1141 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1142 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1143 	mutex_unlock(&kvm->lock);
1144 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1145 			 data.feat[0],
1146 			 data.feat[1],
1147 			 data.feat[2]);
1148 	return 0;
1149 }
1150 
1151 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1152 					  struct kvm_device_attr *attr)
1153 {
1154 	/*
1155 	 * Once supported by kernel + hw, we have to store the subfunctions
1156 	 * in kvm->arch and remember that user space configured them.
1157 	 */
1158 	return -ENXIO;
1159 }
1160 
1161 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1162 {
1163 	int ret = -ENXIO;
1164 
1165 	switch (attr->attr) {
1166 	case KVM_S390_VM_CPU_PROCESSOR:
1167 		ret = kvm_s390_set_processor(kvm, attr);
1168 		break;
1169 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1170 		ret = kvm_s390_set_processor_feat(kvm, attr);
1171 		break;
1172 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1173 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1174 		break;
1175 	}
1176 	return ret;
1177 }
1178 
1179 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1180 {
1181 	struct kvm_s390_vm_cpu_processor *proc;
1182 	int ret = 0;
1183 
1184 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1185 	if (!proc) {
1186 		ret = -ENOMEM;
1187 		goto out;
1188 	}
1189 	proc->cpuid = kvm->arch.model.cpuid;
1190 	proc->ibc = kvm->arch.model.ibc;
1191 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1192 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1193 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1194 		 kvm->arch.model.ibc,
1195 		 kvm->arch.model.cpuid);
1196 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1197 		 kvm->arch.model.fac_list[0],
1198 		 kvm->arch.model.fac_list[1],
1199 		 kvm->arch.model.fac_list[2]);
1200 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1201 		ret = -EFAULT;
1202 	kfree(proc);
1203 out:
1204 	return ret;
1205 }
1206 
1207 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1208 {
1209 	struct kvm_s390_vm_cpu_machine *mach;
1210 	int ret = 0;
1211 
1212 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1213 	if (!mach) {
1214 		ret = -ENOMEM;
1215 		goto out;
1216 	}
1217 	get_cpu_id((struct cpuid *) &mach->cpuid);
1218 	mach->ibc = sclp.ibc;
1219 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1220 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1221 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1222 	       sizeof(S390_lowcore.stfle_fac_list));
1223 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1224 		 kvm->arch.model.ibc,
1225 		 kvm->arch.model.cpuid);
1226 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1227 		 mach->fac_mask[0],
1228 		 mach->fac_mask[1],
1229 		 mach->fac_mask[2]);
1230 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1231 		 mach->fac_list[0],
1232 		 mach->fac_list[1],
1233 		 mach->fac_list[2]);
1234 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1235 		ret = -EFAULT;
1236 	kfree(mach);
1237 out:
1238 	return ret;
1239 }
1240 
1241 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1242 				       struct kvm_device_attr *attr)
1243 {
1244 	struct kvm_s390_vm_cpu_feat data;
1245 
1246 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1247 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1248 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1249 		return -EFAULT;
1250 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1251 			 data.feat[0],
1252 			 data.feat[1],
1253 			 data.feat[2]);
1254 	return 0;
1255 }
1256 
1257 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1258 				     struct kvm_device_attr *attr)
1259 {
1260 	struct kvm_s390_vm_cpu_feat data;
1261 
1262 	bitmap_copy((unsigned long *) data.feat,
1263 		    kvm_s390_available_cpu_feat,
1264 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1265 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1266 		return -EFAULT;
1267 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1268 			 data.feat[0],
1269 			 data.feat[1],
1270 			 data.feat[2]);
1271 	return 0;
1272 }
1273 
1274 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1275 					  struct kvm_device_attr *attr)
1276 {
1277 	/*
1278 	 * Once we can actually configure subfunctions (kernel + hw support),
1279 	 * we have to check if they were already set by user space, if so copy
1280 	 * them from kvm->arch.
1281 	 */
1282 	return -ENXIO;
1283 }
1284 
1285 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1286 					struct kvm_device_attr *attr)
1287 {
1288 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1289 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1290 		return -EFAULT;
1291 	return 0;
1292 }
1293 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1294 {
1295 	int ret = -ENXIO;
1296 
1297 	switch (attr->attr) {
1298 	case KVM_S390_VM_CPU_PROCESSOR:
1299 		ret = kvm_s390_get_processor(kvm, attr);
1300 		break;
1301 	case KVM_S390_VM_CPU_MACHINE:
1302 		ret = kvm_s390_get_machine(kvm, attr);
1303 		break;
1304 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1305 		ret = kvm_s390_get_processor_feat(kvm, attr);
1306 		break;
1307 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1308 		ret = kvm_s390_get_machine_feat(kvm, attr);
1309 		break;
1310 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1311 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1312 		break;
1313 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1314 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1315 		break;
1316 	}
1317 	return ret;
1318 }
1319 
1320 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1321 {
1322 	int ret;
1323 
1324 	switch (attr->group) {
1325 	case KVM_S390_VM_MEM_CTRL:
1326 		ret = kvm_s390_set_mem_control(kvm, attr);
1327 		break;
1328 	case KVM_S390_VM_TOD:
1329 		ret = kvm_s390_set_tod(kvm, attr);
1330 		break;
1331 	case KVM_S390_VM_CPU_MODEL:
1332 		ret = kvm_s390_set_cpu_model(kvm, attr);
1333 		break;
1334 	case KVM_S390_VM_CRYPTO:
1335 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1336 		break;
1337 	case KVM_S390_VM_MIGRATION:
1338 		ret = kvm_s390_vm_set_migration(kvm, attr);
1339 		break;
1340 	default:
1341 		ret = -ENXIO;
1342 		break;
1343 	}
1344 
1345 	return ret;
1346 }
1347 
1348 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1349 {
1350 	int ret;
1351 
1352 	switch (attr->group) {
1353 	case KVM_S390_VM_MEM_CTRL:
1354 		ret = kvm_s390_get_mem_control(kvm, attr);
1355 		break;
1356 	case KVM_S390_VM_TOD:
1357 		ret = kvm_s390_get_tod(kvm, attr);
1358 		break;
1359 	case KVM_S390_VM_CPU_MODEL:
1360 		ret = kvm_s390_get_cpu_model(kvm, attr);
1361 		break;
1362 	case KVM_S390_VM_MIGRATION:
1363 		ret = kvm_s390_vm_get_migration(kvm, attr);
1364 		break;
1365 	default:
1366 		ret = -ENXIO;
1367 		break;
1368 	}
1369 
1370 	return ret;
1371 }
1372 
1373 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1374 {
1375 	int ret;
1376 
1377 	switch (attr->group) {
1378 	case KVM_S390_VM_MEM_CTRL:
1379 		switch (attr->attr) {
1380 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1381 		case KVM_S390_VM_MEM_CLR_CMMA:
1382 			ret = sclp.has_cmma ? 0 : -ENXIO;
1383 			break;
1384 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1385 			ret = 0;
1386 			break;
1387 		default:
1388 			ret = -ENXIO;
1389 			break;
1390 		}
1391 		break;
1392 	case KVM_S390_VM_TOD:
1393 		switch (attr->attr) {
1394 		case KVM_S390_VM_TOD_LOW:
1395 		case KVM_S390_VM_TOD_HIGH:
1396 			ret = 0;
1397 			break;
1398 		default:
1399 			ret = -ENXIO;
1400 			break;
1401 		}
1402 		break;
1403 	case KVM_S390_VM_CPU_MODEL:
1404 		switch (attr->attr) {
1405 		case KVM_S390_VM_CPU_PROCESSOR:
1406 		case KVM_S390_VM_CPU_MACHINE:
1407 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1408 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1409 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1410 			ret = 0;
1411 			break;
1412 		/* configuring subfunctions is not supported yet */
1413 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1414 		default:
1415 			ret = -ENXIO;
1416 			break;
1417 		}
1418 		break;
1419 	case KVM_S390_VM_CRYPTO:
1420 		switch (attr->attr) {
1421 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1422 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1423 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1424 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1425 			ret = 0;
1426 			break;
1427 		default:
1428 			ret = -ENXIO;
1429 			break;
1430 		}
1431 		break;
1432 	case KVM_S390_VM_MIGRATION:
1433 		ret = 0;
1434 		break;
1435 	default:
1436 		ret = -ENXIO;
1437 		break;
1438 	}
1439 
1440 	return ret;
1441 }
1442 
1443 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1444 {
1445 	uint8_t *keys;
1446 	uint64_t hva;
1447 	int srcu_idx, i, r = 0;
1448 
1449 	if (args->flags != 0)
1450 		return -EINVAL;
1451 
1452 	/* Is this guest using storage keys? */
1453 	if (!mm_use_skey(current->mm))
1454 		return KVM_S390_GET_SKEYS_NONE;
1455 
1456 	/* Enforce sane limit on memory allocation */
1457 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1458 		return -EINVAL;
1459 
1460 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1461 	if (!keys)
1462 		return -ENOMEM;
1463 
1464 	down_read(&current->mm->mmap_sem);
1465 	srcu_idx = srcu_read_lock(&kvm->srcu);
1466 	for (i = 0; i < args->count; i++) {
1467 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1468 		if (kvm_is_error_hva(hva)) {
1469 			r = -EFAULT;
1470 			break;
1471 		}
1472 
1473 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1474 		if (r)
1475 			break;
1476 	}
1477 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1478 	up_read(&current->mm->mmap_sem);
1479 
1480 	if (!r) {
1481 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1482 				 sizeof(uint8_t) * args->count);
1483 		if (r)
1484 			r = -EFAULT;
1485 	}
1486 
1487 	kvfree(keys);
1488 	return r;
1489 }
1490 
1491 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1492 {
1493 	uint8_t *keys;
1494 	uint64_t hva;
1495 	int srcu_idx, i, r = 0;
1496 
1497 	if (args->flags != 0)
1498 		return -EINVAL;
1499 
1500 	/* Enforce sane limit on memory allocation */
1501 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1502 		return -EINVAL;
1503 
1504 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1505 	if (!keys)
1506 		return -ENOMEM;
1507 
1508 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1509 			   sizeof(uint8_t) * args->count);
1510 	if (r) {
1511 		r = -EFAULT;
1512 		goto out;
1513 	}
1514 
1515 	/* Enable storage key handling for the guest */
1516 	r = s390_enable_skey();
1517 	if (r)
1518 		goto out;
1519 
1520 	down_read(&current->mm->mmap_sem);
1521 	srcu_idx = srcu_read_lock(&kvm->srcu);
1522 	for (i = 0; i < args->count; i++) {
1523 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1524 		if (kvm_is_error_hva(hva)) {
1525 			r = -EFAULT;
1526 			break;
1527 		}
1528 
1529 		/* Lowest order bit is reserved */
1530 		if (keys[i] & 0x01) {
1531 			r = -EINVAL;
1532 			break;
1533 		}
1534 
1535 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1536 		if (r)
1537 			break;
1538 	}
1539 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1540 	up_read(&current->mm->mmap_sem);
1541 out:
1542 	kvfree(keys);
1543 	return r;
1544 }
1545 
1546 /*
1547  * Base address and length must be sent at the start of each block, therefore
1548  * it's cheaper to send some clean data, as long as it's less than the size of
1549  * two longs.
1550  */
1551 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1552 /* for consistency */
1553 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1554 
1555 /*
1556  * This function searches for the next page with dirty CMMA attributes, and
1557  * saves the attributes in the buffer up to either the end of the buffer or
1558  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1559  * no trailing clean bytes are saved.
1560  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1561  * output buffer will indicate 0 as length.
1562  */
1563 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1564 				  struct kvm_s390_cmma_log *args)
1565 {
1566 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1567 	unsigned long bufsize, hva, pgstev, i, next, cur;
1568 	int srcu_idx, peek, r = 0, rr;
1569 	u8 *res;
1570 
1571 	cur = args->start_gfn;
1572 	i = next = pgstev = 0;
1573 
1574 	if (unlikely(!kvm->arch.use_cmma))
1575 		return -ENXIO;
1576 	/* Invalid/unsupported flags were specified */
1577 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1578 		return -EINVAL;
1579 	/* Migration mode query, and we are not doing a migration */
1580 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1581 	if (!peek && !s)
1582 		return -EINVAL;
1583 	/* CMMA is disabled or was not used, or the buffer has length zero */
1584 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1585 	if (!bufsize || !kvm->mm->context.use_cmma) {
1586 		memset(args, 0, sizeof(*args));
1587 		return 0;
1588 	}
1589 
1590 	if (!peek) {
1591 		/* We are not peeking, and there are no dirty pages */
1592 		if (!atomic64_read(&s->dirty_pages)) {
1593 			memset(args, 0, sizeof(*args));
1594 			return 0;
1595 		}
1596 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1597 				    args->start_gfn);
1598 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1599 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1600 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1601 			memset(args, 0, sizeof(*args));
1602 			return 0;
1603 		}
1604 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1605 	}
1606 
1607 	res = vmalloc(bufsize);
1608 	if (!res)
1609 		return -ENOMEM;
1610 
1611 	args->start_gfn = cur;
1612 
1613 	down_read(&kvm->mm->mmap_sem);
1614 	srcu_idx = srcu_read_lock(&kvm->srcu);
1615 	while (i < bufsize) {
1616 		hva = gfn_to_hva(kvm, cur);
1617 		if (kvm_is_error_hva(hva)) {
1618 			r = -EFAULT;
1619 			break;
1620 		}
1621 		/* decrement only if we actually flipped the bit to 0 */
1622 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1623 			atomic64_dec(&s->dirty_pages);
1624 		r = get_pgste(kvm->mm, hva, &pgstev);
1625 		if (r < 0)
1626 			pgstev = 0;
1627 		/* save the value */
1628 		res[i++] = (pgstev >> 24) & 0x43;
1629 		/*
1630 		 * if the next bit is too far away, stop.
1631 		 * if we reached the previous "next", find the next one
1632 		 */
1633 		if (!peek) {
1634 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1635 				break;
1636 			if (cur == next)
1637 				next = find_next_bit(s->pgste_bitmap,
1638 						     s->bitmap_size, cur + 1);
1639 		/* reached the end of the bitmap or of the buffer, stop */
1640 			if ((next >= s->bitmap_size) ||
1641 			    (next >= args->start_gfn + bufsize))
1642 				break;
1643 		}
1644 		cur++;
1645 	}
1646 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1647 	up_read(&kvm->mm->mmap_sem);
1648 	args->count = i;
1649 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1650 
1651 	rr = copy_to_user((void __user *)args->values, res, args->count);
1652 	if (rr)
1653 		r = -EFAULT;
1654 
1655 	vfree(res);
1656 	return r;
1657 }
1658 
1659 /*
1660  * This function sets the CMMA attributes for the given pages. If the input
1661  * buffer has zero length, no action is taken, otherwise the attributes are
1662  * set and the mm->context.use_cmma flag is set.
1663  */
1664 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1665 				  const struct kvm_s390_cmma_log *args)
1666 {
1667 	unsigned long hva, mask, pgstev, i;
1668 	uint8_t *bits;
1669 	int srcu_idx, r = 0;
1670 
1671 	mask = args->mask;
1672 
1673 	if (!kvm->arch.use_cmma)
1674 		return -ENXIO;
1675 	/* invalid/unsupported flags */
1676 	if (args->flags != 0)
1677 		return -EINVAL;
1678 	/* Enforce sane limit on memory allocation */
1679 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1680 		return -EINVAL;
1681 	/* Nothing to do */
1682 	if (args->count == 0)
1683 		return 0;
1684 
1685 	bits = vmalloc(sizeof(*bits) * args->count);
1686 	if (!bits)
1687 		return -ENOMEM;
1688 
1689 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1690 	if (r) {
1691 		r = -EFAULT;
1692 		goto out;
1693 	}
1694 
1695 	down_read(&kvm->mm->mmap_sem);
1696 	srcu_idx = srcu_read_lock(&kvm->srcu);
1697 	for (i = 0; i < args->count; i++) {
1698 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1699 		if (kvm_is_error_hva(hva)) {
1700 			r = -EFAULT;
1701 			break;
1702 		}
1703 
1704 		pgstev = bits[i];
1705 		pgstev = pgstev << 24;
1706 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1707 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1708 	}
1709 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1710 	up_read(&kvm->mm->mmap_sem);
1711 
1712 	if (!kvm->mm->context.use_cmma) {
1713 		down_write(&kvm->mm->mmap_sem);
1714 		kvm->mm->context.use_cmma = 1;
1715 		up_write(&kvm->mm->mmap_sem);
1716 	}
1717 out:
1718 	vfree(bits);
1719 	return r;
1720 }
1721 
1722 long kvm_arch_vm_ioctl(struct file *filp,
1723 		       unsigned int ioctl, unsigned long arg)
1724 {
1725 	struct kvm *kvm = filp->private_data;
1726 	void __user *argp = (void __user *)arg;
1727 	struct kvm_device_attr attr;
1728 	int r;
1729 
1730 	switch (ioctl) {
1731 	case KVM_S390_INTERRUPT: {
1732 		struct kvm_s390_interrupt s390int;
1733 
1734 		r = -EFAULT;
1735 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1736 			break;
1737 		r = kvm_s390_inject_vm(kvm, &s390int);
1738 		break;
1739 	}
1740 	case KVM_ENABLE_CAP: {
1741 		struct kvm_enable_cap cap;
1742 		r = -EFAULT;
1743 		if (copy_from_user(&cap, argp, sizeof(cap)))
1744 			break;
1745 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1746 		break;
1747 	}
1748 	case KVM_CREATE_IRQCHIP: {
1749 		struct kvm_irq_routing_entry routing;
1750 
1751 		r = -EINVAL;
1752 		if (kvm->arch.use_irqchip) {
1753 			/* Set up dummy routing. */
1754 			memset(&routing, 0, sizeof(routing));
1755 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1756 		}
1757 		break;
1758 	}
1759 	case KVM_SET_DEVICE_ATTR: {
1760 		r = -EFAULT;
1761 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1762 			break;
1763 		r = kvm_s390_vm_set_attr(kvm, &attr);
1764 		break;
1765 	}
1766 	case KVM_GET_DEVICE_ATTR: {
1767 		r = -EFAULT;
1768 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1769 			break;
1770 		r = kvm_s390_vm_get_attr(kvm, &attr);
1771 		break;
1772 	}
1773 	case KVM_HAS_DEVICE_ATTR: {
1774 		r = -EFAULT;
1775 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1776 			break;
1777 		r = kvm_s390_vm_has_attr(kvm, &attr);
1778 		break;
1779 	}
1780 	case KVM_S390_GET_SKEYS: {
1781 		struct kvm_s390_skeys args;
1782 
1783 		r = -EFAULT;
1784 		if (copy_from_user(&args, argp,
1785 				   sizeof(struct kvm_s390_skeys)))
1786 			break;
1787 		r = kvm_s390_get_skeys(kvm, &args);
1788 		break;
1789 	}
1790 	case KVM_S390_SET_SKEYS: {
1791 		struct kvm_s390_skeys args;
1792 
1793 		r = -EFAULT;
1794 		if (copy_from_user(&args, argp,
1795 				   sizeof(struct kvm_s390_skeys)))
1796 			break;
1797 		r = kvm_s390_set_skeys(kvm, &args);
1798 		break;
1799 	}
1800 	case KVM_S390_GET_CMMA_BITS: {
1801 		struct kvm_s390_cmma_log args;
1802 
1803 		r = -EFAULT;
1804 		if (copy_from_user(&args, argp, sizeof(args)))
1805 			break;
1806 		mutex_lock(&kvm->slots_lock);
1807 		r = kvm_s390_get_cmma_bits(kvm, &args);
1808 		mutex_unlock(&kvm->slots_lock);
1809 		if (!r) {
1810 			r = copy_to_user(argp, &args, sizeof(args));
1811 			if (r)
1812 				r = -EFAULT;
1813 		}
1814 		break;
1815 	}
1816 	case KVM_S390_SET_CMMA_BITS: {
1817 		struct kvm_s390_cmma_log args;
1818 
1819 		r = -EFAULT;
1820 		if (copy_from_user(&args, argp, sizeof(args)))
1821 			break;
1822 		mutex_lock(&kvm->slots_lock);
1823 		r = kvm_s390_set_cmma_bits(kvm, &args);
1824 		mutex_unlock(&kvm->slots_lock);
1825 		break;
1826 	}
1827 	default:
1828 		r = -ENOTTY;
1829 	}
1830 
1831 	return r;
1832 }
1833 
1834 static int kvm_s390_query_ap_config(u8 *config)
1835 {
1836 	u32 fcn_code = 0x04000000UL;
1837 	u32 cc = 0;
1838 
1839 	memset(config, 0, 128);
1840 	asm volatile(
1841 		"lgr 0,%1\n"
1842 		"lgr 2,%2\n"
1843 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1844 		"0: ipm %0\n"
1845 		"srl %0,28\n"
1846 		"1:\n"
1847 		EX_TABLE(0b, 1b)
1848 		: "+r" (cc)
1849 		: "r" (fcn_code), "r" (config)
1850 		: "cc", "0", "2", "memory"
1851 	);
1852 
1853 	return cc;
1854 }
1855 
1856 static int kvm_s390_apxa_installed(void)
1857 {
1858 	u8 config[128];
1859 	int cc;
1860 
1861 	if (test_facility(12)) {
1862 		cc = kvm_s390_query_ap_config(config);
1863 
1864 		if (cc)
1865 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1866 		else
1867 			return config[0] & 0x40;
1868 	}
1869 
1870 	return 0;
1871 }
1872 
1873 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1874 {
1875 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1876 
1877 	if (kvm_s390_apxa_installed())
1878 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1879 	else
1880 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1881 }
1882 
1883 static u64 kvm_s390_get_initial_cpuid(void)
1884 {
1885 	struct cpuid cpuid;
1886 
1887 	get_cpu_id(&cpuid);
1888 	cpuid.version = 0xff;
1889 	return *((u64 *) &cpuid);
1890 }
1891 
1892 static void kvm_s390_crypto_init(struct kvm *kvm)
1893 {
1894 	if (!test_kvm_facility(kvm, 76))
1895 		return;
1896 
1897 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1898 	kvm_s390_set_crycb_format(kvm);
1899 
1900 	/* Enable AES/DEA protected key functions by default */
1901 	kvm->arch.crypto.aes_kw = 1;
1902 	kvm->arch.crypto.dea_kw = 1;
1903 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1904 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1905 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1906 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1907 }
1908 
1909 static void sca_dispose(struct kvm *kvm)
1910 {
1911 	if (kvm->arch.use_esca)
1912 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1913 	else
1914 		free_page((unsigned long)(kvm->arch.sca));
1915 	kvm->arch.sca = NULL;
1916 }
1917 
1918 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1919 {
1920 	gfp_t alloc_flags = GFP_KERNEL;
1921 	int i, rc;
1922 	char debug_name[16];
1923 	static unsigned long sca_offset;
1924 
1925 	rc = -EINVAL;
1926 #ifdef CONFIG_KVM_S390_UCONTROL
1927 	if (type & ~KVM_VM_S390_UCONTROL)
1928 		goto out_err;
1929 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1930 		goto out_err;
1931 #else
1932 	if (type)
1933 		goto out_err;
1934 #endif
1935 
1936 	rc = s390_enable_sie();
1937 	if (rc)
1938 		goto out_err;
1939 
1940 	rc = -ENOMEM;
1941 
1942 	kvm->arch.use_esca = 0; /* start with basic SCA */
1943 	if (!sclp.has_64bscao)
1944 		alloc_flags |= GFP_DMA;
1945 	rwlock_init(&kvm->arch.sca_lock);
1946 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1947 	if (!kvm->arch.sca)
1948 		goto out_err;
1949 	spin_lock(&kvm_lock);
1950 	sca_offset += 16;
1951 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1952 		sca_offset = 0;
1953 	kvm->arch.sca = (struct bsca_block *)
1954 			((char *) kvm->arch.sca + sca_offset);
1955 	spin_unlock(&kvm_lock);
1956 
1957 	sprintf(debug_name, "kvm-%u", current->pid);
1958 
1959 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1960 	if (!kvm->arch.dbf)
1961 		goto out_err;
1962 
1963 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
1964 	kvm->arch.sie_page2 =
1965 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1966 	if (!kvm->arch.sie_page2)
1967 		goto out_err;
1968 
1969 	/* Populate the facility mask initially. */
1970 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1971 	       sizeof(S390_lowcore.stfle_fac_list));
1972 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1973 		if (i < kvm_s390_fac_list_mask_size())
1974 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1975 		else
1976 			kvm->arch.model.fac_mask[i] = 0UL;
1977 	}
1978 
1979 	/* Populate the facility list initially. */
1980 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1981 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1982 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1983 
1984 	/* we are always in czam mode - even on pre z14 machines */
1985 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1986 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1987 	/* we emulate STHYI in kvm */
1988 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1989 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1990 	if (MACHINE_HAS_TLB_GUEST) {
1991 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1992 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1993 	}
1994 
1995 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1996 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1997 
1998 	kvm_s390_crypto_init(kvm);
1999 
2000 	mutex_init(&kvm->arch.float_int.ais_lock);
2001 	kvm->arch.float_int.simm = 0;
2002 	kvm->arch.float_int.nimm = 0;
2003 	spin_lock_init(&kvm->arch.float_int.lock);
2004 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2005 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2006 	init_waitqueue_head(&kvm->arch.ipte_wq);
2007 	mutex_init(&kvm->arch.ipte_mutex);
2008 
2009 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2010 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2011 
2012 	if (type & KVM_VM_S390_UCONTROL) {
2013 		kvm->arch.gmap = NULL;
2014 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2015 	} else {
2016 		if (sclp.hamax == U64_MAX)
2017 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2018 		else
2019 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2020 						    sclp.hamax + 1);
2021 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2022 		if (!kvm->arch.gmap)
2023 			goto out_err;
2024 		kvm->arch.gmap->private = kvm;
2025 		kvm->arch.gmap->pfault_enabled = 0;
2026 	}
2027 
2028 	kvm->arch.css_support = 0;
2029 	kvm->arch.use_irqchip = 0;
2030 	kvm->arch.epoch = 0;
2031 
2032 	spin_lock_init(&kvm->arch.start_stop_lock);
2033 	kvm_s390_vsie_init(kvm);
2034 	kvm_s390_gisa_init(kvm);
2035 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2036 
2037 	return 0;
2038 out_err:
2039 	free_page((unsigned long)kvm->arch.sie_page2);
2040 	debug_unregister(kvm->arch.dbf);
2041 	sca_dispose(kvm);
2042 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2043 	return rc;
2044 }
2045 
2046 bool kvm_arch_has_vcpu_debugfs(void)
2047 {
2048 	return false;
2049 }
2050 
2051 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2052 {
2053 	return 0;
2054 }
2055 
2056 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2057 {
2058 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2059 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2060 	kvm_s390_clear_local_irqs(vcpu);
2061 	kvm_clear_async_pf_completion_queue(vcpu);
2062 	if (!kvm_is_ucontrol(vcpu->kvm))
2063 		sca_del_vcpu(vcpu);
2064 
2065 	if (kvm_is_ucontrol(vcpu->kvm))
2066 		gmap_remove(vcpu->arch.gmap);
2067 
2068 	if (vcpu->kvm->arch.use_cmma)
2069 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2070 	free_page((unsigned long)(vcpu->arch.sie_block));
2071 
2072 	kvm_vcpu_uninit(vcpu);
2073 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2074 }
2075 
2076 static void kvm_free_vcpus(struct kvm *kvm)
2077 {
2078 	unsigned int i;
2079 	struct kvm_vcpu *vcpu;
2080 
2081 	kvm_for_each_vcpu(i, vcpu, kvm)
2082 		kvm_arch_vcpu_destroy(vcpu);
2083 
2084 	mutex_lock(&kvm->lock);
2085 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2086 		kvm->vcpus[i] = NULL;
2087 
2088 	atomic_set(&kvm->online_vcpus, 0);
2089 	mutex_unlock(&kvm->lock);
2090 }
2091 
2092 void kvm_arch_destroy_vm(struct kvm *kvm)
2093 {
2094 	kvm_free_vcpus(kvm);
2095 	sca_dispose(kvm);
2096 	debug_unregister(kvm->arch.dbf);
2097 	kvm_s390_gisa_destroy(kvm);
2098 	free_page((unsigned long)kvm->arch.sie_page2);
2099 	if (!kvm_is_ucontrol(kvm))
2100 		gmap_remove(kvm->arch.gmap);
2101 	kvm_s390_destroy_adapters(kvm);
2102 	kvm_s390_clear_float_irqs(kvm);
2103 	kvm_s390_vsie_destroy(kvm);
2104 	if (kvm->arch.migration_state) {
2105 		vfree(kvm->arch.migration_state->pgste_bitmap);
2106 		kfree(kvm->arch.migration_state);
2107 	}
2108 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2109 }
2110 
2111 /* Section: vcpu related */
2112 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2113 {
2114 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2115 	if (!vcpu->arch.gmap)
2116 		return -ENOMEM;
2117 	vcpu->arch.gmap->private = vcpu->kvm;
2118 
2119 	return 0;
2120 }
2121 
2122 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2123 {
2124 	if (!kvm_s390_use_sca_entries())
2125 		return;
2126 	read_lock(&vcpu->kvm->arch.sca_lock);
2127 	if (vcpu->kvm->arch.use_esca) {
2128 		struct esca_block *sca = vcpu->kvm->arch.sca;
2129 
2130 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2131 		sca->cpu[vcpu->vcpu_id].sda = 0;
2132 	} else {
2133 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2134 
2135 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2136 		sca->cpu[vcpu->vcpu_id].sda = 0;
2137 	}
2138 	read_unlock(&vcpu->kvm->arch.sca_lock);
2139 }
2140 
2141 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2142 {
2143 	if (!kvm_s390_use_sca_entries()) {
2144 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2145 
2146 		/* we still need the basic sca for the ipte control */
2147 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2148 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2149 	}
2150 	read_lock(&vcpu->kvm->arch.sca_lock);
2151 	if (vcpu->kvm->arch.use_esca) {
2152 		struct esca_block *sca = vcpu->kvm->arch.sca;
2153 
2154 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2155 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2156 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2157 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2158 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2159 	} else {
2160 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2161 
2162 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2163 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2164 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2165 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2166 	}
2167 	read_unlock(&vcpu->kvm->arch.sca_lock);
2168 }
2169 
2170 /* Basic SCA to Extended SCA data copy routines */
2171 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2172 {
2173 	d->sda = s->sda;
2174 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2175 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2176 }
2177 
2178 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2179 {
2180 	int i;
2181 
2182 	d->ipte_control = s->ipte_control;
2183 	d->mcn[0] = s->mcn;
2184 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2185 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2186 }
2187 
2188 static int sca_switch_to_extended(struct kvm *kvm)
2189 {
2190 	struct bsca_block *old_sca = kvm->arch.sca;
2191 	struct esca_block *new_sca;
2192 	struct kvm_vcpu *vcpu;
2193 	unsigned int vcpu_idx;
2194 	u32 scaol, scaoh;
2195 
2196 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2197 	if (!new_sca)
2198 		return -ENOMEM;
2199 
2200 	scaoh = (u32)((u64)(new_sca) >> 32);
2201 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2202 
2203 	kvm_s390_vcpu_block_all(kvm);
2204 	write_lock(&kvm->arch.sca_lock);
2205 
2206 	sca_copy_b_to_e(new_sca, old_sca);
2207 
2208 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2209 		vcpu->arch.sie_block->scaoh = scaoh;
2210 		vcpu->arch.sie_block->scaol = scaol;
2211 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2212 	}
2213 	kvm->arch.sca = new_sca;
2214 	kvm->arch.use_esca = 1;
2215 
2216 	write_unlock(&kvm->arch.sca_lock);
2217 	kvm_s390_vcpu_unblock_all(kvm);
2218 
2219 	free_page((unsigned long)old_sca);
2220 
2221 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2222 		 old_sca, kvm->arch.sca);
2223 	return 0;
2224 }
2225 
2226 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2227 {
2228 	int rc;
2229 
2230 	if (!kvm_s390_use_sca_entries()) {
2231 		if (id < KVM_MAX_VCPUS)
2232 			return true;
2233 		return false;
2234 	}
2235 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2236 		return true;
2237 	if (!sclp.has_esca || !sclp.has_64bscao)
2238 		return false;
2239 
2240 	mutex_lock(&kvm->lock);
2241 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2242 	mutex_unlock(&kvm->lock);
2243 
2244 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2245 }
2246 
2247 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2248 {
2249 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2250 	kvm_clear_async_pf_completion_queue(vcpu);
2251 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2252 				    KVM_SYNC_GPRS |
2253 				    KVM_SYNC_ACRS |
2254 				    KVM_SYNC_CRS |
2255 				    KVM_SYNC_ARCH0 |
2256 				    KVM_SYNC_PFAULT;
2257 	kvm_s390_set_prefix(vcpu, 0);
2258 	if (test_kvm_facility(vcpu->kvm, 64))
2259 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2260 	if (test_kvm_facility(vcpu->kvm, 82))
2261 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2262 	if (test_kvm_facility(vcpu->kvm, 133))
2263 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2264 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2265 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2266 	 */
2267 	if (MACHINE_HAS_VX)
2268 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2269 	else
2270 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2271 
2272 	if (kvm_is_ucontrol(vcpu->kvm))
2273 		return __kvm_ucontrol_vcpu_init(vcpu);
2274 
2275 	return 0;
2276 }
2277 
2278 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2279 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2280 {
2281 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2282 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2283 	vcpu->arch.cputm_start = get_tod_clock_fast();
2284 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2285 }
2286 
2287 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2288 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2289 {
2290 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2291 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2292 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2293 	vcpu->arch.cputm_start = 0;
2294 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2295 }
2296 
2297 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2298 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2299 {
2300 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2301 	vcpu->arch.cputm_enabled = true;
2302 	__start_cpu_timer_accounting(vcpu);
2303 }
2304 
2305 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2306 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2307 {
2308 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2309 	__stop_cpu_timer_accounting(vcpu);
2310 	vcpu->arch.cputm_enabled = false;
2311 }
2312 
2313 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2314 {
2315 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2316 	__enable_cpu_timer_accounting(vcpu);
2317 	preempt_enable();
2318 }
2319 
2320 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2321 {
2322 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2323 	__disable_cpu_timer_accounting(vcpu);
2324 	preempt_enable();
2325 }
2326 
2327 /* set the cpu timer - may only be called from the VCPU thread itself */
2328 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2329 {
2330 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2331 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2332 	if (vcpu->arch.cputm_enabled)
2333 		vcpu->arch.cputm_start = get_tod_clock_fast();
2334 	vcpu->arch.sie_block->cputm = cputm;
2335 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2336 	preempt_enable();
2337 }
2338 
2339 /* update and get the cpu timer - can also be called from other VCPU threads */
2340 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2341 {
2342 	unsigned int seq;
2343 	__u64 value;
2344 
2345 	if (unlikely(!vcpu->arch.cputm_enabled))
2346 		return vcpu->arch.sie_block->cputm;
2347 
2348 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2349 	do {
2350 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2351 		/*
2352 		 * If the writer would ever execute a read in the critical
2353 		 * section, e.g. in irq context, we have a deadlock.
2354 		 */
2355 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2356 		value = vcpu->arch.sie_block->cputm;
2357 		/* if cputm_start is 0, accounting is being started/stopped */
2358 		if (likely(vcpu->arch.cputm_start))
2359 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2360 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2361 	preempt_enable();
2362 	return value;
2363 }
2364 
2365 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2366 {
2367 
2368 	gmap_enable(vcpu->arch.enabled_gmap);
2369 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2370 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2371 		__start_cpu_timer_accounting(vcpu);
2372 	vcpu->cpu = cpu;
2373 }
2374 
2375 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2376 {
2377 	vcpu->cpu = -1;
2378 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2379 		__stop_cpu_timer_accounting(vcpu);
2380 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2381 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2382 	gmap_disable(vcpu->arch.enabled_gmap);
2383 
2384 }
2385 
2386 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2387 {
2388 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2389 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2390 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2391 	kvm_s390_set_prefix(vcpu, 0);
2392 	kvm_s390_set_cpu_timer(vcpu, 0);
2393 	vcpu->arch.sie_block->ckc       = 0UL;
2394 	vcpu->arch.sie_block->todpr     = 0;
2395 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2396 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2397 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2398 	/* make sure the new fpc will be lazily loaded */
2399 	save_fpu_regs();
2400 	current->thread.fpu.fpc = 0;
2401 	vcpu->arch.sie_block->gbea = 1;
2402 	vcpu->arch.sie_block->pp = 0;
2403 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2404 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2405 	kvm_clear_async_pf_completion_queue(vcpu);
2406 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2407 		kvm_s390_vcpu_stop(vcpu);
2408 	kvm_s390_clear_local_irqs(vcpu);
2409 }
2410 
2411 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2412 {
2413 	mutex_lock(&vcpu->kvm->lock);
2414 	preempt_disable();
2415 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2416 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2417 	preempt_enable();
2418 	mutex_unlock(&vcpu->kvm->lock);
2419 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2420 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2421 		sca_add_vcpu(vcpu);
2422 	}
2423 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2424 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2425 	/* make vcpu_load load the right gmap on the first trigger */
2426 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2427 }
2428 
2429 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2430 {
2431 	if (!test_kvm_facility(vcpu->kvm, 76))
2432 		return;
2433 
2434 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2435 
2436 	if (vcpu->kvm->arch.crypto.aes_kw)
2437 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2438 	if (vcpu->kvm->arch.crypto.dea_kw)
2439 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2440 
2441 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2442 }
2443 
2444 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2445 {
2446 	free_page(vcpu->arch.sie_block->cbrlo);
2447 	vcpu->arch.sie_block->cbrlo = 0;
2448 }
2449 
2450 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2451 {
2452 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2453 	if (!vcpu->arch.sie_block->cbrlo)
2454 		return -ENOMEM;
2455 
2456 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2457 	return 0;
2458 }
2459 
2460 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2461 {
2462 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2463 
2464 	vcpu->arch.sie_block->ibc = model->ibc;
2465 	if (test_kvm_facility(vcpu->kvm, 7))
2466 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2467 }
2468 
2469 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2470 {
2471 	int rc = 0;
2472 
2473 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2474 						    CPUSTAT_SM |
2475 						    CPUSTAT_STOPPED);
2476 
2477 	if (test_kvm_facility(vcpu->kvm, 78))
2478 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2479 	else if (test_kvm_facility(vcpu->kvm, 8))
2480 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2481 
2482 	kvm_s390_vcpu_setup_model(vcpu);
2483 
2484 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2485 	if (MACHINE_HAS_ESOP)
2486 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2487 	if (test_kvm_facility(vcpu->kvm, 9))
2488 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2489 	if (test_kvm_facility(vcpu->kvm, 73))
2490 		vcpu->arch.sie_block->ecb |= ECB_TE;
2491 
2492 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2493 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2494 	if (test_kvm_facility(vcpu->kvm, 130))
2495 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2496 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2497 	if (sclp.has_cei)
2498 		vcpu->arch.sie_block->eca |= ECA_CEI;
2499 	if (sclp.has_ib)
2500 		vcpu->arch.sie_block->eca |= ECA_IB;
2501 	if (sclp.has_siif)
2502 		vcpu->arch.sie_block->eca |= ECA_SII;
2503 	if (sclp.has_sigpif)
2504 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2505 	if (test_kvm_facility(vcpu->kvm, 129)) {
2506 		vcpu->arch.sie_block->eca |= ECA_VX;
2507 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2508 	}
2509 	if (test_kvm_facility(vcpu->kvm, 139))
2510 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2511 
2512 	if (vcpu->arch.sie_block->gd) {
2513 		vcpu->arch.sie_block->eca |= ECA_AIV;
2514 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2515 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2516 	}
2517 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2518 					| SDNXC;
2519 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2520 
2521 	if (sclp.has_kss)
2522 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2523 	else
2524 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2525 
2526 	if (vcpu->kvm->arch.use_cmma) {
2527 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2528 		if (rc)
2529 			return rc;
2530 	}
2531 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2532 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2533 
2534 	kvm_s390_vcpu_crypto_setup(vcpu);
2535 
2536 	return rc;
2537 }
2538 
2539 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2540 				      unsigned int id)
2541 {
2542 	struct kvm_vcpu *vcpu;
2543 	struct sie_page *sie_page;
2544 	int rc = -EINVAL;
2545 
2546 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2547 		goto out;
2548 
2549 	rc = -ENOMEM;
2550 
2551 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2552 	if (!vcpu)
2553 		goto out;
2554 
2555 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2556 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2557 	if (!sie_page)
2558 		goto out_free_cpu;
2559 
2560 	vcpu->arch.sie_block = &sie_page->sie_block;
2561 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2562 
2563 	/* the real guest size will always be smaller than msl */
2564 	vcpu->arch.sie_block->mso = 0;
2565 	vcpu->arch.sie_block->msl = sclp.hamax;
2566 
2567 	vcpu->arch.sie_block->icpua = id;
2568 	spin_lock_init(&vcpu->arch.local_int.lock);
2569 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2570 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2571 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2572 	seqcount_init(&vcpu->arch.cputm_seqcount);
2573 
2574 	rc = kvm_vcpu_init(vcpu, kvm, id);
2575 	if (rc)
2576 		goto out_free_sie_block;
2577 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2578 		 vcpu->arch.sie_block);
2579 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2580 
2581 	return vcpu;
2582 out_free_sie_block:
2583 	free_page((unsigned long)(vcpu->arch.sie_block));
2584 out_free_cpu:
2585 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2586 out:
2587 	return ERR_PTR(rc);
2588 }
2589 
2590 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2591 {
2592 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2593 }
2594 
2595 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2596 {
2597 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2598 }
2599 
2600 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2601 {
2602 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2603 	exit_sie(vcpu);
2604 }
2605 
2606 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2607 {
2608 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2609 }
2610 
2611 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2612 {
2613 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2614 	exit_sie(vcpu);
2615 }
2616 
2617 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2618 {
2619 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2620 }
2621 
2622 /*
2623  * Kick a guest cpu out of SIE and wait until SIE is not running.
2624  * If the CPU is not running (e.g. waiting as idle) the function will
2625  * return immediately. */
2626 void exit_sie(struct kvm_vcpu *vcpu)
2627 {
2628 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2629 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2630 		cpu_relax();
2631 }
2632 
2633 /* Kick a guest cpu out of SIE to process a request synchronously */
2634 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2635 {
2636 	kvm_make_request(req, vcpu);
2637 	kvm_s390_vcpu_request(vcpu);
2638 }
2639 
2640 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2641 			      unsigned long end)
2642 {
2643 	struct kvm *kvm = gmap->private;
2644 	struct kvm_vcpu *vcpu;
2645 	unsigned long prefix;
2646 	int i;
2647 
2648 	if (gmap_is_shadow(gmap))
2649 		return;
2650 	if (start >= 1UL << 31)
2651 		/* We are only interested in prefix pages */
2652 		return;
2653 	kvm_for_each_vcpu(i, vcpu, kvm) {
2654 		/* match against both prefix pages */
2655 		prefix = kvm_s390_get_prefix(vcpu);
2656 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2657 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2658 				   start, end);
2659 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2660 		}
2661 	}
2662 }
2663 
2664 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2665 {
2666 	/* kvm common code refers to this, but never calls it */
2667 	BUG();
2668 	return 0;
2669 }
2670 
2671 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2672 					   struct kvm_one_reg *reg)
2673 {
2674 	int r = -EINVAL;
2675 
2676 	switch (reg->id) {
2677 	case KVM_REG_S390_TODPR:
2678 		r = put_user(vcpu->arch.sie_block->todpr,
2679 			     (u32 __user *)reg->addr);
2680 		break;
2681 	case KVM_REG_S390_EPOCHDIFF:
2682 		r = put_user(vcpu->arch.sie_block->epoch,
2683 			     (u64 __user *)reg->addr);
2684 		break;
2685 	case KVM_REG_S390_CPU_TIMER:
2686 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2687 			     (u64 __user *)reg->addr);
2688 		break;
2689 	case KVM_REG_S390_CLOCK_COMP:
2690 		r = put_user(vcpu->arch.sie_block->ckc,
2691 			     (u64 __user *)reg->addr);
2692 		break;
2693 	case KVM_REG_S390_PFTOKEN:
2694 		r = put_user(vcpu->arch.pfault_token,
2695 			     (u64 __user *)reg->addr);
2696 		break;
2697 	case KVM_REG_S390_PFCOMPARE:
2698 		r = put_user(vcpu->arch.pfault_compare,
2699 			     (u64 __user *)reg->addr);
2700 		break;
2701 	case KVM_REG_S390_PFSELECT:
2702 		r = put_user(vcpu->arch.pfault_select,
2703 			     (u64 __user *)reg->addr);
2704 		break;
2705 	case KVM_REG_S390_PP:
2706 		r = put_user(vcpu->arch.sie_block->pp,
2707 			     (u64 __user *)reg->addr);
2708 		break;
2709 	case KVM_REG_S390_GBEA:
2710 		r = put_user(vcpu->arch.sie_block->gbea,
2711 			     (u64 __user *)reg->addr);
2712 		break;
2713 	default:
2714 		break;
2715 	}
2716 
2717 	return r;
2718 }
2719 
2720 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2721 					   struct kvm_one_reg *reg)
2722 {
2723 	int r = -EINVAL;
2724 	__u64 val;
2725 
2726 	switch (reg->id) {
2727 	case KVM_REG_S390_TODPR:
2728 		r = get_user(vcpu->arch.sie_block->todpr,
2729 			     (u32 __user *)reg->addr);
2730 		break;
2731 	case KVM_REG_S390_EPOCHDIFF:
2732 		r = get_user(vcpu->arch.sie_block->epoch,
2733 			     (u64 __user *)reg->addr);
2734 		break;
2735 	case KVM_REG_S390_CPU_TIMER:
2736 		r = get_user(val, (u64 __user *)reg->addr);
2737 		if (!r)
2738 			kvm_s390_set_cpu_timer(vcpu, val);
2739 		break;
2740 	case KVM_REG_S390_CLOCK_COMP:
2741 		r = get_user(vcpu->arch.sie_block->ckc,
2742 			     (u64 __user *)reg->addr);
2743 		break;
2744 	case KVM_REG_S390_PFTOKEN:
2745 		r = get_user(vcpu->arch.pfault_token,
2746 			     (u64 __user *)reg->addr);
2747 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2748 			kvm_clear_async_pf_completion_queue(vcpu);
2749 		break;
2750 	case KVM_REG_S390_PFCOMPARE:
2751 		r = get_user(vcpu->arch.pfault_compare,
2752 			     (u64 __user *)reg->addr);
2753 		break;
2754 	case KVM_REG_S390_PFSELECT:
2755 		r = get_user(vcpu->arch.pfault_select,
2756 			     (u64 __user *)reg->addr);
2757 		break;
2758 	case KVM_REG_S390_PP:
2759 		r = get_user(vcpu->arch.sie_block->pp,
2760 			     (u64 __user *)reg->addr);
2761 		break;
2762 	case KVM_REG_S390_GBEA:
2763 		r = get_user(vcpu->arch.sie_block->gbea,
2764 			     (u64 __user *)reg->addr);
2765 		break;
2766 	default:
2767 		break;
2768 	}
2769 
2770 	return r;
2771 }
2772 
2773 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2774 {
2775 	kvm_s390_vcpu_initial_reset(vcpu);
2776 	return 0;
2777 }
2778 
2779 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2780 {
2781 	vcpu_load(vcpu);
2782 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2783 	vcpu_put(vcpu);
2784 	return 0;
2785 }
2786 
2787 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2788 {
2789 	vcpu_load(vcpu);
2790 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2791 	vcpu_put(vcpu);
2792 	return 0;
2793 }
2794 
2795 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2796 				  struct kvm_sregs *sregs)
2797 {
2798 	vcpu_load(vcpu);
2799 
2800 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2801 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2802 
2803 	vcpu_put(vcpu);
2804 	return 0;
2805 }
2806 
2807 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2808 				  struct kvm_sregs *sregs)
2809 {
2810 	vcpu_load(vcpu);
2811 
2812 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2813 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2814 
2815 	vcpu_put(vcpu);
2816 	return 0;
2817 }
2818 
2819 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2820 {
2821 	int ret = 0;
2822 
2823 	vcpu_load(vcpu);
2824 
2825 	if (test_fp_ctl(fpu->fpc)) {
2826 		ret = -EINVAL;
2827 		goto out;
2828 	}
2829 	vcpu->run->s.regs.fpc = fpu->fpc;
2830 	if (MACHINE_HAS_VX)
2831 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2832 				 (freg_t *) fpu->fprs);
2833 	else
2834 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2835 
2836 out:
2837 	vcpu_put(vcpu);
2838 	return ret;
2839 }
2840 
2841 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2842 {
2843 	vcpu_load(vcpu);
2844 
2845 	/* make sure we have the latest values */
2846 	save_fpu_regs();
2847 	if (MACHINE_HAS_VX)
2848 		convert_vx_to_fp((freg_t *) fpu->fprs,
2849 				 (__vector128 *) vcpu->run->s.regs.vrs);
2850 	else
2851 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2852 	fpu->fpc = vcpu->run->s.regs.fpc;
2853 
2854 	vcpu_put(vcpu);
2855 	return 0;
2856 }
2857 
2858 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2859 {
2860 	int rc = 0;
2861 
2862 	if (!is_vcpu_stopped(vcpu))
2863 		rc = -EBUSY;
2864 	else {
2865 		vcpu->run->psw_mask = psw.mask;
2866 		vcpu->run->psw_addr = psw.addr;
2867 	}
2868 	return rc;
2869 }
2870 
2871 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2872 				  struct kvm_translation *tr)
2873 {
2874 	return -EINVAL; /* not implemented yet */
2875 }
2876 
2877 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2878 			      KVM_GUESTDBG_USE_HW_BP | \
2879 			      KVM_GUESTDBG_ENABLE)
2880 
2881 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2882 					struct kvm_guest_debug *dbg)
2883 {
2884 	int rc = 0;
2885 
2886 	vcpu_load(vcpu);
2887 
2888 	vcpu->guest_debug = 0;
2889 	kvm_s390_clear_bp_data(vcpu);
2890 
2891 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2892 		rc = -EINVAL;
2893 		goto out;
2894 	}
2895 	if (!sclp.has_gpere) {
2896 		rc = -EINVAL;
2897 		goto out;
2898 	}
2899 
2900 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2901 		vcpu->guest_debug = dbg->control;
2902 		/* enforce guest PER */
2903 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2904 
2905 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2906 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2907 	} else {
2908 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2909 		vcpu->arch.guestdbg.last_bp = 0;
2910 	}
2911 
2912 	if (rc) {
2913 		vcpu->guest_debug = 0;
2914 		kvm_s390_clear_bp_data(vcpu);
2915 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2916 	}
2917 
2918 out:
2919 	vcpu_put(vcpu);
2920 	return rc;
2921 }
2922 
2923 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2924 				    struct kvm_mp_state *mp_state)
2925 {
2926 	int ret;
2927 
2928 	vcpu_load(vcpu);
2929 
2930 	/* CHECK_STOP and LOAD are not supported yet */
2931 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2932 				      KVM_MP_STATE_OPERATING;
2933 
2934 	vcpu_put(vcpu);
2935 	return ret;
2936 }
2937 
2938 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2939 				    struct kvm_mp_state *mp_state)
2940 {
2941 	int rc = 0;
2942 
2943 	vcpu_load(vcpu);
2944 
2945 	/* user space knows about this interface - let it control the state */
2946 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2947 
2948 	switch (mp_state->mp_state) {
2949 	case KVM_MP_STATE_STOPPED:
2950 		kvm_s390_vcpu_stop(vcpu);
2951 		break;
2952 	case KVM_MP_STATE_OPERATING:
2953 		kvm_s390_vcpu_start(vcpu);
2954 		break;
2955 	case KVM_MP_STATE_LOAD:
2956 	case KVM_MP_STATE_CHECK_STOP:
2957 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2958 	default:
2959 		rc = -ENXIO;
2960 	}
2961 
2962 	vcpu_put(vcpu);
2963 	return rc;
2964 }
2965 
2966 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2967 {
2968 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
2969 }
2970 
2971 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2972 {
2973 retry:
2974 	kvm_s390_vcpu_request_handled(vcpu);
2975 	if (!kvm_request_pending(vcpu))
2976 		return 0;
2977 	/*
2978 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2979 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2980 	 * This ensures that the ipte instruction for this request has
2981 	 * already finished. We might race against a second unmapper that
2982 	 * wants to set the blocking bit. Lets just retry the request loop.
2983 	 */
2984 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2985 		int rc;
2986 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2987 					  kvm_s390_get_prefix(vcpu),
2988 					  PAGE_SIZE * 2, PROT_WRITE);
2989 		if (rc) {
2990 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2991 			return rc;
2992 		}
2993 		goto retry;
2994 	}
2995 
2996 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2997 		vcpu->arch.sie_block->ihcpu = 0xffff;
2998 		goto retry;
2999 	}
3000 
3001 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3002 		if (!ibs_enabled(vcpu)) {
3003 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3004 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3005 		}
3006 		goto retry;
3007 	}
3008 
3009 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3010 		if (ibs_enabled(vcpu)) {
3011 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3012 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3013 		}
3014 		goto retry;
3015 	}
3016 
3017 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3018 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3019 		goto retry;
3020 	}
3021 
3022 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3023 		/*
3024 		 * Disable CMMA virtualization; we will emulate the ESSA
3025 		 * instruction manually, in order to provide additional
3026 		 * functionalities needed for live migration.
3027 		 */
3028 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3029 		goto retry;
3030 	}
3031 
3032 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3033 		/*
3034 		 * Re-enable CMMA virtualization if CMMA is available and
3035 		 * was used.
3036 		 */
3037 		if ((vcpu->kvm->arch.use_cmma) &&
3038 		    (vcpu->kvm->mm->context.use_cmma))
3039 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3040 		goto retry;
3041 	}
3042 
3043 	/* nothing to do, just clear the request */
3044 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3045 
3046 	return 0;
3047 }
3048 
3049 void kvm_s390_set_tod_clock(struct kvm *kvm,
3050 			    const struct kvm_s390_vm_tod_clock *gtod)
3051 {
3052 	struct kvm_vcpu *vcpu;
3053 	struct kvm_s390_tod_clock_ext htod;
3054 	int i;
3055 
3056 	mutex_lock(&kvm->lock);
3057 	preempt_disable();
3058 
3059 	get_tod_clock_ext((char *)&htod);
3060 
3061 	kvm->arch.epoch = gtod->tod - htod.tod;
3062 	kvm->arch.epdx = 0;
3063 	if (test_kvm_facility(kvm, 139)) {
3064 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3065 		if (kvm->arch.epoch > gtod->tod)
3066 			kvm->arch.epdx -= 1;
3067 	}
3068 
3069 	kvm_s390_vcpu_block_all(kvm);
3070 	kvm_for_each_vcpu(i, vcpu, kvm) {
3071 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3072 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3073 	}
3074 
3075 	kvm_s390_vcpu_unblock_all(kvm);
3076 	preempt_enable();
3077 	mutex_unlock(&kvm->lock);
3078 }
3079 
3080 /**
3081  * kvm_arch_fault_in_page - fault-in guest page if necessary
3082  * @vcpu: The corresponding virtual cpu
3083  * @gpa: Guest physical address
3084  * @writable: Whether the page should be writable or not
3085  *
3086  * Make sure that a guest page has been faulted-in on the host.
3087  *
3088  * Return: Zero on success, negative error code otherwise.
3089  */
3090 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3091 {
3092 	return gmap_fault(vcpu->arch.gmap, gpa,
3093 			  writable ? FAULT_FLAG_WRITE : 0);
3094 }
3095 
3096 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3097 				      unsigned long token)
3098 {
3099 	struct kvm_s390_interrupt inti;
3100 	struct kvm_s390_irq irq;
3101 
3102 	if (start_token) {
3103 		irq.u.ext.ext_params2 = token;
3104 		irq.type = KVM_S390_INT_PFAULT_INIT;
3105 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3106 	} else {
3107 		inti.type = KVM_S390_INT_PFAULT_DONE;
3108 		inti.parm64 = token;
3109 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3110 	}
3111 }
3112 
3113 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3114 				     struct kvm_async_pf *work)
3115 {
3116 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3117 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3118 }
3119 
3120 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3121 				 struct kvm_async_pf *work)
3122 {
3123 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3124 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3125 }
3126 
3127 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3128 			       struct kvm_async_pf *work)
3129 {
3130 	/* s390 will always inject the page directly */
3131 }
3132 
3133 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3134 {
3135 	/*
3136 	 * s390 will always inject the page directly,
3137 	 * but we still want check_async_completion to cleanup
3138 	 */
3139 	return true;
3140 }
3141 
3142 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3143 {
3144 	hva_t hva;
3145 	struct kvm_arch_async_pf arch;
3146 	int rc;
3147 
3148 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3149 		return 0;
3150 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3151 	    vcpu->arch.pfault_compare)
3152 		return 0;
3153 	if (psw_extint_disabled(vcpu))
3154 		return 0;
3155 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3156 		return 0;
3157 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3158 		return 0;
3159 	if (!vcpu->arch.gmap->pfault_enabled)
3160 		return 0;
3161 
3162 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3163 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3164 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3165 		return 0;
3166 
3167 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3168 	return rc;
3169 }
3170 
3171 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3172 {
3173 	int rc, cpuflags;
3174 
3175 	/*
3176 	 * On s390 notifications for arriving pages will be delivered directly
3177 	 * to the guest but the house keeping for completed pfaults is
3178 	 * handled outside the worker.
3179 	 */
3180 	kvm_check_async_pf_completion(vcpu);
3181 
3182 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3183 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3184 
3185 	if (need_resched())
3186 		schedule();
3187 
3188 	if (test_cpu_flag(CIF_MCCK_PENDING))
3189 		s390_handle_mcck();
3190 
3191 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3192 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3193 		if (rc)
3194 			return rc;
3195 	}
3196 
3197 	rc = kvm_s390_handle_requests(vcpu);
3198 	if (rc)
3199 		return rc;
3200 
3201 	if (guestdbg_enabled(vcpu)) {
3202 		kvm_s390_backup_guest_per_regs(vcpu);
3203 		kvm_s390_patch_guest_per_regs(vcpu);
3204 	}
3205 
3206 	vcpu->arch.sie_block->icptcode = 0;
3207 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3208 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3209 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3210 
3211 	return 0;
3212 }
3213 
3214 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3215 {
3216 	struct kvm_s390_pgm_info pgm_info = {
3217 		.code = PGM_ADDRESSING,
3218 	};
3219 	u8 opcode, ilen;
3220 	int rc;
3221 
3222 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3223 	trace_kvm_s390_sie_fault(vcpu);
3224 
3225 	/*
3226 	 * We want to inject an addressing exception, which is defined as a
3227 	 * suppressing or terminating exception. However, since we came here
3228 	 * by a DAT access exception, the PSW still points to the faulting
3229 	 * instruction since DAT exceptions are nullifying. So we've got
3230 	 * to look up the current opcode to get the length of the instruction
3231 	 * to be able to forward the PSW.
3232 	 */
3233 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3234 	ilen = insn_length(opcode);
3235 	if (rc < 0) {
3236 		return rc;
3237 	} else if (rc) {
3238 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3239 		 * Forward by arbitrary ilc, injection will take care of
3240 		 * nullification if necessary.
3241 		 */
3242 		pgm_info = vcpu->arch.pgm;
3243 		ilen = 4;
3244 	}
3245 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3246 	kvm_s390_forward_psw(vcpu, ilen);
3247 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3248 }
3249 
3250 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3251 {
3252 	struct mcck_volatile_info *mcck_info;
3253 	struct sie_page *sie_page;
3254 
3255 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3256 		   vcpu->arch.sie_block->icptcode);
3257 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3258 
3259 	if (guestdbg_enabled(vcpu))
3260 		kvm_s390_restore_guest_per_regs(vcpu);
3261 
3262 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3263 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3264 
3265 	if (exit_reason == -EINTR) {
3266 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3267 		sie_page = container_of(vcpu->arch.sie_block,
3268 					struct sie_page, sie_block);
3269 		mcck_info = &sie_page->mcck_info;
3270 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3271 		return 0;
3272 	}
3273 
3274 	if (vcpu->arch.sie_block->icptcode > 0) {
3275 		int rc = kvm_handle_sie_intercept(vcpu);
3276 
3277 		if (rc != -EOPNOTSUPP)
3278 			return rc;
3279 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3280 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3281 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3282 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3283 		return -EREMOTE;
3284 	} else if (exit_reason != -EFAULT) {
3285 		vcpu->stat.exit_null++;
3286 		return 0;
3287 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3288 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3289 		vcpu->run->s390_ucontrol.trans_exc_code =
3290 						current->thread.gmap_addr;
3291 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3292 		return -EREMOTE;
3293 	} else if (current->thread.gmap_pfault) {
3294 		trace_kvm_s390_major_guest_pfault(vcpu);
3295 		current->thread.gmap_pfault = 0;
3296 		if (kvm_arch_setup_async_pf(vcpu))
3297 			return 0;
3298 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3299 	}
3300 	return vcpu_post_run_fault_in_sie(vcpu);
3301 }
3302 
3303 static int __vcpu_run(struct kvm_vcpu *vcpu)
3304 {
3305 	int rc, exit_reason;
3306 
3307 	/*
3308 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3309 	 * ning the guest), so that memslots (and other stuff) are protected
3310 	 */
3311 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3312 
3313 	do {
3314 		rc = vcpu_pre_run(vcpu);
3315 		if (rc)
3316 			break;
3317 
3318 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3319 		/*
3320 		 * As PF_VCPU will be used in fault handler, between
3321 		 * guest_enter and guest_exit should be no uaccess.
3322 		 */
3323 		local_irq_disable();
3324 		guest_enter_irqoff();
3325 		__disable_cpu_timer_accounting(vcpu);
3326 		local_irq_enable();
3327 		exit_reason = sie64a(vcpu->arch.sie_block,
3328 				     vcpu->run->s.regs.gprs);
3329 		local_irq_disable();
3330 		__enable_cpu_timer_accounting(vcpu);
3331 		guest_exit_irqoff();
3332 		local_irq_enable();
3333 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3334 
3335 		rc = vcpu_post_run(vcpu, exit_reason);
3336 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3337 
3338 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3339 	return rc;
3340 }
3341 
3342 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3343 {
3344 	struct runtime_instr_cb *riccb;
3345 	struct gs_cb *gscb;
3346 
3347 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3348 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3349 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3350 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3351 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3352 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3353 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3354 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3355 		/* some control register changes require a tlb flush */
3356 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3357 	}
3358 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3359 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3360 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3361 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3362 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3363 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3364 	}
3365 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3366 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3367 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3368 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3369 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3370 			kvm_clear_async_pf_completion_queue(vcpu);
3371 	}
3372 	/*
3373 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3374 	 * we should enable RI here instead of doing the lazy enablement.
3375 	 */
3376 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3377 	    test_kvm_facility(vcpu->kvm, 64) &&
3378 	    riccb->v &&
3379 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3380 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3381 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3382 	}
3383 	/*
3384 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3385 	 * we should enable GS here instead of doing the lazy enablement.
3386 	 */
3387 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3388 	    test_kvm_facility(vcpu->kvm, 133) &&
3389 	    gscb->gssm &&
3390 	    !vcpu->arch.gs_enabled) {
3391 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3392 		vcpu->arch.sie_block->ecb |= ECB_GS;
3393 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3394 		vcpu->arch.gs_enabled = 1;
3395 	}
3396 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3397 	    test_kvm_facility(vcpu->kvm, 82)) {
3398 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3399 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3400 	}
3401 	save_access_regs(vcpu->arch.host_acrs);
3402 	restore_access_regs(vcpu->run->s.regs.acrs);
3403 	/* save host (userspace) fprs/vrs */
3404 	save_fpu_regs();
3405 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3406 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3407 	if (MACHINE_HAS_VX)
3408 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3409 	else
3410 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3411 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3412 	if (test_fp_ctl(current->thread.fpu.fpc))
3413 		/* User space provided an invalid FPC, let's clear it */
3414 		current->thread.fpu.fpc = 0;
3415 	if (MACHINE_HAS_GS) {
3416 		preempt_disable();
3417 		__ctl_set_bit(2, 4);
3418 		if (current->thread.gs_cb) {
3419 			vcpu->arch.host_gscb = current->thread.gs_cb;
3420 			save_gs_cb(vcpu->arch.host_gscb);
3421 		}
3422 		if (vcpu->arch.gs_enabled) {
3423 			current->thread.gs_cb = (struct gs_cb *)
3424 						&vcpu->run->s.regs.gscb;
3425 			restore_gs_cb(current->thread.gs_cb);
3426 		}
3427 		preempt_enable();
3428 	}
3429 
3430 	kvm_run->kvm_dirty_regs = 0;
3431 }
3432 
3433 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3434 {
3435 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3436 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3437 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3438 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3439 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3440 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3441 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3442 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3443 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3444 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3445 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3446 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3447 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3448 	save_access_regs(vcpu->run->s.regs.acrs);
3449 	restore_access_regs(vcpu->arch.host_acrs);
3450 	/* Save guest register state */
3451 	save_fpu_regs();
3452 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3453 	/* Restore will be done lazily at return */
3454 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3455 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3456 	if (MACHINE_HAS_GS) {
3457 		__ctl_set_bit(2, 4);
3458 		if (vcpu->arch.gs_enabled)
3459 			save_gs_cb(current->thread.gs_cb);
3460 		preempt_disable();
3461 		current->thread.gs_cb = vcpu->arch.host_gscb;
3462 		restore_gs_cb(vcpu->arch.host_gscb);
3463 		preempt_enable();
3464 		if (!vcpu->arch.host_gscb)
3465 			__ctl_clear_bit(2, 4);
3466 		vcpu->arch.host_gscb = NULL;
3467 	}
3468 
3469 }
3470 
3471 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3472 {
3473 	int rc;
3474 
3475 	if (kvm_run->immediate_exit)
3476 		return -EINTR;
3477 
3478 	vcpu_load(vcpu);
3479 
3480 	if (guestdbg_exit_pending(vcpu)) {
3481 		kvm_s390_prepare_debug_exit(vcpu);
3482 		rc = 0;
3483 		goto out;
3484 	}
3485 
3486 	kvm_sigset_activate(vcpu);
3487 
3488 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3489 		kvm_s390_vcpu_start(vcpu);
3490 	} else if (is_vcpu_stopped(vcpu)) {
3491 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3492 				   vcpu->vcpu_id);
3493 		rc = -EINVAL;
3494 		goto out;
3495 	}
3496 
3497 	sync_regs(vcpu, kvm_run);
3498 	enable_cpu_timer_accounting(vcpu);
3499 
3500 	might_fault();
3501 	rc = __vcpu_run(vcpu);
3502 
3503 	if (signal_pending(current) && !rc) {
3504 		kvm_run->exit_reason = KVM_EXIT_INTR;
3505 		rc = -EINTR;
3506 	}
3507 
3508 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3509 		kvm_s390_prepare_debug_exit(vcpu);
3510 		rc = 0;
3511 	}
3512 
3513 	if (rc == -EREMOTE) {
3514 		/* userspace support is needed, kvm_run has been prepared */
3515 		rc = 0;
3516 	}
3517 
3518 	disable_cpu_timer_accounting(vcpu);
3519 	store_regs(vcpu, kvm_run);
3520 
3521 	kvm_sigset_deactivate(vcpu);
3522 
3523 	vcpu->stat.exit_userspace++;
3524 out:
3525 	vcpu_put(vcpu);
3526 	return rc;
3527 }
3528 
3529 /*
3530  * store status at address
3531  * we use have two special cases:
3532  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3533  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3534  */
3535 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3536 {
3537 	unsigned char archmode = 1;
3538 	freg_t fprs[NUM_FPRS];
3539 	unsigned int px;
3540 	u64 clkcomp, cputm;
3541 	int rc;
3542 
3543 	px = kvm_s390_get_prefix(vcpu);
3544 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3545 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3546 			return -EFAULT;
3547 		gpa = 0;
3548 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3549 		if (write_guest_real(vcpu, 163, &archmode, 1))
3550 			return -EFAULT;
3551 		gpa = px;
3552 	} else
3553 		gpa -= __LC_FPREGS_SAVE_AREA;
3554 
3555 	/* manually convert vector registers if necessary */
3556 	if (MACHINE_HAS_VX) {
3557 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3558 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3559 				     fprs, 128);
3560 	} else {
3561 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3562 				     vcpu->run->s.regs.fprs, 128);
3563 	}
3564 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3565 			      vcpu->run->s.regs.gprs, 128);
3566 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3567 			      &vcpu->arch.sie_block->gpsw, 16);
3568 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3569 			      &px, 4);
3570 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3571 			      &vcpu->run->s.regs.fpc, 4);
3572 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3573 			      &vcpu->arch.sie_block->todpr, 4);
3574 	cputm = kvm_s390_get_cpu_timer(vcpu);
3575 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3576 			      &cputm, 8);
3577 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3578 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3579 			      &clkcomp, 8);
3580 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3581 			      &vcpu->run->s.regs.acrs, 64);
3582 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3583 			      &vcpu->arch.sie_block->gcr, 128);
3584 	return rc ? -EFAULT : 0;
3585 }
3586 
3587 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3588 {
3589 	/*
3590 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3591 	 * switch in the run ioctl. Let's update our copies before we save
3592 	 * it into the save area
3593 	 */
3594 	save_fpu_regs();
3595 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3596 	save_access_regs(vcpu->run->s.regs.acrs);
3597 
3598 	return kvm_s390_store_status_unloaded(vcpu, addr);
3599 }
3600 
3601 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3602 {
3603 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3604 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3605 }
3606 
3607 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3608 {
3609 	unsigned int i;
3610 	struct kvm_vcpu *vcpu;
3611 
3612 	kvm_for_each_vcpu(i, vcpu, kvm) {
3613 		__disable_ibs_on_vcpu(vcpu);
3614 	}
3615 }
3616 
3617 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3618 {
3619 	if (!sclp.has_ibs)
3620 		return;
3621 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3622 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3623 }
3624 
3625 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3626 {
3627 	int i, online_vcpus, started_vcpus = 0;
3628 
3629 	if (!is_vcpu_stopped(vcpu))
3630 		return;
3631 
3632 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3633 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3634 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3635 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3636 
3637 	for (i = 0; i < online_vcpus; i++) {
3638 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3639 			started_vcpus++;
3640 	}
3641 
3642 	if (started_vcpus == 0) {
3643 		/* we're the only active VCPU -> speed it up */
3644 		__enable_ibs_on_vcpu(vcpu);
3645 	} else if (started_vcpus == 1) {
3646 		/*
3647 		 * As we are starting a second VCPU, we have to disable
3648 		 * the IBS facility on all VCPUs to remove potentially
3649 		 * oustanding ENABLE requests.
3650 		 */
3651 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3652 	}
3653 
3654 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3655 	/*
3656 	 * Another VCPU might have used IBS while we were offline.
3657 	 * Let's play safe and flush the VCPU at startup.
3658 	 */
3659 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3660 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3661 	return;
3662 }
3663 
3664 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3665 {
3666 	int i, online_vcpus, started_vcpus = 0;
3667 	struct kvm_vcpu *started_vcpu = NULL;
3668 
3669 	if (is_vcpu_stopped(vcpu))
3670 		return;
3671 
3672 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3673 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3674 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3675 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3676 
3677 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3678 	kvm_s390_clear_stop_irq(vcpu);
3679 
3680 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3681 	__disable_ibs_on_vcpu(vcpu);
3682 
3683 	for (i = 0; i < online_vcpus; i++) {
3684 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3685 			started_vcpus++;
3686 			started_vcpu = vcpu->kvm->vcpus[i];
3687 		}
3688 	}
3689 
3690 	if (started_vcpus == 1) {
3691 		/*
3692 		 * As we only have one VCPU left, we want to enable the
3693 		 * IBS facility for that VCPU to speed it up.
3694 		 */
3695 		__enable_ibs_on_vcpu(started_vcpu);
3696 	}
3697 
3698 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3699 	return;
3700 }
3701 
3702 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3703 				     struct kvm_enable_cap *cap)
3704 {
3705 	int r;
3706 
3707 	if (cap->flags)
3708 		return -EINVAL;
3709 
3710 	switch (cap->cap) {
3711 	case KVM_CAP_S390_CSS_SUPPORT:
3712 		if (!vcpu->kvm->arch.css_support) {
3713 			vcpu->kvm->arch.css_support = 1;
3714 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3715 			trace_kvm_s390_enable_css(vcpu->kvm);
3716 		}
3717 		r = 0;
3718 		break;
3719 	default:
3720 		r = -EINVAL;
3721 		break;
3722 	}
3723 	return r;
3724 }
3725 
3726 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3727 				  struct kvm_s390_mem_op *mop)
3728 {
3729 	void __user *uaddr = (void __user *)mop->buf;
3730 	void *tmpbuf = NULL;
3731 	int r, srcu_idx;
3732 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3733 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3734 
3735 	if (mop->flags & ~supported_flags)
3736 		return -EINVAL;
3737 
3738 	if (mop->size > MEM_OP_MAX_SIZE)
3739 		return -E2BIG;
3740 
3741 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3742 		tmpbuf = vmalloc(mop->size);
3743 		if (!tmpbuf)
3744 			return -ENOMEM;
3745 	}
3746 
3747 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3748 
3749 	switch (mop->op) {
3750 	case KVM_S390_MEMOP_LOGICAL_READ:
3751 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3752 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3753 					    mop->size, GACC_FETCH);
3754 			break;
3755 		}
3756 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3757 		if (r == 0) {
3758 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3759 				r = -EFAULT;
3760 		}
3761 		break;
3762 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3763 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3764 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3765 					    mop->size, GACC_STORE);
3766 			break;
3767 		}
3768 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3769 			r = -EFAULT;
3770 			break;
3771 		}
3772 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3773 		break;
3774 	default:
3775 		r = -EINVAL;
3776 	}
3777 
3778 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3779 
3780 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3781 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3782 
3783 	vfree(tmpbuf);
3784 	return r;
3785 }
3786 
3787 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3788 			       unsigned int ioctl, unsigned long arg)
3789 {
3790 	struct kvm_vcpu *vcpu = filp->private_data;
3791 	void __user *argp = (void __user *)arg;
3792 
3793 	switch (ioctl) {
3794 	case KVM_S390_IRQ: {
3795 		struct kvm_s390_irq s390irq;
3796 
3797 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3798 			return -EFAULT;
3799 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3800 	}
3801 	case KVM_S390_INTERRUPT: {
3802 		struct kvm_s390_interrupt s390int;
3803 		struct kvm_s390_irq s390irq;
3804 
3805 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3806 			return -EFAULT;
3807 		if (s390int_to_s390irq(&s390int, &s390irq))
3808 			return -EINVAL;
3809 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3810 	}
3811 	}
3812 	return -ENOIOCTLCMD;
3813 }
3814 
3815 long kvm_arch_vcpu_ioctl(struct file *filp,
3816 			 unsigned int ioctl, unsigned long arg)
3817 {
3818 	struct kvm_vcpu *vcpu = filp->private_data;
3819 	void __user *argp = (void __user *)arg;
3820 	int idx;
3821 	long r;
3822 
3823 	vcpu_load(vcpu);
3824 
3825 	switch (ioctl) {
3826 	case KVM_S390_STORE_STATUS:
3827 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3828 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3829 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3830 		break;
3831 	case KVM_S390_SET_INITIAL_PSW: {
3832 		psw_t psw;
3833 
3834 		r = -EFAULT;
3835 		if (copy_from_user(&psw, argp, sizeof(psw)))
3836 			break;
3837 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3838 		break;
3839 	}
3840 	case KVM_S390_INITIAL_RESET:
3841 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3842 		break;
3843 	case KVM_SET_ONE_REG:
3844 	case KVM_GET_ONE_REG: {
3845 		struct kvm_one_reg reg;
3846 		r = -EFAULT;
3847 		if (copy_from_user(&reg, argp, sizeof(reg)))
3848 			break;
3849 		if (ioctl == KVM_SET_ONE_REG)
3850 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3851 		else
3852 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3853 		break;
3854 	}
3855 #ifdef CONFIG_KVM_S390_UCONTROL
3856 	case KVM_S390_UCAS_MAP: {
3857 		struct kvm_s390_ucas_mapping ucasmap;
3858 
3859 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3860 			r = -EFAULT;
3861 			break;
3862 		}
3863 
3864 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3865 			r = -EINVAL;
3866 			break;
3867 		}
3868 
3869 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3870 				     ucasmap.vcpu_addr, ucasmap.length);
3871 		break;
3872 	}
3873 	case KVM_S390_UCAS_UNMAP: {
3874 		struct kvm_s390_ucas_mapping ucasmap;
3875 
3876 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3877 			r = -EFAULT;
3878 			break;
3879 		}
3880 
3881 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3882 			r = -EINVAL;
3883 			break;
3884 		}
3885 
3886 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3887 			ucasmap.length);
3888 		break;
3889 	}
3890 #endif
3891 	case KVM_S390_VCPU_FAULT: {
3892 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3893 		break;
3894 	}
3895 	case KVM_ENABLE_CAP:
3896 	{
3897 		struct kvm_enable_cap cap;
3898 		r = -EFAULT;
3899 		if (copy_from_user(&cap, argp, sizeof(cap)))
3900 			break;
3901 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3902 		break;
3903 	}
3904 	case KVM_S390_MEM_OP: {
3905 		struct kvm_s390_mem_op mem_op;
3906 
3907 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3908 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3909 		else
3910 			r = -EFAULT;
3911 		break;
3912 	}
3913 	case KVM_S390_SET_IRQ_STATE: {
3914 		struct kvm_s390_irq_state irq_state;
3915 
3916 		r = -EFAULT;
3917 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3918 			break;
3919 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3920 		    irq_state.len == 0 ||
3921 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3922 			r = -EINVAL;
3923 			break;
3924 		}
3925 		/* do not use irq_state.flags, it will break old QEMUs */
3926 		r = kvm_s390_set_irq_state(vcpu,
3927 					   (void __user *) irq_state.buf,
3928 					   irq_state.len);
3929 		break;
3930 	}
3931 	case KVM_S390_GET_IRQ_STATE: {
3932 		struct kvm_s390_irq_state irq_state;
3933 
3934 		r = -EFAULT;
3935 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3936 			break;
3937 		if (irq_state.len == 0) {
3938 			r = -EINVAL;
3939 			break;
3940 		}
3941 		/* do not use irq_state.flags, it will break old QEMUs */
3942 		r = kvm_s390_get_irq_state(vcpu,
3943 					   (__u8 __user *)  irq_state.buf,
3944 					   irq_state.len);
3945 		break;
3946 	}
3947 	default:
3948 		r = -ENOTTY;
3949 	}
3950 
3951 	vcpu_put(vcpu);
3952 	return r;
3953 }
3954 
3955 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3956 {
3957 #ifdef CONFIG_KVM_S390_UCONTROL
3958 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3959 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3960 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3961 		get_page(vmf->page);
3962 		return 0;
3963 	}
3964 #endif
3965 	return VM_FAULT_SIGBUS;
3966 }
3967 
3968 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3969 			    unsigned long npages)
3970 {
3971 	return 0;
3972 }
3973 
3974 /* Section: memory related */
3975 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3976 				   struct kvm_memory_slot *memslot,
3977 				   const struct kvm_userspace_memory_region *mem,
3978 				   enum kvm_mr_change change)
3979 {
3980 	/* A few sanity checks. We can have memory slots which have to be
3981 	   located/ended at a segment boundary (1MB). The memory in userland is
3982 	   ok to be fragmented into various different vmas. It is okay to mmap()
3983 	   and munmap() stuff in this slot after doing this call at any time */
3984 
3985 	if (mem->userspace_addr & 0xffffful)
3986 		return -EINVAL;
3987 
3988 	if (mem->memory_size & 0xffffful)
3989 		return -EINVAL;
3990 
3991 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3992 		return -EINVAL;
3993 
3994 	return 0;
3995 }
3996 
3997 void kvm_arch_commit_memory_region(struct kvm *kvm,
3998 				const struct kvm_userspace_memory_region *mem,
3999 				const struct kvm_memory_slot *old,
4000 				const struct kvm_memory_slot *new,
4001 				enum kvm_mr_change change)
4002 {
4003 	int rc;
4004 
4005 	/* If the basics of the memslot do not change, we do not want
4006 	 * to update the gmap. Every update causes several unnecessary
4007 	 * segment translation exceptions. This is usually handled just
4008 	 * fine by the normal fault handler + gmap, but it will also
4009 	 * cause faults on the prefix page of running guest CPUs.
4010 	 */
4011 	if (old->userspace_addr == mem->userspace_addr &&
4012 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4013 	    old->npages * PAGE_SIZE == mem->memory_size)
4014 		return;
4015 
4016 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4017 		mem->guest_phys_addr, mem->memory_size);
4018 	if (rc)
4019 		pr_warn("failed to commit memory region\n");
4020 	return;
4021 }
4022 
4023 static inline unsigned long nonhyp_mask(int i)
4024 {
4025 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4026 
4027 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4028 }
4029 
4030 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4031 {
4032 	vcpu->valid_wakeup = false;
4033 }
4034 
4035 static int __init kvm_s390_init(void)
4036 {
4037 	int i;
4038 
4039 	if (!sclp.has_sief2) {
4040 		pr_info("SIE not available\n");
4041 		return -ENODEV;
4042 	}
4043 
4044 	for (i = 0; i < 16; i++)
4045 		kvm_s390_fac_list_mask[i] |=
4046 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4047 
4048 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4049 }
4050 
4051 static void __exit kvm_s390_exit(void)
4052 {
4053 	kvm_exit();
4054 }
4055 
4056 module_init(kvm_s390_init);
4057 module_exit(kvm_s390_exit);
4058 
4059 /*
4060  * Enable autoloading of the kvm module.
4061  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4062  * since x86 takes a different approach.
4063  */
4064 #include <linux/miscdevice.h>
4065 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4066 MODULE_ALIAS("devname:kvm");
4067