xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 19114beb73f774e466d9e39b8e8b961812c9f881)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
63 	{ "exit_null", VCPU_STAT(exit_null) },
64 	{ "exit_validity", VCPU_STAT(exit_validity) },
65 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
67 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
69 	{ "exit_pei", VCPU_STAT(exit_pei) },
70 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
91 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
92 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
93 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
94 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
95 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
96 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
97 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
98 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
99 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
100 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
101 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
102 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
103 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
104 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
105 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
106 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
107 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
108 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
109 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
110 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
111 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
112 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
113 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
114 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
115 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
116 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
117 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
118 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
119 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
120 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
121 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
122 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
123 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
124 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
125 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
126 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
127 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
128 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
129 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
130 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
131 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
132 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
133 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
134 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
135 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
136 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
137 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
138 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
139 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
140 	{ NULL }
141 };
142 
143 struct kvm_s390_tod_clock_ext {
144 	__u8 epoch_idx;
145 	__u64 tod;
146 	__u8 reserved[7];
147 } __packed;
148 
149 /* allow nested virtualization in KVM (if enabled by user space) */
150 static int nested;
151 module_param(nested, int, S_IRUGO);
152 MODULE_PARM_DESC(nested, "Nested virtualization support");
153 
154 /* upper facilities limit for kvm */
155 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
156 
157 unsigned long kvm_s390_fac_list_mask_size(void)
158 {
159 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
160 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
161 }
162 
163 /* available cpu features supported by kvm */
164 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
165 /* available subfunctions indicated via query / "test bit" */
166 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
167 
168 static struct gmap_notifier gmap_notifier;
169 static struct gmap_notifier vsie_gmap_notifier;
170 debug_info_t *kvm_s390_dbf;
171 
172 /* Section: not file related */
173 int kvm_arch_hardware_enable(void)
174 {
175 	/* every s390 is virtualization enabled ;-) */
176 	return 0;
177 }
178 
179 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
180 			      unsigned long end);
181 
182 /*
183  * This callback is executed during stop_machine(). All CPUs are therefore
184  * temporarily stopped. In order not to change guest behavior, we have to
185  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
186  * so a CPU won't be stopped while calculating with the epoch.
187  */
188 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
189 			  void *v)
190 {
191 	struct kvm *kvm;
192 	struct kvm_vcpu *vcpu;
193 	int i;
194 	unsigned long long *delta = v;
195 
196 	list_for_each_entry(kvm, &vm_list, vm_list) {
197 		kvm->arch.epoch -= *delta;
198 		kvm_for_each_vcpu(i, vcpu, kvm) {
199 			vcpu->arch.sie_block->epoch -= *delta;
200 			if (vcpu->arch.cputm_enabled)
201 				vcpu->arch.cputm_start += *delta;
202 			if (vcpu->arch.vsie_block)
203 				vcpu->arch.vsie_block->epoch -= *delta;
204 		}
205 	}
206 	return NOTIFY_OK;
207 }
208 
209 static struct notifier_block kvm_clock_notifier = {
210 	.notifier_call = kvm_clock_sync,
211 };
212 
213 int kvm_arch_hardware_setup(void)
214 {
215 	gmap_notifier.notifier_call = kvm_gmap_notifier;
216 	gmap_register_pte_notifier(&gmap_notifier);
217 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
218 	gmap_register_pte_notifier(&vsie_gmap_notifier);
219 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
220 				       &kvm_clock_notifier);
221 	return 0;
222 }
223 
224 void kvm_arch_hardware_unsetup(void)
225 {
226 	gmap_unregister_pte_notifier(&gmap_notifier);
227 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
228 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
229 					 &kvm_clock_notifier);
230 }
231 
232 static void allow_cpu_feat(unsigned long nr)
233 {
234 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
235 }
236 
237 static inline int plo_test_bit(unsigned char nr)
238 {
239 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
240 	int cc;
241 
242 	asm volatile(
243 		/* Parameter registers are ignored for "test bit" */
244 		"	plo	0,0,0,0(0)\n"
245 		"	ipm	%0\n"
246 		"	srl	%0,28\n"
247 		: "=d" (cc)
248 		: "d" (r0)
249 		: "cc");
250 	return cc == 0;
251 }
252 
253 static void kvm_s390_cpu_feat_init(void)
254 {
255 	int i;
256 
257 	for (i = 0; i < 256; ++i) {
258 		if (plo_test_bit(i))
259 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
260 	}
261 
262 	if (test_facility(28)) /* TOD-clock steering */
263 		ptff(kvm_s390_available_subfunc.ptff,
264 		     sizeof(kvm_s390_available_subfunc.ptff),
265 		     PTFF_QAF);
266 
267 	if (test_facility(17)) { /* MSA */
268 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
269 			      kvm_s390_available_subfunc.kmac);
270 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.kmc);
272 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
273 			      kvm_s390_available_subfunc.km);
274 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
275 			      kvm_s390_available_subfunc.kimd);
276 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
277 			      kvm_s390_available_subfunc.klmd);
278 	}
279 	if (test_facility(76)) /* MSA3 */
280 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
281 			      kvm_s390_available_subfunc.pckmo);
282 	if (test_facility(77)) { /* MSA4 */
283 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
284 			      kvm_s390_available_subfunc.kmctr);
285 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
286 			      kvm_s390_available_subfunc.kmf);
287 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
288 			      kvm_s390_available_subfunc.kmo);
289 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
290 			      kvm_s390_available_subfunc.pcc);
291 	}
292 	if (test_facility(57)) /* MSA5 */
293 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
294 			      kvm_s390_available_subfunc.ppno);
295 
296 	if (test_facility(146)) /* MSA8 */
297 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
298 			      kvm_s390_available_subfunc.kma);
299 
300 	if (MACHINE_HAS_ESOP)
301 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
302 	/*
303 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
304 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
305 	 */
306 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
307 	    !test_facility(3) || !nested)
308 		return;
309 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
310 	if (sclp.has_64bscao)
311 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
312 	if (sclp.has_siif)
313 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
314 	if (sclp.has_gpere)
315 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
316 	if (sclp.has_gsls)
317 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
318 	if (sclp.has_ib)
319 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
320 	if (sclp.has_cei)
321 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
322 	if (sclp.has_ibs)
323 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
324 	if (sclp.has_kss)
325 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
326 	/*
327 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
328 	 * all skey handling functions read/set the skey from the PGSTE
329 	 * instead of the real storage key.
330 	 *
331 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
332 	 * pages being detected as preserved although they are resident.
333 	 *
334 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
335 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
336 	 *
337 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
338 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
339 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
340 	 *
341 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
342 	 * cannot easily shadow the SCA because of the ipte lock.
343 	 */
344 }
345 
346 int kvm_arch_init(void *opaque)
347 {
348 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
349 	if (!kvm_s390_dbf)
350 		return -ENOMEM;
351 
352 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
353 		debug_unregister(kvm_s390_dbf);
354 		return -ENOMEM;
355 	}
356 
357 	kvm_s390_cpu_feat_init();
358 
359 	/* Register floating interrupt controller interface. */
360 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
361 }
362 
363 void kvm_arch_exit(void)
364 {
365 	debug_unregister(kvm_s390_dbf);
366 }
367 
368 /* Section: device related */
369 long kvm_arch_dev_ioctl(struct file *filp,
370 			unsigned int ioctl, unsigned long arg)
371 {
372 	if (ioctl == KVM_S390_ENABLE_SIE)
373 		return s390_enable_sie();
374 	return -EINVAL;
375 }
376 
377 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
378 {
379 	int r;
380 
381 	switch (ext) {
382 	case KVM_CAP_S390_PSW:
383 	case KVM_CAP_S390_GMAP:
384 	case KVM_CAP_SYNC_MMU:
385 #ifdef CONFIG_KVM_S390_UCONTROL
386 	case KVM_CAP_S390_UCONTROL:
387 #endif
388 	case KVM_CAP_ASYNC_PF:
389 	case KVM_CAP_SYNC_REGS:
390 	case KVM_CAP_ONE_REG:
391 	case KVM_CAP_ENABLE_CAP:
392 	case KVM_CAP_S390_CSS_SUPPORT:
393 	case KVM_CAP_IOEVENTFD:
394 	case KVM_CAP_DEVICE_CTRL:
395 	case KVM_CAP_ENABLE_CAP_VM:
396 	case KVM_CAP_S390_IRQCHIP:
397 	case KVM_CAP_VM_ATTRIBUTES:
398 	case KVM_CAP_MP_STATE:
399 	case KVM_CAP_IMMEDIATE_EXIT:
400 	case KVM_CAP_S390_INJECT_IRQ:
401 	case KVM_CAP_S390_USER_SIGP:
402 	case KVM_CAP_S390_USER_STSI:
403 	case KVM_CAP_S390_SKEYS:
404 	case KVM_CAP_S390_IRQ_STATE:
405 	case KVM_CAP_S390_USER_INSTR0:
406 	case KVM_CAP_S390_CMMA_MIGRATION:
407 	case KVM_CAP_S390_AIS:
408 	case KVM_CAP_S390_AIS_MIGRATION:
409 		r = 1;
410 		break;
411 	case KVM_CAP_S390_MEM_OP:
412 		r = MEM_OP_MAX_SIZE;
413 		break;
414 	case KVM_CAP_NR_VCPUS:
415 	case KVM_CAP_MAX_VCPUS:
416 		r = KVM_S390_BSCA_CPU_SLOTS;
417 		if (!kvm_s390_use_sca_entries())
418 			r = KVM_MAX_VCPUS;
419 		else if (sclp.has_esca && sclp.has_64bscao)
420 			r = KVM_S390_ESCA_CPU_SLOTS;
421 		break;
422 	case KVM_CAP_NR_MEMSLOTS:
423 		r = KVM_USER_MEM_SLOTS;
424 		break;
425 	case KVM_CAP_S390_COW:
426 		r = MACHINE_HAS_ESOP;
427 		break;
428 	case KVM_CAP_S390_VECTOR_REGISTERS:
429 		r = MACHINE_HAS_VX;
430 		break;
431 	case KVM_CAP_S390_RI:
432 		r = test_facility(64);
433 		break;
434 	case KVM_CAP_S390_GS:
435 		r = test_facility(133);
436 		break;
437 	default:
438 		r = 0;
439 	}
440 	return r;
441 }
442 
443 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
444 					struct kvm_memory_slot *memslot)
445 {
446 	gfn_t cur_gfn, last_gfn;
447 	unsigned long address;
448 	struct gmap *gmap = kvm->arch.gmap;
449 
450 	/* Loop over all guest pages */
451 	last_gfn = memslot->base_gfn + memslot->npages;
452 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
453 		address = gfn_to_hva_memslot(memslot, cur_gfn);
454 
455 		if (test_and_clear_guest_dirty(gmap->mm, address))
456 			mark_page_dirty(kvm, cur_gfn);
457 		if (fatal_signal_pending(current))
458 			return;
459 		cond_resched();
460 	}
461 }
462 
463 /* Section: vm related */
464 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
465 
466 /*
467  * Get (and clear) the dirty memory log for a memory slot.
468  */
469 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
470 			       struct kvm_dirty_log *log)
471 {
472 	int r;
473 	unsigned long n;
474 	struct kvm_memslots *slots;
475 	struct kvm_memory_slot *memslot;
476 	int is_dirty = 0;
477 
478 	if (kvm_is_ucontrol(kvm))
479 		return -EINVAL;
480 
481 	mutex_lock(&kvm->slots_lock);
482 
483 	r = -EINVAL;
484 	if (log->slot >= KVM_USER_MEM_SLOTS)
485 		goto out;
486 
487 	slots = kvm_memslots(kvm);
488 	memslot = id_to_memslot(slots, log->slot);
489 	r = -ENOENT;
490 	if (!memslot->dirty_bitmap)
491 		goto out;
492 
493 	kvm_s390_sync_dirty_log(kvm, memslot);
494 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
495 	if (r)
496 		goto out;
497 
498 	/* Clear the dirty log */
499 	if (is_dirty) {
500 		n = kvm_dirty_bitmap_bytes(memslot);
501 		memset(memslot->dirty_bitmap, 0, n);
502 	}
503 	r = 0;
504 out:
505 	mutex_unlock(&kvm->slots_lock);
506 	return r;
507 }
508 
509 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
510 {
511 	unsigned int i;
512 	struct kvm_vcpu *vcpu;
513 
514 	kvm_for_each_vcpu(i, vcpu, kvm) {
515 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
516 	}
517 }
518 
519 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
520 {
521 	int r;
522 
523 	if (cap->flags)
524 		return -EINVAL;
525 
526 	switch (cap->cap) {
527 	case KVM_CAP_S390_IRQCHIP:
528 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
529 		kvm->arch.use_irqchip = 1;
530 		r = 0;
531 		break;
532 	case KVM_CAP_S390_USER_SIGP:
533 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
534 		kvm->arch.user_sigp = 1;
535 		r = 0;
536 		break;
537 	case KVM_CAP_S390_VECTOR_REGISTERS:
538 		mutex_lock(&kvm->lock);
539 		if (kvm->created_vcpus) {
540 			r = -EBUSY;
541 		} else if (MACHINE_HAS_VX) {
542 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
543 			set_kvm_facility(kvm->arch.model.fac_list, 129);
544 			if (test_facility(134)) {
545 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
546 				set_kvm_facility(kvm->arch.model.fac_list, 134);
547 			}
548 			if (test_facility(135)) {
549 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
550 				set_kvm_facility(kvm->arch.model.fac_list, 135);
551 			}
552 			r = 0;
553 		} else
554 			r = -EINVAL;
555 		mutex_unlock(&kvm->lock);
556 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
557 			 r ? "(not available)" : "(success)");
558 		break;
559 	case KVM_CAP_S390_RI:
560 		r = -EINVAL;
561 		mutex_lock(&kvm->lock);
562 		if (kvm->created_vcpus) {
563 			r = -EBUSY;
564 		} else if (test_facility(64)) {
565 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
566 			set_kvm_facility(kvm->arch.model.fac_list, 64);
567 			r = 0;
568 		}
569 		mutex_unlock(&kvm->lock);
570 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
571 			 r ? "(not available)" : "(success)");
572 		break;
573 	case KVM_CAP_S390_AIS:
574 		mutex_lock(&kvm->lock);
575 		if (kvm->created_vcpus) {
576 			r = -EBUSY;
577 		} else {
578 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
579 			set_kvm_facility(kvm->arch.model.fac_list, 72);
580 			r = 0;
581 		}
582 		mutex_unlock(&kvm->lock);
583 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
584 			 r ? "(not available)" : "(success)");
585 		break;
586 	case KVM_CAP_S390_GS:
587 		r = -EINVAL;
588 		mutex_lock(&kvm->lock);
589 		if (kvm->created_vcpus) {
590 			r = -EBUSY;
591 		} else if (test_facility(133)) {
592 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
593 			set_kvm_facility(kvm->arch.model.fac_list, 133);
594 			r = 0;
595 		}
596 		mutex_unlock(&kvm->lock);
597 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
598 			 r ? "(not available)" : "(success)");
599 		break;
600 	case KVM_CAP_S390_USER_STSI:
601 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
602 		kvm->arch.user_stsi = 1;
603 		r = 0;
604 		break;
605 	case KVM_CAP_S390_USER_INSTR0:
606 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
607 		kvm->arch.user_instr0 = 1;
608 		icpt_operexc_on_all_vcpus(kvm);
609 		r = 0;
610 		break;
611 	default:
612 		r = -EINVAL;
613 		break;
614 	}
615 	return r;
616 }
617 
618 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
619 {
620 	int ret;
621 
622 	switch (attr->attr) {
623 	case KVM_S390_VM_MEM_LIMIT_SIZE:
624 		ret = 0;
625 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
626 			 kvm->arch.mem_limit);
627 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
628 			ret = -EFAULT;
629 		break;
630 	default:
631 		ret = -ENXIO;
632 		break;
633 	}
634 	return ret;
635 }
636 
637 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
638 {
639 	int ret;
640 	unsigned int idx;
641 	switch (attr->attr) {
642 	case KVM_S390_VM_MEM_ENABLE_CMMA:
643 		ret = -ENXIO;
644 		if (!sclp.has_cmma)
645 			break;
646 
647 		ret = -EBUSY;
648 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
649 		mutex_lock(&kvm->lock);
650 		if (!kvm->created_vcpus) {
651 			kvm->arch.use_cmma = 1;
652 			ret = 0;
653 		}
654 		mutex_unlock(&kvm->lock);
655 		break;
656 	case KVM_S390_VM_MEM_CLR_CMMA:
657 		ret = -ENXIO;
658 		if (!sclp.has_cmma)
659 			break;
660 		ret = -EINVAL;
661 		if (!kvm->arch.use_cmma)
662 			break;
663 
664 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
665 		mutex_lock(&kvm->lock);
666 		idx = srcu_read_lock(&kvm->srcu);
667 		s390_reset_cmma(kvm->arch.gmap->mm);
668 		srcu_read_unlock(&kvm->srcu, idx);
669 		mutex_unlock(&kvm->lock);
670 		ret = 0;
671 		break;
672 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
673 		unsigned long new_limit;
674 
675 		if (kvm_is_ucontrol(kvm))
676 			return -EINVAL;
677 
678 		if (get_user(new_limit, (u64 __user *)attr->addr))
679 			return -EFAULT;
680 
681 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
682 		    new_limit > kvm->arch.mem_limit)
683 			return -E2BIG;
684 
685 		if (!new_limit)
686 			return -EINVAL;
687 
688 		/* gmap_create takes last usable address */
689 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
690 			new_limit -= 1;
691 
692 		ret = -EBUSY;
693 		mutex_lock(&kvm->lock);
694 		if (!kvm->created_vcpus) {
695 			/* gmap_create will round the limit up */
696 			struct gmap *new = gmap_create(current->mm, new_limit);
697 
698 			if (!new) {
699 				ret = -ENOMEM;
700 			} else {
701 				gmap_remove(kvm->arch.gmap);
702 				new->private = kvm;
703 				kvm->arch.gmap = new;
704 				ret = 0;
705 			}
706 		}
707 		mutex_unlock(&kvm->lock);
708 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
709 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
710 			 (void *) kvm->arch.gmap->asce);
711 		break;
712 	}
713 	default:
714 		ret = -ENXIO;
715 		break;
716 	}
717 	return ret;
718 }
719 
720 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
721 
722 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
723 {
724 	struct kvm_vcpu *vcpu;
725 	int i;
726 
727 	if (!test_kvm_facility(kvm, 76))
728 		return -EINVAL;
729 
730 	mutex_lock(&kvm->lock);
731 	switch (attr->attr) {
732 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
733 		get_random_bytes(
734 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
735 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
736 		kvm->arch.crypto.aes_kw = 1;
737 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
738 		break;
739 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
740 		get_random_bytes(
741 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
742 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743 		kvm->arch.crypto.dea_kw = 1;
744 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
745 		break;
746 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
747 		kvm->arch.crypto.aes_kw = 0;
748 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
749 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
750 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
751 		break;
752 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
753 		kvm->arch.crypto.dea_kw = 0;
754 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
755 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
756 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
757 		break;
758 	default:
759 		mutex_unlock(&kvm->lock);
760 		return -ENXIO;
761 	}
762 
763 	kvm_for_each_vcpu(i, vcpu, kvm) {
764 		kvm_s390_vcpu_crypto_setup(vcpu);
765 		exit_sie(vcpu);
766 	}
767 	mutex_unlock(&kvm->lock);
768 	return 0;
769 }
770 
771 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
772 {
773 	int cx;
774 	struct kvm_vcpu *vcpu;
775 
776 	kvm_for_each_vcpu(cx, vcpu, kvm)
777 		kvm_s390_sync_request(req, vcpu);
778 }
779 
780 /*
781  * Must be called with kvm->srcu held to avoid races on memslots, and with
782  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
783  */
784 static int kvm_s390_vm_start_migration(struct kvm *kvm)
785 {
786 	struct kvm_s390_migration_state *mgs;
787 	struct kvm_memory_slot *ms;
788 	/* should be the only one */
789 	struct kvm_memslots *slots;
790 	unsigned long ram_pages;
791 	int slotnr;
792 
793 	/* migration mode already enabled */
794 	if (kvm->arch.migration_state)
795 		return 0;
796 
797 	slots = kvm_memslots(kvm);
798 	if (!slots || !slots->used_slots)
799 		return -EINVAL;
800 
801 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
802 	if (!mgs)
803 		return -ENOMEM;
804 	kvm->arch.migration_state = mgs;
805 
806 	if (kvm->arch.use_cmma) {
807 		/*
808 		 * Get the last slot. They should be sorted by base_gfn, so the
809 		 * last slot is also the one at the end of the address space.
810 		 * We have verified above that at least one slot is present.
811 		 */
812 		ms = slots->memslots + slots->used_slots - 1;
813 		/* round up so we only use full longs */
814 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
815 		/* allocate enough bytes to store all the bits */
816 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
817 		if (!mgs->pgste_bitmap) {
818 			kfree(mgs);
819 			kvm->arch.migration_state = NULL;
820 			return -ENOMEM;
821 		}
822 
823 		mgs->bitmap_size = ram_pages;
824 		atomic64_set(&mgs->dirty_pages, ram_pages);
825 		/* mark all the pages in active slots as dirty */
826 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
827 			ms = slots->memslots + slotnr;
828 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
829 		}
830 
831 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
832 	}
833 	return 0;
834 }
835 
836 /*
837  * Must be called with kvm->lock to avoid races with ourselves and
838  * kvm_s390_vm_start_migration.
839  */
840 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
841 {
842 	struct kvm_s390_migration_state *mgs;
843 
844 	/* migration mode already disabled */
845 	if (!kvm->arch.migration_state)
846 		return 0;
847 	mgs = kvm->arch.migration_state;
848 	kvm->arch.migration_state = NULL;
849 
850 	if (kvm->arch.use_cmma) {
851 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
852 		vfree(mgs->pgste_bitmap);
853 	}
854 	kfree(mgs);
855 	return 0;
856 }
857 
858 static int kvm_s390_vm_set_migration(struct kvm *kvm,
859 				     struct kvm_device_attr *attr)
860 {
861 	int idx, res = -ENXIO;
862 
863 	mutex_lock(&kvm->lock);
864 	switch (attr->attr) {
865 	case KVM_S390_VM_MIGRATION_START:
866 		idx = srcu_read_lock(&kvm->srcu);
867 		res = kvm_s390_vm_start_migration(kvm);
868 		srcu_read_unlock(&kvm->srcu, idx);
869 		break;
870 	case KVM_S390_VM_MIGRATION_STOP:
871 		res = kvm_s390_vm_stop_migration(kvm);
872 		break;
873 	default:
874 		break;
875 	}
876 	mutex_unlock(&kvm->lock);
877 
878 	return res;
879 }
880 
881 static int kvm_s390_vm_get_migration(struct kvm *kvm,
882 				     struct kvm_device_attr *attr)
883 {
884 	u64 mig = (kvm->arch.migration_state != NULL);
885 
886 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
887 		return -ENXIO;
888 
889 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
890 		return -EFAULT;
891 	return 0;
892 }
893 
894 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
895 {
896 	struct kvm_s390_vm_tod_clock gtod;
897 
898 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
899 		return -EFAULT;
900 
901 	if (test_kvm_facility(kvm, 139))
902 		kvm_s390_set_tod_clock_ext(kvm, &gtod);
903 	else if (gtod.epoch_idx == 0)
904 		kvm_s390_set_tod_clock(kvm, gtod.tod);
905 	else
906 		return -EINVAL;
907 
908 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
909 		gtod.epoch_idx, gtod.tod);
910 
911 	return 0;
912 }
913 
914 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
915 {
916 	u8 gtod_high;
917 
918 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
919 					   sizeof(gtod_high)))
920 		return -EFAULT;
921 
922 	if (gtod_high != 0)
923 		return -EINVAL;
924 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
925 
926 	return 0;
927 }
928 
929 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
930 {
931 	u64 gtod;
932 
933 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
934 		return -EFAULT;
935 
936 	kvm_s390_set_tod_clock(kvm, gtod);
937 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
938 	return 0;
939 }
940 
941 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
942 {
943 	int ret;
944 
945 	if (attr->flags)
946 		return -EINVAL;
947 
948 	switch (attr->attr) {
949 	case KVM_S390_VM_TOD_EXT:
950 		ret = kvm_s390_set_tod_ext(kvm, attr);
951 		break;
952 	case KVM_S390_VM_TOD_HIGH:
953 		ret = kvm_s390_set_tod_high(kvm, attr);
954 		break;
955 	case KVM_S390_VM_TOD_LOW:
956 		ret = kvm_s390_set_tod_low(kvm, attr);
957 		break;
958 	default:
959 		ret = -ENXIO;
960 		break;
961 	}
962 	return ret;
963 }
964 
965 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
966 					struct kvm_s390_vm_tod_clock *gtod)
967 {
968 	struct kvm_s390_tod_clock_ext htod;
969 
970 	preempt_disable();
971 
972 	get_tod_clock_ext((char *)&htod);
973 
974 	gtod->tod = htod.tod + kvm->arch.epoch;
975 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
976 
977 	if (gtod->tod < htod.tod)
978 		gtod->epoch_idx += 1;
979 
980 	preempt_enable();
981 }
982 
983 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
984 {
985 	struct kvm_s390_vm_tod_clock gtod;
986 
987 	memset(&gtod, 0, sizeof(gtod));
988 
989 	if (test_kvm_facility(kvm, 139))
990 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
991 	else
992 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
993 
994 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
995 		return -EFAULT;
996 
997 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
998 		gtod.epoch_idx, gtod.tod);
999 	return 0;
1000 }
1001 
1002 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1003 {
1004 	u8 gtod_high = 0;
1005 
1006 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1007 					 sizeof(gtod_high)))
1008 		return -EFAULT;
1009 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1010 
1011 	return 0;
1012 }
1013 
1014 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1015 {
1016 	u64 gtod;
1017 
1018 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1019 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1020 		return -EFAULT;
1021 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1022 
1023 	return 0;
1024 }
1025 
1026 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1027 {
1028 	int ret;
1029 
1030 	if (attr->flags)
1031 		return -EINVAL;
1032 
1033 	switch (attr->attr) {
1034 	case KVM_S390_VM_TOD_EXT:
1035 		ret = kvm_s390_get_tod_ext(kvm, attr);
1036 		break;
1037 	case KVM_S390_VM_TOD_HIGH:
1038 		ret = kvm_s390_get_tod_high(kvm, attr);
1039 		break;
1040 	case KVM_S390_VM_TOD_LOW:
1041 		ret = kvm_s390_get_tod_low(kvm, attr);
1042 		break;
1043 	default:
1044 		ret = -ENXIO;
1045 		break;
1046 	}
1047 	return ret;
1048 }
1049 
1050 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1051 {
1052 	struct kvm_s390_vm_cpu_processor *proc;
1053 	u16 lowest_ibc, unblocked_ibc;
1054 	int ret = 0;
1055 
1056 	mutex_lock(&kvm->lock);
1057 	if (kvm->created_vcpus) {
1058 		ret = -EBUSY;
1059 		goto out;
1060 	}
1061 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1062 	if (!proc) {
1063 		ret = -ENOMEM;
1064 		goto out;
1065 	}
1066 	if (!copy_from_user(proc, (void __user *)attr->addr,
1067 			    sizeof(*proc))) {
1068 		kvm->arch.model.cpuid = proc->cpuid;
1069 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1070 		unblocked_ibc = sclp.ibc & 0xfff;
1071 		if (lowest_ibc && proc->ibc) {
1072 			if (proc->ibc > unblocked_ibc)
1073 				kvm->arch.model.ibc = unblocked_ibc;
1074 			else if (proc->ibc < lowest_ibc)
1075 				kvm->arch.model.ibc = lowest_ibc;
1076 			else
1077 				kvm->arch.model.ibc = proc->ibc;
1078 		}
1079 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1080 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1081 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082 			 kvm->arch.model.ibc,
1083 			 kvm->arch.model.cpuid);
1084 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085 			 kvm->arch.model.fac_list[0],
1086 			 kvm->arch.model.fac_list[1],
1087 			 kvm->arch.model.fac_list[2]);
1088 	} else
1089 		ret = -EFAULT;
1090 	kfree(proc);
1091 out:
1092 	mutex_unlock(&kvm->lock);
1093 	return ret;
1094 }
1095 
1096 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1097 				       struct kvm_device_attr *attr)
1098 {
1099 	struct kvm_s390_vm_cpu_feat data;
1100 
1101 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1102 		return -EFAULT;
1103 	if (!bitmap_subset((unsigned long *) data.feat,
1104 			   kvm_s390_available_cpu_feat,
1105 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1106 		return -EINVAL;
1107 
1108 	mutex_lock(&kvm->lock);
1109 	if (kvm->created_vcpus) {
1110 		mutex_unlock(&kvm->lock);
1111 		return -EBUSY;
1112 	}
1113 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1114 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1115 	mutex_unlock(&kvm->lock);
1116 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1117 			 data.feat[0],
1118 			 data.feat[1],
1119 			 data.feat[2]);
1120 	return 0;
1121 }
1122 
1123 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1124 					  struct kvm_device_attr *attr)
1125 {
1126 	/*
1127 	 * Once supported by kernel + hw, we have to store the subfunctions
1128 	 * in kvm->arch and remember that user space configured them.
1129 	 */
1130 	return -ENXIO;
1131 }
1132 
1133 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1134 {
1135 	int ret = -ENXIO;
1136 
1137 	switch (attr->attr) {
1138 	case KVM_S390_VM_CPU_PROCESSOR:
1139 		ret = kvm_s390_set_processor(kvm, attr);
1140 		break;
1141 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1142 		ret = kvm_s390_set_processor_feat(kvm, attr);
1143 		break;
1144 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1145 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1146 		break;
1147 	}
1148 	return ret;
1149 }
1150 
1151 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1152 {
1153 	struct kvm_s390_vm_cpu_processor *proc;
1154 	int ret = 0;
1155 
1156 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1157 	if (!proc) {
1158 		ret = -ENOMEM;
1159 		goto out;
1160 	}
1161 	proc->cpuid = kvm->arch.model.cpuid;
1162 	proc->ibc = kvm->arch.model.ibc;
1163 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1164 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1165 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1166 		 kvm->arch.model.ibc,
1167 		 kvm->arch.model.cpuid);
1168 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1169 		 kvm->arch.model.fac_list[0],
1170 		 kvm->arch.model.fac_list[1],
1171 		 kvm->arch.model.fac_list[2]);
1172 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1173 		ret = -EFAULT;
1174 	kfree(proc);
1175 out:
1176 	return ret;
1177 }
1178 
1179 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1180 {
1181 	struct kvm_s390_vm_cpu_machine *mach;
1182 	int ret = 0;
1183 
1184 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1185 	if (!mach) {
1186 		ret = -ENOMEM;
1187 		goto out;
1188 	}
1189 	get_cpu_id((struct cpuid *) &mach->cpuid);
1190 	mach->ibc = sclp.ibc;
1191 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1192 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1193 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1194 	       sizeof(S390_lowcore.stfle_fac_list));
1195 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1196 		 kvm->arch.model.ibc,
1197 		 kvm->arch.model.cpuid);
1198 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1199 		 mach->fac_mask[0],
1200 		 mach->fac_mask[1],
1201 		 mach->fac_mask[2]);
1202 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1203 		 mach->fac_list[0],
1204 		 mach->fac_list[1],
1205 		 mach->fac_list[2]);
1206 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1207 		ret = -EFAULT;
1208 	kfree(mach);
1209 out:
1210 	return ret;
1211 }
1212 
1213 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1214 				       struct kvm_device_attr *attr)
1215 {
1216 	struct kvm_s390_vm_cpu_feat data;
1217 
1218 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1219 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1220 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1221 		return -EFAULT;
1222 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1223 			 data.feat[0],
1224 			 data.feat[1],
1225 			 data.feat[2]);
1226 	return 0;
1227 }
1228 
1229 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1230 				     struct kvm_device_attr *attr)
1231 {
1232 	struct kvm_s390_vm_cpu_feat data;
1233 
1234 	bitmap_copy((unsigned long *) data.feat,
1235 		    kvm_s390_available_cpu_feat,
1236 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1237 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1238 		return -EFAULT;
1239 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1240 			 data.feat[0],
1241 			 data.feat[1],
1242 			 data.feat[2]);
1243 	return 0;
1244 }
1245 
1246 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1247 					  struct kvm_device_attr *attr)
1248 {
1249 	/*
1250 	 * Once we can actually configure subfunctions (kernel + hw support),
1251 	 * we have to check if they were already set by user space, if so copy
1252 	 * them from kvm->arch.
1253 	 */
1254 	return -ENXIO;
1255 }
1256 
1257 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1258 					struct kvm_device_attr *attr)
1259 {
1260 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1261 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1262 		return -EFAULT;
1263 	return 0;
1264 }
1265 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1266 {
1267 	int ret = -ENXIO;
1268 
1269 	switch (attr->attr) {
1270 	case KVM_S390_VM_CPU_PROCESSOR:
1271 		ret = kvm_s390_get_processor(kvm, attr);
1272 		break;
1273 	case KVM_S390_VM_CPU_MACHINE:
1274 		ret = kvm_s390_get_machine(kvm, attr);
1275 		break;
1276 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1277 		ret = kvm_s390_get_processor_feat(kvm, attr);
1278 		break;
1279 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1280 		ret = kvm_s390_get_machine_feat(kvm, attr);
1281 		break;
1282 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1283 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1284 		break;
1285 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1286 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1287 		break;
1288 	}
1289 	return ret;
1290 }
1291 
1292 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1293 {
1294 	int ret;
1295 
1296 	switch (attr->group) {
1297 	case KVM_S390_VM_MEM_CTRL:
1298 		ret = kvm_s390_set_mem_control(kvm, attr);
1299 		break;
1300 	case KVM_S390_VM_TOD:
1301 		ret = kvm_s390_set_tod(kvm, attr);
1302 		break;
1303 	case KVM_S390_VM_CPU_MODEL:
1304 		ret = kvm_s390_set_cpu_model(kvm, attr);
1305 		break;
1306 	case KVM_S390_VM_CRYPTO:
1307 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1308 		break;
1309 	case KVM_S390_VM_MIGRATION:
1310 		ret = kvm_s390_vm_set_migration(kvm, attr);
1311 		break;
1312 	default:
1313 		ret = -ENXIO;
1314 		break;
1315 	}
1316 
1317 	return ret;
1318 }
1319 
1320 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1321 {
1322 	int ret;
1323 
1324 	switch (attr->group) {
1325 	case KVM_S390_VM_MEM_CTRL:
1326 		ret = kvm_s390_get_mem_control(kvm, attr);
1327 		break;
1328 	case KVM_S390_VM_TOD:
1329 		ret = kvm_s390_get_tod(kvm, attr);
1330 		break;
1331 	case KVM_S390_VM_CPU_MODEL:
1332 		ret = kvm_s390_get_cpu_model(kvm, attr);
1333 		break;
1334 	case KVM_S390_VM_MIGRATION:
1335 		ret = kvm_s390_vm_get_migration(kvm, attr);
1336 		break;
1337 	default:
1338 		ret = -ENXIO;
1339 		break;
1340 	}
1341 
1342 	return ret;
1343 }
1344 
1345 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1346 {
1347 	int ret;
1348 
1349 	switch (attr->group) {
1350 	case KVM_S390_VM_MEM_CTRL:
1351 		switch (attr->attr) {
1352 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1353 		case KVM_S390_VM_MEM_CLR_CMMA:
1354 			ret = sclp.has_cmma ? 0 : -ENXIO;
1355 			break;
1356 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1357 			ret = 0;
1358 			break;
1359 		default:
1360 			ret = -ENXIO;
1361 			break;
1362 		}
1363 		break;
1364 	case KVM_S390_VM_TOD:
1365 		switch (attr->attr) {
1366 		case KVM_S390_VM_TOD_LOW:
1367 		case KVM_S390_VM_TOD_HIGH:
1368 			ret = 0;
1369 			break;
1370 		default:
1371 			ret = -ENXIO;
1372 			break;
1373 		}
1374 		break;
1375 	case KVM_S390_VM_CPU_MODEL:
1376 		switch (attr->attr) {
1377 		case KVM_S390_VM_CPU_PROCESSOR:
1378 		case KVM_S390_VM_CPU_MACHINE:
1379 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1380 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1381 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1382 			ret = 0;
1383 			break;
1384 		/* configuring subfunctions is not supported yet */
1385 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1386 		default:
1387 			ret = -ENXIO;
1388 			break;
1389 		}
1390 		break;
1391 	case KVM_S390_VM_CRYPTO:
1392 		switch (attr->attr) {
1393 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1394 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1395 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1396 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1397 			ret = 0;
1398 			break;
1399 		default:
1400 			ret = -ENXIO;
1401 			break;
1402 		}
1403 		break;
1404 	case KVM_S390_VM_MIGRATION:
1405 		ret = 0;
1406 		break;
1407 	default:
1408 		ret = -ENXIO;
1409 		break;
1410 	}
1411 
1412 	return ret;
1413 }
1414 
1415 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1416 {
1417 	uint8_t *keys;
1418 	uint64_t hva;
1419 	int srcu_idx, i, r = 0;
1420 
1421 	if (args->flags != 0)
1422 		return -EINVAL;
1423 
1424 	/* Is this guest using storage keys? */
1425 	if (!mm_use_skey(current->mm))
1426 		return KVM_S390_GET_SKEYS_NONE;
1427 
1428 	/* Enforce sane limit on memory allocation */
1429 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1430 		return -EINVAL;
1431 
1432 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1433 	if (!keys)
1434 		return -ENOMEM;
1435 
1436 	down_read(&current->mm->mmap_sem);
1437 	srcu_idx = srcu_read_lock(&kvm->srcu);
1438 	for (i = 0; i < args->count; i++) {
1439 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1440 		if (kvm_is_error_hva(hva)) {
1441 			r = -EFAULT;
1442 			break;
1443 		}
1444 
1445 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1446 		if (r)
1447 			break;
1448 	}
1449 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1450 	up_read(&current->mm->mmap_sem);
1451 
1452 	if (!r) {
1453 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1454 				 sizeof(uint8_t) * args->count);
1455 		if (r)
1456 			r = -EFAULT;
1457 	}
1458 
1459 	kvfree(keys);
1460 	return r;
1461 }
1462 
1463 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1464 {
1465 	uint8_t *keys;
1466 	uint64_t hva;
1467 	int srcu_idx, i, r = 0;
1468 
1469 	if (args->flags != 0)
1470 		return -EINVAL;
1471 
1472 	/* Enforce sane limit on memory allocation */
1473 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1474 		return -EINVAL;
1475 
1476 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1477 	if (!keys)
1478 		return -ENOMEM;
1479 
1480 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1481 			   sizeof(uint8_t) * args->count);
1482 	if (r) {
1483 		r = -EFAULT;
1484 		goto out;
1485 	}
1486 
1487 	/* Enable storage key handling for the guest */
1488 	r = s390_enable_skey();
1489 	if (r)
1490 		goto out;
1491 
1492 	down_read(&current->mm->mmap_sem);
1493 	srcu_idx = srcu_read_lock(&kvm->srcu);
1494 	for (i = 0; i < args->count; i++) {
1495 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1496 		if (kvm_is_error_hva(hva)) {
1497 			r = -EFAULT;
1498 			break;
1499 		}
1500 
1501 		/* Lowest order bit is reserved */
1502 		if (keys[i] & 0x01) {
1503 			r = -EINVAL;
1504 			break;
1505 		}
1506 
1507 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1508 		if (r)
1509 			break;
1510 	}
1511 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1512 	up_read(&current->mm->mmap_sem);
1513 out:
1514 	kvfree(keys);
1515 	return r;
1516 }
1517 
1518 /*
1519  * Base address and length must be sent at the start of each block, therefore
1520  * it's cheaper to send some clean data, as long as it's less than the size of
1521  * two longs.
1522  */
1523 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1524 /* for consistency */
1525 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1526 
1527 /*
1528  * This function searches for the next page with dirty CMMA attributes, and
1529  * saves the attributes in the buffer up to either the end of the buffer or
1530  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1531  * no trailing clean bytes are saved.
1532  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1533  * output buffer will indicate 0 as length.
1534  */
1535 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1536 				  struct kvm_s390_cmma_log *args)
1537 {
1538 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1539 	unsigned long bufsize, hva, pgstev, i, next, cur;
1540 	int srcu_idx, peek, r = 0, rr;
1541 	u8 *res;
1542 
1543 	cur = args->start_gfn;
1544 	i = next = pgstev = 0;
1545 
1546 	if (unlikely(!kvm->arch.use_cmma))
1547 		return -ENXIO;
1548 	/* Invalid/unsupported flags were specified */
1549 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1550 		return -EINVAL;
1551 	/* Migration mode query, and we are not doing a migration */
1552 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1553 	if (!peek && !s)
1554 		return -EINVAL;
1555 	/* CMMA is disabled or was not used, or the buffer has length zero */
1556 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1557 	if (!bufsize || !kvm->mm->context.use_cmma) {
1558 		memset(args, 0, sizeof(*args));
1559 		return 0;
1560 	}
1561 
1562 	if (!peek) {
1563 		/* We are not peeking, and there are no dirty pages */
1564 		if (!atomic64_read(&s->dirty_pages)) {
1565 			memset(args, 0, sizeof(*args));
1566 			return 0;
1567 		}
1568 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1569 				    args->start_gfn);
1570 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1571 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1572 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1573 			memset(args, 0, sizeof(*args));
1574 			return 0;
1575 		}
1576 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1577 	}
1578 
1579 	res = vmalloc(bufsize);
1580 	if (!res)
1581 		return -ENOMEM;
1582 
1583 	args->start_gfn = cur;
1584 
1585 	down_read(&kvm->mm->mmap_sem);
1586 	srcu_idx = srcu_read_lock(&kvm->srcu);
1587 	while (i < bufsize) {
1588 		hva = gfn_to_hva(kvm, cur);
1589 		if (kvm_is_error_hva(hva)) {
1590 			r = -EFAULT;
1591 			break;
1592 		}
1593 		/* decrement only if we actually flipped the bit to 0 */
1594 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1595 			atomic64_dec(&s->dirty_pages);
1596 		r = get_pgste(kvm->mm, hva, &pgstev);
1597 		if (r < 0)
1598 			pgstev = 0;
1599 		/* save the value */
1600 		res[i++] = (pgstev >> 24) & 0x43;
1601 		/*
1602 		 * if the next bit is too far away, stop.
1603 		 * if we reached the previous "next", find the next one
1604 		 */
1605 		if (!peek) {
1606 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1607 				break;
1608 			if (cur == next)
1609 				next = find_next_bit(s->pgste_bitmap,
1610 						     s->bitmap_size, cur + 1);
1611 		/* reached the end of the bitmap or of the buffer, stop */
1612 			if ((next >= s->bitmap_size) ||
1613 			    (next >= args->start_gfn + bufsize))
1614 				break;
1615 		}
1616 		cur++;
1617 	}
1618 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1619 	up_read(&kvm->mm->mmap_sem);
1620 	args->count = i;
1621 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1622 
1623 	rr = copy_to_user((void __user *)args->values, res, args->count);
1624 	if (rr)
1625 		r = -EFAULT;
1626 
1627 	vfree(res);
1628 	return r;
1629 }
1630 
1631 /*
1632  * This function sets the CMMA attributes for the given pages. If the input
1633  * buffer has zero length, no action is taken, otherwise the attributes are
1634  * set and the mm->context.use_cmma flag is set.
1635  */
1636 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1637 				  const struct kvm_s390_cmma_log *args)
1638 {
1639 	unsigned long hva, mask, pgstev, i;
1640 	uint8_t *bits;
1641 	int srcu_idx, r = 0;
1642 
1643 	mask = args->mask;
1644 
1645 	if (!kvm->arch.use_cmma)
1646 		return -ENXIO;
1647 	/* invalid/unsupported flags */
1648 	if (args->flags != 0)
1649 		return -EINVAL;
1650 	/* Enforce sane limit on memory allocation */
1651 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1652 		return -EINVAL;
1653 	/* Nothing to do */
1654 	if (args->count == 0)
1655 		return 0;
1656 
1657 	bits = vmalloc(sizeof(*bits) * args->count);
1658 	if (!bits)
1659 		return -ENOMEM;
1660 
1661 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1662 	if (r) {
1663 		r = -EFAULT;
1664 		goto out;
1665 	}
1666 
1667 	down_read(&kvm->mm->mmap_sem);
1668 	srcu_idx = srcu_read_lock(&kvm->srcu);
1669 	for (i = 0; i < args->count; i++) {
1670 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1671 		if (kvm_is_error_hva(hva)) {
1672 			r = -EFAULT;
1673 			break;
1674 		}
1675 
1676 		pgstev = bits[i];
1677 		pgstev = pgstev << 24;
1678 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1679 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1680 	}
1681 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1682 	up_read(&kvm->mm->mmap_sem);
1683 
1684 	if (!kvm->mm->context.use_cmma) {
1685 		down_write(&kvm->mm->mmap_sem);
1686 		kvm->mm->context.use_cmma = 1;
1687 		up_write(&kvm->mm->mmap_sem);
1688 	}
1689 out:
1690 	vfree(bits);
1691 	return r;
1692 }
1693 
1694 long kvm_arch_vm_ioctl(struct file *filp,
1695 		       unsigned int ioctl, unsigned long arg)
1696 {
1697 	struct kvm *kvm = filp->private_data;
1698 	void __user *argp = (void __user *)arg;
1699 	struct kvm_device_attr attr;
1700 	int r;
1701 
1702 	switch (ioctl) {
1703 	case KVM_S390_INTERRUPT: {
1704 		struct kvm_s390_interrupt s390int;
1705 
1706 		r = -EFAULT;
1707 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1708 			break;
1709 		r = kvm_s390_inject_vm(kvm, &s390int);
1710 		break;
1711 	}
1712 	case KVM_ENABLE_CAP: {
1713 		struct kvm_enable_cap cap;
1714 		r = -EFAULT;
1715 		if (copy_from_user(&cap, argp, sizeof(cap)))
1716 			break;
1717 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1718 		break;
1719 	}
1720 	case KVM_CREATE_IRQCHIP: {
1721 		struct kvm_irq_routing_entry routing;
1722 
1723 		r = -EINVAL;
1724 		if (kvm->arch.use_irqchip) {
1725 			/* Set up dummy routing. */
1726 			memset(&routing, 0, sizeof(routing));
1727 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1728 		}
1729 		break;
1730 	}
1731 	case KVM_SET_DEVICE_ATTR: {
1732 		r = -EFAULT;
1733 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1734 			break;
1735 		r = kvm_s390_vm_set_attr(kvm, &attr);
1736 		break;
1737 	}
1738 	case KVM_GET_DEVICE_ATTR: {
1739 		r = -EFAULT;
1740 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1741 			break;
1742 		r = kvm_s390_vm_get_attr(kvm, &attr);
1743 		break;
1744 	}
1745 	case KVM_HAS_DEVICE_ATTR: {
1746 		r = -EFAULT;
1747 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1748 			break;
1749 		r = kvm_s390_vm_has_attr(kvm, &attr);
1750 		break;
1751 	}
1752 	case KVM_S390_GET_SKEYS: {
1753 		struct kvm_s390_skeys args;
1754 
1755 		r = -EFAULT;
1756 		if (copy_from_user(&args, argp,
1757 				   sizeof(struct kvm_s390_skeys)))
1758 			break;
1759 		r = kvm_s390_get_skeys(kvm, &args);
1760 		break;
1761 	}
1762 	case KVM_S390_SET_SKEYS: {
1763 		struct kvm_s390_skeys args;
1764 
1765 		r = -EFAULT;
1766 		if (copy_from_user(&args, argp,
1767 				   sizeof(struct kvm_s390_skeys)))
1768 			break;
1769 		r = kvm_s390_set_skeys(kvm, &args);
1770 		break;
1771 	}
1772 	case KVM_S390_GET_CMMA_BITS: {
1773 		struct kvm_s390_cmma_log args;
1774 
1775 		r = -EFAULT;
1776 		if (copy_from_user(&args, argp, sizeof(args)))
1777 			break;
1778 		r = kvm_s390_get_cmma_bits(kvm, &args);
1779 		if (!r) {
1780 			r = copy_to_user(argp, &args, sizeof(args));
1781 			if (r)
1782 				r = -EFAULT;
1783 		}
1784 		break;
1785 	}
1786 	case KVM_S390_SET_CMMA_BITS: {
1787 		struct kvm_s390_cmma_log args;
1788 
1789 		r = -EFAULT;
1790 		if (copy_from_user(&args, argp, sizeof(args)))
1791 			break;
1792 		r = kvm_s390_set_cmma_bits(kvm, &args);
1793 		break;
1794 	}
1795 	default:
1796 		r = -ENOTTY;
1797 	}
1798 
1799 	return r;
1800 }
1801 
1802 static int kvm_s390_query_ap_config(u8 *config)
1803 {
1804 	u32 fcn_code = 0x04000000UL;
1805 	u32 cc = 0;
1806 
1807 	memset(config, 0, 128);
1808 	asm volatile(
1809 		"lgr 0,%1\n"
1810 		"lgr 2,%2\n"
1811 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1812 		"0: ipm %0\n"
1813 		"srl %0,28\n"
1814 		"1:\n"
1815 		EX_TABLE(0b, 1b)
1816 		: "+r" (cc)
1817 		: "r" (fcn_code), "r" (config)
1818 		: "cc", "0", "2", "memory"
1819 	);
1820 
1821 	return cc;
1822 }
1823 
1824 static int kvm_s390_apxa_installed(void)
1825 {
1826 	u8 config[128];
1827 	int cc;
1828 
1829 	if (test_facility(12)) {
1830 		cc = kvm_s390_query_ap_config(config);
1831 
1832 		if (cc)
1833 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1834 		else
1835 			return config[0] & 0x40;
1836 	}
1837 
1838 	return 0;
1839 }
1840 
1841 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1842 {
1843 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1844 
1845 	if (kvm_s390_apxa_installed())
1846 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1847 	else
1848 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1849 }
1850 
1851 static u64 kvm_s390_get_initial_cpuid(void)
1852 {
1853 	struct cpuid cpuid;
1854 
1855 	get_cpu_id(&cpuid);
1856 	cpuid.version = 0xff;
1857 	return *((u64 *) &cpuid);
1858 }
1859 
1860 static void kvm_s390_crypto_init(struct kvm *kvm)
1861 {
1862 	if (!test_kvm_facility(kvm, 76))
1863 		return;
1864 
1865 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1866 	kvm_s390_set_crycb_format(kvm);
1867 
1868 	/* Enable AES/DEA protected key functions by default */
1869 	kvm->arch.crypto.aes_kw = 1;
1870 	kvm->arch.crypto.dea_kw = 1;
1871 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1872 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1873 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1874 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1875 }
1876 
1877 static void sca_dispose(struct kvm *kvm)
1878 {
1879 	if (kvm->arch.use_esca)
1880 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1881 	else
1882 		free_page((unsigned long)(kvm->arch.sca));
1883 	kvm->arch.sca = NULL;
1884 }
1885 
1886 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1887 {
1888 	gfp_t alloc_flags = GFP_KERNEL;
1889 	int i, rc;
1890 	char debug_name[16];
1891 	static unsigned long sca_offset;
1892 
1893 	rc = -EINVAL;
1894 #ifdef CONFIG_KVM_S390_UCONTROL
1895 	if (type & ~KVM_VM_S390_UCONTROL)
1896 		goto out_err;
1897 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1898 		goto out_err;
1899 #else
1900 	if (type)
1901 		goto out_err;
1902 #endif
1903 
1904 	rc = s390_enable_sie();
1905 	if (rc)
1906 		goto out_err;
1907 
1908 	rc = -ENOMEM;
1909 
1910 	kvm->arch.use_esca = 0; /* start with basic SCA */
1911 	if (!sclp.has_64bscao)
1912 		alloc_flags |= GFP_DMA;
1913 	rwlock_init(&kvm->arch.sca_lock);
1914 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1915 	if (!kvm->arch.sca)
1916 		goto out_err;
1917 	spin_lock(&kvm_lock);
1918 	sca_offset += 16;
1919 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1920 		sca_offset = 0;
1921 	kvm->arch.sca = (struct bsca_block *)
1922 			((char *) kvm->arch.sca + sca_offset);
1923 	spin_unlock(&kvm_lock);
1924 
1925 	sprintf(debug_name, "kvm-%u", current->pid);
1926 
1927 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1928 	if (!kvm->arch.dbf)
1929 		goto out_err;
1930 
1931 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
1932 	kvm->arch.sie_page2 =
1933 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1934 	if (!kvm->arch.sie_page2)
1935 		goto out_err;
1936 
1937 	/* Populate the facility mask initially. */
1938 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1939 	       sizeof(S390_lowcore.stfle_fac_list));
1940 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1941 		if (i < kvm_s390_fac_list_mask_size())
1942 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1943 		else
1944 			kvm->arch.model.fac_mask[i] = 0UL;
1945 	}
1946 
1947 	/* Populate the facility list initially. */
1948 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1949 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1950 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1951 
1952 	/* we are always in czam mode - even on pre z14 machines */
1953 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1954 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1955 	/* we emulate STHYI in kvm */
1956 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1957 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1958 	if (MACHINE_HAS_TLB_GUEST) {
1959 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1960 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1961 	}
1962 
1963 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1964 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1965 
1966 	kvm_s390_crypto_init(kvm);
1967 
1968 	mutex_init(&kvm->arch.float_int.ais_lock);
1969 	kvm->arch.float_int.simm = 0;
1970 	kvm->arch.float_int.nimm = 0;
1971 	spin_lock_init(&kvm->arch.float_int.lock);
1972 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1973 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1974 	init_waitqueue_head(&kvm->arch.ipte_wq);
1975 	mutex_init(&kvm->arch.ipte_mutex);
1976 
1977 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1978 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1979 
1980 	if (type & KVM_VM_S390_UCONTROL) {
1981 		kvm->arch.gmap = NULL;
1982 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1983 	} else {
1984 		if (sclp.hamax == U64_MAX)
1985 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1986 		else
1987 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1988 						    sclp.hamax + 1);
1989 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1990 		if (!kvm->arch.gmap)
1991 			goto out_err;
1992 		kvm->arch.gmap->private = kvm;
1993 		kvm->arch.gmap->pfault_enabled = 0;
1994 	}
1995 
1996 	kvm->arch.css_support = 0;
1997 	kvm->arch.use_irqchip = 0;
1998 	kvm->arch.epoch = 0;
1999 
2000 	spin_lock_init(&kvm->arch.start_stop_lock);
2001 	kvm_s390_vsie_init(kvm);
2002 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2003 
2004 	return 0;
2005 out_err:
2006 	free_page((unsigned long)kvm->arch.sie_page2);
2007 	debug_unregister(kvm->arch.dbf);
2008 	sca_dispose(kvm);
2009 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2010 	return rc;
2011 }
2012 
2013 bool kvm_arch_has_vcpu_debugfs(void)
2014 {
2015 	return false;
2016 }
2017 
2018 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2019 {
2020 	return 0;
2021 }
2022 
2023 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2024 {
2025 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2026 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2027 	kvm_s390_clear_local_irqs(vcpu);
2028 	kvm_clear_async_pf_completion_queue(vcpu);
2029 	if (!kvm_is_ucontrol(vcpu->kvm))
2030 		sca_del_vcpu(vcpu);
2031 
2032 	if (kvm_is_ucontrol(vcpu->kvm))
2033 		gmap_remove(vcpu->arch.gmap);
2034 
2035 	if (vcpu->kvm->arch.use_cmma)
2036 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2037 	free_page((unsigned long)(vcpu->arch.sie_block));
2038 
2039 	kvm_vcpu_uninit(vcpu);
2040 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2041 }
2042 
2043 static void kvm_free_vcpus(struct kvm *kvm)
2044 {
2045 	unsigned int i;
2046 	struct kvm_vcpu *vcpu;
2047 
2048 	kvm_for_each_vcpu(i, vcpu, kvm)
2049 		kvm_arch_vcpu_destroy(vcpu);
2050 
2051 	mutex_lock(&kvm->lock);
2052 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2053 		kvm->vcpus[i] = NULL;
2054 
2055 	atomic_set(&kvm->online_vcpus, 0);
2056 	mutex_unlock(&kvm->lock);
2057 }
2058 
2059 void kvm_arch_destroy_vm(struct kvm *kvm)
2060 {
2061 	kvm_free_vcpus(kvm);
2062 	sca_dispose(kvm);
2063 	debug_unregister(kvm->arch.dbf);
2064 	free_page((unsigned long)kvm->arch.sie_page2);
2065 	if (!kvm_is_ucontrol(kvm))
2066 		gmap_remove(kvm->arch.gmap);
2067 	kvm_s390_destroy_adapters(kvm);
2068 	kvm_s390_clear_float_irqs(kvm);
2069 	kvm_s390_vsie_destroy(kvm);
2070 	if (kvm->arch.migration_state) {
2071 		vfree(kvm->arch.migration_state->pgste_bitmap);
2072 		kfree(kvm->arch.migration_state);
2073 	}
2074 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2075 }
2076 
2077 /* Section: vcpu related */
2078 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2079 {
2080 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2081 	if (!vcpu->arch.gmap)
2082 		return -ENOMEM;
2083 	vcpu->arch.gmap->private = vcpu->kvm;
2084 
2085 	return 0;
2086 }
2087 
2088 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2089 {
2090 	if (!kvm_s390_use_sca_entries())
2091 		return;
2092 	read_lock(&vcpu->kvm->arch.sca_lock);
2093 	if (vcpu->kvm->arch.use_esca) {
2094 		struct esca_block *sca = vcpu->kvm->arch.sca;
2095 
2096 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2097 		sca->cpu[vcpu->vcpu_id].sda = 0;
2098 	} else {
2099 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2100 
2101 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2102 		sca->cpu[vcpu->vcpu_id].sda = 0;
2103 	}
2104 	read_unlock(&vcpu->kvm->arch.sca_lock);
2105 }
2106 
2107 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2108 {
2109 	if (!kvm_s390_use_sca_entries()) {
2110 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2111 
2112 		/* we still need the basic sca for the ipte control */
2113 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2114 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2115 	}
2116 	read_lock(&vcpu->kvm->arch.sca_lock);
2117 	if (vcpu->kvm->arch.use_esca) {
2118 		struct esca_block *sca = vcpu->kvm->arch.sca;
2119 
2120 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2121 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2122 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2123 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2124 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2125 	} else {
2126 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2127 
2128 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2129 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2130 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2131 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2132 	}
2133 	read_unlock(&vcpu->kvm->arch.sca_lock);
2134 }
2135 
2136 /* Basic SCA to Extended SCA data copy routines */
2137 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2138 {
2139 	d->sda = s->sda;
2140 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2141 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2142 }
2143 
2144 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2145 {
2146 	int i;
2147 
2148 	d->ipte_control = s->ipte_control;
2149 	d->mcn[0] = s->mcn;
2150 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2151 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2152 }
2153 
2154 static int sca_switch_to_extended(struct kvm *kvm)
2155 {
2156 	struct bsca_block *old_sca = kvm->arch.sca;
2157 	struct esca_block *new_sca;
2158 	struct kvm_vcpu *vcpu;
2159 	unsigned int vcpu_idx;
2160 	u32 scaol, scaoh;
2161 
2162 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2163 	if (!new_sca)
2164 		return -ENOMEM;
2165 
2166 	scaoh = (u32)((u64)(new_sca) >> 32);
2167 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2168 
2169 	kvm_s390_vcpu_block_all(kvm);
2170 	write_lock(&kvm->arch.sca_lock);
2171 
2172 	sca_copy_b_to_e(new_sca, old_sca);
2173 
2174 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2175 		vcpu->arch.sie_block->scaoh = scaoh;
2176 		vcpu->arch.sie_block->scaol = scaol;
2177 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2178 	}
2179 	kvm->arch.sca = new_sca;
2180 	kvm->arch.use_esca = 1;
2181 
2182 	write_unlock(&kvm->arch.sca_lock);
2183 	kvm_s390_vcpu_unblock_all(kvm);
2184 
2185 	free_page((unsigned long)old_sca);
2186 
2187 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2188 		 old_sca, kvm->arch.sca);
2189 	return 0;
2190 }
2191 
2192 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2193 {
2194 	int rc;
2195 
2196 	if (!kvm_s390_use_sca_entries()) {
2197 		if (id < KVM_MAX_VCPUS)
2198 			return true;
2199 		return false;
2200 	}
2201 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2202 		return true;
2203 	if (!sclp.has_esca || !sclp.has_64bscao)
2204 		return false;
2205 
2206 	mutex_lock(&kvm->lock);
2207 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2208 	mutex_unlock(&kvm->lock);
2209 
2210 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2211 }
2212 
2213 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2214 {
2215 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2216 	kvm_clear_async_pf_completion_queue(vcpu);
2217 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2218 				    KVM_SYNC_GPRS |
2219 				    KVM_SYNC_ACRS |
2220 				    KVM_SYNC_CRS |
2221 				    KVM_SYNC_ARCH0 |
2222 				    KVM_SYNC_PFAULT;
2223 	kvm_s390_set_prefix(vcpu, 0);
2224 	if (test_kvm_facility(vcpu->kvm, 64))
2225 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2226 	if (test_kvm_facility(vcpu->kvm, 133))
2227 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2228 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2229 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2230 	 */
2231 	if (MACHINE_HAS_VX)
2232 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2233 	else
2234 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2235 
2236 	if (kvm_is_ucontrol(vcpu->kvm))
2237 		return __kvm_ucontrol_vcpu_init(vcpu);
2238 
2239 	return 0;
2240 }
2241 
2242 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2243 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2244 {
2245 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2246 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2247 	vcpu->arch.cputm_start = get_tod_clock_fast();
2248 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2249 }
2250 
2251 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2252 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2253 {
2254 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2255 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2256 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2257 	vcpu->arch.cputm_start = 0;
2258 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2259 }
2260 
2261 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2262 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2263 {
2264 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2265 	vcpu->arch.cputm_enabled = true;
2266 	__start_cpu_timer_accounting(vcpu);
2267 }
2268 
2269 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2270 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2271 {
2272 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2273 	__stop_cpu_timer_accounting(vcpu);
2274 	vcpu->arch.cputm_enabled = false;
2275 }
2276 
2277 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2278 {
2279 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2280 	__enable_cpu_timer_accounting(vcpu);
2281 	preempt_enable();
2282 }
2283 
2284 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2285 {
2286 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2287 	__disable_cpu_timer_accounting(vcpu);
2288 	preempt_enable();
2289 }
2290 
2291 /* set the cpu timer - may only be called from the VCPU thread itself */
2292 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2293 {
2294 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2295 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2296 	if (vcpu->arch.cputm_enabled)
2297 		vcpu->arch.cputm_start = get_tod_clock_fast();
2298 	vcpu->arch.sie_block->cputm = cputm;
2299 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2300 	preempt_enable();
2301 }
2302 
2303 /* update and get the cpu timer - can also be called from other VCPU threads */
2304 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2305 {
2306 	unsigned int seq;
2307 	__u64 value;
2308 
2309 	if (unlikely(!vcpu->arch.cputm_enabled))
2310 		return vcpu->arch.sie_block->cputm;
2311 
2312 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2313 	do {
2314 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2315 		/*
2316 		 * If the writer would ever execute a read in the critical
2317 		 * section, e.g. in irq context, we have a deadlock.
2318 		 */
2319 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2320 		value = vcpu->arch.sie_block->cputm;
2321 		/* if cputm_start is 0, accounting is being started/stopped */
2322 		if (likely(vcpu->arch.cputm_start))
2323 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2324 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2325 	preempt_enable();
2326 	return value;
2327 }
2328 
2329 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2330 {
2331 
2332 	gmap_enable(vcpu->arch.enabled_gmap);
2333 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2334 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2335 		__start_cpu_timer_accounting(vcpu);
2336 	vcpu->cpu = cpu;
2337 }
2338 
2339 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2340 {
2341 	vcpu->cpu = -1;
2342 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2343 		__stop_cpu_timer_accounting(vcpu);
2344 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2345 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2346 	gmap_disable(vcpu->arch.enabled_gmap);
2347 
2348 }
2349 
2350 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2351 {
2352 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2353 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2354 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2355 	kvm_s390_set_prefix(vcpu, 0);
2356 	kvm_s390_set_cpu_timer(vcpu, 0);
2357 	vcpu->arch.sie_block->ckc       = 0UL;
2358 	vcpu->arch.sie_block->todpr     = 0;
2359 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2360 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2361 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2362 	/* make sure the new fpc will be lazily loaded */
2363 	save_fpu_regs();
2364 	current->thread.fpu.fpc = 0;
2365 	vcpu->arch.sie_block->gbea = 1;
2366 	vcpu->arch.sie_block->pp = 0;
2367 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2368 	kvm_clear_async_pf_completion_queue(vcpu);
2369 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2370 		kvm_s390_vcpu_stop(vcpu);
2371 	kvm_s390_clear_local_irqs(vcpu);
2372 }
2373 
2374 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2375 {
2376 	mutex_lock(&vcpu->kvm->lock);
2377 	preempt_disable();
2378 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2379 	preempt_enable();
2380 	mutex_unlock(&vcpu->kvm->lock);
2381 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2382 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2383 		sca_add_vcpu(vcpu);
2384 	}
2385 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2386 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2387 	/* make vcpu_load load the right gmap on the first trigger */
2388 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2389 }
2390 
2391 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2392 {
2393 	if (!test_kvm_facility(vcpu->kvm, 76))
2394 		return;
2395 
2396 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2397 
2398 	if (vcpu->kvm->arch.crypto.aes_kw)
2399 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2400 	if (vcpu->kvm->arch.crypto.dea_kw)
2401 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2402 
2403 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2404 }
2405 
2406 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2407 {
2408 	free_page(vcpu->arch.sie_block->cbrlo);
2409 	vcpu->arch.sie_block->cbrlo = 0;
2410 }
2411 
2412 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2413 {
2414 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2415 	if (!vcpu->arch.sie_block->cbrlo)
2416 		return -ENOMEM;
2417 
2418 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2419 	return 0;
2420 }
2421 
2422 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2423 {
2424 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2425 
2426 	vcpu->arch.sie_block->ibc = model->ibc;
2427 	if (test_kvm_facility(vcpu->kvm, 7))
2428 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2429 }
2430 
2431 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2432 {
2433 	int rc = 0;
2434 
2435 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2436 						    CPUSTAT_SM |
2437 						    CPUSTAT_STOPPED);
2438 
2439 	if (test_kvm_facility(vcpu->kvm, 78))
2440 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2441 	else if (test_kvm_facility(vcpu->kvm, 8))
2442 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2443 
2444 	kvm_s390_vcpu_setup_model(vcpu);
2445 
2446 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2447 	if (MACHINE_HAS_ESOP)
2448 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2449 	if (test_kvm_facility(vcpu->kvm, 9))
2450 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2451 	if (test_kvm_facility(vcpu->kvm, 73))
2452 		vcpu->arch.sie_block->ecb |= ECB_TE;
2453 
2454 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2455 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2456 	if (test_kvm_facility(vcpu->kvm, 130))
2457 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2458 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2459 	if (sclp.has_cei)
2460 		vcpu->arch.sie_block->eca |= ECA_CEI;
2461 	if (sclp.has_ib)
2462 		vcpu->arch.sie_block->eca |= ECA_IB;
2463 	if (sclp.has_siif)
2464 		vcpu->arch.sie_block->eca |= ECA_SII;
2465 	if (sclp.has_sigpif)
2466 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2467 	if (test_kvm_facility(vcpu->kvm, 129)) {
2468 		vcpu->arch.sie_block->eca |= ECA_VX;
2469 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2470 	}
2471 	if (test_kvm_facility(vcpu->kvm, 139))
2472 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2473 
2474 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2475 					| SDNXC;
2476 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2477 
2478 	if (sclp.has_kss)
2479 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2480 	else
2481 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2482 
2483 	if (vcpu->kvm->arch.use_cmma) {
2484 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2485 		if (rc)
2486 			return rc;
2487 	}
2488 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2489 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2490 
2491 	kvm_s390_vcpu_crypto_setup(vcpu);
2492 
2493 	return rc;
2494 }
2495 
2496 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2497 				      unsigned int id)
2498 {
2499 	struct kvm_vcpu *vcpu;
2500 	struct sie_page *sie_page;
2501 	int rc = -EINVAL;
2502 
2503 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2504 		goto out;
2505 
2506 	rc = -ENOMEM;
2507 
2508 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2509 	if (!vcpu)
2510 		goto out;
2511 
2512 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2513 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2514 	if (!sie_page)
2515 		goto out_free_cpu;
2516 
2517 	vcpu->arch.sie_block = &sie_page->sie_block;
2518 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2519 
2520 	/* the real guest size will always be smaller than msl */
2521 	vcpu->arch.sie_block->mso = 0;
2522 	vcpu->arch.sie_block->msl = sclp.hamax;
2523 
2524 	vcpu->arch.sie_block->icpua = id;
2525 	spin_lock_init(&vcpu->arch.local_int.lock);
2526 	seqcount_init(&vcpu->arch.cputm_seqcount);
2527 
2528 	rc = kvm_vcpu_init(vcpu, kvm, id);
2529 	if (rc)
2530 		goto out_free_sie_block;
2531 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2532 		 vcpu->arch.sie_block);
2533 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2534 
2535 	return vcpu;
2536 out_free_sie_block:
2537 	free_page((unsigned long)(vcpu->arch.sie_block));
2538 out_free_cpu:
2539 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2540 out:
2541 	return ERR_PTR(rc);
2542 }
2543 
2544 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2545 {
2546 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2547 }
2548 
2549 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2550 {
2551 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2552 }
2553 
2554 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2555 {
2556 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2557 	exit_sie(vcpu);
2558 }
2559 
2560 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2561 {
2562 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2563 }
2564 
2565 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2566 {
2567 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2568 	exit_sie(vcpu);
2569 }
2570 
2571 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2572 {
2573 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2574 }
2575 
2576 /*
2577  * Kick a guest cpu out of SIE and wait until SIE is not running.
2578  * If the CPU is not running (e.g. waiting as idle) the function will
2579  * return immediately. */
2580 void exit_sie(struct kvm_vcpu *vcpu)
2581 {
2582 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2583 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2584 		cpu_relax();
2585 }
2586 
2587 /* Kick a guest cpu out of SIE to process a request synchronously */
2588 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2589 {
2590 	kvm_make_request(req, vcpu);
2591 	kvm_s390_vcpu_request(vcpu);
2592 }
2593 
2594 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2595 			      unsigned long end)
2596 {
2597 	struct kvm *kvm = gmap->private;
2598 	struct kvm_vcpu *vcpu;
2599 	unsigned long prefix;
2600 	int i;
2601 
2602 	if (gmap_is_shadow(gmap))
2603 		return;
2604 	if (start >= 1UL << 31)
2605 		/* We are only interested in prefix pages */
2606 		return;
2607 	kvm_for_each_vcpu(i, vcpu, kvm) {
2608 		/* match against both prefix pages */
2609 		prefix = kvm_s390_get_prefix(vcpu);
2610 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2611 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2612 				   start, end);
2613 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2614 		}
2615 	}
2616 }
2617 
2618 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2619 {
2620 	/* kvm common code refers to this, but never calls it */
2621 	BUG();
2622 	return 0;
2623 }
2624 
2625 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2626 					   struct kvm_one_reg *reg)
2627 {
2628 	int r = -EINVAL;
2629 
2630 	switch (reg->id) {
2631 	case KVM_REG_S390_TODPR:
2632 		r = put_user(vcpu->arch.sie_block->todpr,
2633 			     (u32 __user *)reg->addr);
2634 		break;
2635 	case KVM_REG_S390_EPOCHDIFF:
2636 		r = put_user(vcpu->arch.sie_block->epoch,
2637 			     (u64 __user *)reg->addr);
2638 		break;
2639 	case KVM_REG_S390_CPU_TIMER:
2640 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2641 			     (u64 __user *)reg->addr);
2642 		break;
2643 	case KVM_REG_S390_CLOCK_COMP:
2644 		r = put_user(vcpu->arch.sie_block->ckc,
2645 			     (u64 __user *)reg->addr);
2646 		break;
2647 	case KVM_REG_S390_PFTOKEN:
2648 		r = put_user(vcpu->arch.pfault_token,
2649 			     (u64 __user *)reg->addr);
2650 		break;
2651 	case KVM_REG_S390_PFCOMPARE:
2652 		r = put_user(vcpu->arch.pfault_compare,
2653 			     (u64 __user *)reg->addr);
2654 		break;
2655 	case KVM_REG_S390_PFSELECT:
2656 		r = put_user(vcpu->arch.pfault_select,
2657 			     (u64 __user *)reg->addr);
2658 		break;
2659 	case KVM_REG_S390_PP:
2660 		r = put_user(vcpu->arch.sie_block->pp,
2661 			     (u64 __user *)reg->addr);
2662 		break;
2663 	case KVM_REG_S390_GBEA:
2664 		r = put_user(vcpu->arch.sie_block->gbea,
2665 			     (u64 __user *)reg->addr);
2666 		break;
2667 	default:
2668 		break;
2669 	}
2670 
2671 	return r;
2672 }
2673 
2674 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2675 					   struct kvm_one_reg *reg)
2676 {
2677 	int r = -EINVAL;
2678 	__u64 val;
2679 
2680 	switch (reg->id) {
2681 	case KVM_REG_S390_TODPR:
2682 		r = get_user(vcpu->arch.sie_block->todpr,
2683 			     (u32 __user *)reg->addr);
2684 		break;
2685 	case KVM_REG_S390_EPOCHDIFF:
2686 		r = get_user(vcpu->arch.sie_block->epoch,
2687 			     (u64 __user *)reg->addr);
2688 		break;
2689 	case KVM_REG_S390_CPU_TIMER:
2690 		r = get_user(val, (u64 __user *)reg->addr);
2691 		if (!r)
2692 			kvm_s390_set_cpu_timer(vcpu, val);
2693 		break;
2694 	case KVM_REG_S390_CLOCK_COMP:
2695 		r = get_user(vcpu->arch.sie_block->ckc,
2696 			     (u64 __user *)reg->addr);
2697 		break;
2698 	case KVM_REG_S390_PFTOKEN:
2699 		r = get_user(vcpu->arch.pfault_token,
2700 			     (u64 __user *)reg->addr);
2701 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2702 			kvm_clear_async_pf_completion_queue(vcpu);
2703 		break;
2704 	case KVM_REG_S390_PFCOMPARE:
2705 		r = get_user(vcpu->arch.pfault_compare,
2706 			     (u64 __user *)reg->addr);
2707 		break;
2708 	case KVM_REG_S390_PFSELECT:
2709 		r = get_user(vcpu->arch.pfault_select,
2710 			     (u64 __user *)reg->addr);
2711 		break;
2712 	case KVM_REG_S390_PP:
2713 		r = get_user(vcpu->arch.sie_block->pp,
2714 			     (u64 __user *)reg->addr);
2715 		break;
2716 	case KVM_REG_S390_GBEA:
2717 		r = get_user(vcpu->arch.sie_block->gbea,
2718 			     (u64 __user *)reg->addr);
2719 		break;
2720 	default:
2721 		break;
2722 	}
2723 
2724 	return r;
2725 }
2726 
2727 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2728 {
2729 	kvm_s390_vcpu_initial_reset(vcpu);
2730 	return 0;
2731 }
2732 
2733 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2734 {
2735 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2736 	return 0;
2737 }
2738 
2739 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2740 {
2741 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2742 	return 0;
2743 }
2744 
2745 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2746 				  struct kvm_sregs *sregs)
2747 {
2748 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2749 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2750 	return 0;
2751 }
2752 
2753 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2754 				  struct kvm_sregs *sregs)
2755 {
2756 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2757 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2758 	return 0;
2759 }
2760 
2761 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2762 {
2763 	if (test_fp_ctl(fpu->fpc))
2764 		return -EINVAL;
2765 	vcpu->run->s.regs.fpc = fpu->fpc;
2766 	if (MACHINE_HAS_VX)
2767 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2768 				 (freg_t *) fpu->fprs);
2769 	else
2770 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2771 	return 0;
2772 }
2773 
2774 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2775 {
2776 	/* make sure we have the latest values */
2777 	save_fpu_regs();
2778 	if (MACHINE_HAS_VX)
2779 		convert_vx_to_fp((freg_t *) fpu->fprs,
2780 				 (__vector128 *) vcpu->run->s.regs.vrs);
2781 	else
2782 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2783 	fpu->fpc = vcpu->run->s.regs.fpc;
2784 	return 0;
2785 }
2786 
2787 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2788 {
2789 	int rc = 0;
2790 
2791 	if (!is_vcpu_stopped(vcpu))
2792 		rc = -EBUSY;
2793 	else {
2794 		vcpu->run->psw_mask = psw.mask;
2795 		vcpu->run->psw_addr = psw.addr;
2796 	}
2797 	return rc;
2798 }
2799 
2800 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2801 				  struct kvm_translation *tr)
2802 {
2803 	return -EINVAL; /* not implemented yet */
2804 }
2805 
2806 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2807 			      KVM_GUESTDBG_USE_HW_BP | \
2808 			      KVM_GUESTDBG_ENABLE)
2809 
2810 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2811 					struct kvm_guest_debug *dbg)
2812 {
2813 	int rc = 0;
2814 
2815 	vcpu->guest_debug = 0;
2816 	kvm_s390_clear_bp_data(vcpu);
2817 
2818 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2819 		return -EINVAL;
2820 	if (!sclp.has_gpere)
2821 		return -EINVAL;
2822 
2823 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2824 		vcpu->guest_debug = dbg->control;
2825 		/* enforce guest PER */
2826 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2827 
2828 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2829 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2830 	} else {
2831 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2832 		vcpu->arch.guestdbg.last_bp = 0;
2833 	}
2834 
2835 	if (rc) {
2836 		vcpu->guest_debug = 0;
2837 		kvm_s390_clear_bp_data(vcpu);
2838 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2839 	}
2840 
2841 	return rc;
2842 }
2843 
2844 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2845 				    struct kvm_mp_state *mp_state)
2846 {
2847 	/* CHECK_STOP and LOAD are not supported yet */
2848 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2849 				       KVM_MP_STATE_OPERATING;
2850 }
2851 
2852 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2853 				    struct kvm_mp_state *mp_state)
2854 {
2855 	int rc = 0;
2856 
2857 	/* user space knows about this interface - let it control the state */
2858 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2859 
2860 	switch (mp_state->mp_state) {
2861 	case KVM_MP_STATE_STOPPED:
2862 		kvm_s390_vcpu_stop(vcpu);
2863 		break;
2864 	case KVM_MP_STATE_OPERATING:
2865 		kvm_s390_vcpu_start(vcpu);
2866 		break;
2867 	case KVM_MP_STATE_LOAD:
2868 	case KVM_MP_STATE_CHECK_STOP:
2869 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2870 	default:
2871 		rc = -ENXIO;
2872 	}
2873 
2874 	return rc;
2875 }
2876 
2877 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2878 {
2879 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
2880 }
2881 
2882 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2883 {
2884 retry:
2885 	kvm_s390_vcpu_request_handled(vcpu);
2886 	if (!kvm_request_pending(vcpu))
2887 		return 0;
2888 	/*
2889 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2890 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2891 	 * This ensures that the ipte instruction for this request has
2892 	 * already finished. We might race against a second unmapper that
2893 	 * wants to set the blocking bit. Lets just retry the request loop.
2894 	 */
2895 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2896 		int rc;
2897 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2898 					  kvm_s390_get_prefix(vcpu),
2899 					  PAGE_SIZE * 2, PROT_WRITE);
2900 		if (rc) {
2901 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2902 			return rc;
2903 		}
2904 		goto retry;
2905 	}
2906 
2907 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2908 		vcpu->arch.sie_block->ihcpu = 0xffff;
2909 		goto retry;
2910 	}
2911 
2912 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2913 		if (!ibs_enabled(vcpu)) {
2914 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2915 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
2916 		}
2917 		goto retry;
2918 	}
2919 
2920 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2921 		if (ibs_enabled(vcpu)) {
2922 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2923 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
2924 		}
2925 		goto retry;
2926 	}
2927 
2928 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2929 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2930 		goto retry;
2931 	}
2932 
2933 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2934 		/*
2935 		 * Disable CMMA virtualization; we will emulate the ESSA
2936 		 * instruction manually, in order to provide additional
2937 		 * functionalities needed for live migration.
2938 		 */
2939 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2940 		goto retry;
2941 	}
2942 
2943 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2944 		/*
2945 		 * Re-enable CMMA virtualization if CMMA is available and
2946 		 * was used.
2947 		 */
2948 		if ((vcpu->kvm->arch.use_cmma) &&
2949 		    (vcpu->kvm->mm->context.use_cmma))
2950 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2951 		goto retry;
2952 	}
2953 
2954 	/* nothing to do, just clear the request */
2955 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2956 
2957 	return 0;
2958 }
2959 
2960 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2961 				 const struct kvm_s390_vm_tod_clock *gtod)
2962 {
2963 	struct kvm_vcpu *vcpu;
2964 	struct kvm_s390_tod_clock_ext htod;
2965 	int i;
2966 
2967 	mutex_lock(&kvm->lock);
2968 	preempt_disable();
2969 
2970 	get_tod_clock_ext((char *)&htod);
2971 
2972 	kvm->arch.epoch = gtod->tod - htod.tod;
2973 	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2974 
2975 	if (kvm->arch.epoch > gtod->tod)
2976 		kvm->arch.epdx -= 1;
2977 
2978 	kvm_s390_vcpu_block_all(kvm);
2979 	kvm_for_each_vcpu(i, vcpu, kvm) {
2980 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2981 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2982 	}
2983 
2984 	kvm_s390_vcpu_unblock_all(kvm);
2985 	preempt_enable();
2986 	mutex_unlock(&kvm->lock);
2987 }
2988 
2989 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2990 {
2991 	struct kvm_vcpu *vcpu;
2992 	int i;
2993 
2994 	mutex_lock(&kvm->lock);
2995 	preempt_disable();
2996 	kvm->arch.epoch = tod - get_tod_clock();
2997 	kvm_s390_vcpu_block_all(kvm);
2998 	kvm_for_each_vcpu(i, vcpu, kvm)
2999 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3000 	kvm_s390_vcpu_unblock_all(kvm);
3001 	preempt_enable();
3002 	mutex_unlock(&kvm->lock);
3003 }
3004 
3005 /**
3006  * kvm_arch_fault_in_page - fault-in guest page if necessary
3007  * @vcpu: The corresponding virtual cpu
3008  * @gpa: Guest physical address
3009  * @writable: Whether the page should be writable or not
3010  *
3011  * Make sure that a guest page has been faulted-in on the host.
3012  *
3013  * Return: Zero on success, negative error code otherwise.
3014  */
3015 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3016 {
3017 	return gmap_fault(vcpu->arch.gmap, gpa,
3018 			  writable ? FAULT_FLAG_WRITE : 0);
3019 }
3020 
3021 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3022 				      unsigned long token)
3023 {
3024 	struct kvm_s390_interrupt inti;
3025 	struct kvm_s390_irq irq;
3026 
3027 	if (start_token) {
3028 		irq.u.ext.ext_params2 = token;
3029 		irq.type = KVM_S390_INT_PFAULT_INIT;
3030 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3031 	} else {
3032 		inti.type = KVM_S390_INT_PFAULT_DONE;
3033 		inti.parm64 = token;
3034 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3035 	}
3036 }
3037 
3038 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3039 				     struct kvm_async_pf *work)
3040 {
3041 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3042 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3043 }
3044 
3045 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3046 				 struct kvm_async_pf *work)
3047 {
3048 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3049 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3050 }
3051 
3052 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3053 			       struct kvm_async_pf *work)
3054 {
3055 	/* s390 will always inject the page directly */
3056 }
3057 
3058 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3059 {
3060 	/*
3061 	 * s390 will always inject the page directly,
3062 	 * but we still want check_async_completion to cleanup
3063 	 */
3064 	return true;
3065 }
3066 
3067 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3068 {
3069 	hva_t hva;
3070 	struct kvm_arch_async_pf arch;
3071 	int rc;
3072 
3073 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3074 		return 0;
3075 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3076 	    vcpu->arch.pfault_compare)
3077 		return 0;
3078 	if (psw_extint_disabled(vcpu))
3079 		return 0;
3080 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3081 		return 0;
3082 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3083 		return 0;
3084 	if (!vcpu->arch.gmap->pfault_enabled)
3085 		return 0;
3086 
3087 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3088 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3089 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3090 		return 0;
3091 
3092 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3093 	return rc;
3094 }
3095 
3096 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3097 {
3098 	int rc, cpuflags;
3099 
3100 	/*
3101 	 * On s390 notifications for arriving pages will be delivered directly
3102 	 * to the guest but the house keeping for completed pfaults is
3103 	 * handled outside the worker.
3104 	 */
3105 	kvm_check_async_pf_completion(vcpu);
3106 
3107 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3108 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3109 
3110 	if (need_resched())
3111 		schedule();
3112 
3113 	if (test_cpu_flag(CIF_MCCK_PENDING))
3114 		s390_handle_mcck();
3115 
3116 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3117 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3118 		if (rc)
3119 			return rc;
3120 	}
3121 
3122 	rc = kvm_s390_handle_requests(vcpu);
3123 	if (rc)
3124 		return rc;
3125 
3126 	if (guestdbg_enabled(vcpu)) {
3127 		kvm_s390_backup_guest_per_regs(vcpu);
3128 		kvm_s390_patch_guest_per_regs(vcpu);
3129 	}
3130 
3131 	vcpu->arch.sie_block->icptcode = 0;
3132 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3133 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3134 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3135 
3136 	return 0;
3137 }
3138 
3139 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3140 {
3141 	struct kvm_s390_pgm_info pgm_info = {
3142 		.code = PGM_ADDRESSING,
3143 	};
3144 	u8 opcode, ilen;
3145 	int rc;
3146 
3147 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3148 	trace_kvm_s390_sie_fault(vcpu);
3149 
3150 	/*
3151 	 * We want to inject an addressing exception, which is defined as a
3152 	 * suppressing or terminating exception. However, since we came here
3153 	 * by a DAT access exception, the PSW still points to the faulting
3154 	 * instruction since DAT exceptions are nullifying. So we've got
3155 	 * to look up the current opcode to get the length of the instruction
3156 	 * to be able to forward the PSW.
3157 	 */
3158 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3159 	ilen = insn_length(opcode);
3160 	if (rc < 0) {
3161 		return rc;
3162 	} else if (rc) {
3163 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3164 		 * Forward by arbitrary ilc, injection will take care of
3165 		 * nullification if necessary.
3166 		 */
3167 		pgm_info = vcpu->arch.pgm;
3168 		ilen = 4;
3169 	}
3170 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3171 	kvm_s390_forward_psw(vcpu, ilen);
3172 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3173 }
3174 
3175 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3176 {
3177 	struct mcck_volatile_info *mcck_info;
3178 	struct sie_page *sie_page;
3179 
3180 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3181 		   vcpu->arch.sie_block->icptcode);
3182 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3183 
3184 	if (guestdbg_enabled(vcpu))
3185 		kvm_s390_restore_guest_per_regs(vcpu);
3186 
3187 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3188 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3189 
3190 	if (exit_reason == -EINTR) {
3191 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3192 		sie_page = container_of(vcpu->arch.sie_block,
3193 					struct sie_page, sie_block);
3194 		mcck_info = &sie_page->mcck_info;
3195 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3196 		return 0;
3197 	}
3198 
3199 	if (vcpu->arch.sie_block->icptcode > 0) {
3200 		int rc = kvm_handle_sie_intercept(vcpu);
3201 
3202 		if (rc != -EOPNOTSUPP)
3203 			return rc;
3204 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3205 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3206 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3207 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3208 		return -EREMOTE;
3209 	} else if (exit_reason != -EFAULT) {
3210 		vcpu->stat.exit_null++;
3211 		return 0;
3212 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3213 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3214 		vcpu->run->s390_ucontrol.trans_exc_code =
3215 						current->thread.gmap_addr;
3216 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3217 		return -EREMOTE;
3218 	} else if (current->thread.gmap_pfault) {
3219 		trace_kvm_s390_major_guest_pfault(vcpu);
3220 		current->thread.gmap_pfault = 0;
3221 		if (kvm_arch_setup_async_pf(vcpu))
3222 			return 0;
3223 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3224 	}
3225 	return vcpu_post_run_fault_in_sie(vcpu);
3226 }
3227 
3228 static int __vcpu_run(struct kvm_vcpu *vcpu)
3229 {
3230 	int rc, exit_reason;
3231 
3232 	/*
3233 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3234 	 * ning the guest), so that memslots (and other stuff) are protected
3235 	 */
3236 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3237 
3238 	do {
3239 		rc = vcpu_pre_run(vcpu);
3240 		if (rc)
3241 			break;
3242 
3243 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3244 		/*
3245 		 * As PF_VCPU will be used in fault handler, between
3246 		 * guest_enter and guest_exit should be no uaccess.
3247 		 */
3248 		local_irq_disable();
3249 		guest_enter_irqoff();
3250 		__disable_cpu_timer_accounting(vcpu);
3251 		local_irq_enable();
3252 		exit_reason = sie64a(vcpu->arch.sie_block,
3253 				     vcpu->run->s.regs.gprs);
3254 		local_irq_disable();
3255 		__enable_cpu_timer_accounting(vcpu);
3256 		guest_exit_irqoff();
3257 		local_irq_enable();
3258 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3259 
3260 		rc = vcpu_post_run(vcpu, exit_reason);
3261 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3262 
3263 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3264 	return rc;
3265 }
3266 
3267 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3268 {
3269 	struct runtime_instr_cb *riccb;
3270 	struct gs_cb *gscb;
3271 
3272 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3273 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3274 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3275 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3276 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3277 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3278 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3279 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3280 		/* some control register changes require a tlb flush */
3281 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3282 	}
3283 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3284 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3285 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3286 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3287 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3288 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3289 	}
3290 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3291 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3292 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3293 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3294 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3295 			kvm_clear_async_pf_completion_queue(vcpu);
3296 	}
3297 	/*
3298 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3299 	 * we should enable RI here instead of doing the lazy enablement.
3300 	 */
3301 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3302 	    test_kvm_facility(vcpu->kvm, 64) &&
3303 	    riccb->v &&
3304 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3305 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3306 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3307 	}
3308 	/*
3309 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3310 	 * we should enable GS here instead of doing the lazy enablement.
3311 	 */
3312 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3313 	    test_kvm_facility(vcpu->kvm, 133) &&
3314 	    gscb->gssm &&
3315 	    !vcpu->arch.gs_enabled) {
3316 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3317 		vcpu->arch.sie_block->ecb |= ECB_GS;
3318 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3319 		vcpu->arch.gs_enabled = 1;
3320 	}
3321 	save_access_regs(vcpu->arch.host_acrs);
3322 	restore_access_regs(vcpu->run->s.regs.acrs);
3323 	/* save host (userspace) fprs/vrs */
3324 	save_fpu_regs();
3325 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3326 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3327 	if (MACHINE_HAS_VX)
3328 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3329 	else
3330 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3331 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3332 	if (test_fp_ctl(current->thread.fpu.fpc))
3333 		/* User space provided an invalid FPC, let's clear it */
3334 		current->thread.fpu.fpc = 0;
3335 	if (MACHINE_HAS_GS) {
3336 		preempt_disable();
3337 		__ctl_set_bit(2, 4);
3338 		if (current->thread.gs_cb) {
3339 			vcpu->arch.host_gscb = current->thread.gs_cb;
3340 			save_gs_cb(vcpu->arch.host_gscb);
3341 		}
3342 		if (vcpu->arch.gs_enabled) {
3343 			current->thread.gs_cb = (struct gs_cb *)
3344 						&vcpu->run->s.regs.gscb;
3345 			restore_gs_cb(current->thread.gs_cb);
3346 		}
3347 		preempt_enable();
3348 	}
3349 
3350 	kvm_run->kvm_dirty_regs = 0;
3351 }
3352 
3353 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3354 {
3355 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3356 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3357 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3358 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3359 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3360 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3361 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3362 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3363 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3364 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3365 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3366 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3367 	save_access_regs(vcpu->run->s.regs.acrs);
3368 	restore_access_regs(vcpu->arch.host_acrs);
3369 	/* Save guest register state */
3370 	save_fpu_regs();
3371 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3372 	/* Restore will be done lazily at return */
3373 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3374 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3375 	if (MACHINE_HAS_GS) {
3376 		__ctl_set_bit(2, 4);
3377 		if (vcpu->arch.gs_enabled)
3378 			save_gs_cb(current->thread.gs_cb);
3379 		preempt_disable();
3380 		current->thread.gs_cb = vcpu->arch.host_gscb;
3381 		restore_gs_cb(vcpu->arch.host_gscb);
3382 		preempt_enable();
3383 		if (!vcpu->arch.host_gscb)
3384 			__ctl_clear_bit(2, 4);
3385 		vcpu->arch.host_gscb = NULL;
3386 	}
3387 
3388 }
3389 
3390 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3391 {
3392 	int rc;
3393 
3394 	if (kvm_run->immediate_exit)
3395 		return -EINTR;
3396 
3397 	if (guestdbg_exit_pending(vcpu)) {
3398 		kvm_s390_prepare_debug_exit(vcpu);
3399 		return 0;
3400 	}
3401 
3402 	kvm_sigset_activate(vcpu);
3403 
3404 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3405 		kvm_s390_vcpu_start(vcpu);
3406 	} else if (is_vcpu_stopped(vcpu)) {
3407 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3408 				   vcpu->vcpu_id);
3409 		return -EINVAL;
3410 	}
3411 
3412 	sync_regs(vcpu, kvm_run);
3413 	enable_cpu_timer_accounting(vcpu);
3414 
3415 	might_fault();
3416 	rc = __vcpu_run(vcpu);
3417 
3418 	if (signal_pending(current) && !rc) {
3419 		kvm_run->exit_reason = KVM_EXIT_INTR;
3420 		rc = -EINTR;
3421 	}
3422 
3423 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3424 		kvm_s390_prepare_debug_exit(vcpu);
3425 		rc = 0;
3426 	}
3427 
3428 	if (rc == -EREMOTE) {
3429 		/* userspace support is needed, kvm_run has been prepared */
3430 		rc = 0;
3431 	}
3432 
3433 	disable_cpu_timer_accounting(vcpu);
3434 	store_regs(vcpu, kvm_run);
3435 
3436 	kvm_sigset_deactivate(vcpu);
3437 
3438 	vcpu->stat.exit_userspace++;
3439 	return rc;
3440 }
3441 
3442 /*
3443  * store status at address
3444  * we use have two special cases:
3445  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3446  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3447  */
3448 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3449 {
3450 	unsigned char archmode = 1;
3451 	freg_t fprs[NUM_FPRS];
3452 	unsigned int px;
3453 	u64 clkcomp, cputm;
3454 	int rc;
3455 
3456 	px = kvm_s390_get_prefix(vcpu);
3457 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3458 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3459 			return -EFAULT;
3460 		gpa = 0;
3461 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3462 		if (write_guest_real(vcpu, 163, &archmode, 1))
3463 			return -EFAULT;
3464 		gpa = px;
3465 	} else
3466 		gpa -= __LC_FPREGS_SAVE_AREA;
3467 
3468 	/* manually convert vector registers if necessary */
3469 	if (MACHINE_HAS_VX) {
3470 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3471 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3472 				     fprs, 128);
3473 	} else {
3474 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3475 				     vcpu->run->s.regs.fprs, 128);
3476 	}
3477 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3478 			      vcpu->run->s.regs.gprs, 128);
3479 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3480 			      &vcpu->arch.sie_block->gpsw, 16);
3481 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3482 			      &px, 4);
3483 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3484 			      &vcpu->run->s.regs.fpc, 4);
3485 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3486 			      &vcpu->arch.sie_block->todpr, 4);
3487 	cputm = kvm_s390_get_cpu_timer(vcpu);
3488 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3489 			      &cputm, 8);
3490 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3491 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3492 			      &clkcomp, 8);
3493 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3494 			      &vcpu->run->s.regs.acrs, 64);
3495 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3496 			      &vcpu->arch.sie_block->gcr, 128);
3497 	return rc ? -EFAULT : 0;
3498 }
3499 
3500 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3501 {
3502 	/*
3503 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3504 	 * switch in the run ioctl. Let's update our copies before we save
3505 	 * it into the save area
3506 	 */
3507 	save_fpu_regs();
3508 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3509 	save_access_regs(vcpu->run->s.regs.acrs);
3510 
3511 	return kvm_s390_store_status_unloaded(vcpu, addr);
3512 }
3513 
3514 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3515 {
3516 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3517 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3518 }
3519 
3520 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3521 {
3522 	unsigned int i;
3523 	struct kvm_vcpu *vcpu;
3524 
3525 	kvm_for_each_vcpu(i, vcpu, kvm) {
3526 		__disable_ibs_on_vcpu(vcpu);
3527 	}
3528 }
3529 
3530 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3531 {
3532 	if (!sclp.has_ibs)
3533 		return;
3534 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3535 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3536 }
3537 
3538 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3539 {
3540 	int i, online_vcpus, started_vcpus = 0;
3541 
3542 	if (!is_vcpu_stopped(vcpu))
3543 		return;
3544 
3545 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3546 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3547 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3548 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3549 
3550 	for (i = 0; i < online_vcpus; i++) {
3551 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3552 			started_vcpus++;
3553 	}
3554 
3555 	if (started_vcpus == 0) {
3556 		/* we're the only active VCPU -> speed it up */
3557 		__enable_ibs_on_vcpu(vcpu);
3558 	} else if (started_vcpus == 1) {
3559 		/*
3560 		 * As we are starting a second VCPU, we have to disable
3561 		 * the IBS facility on all VCPUs to remove potentially
3562 		 * oustanding ENABLE requests.
3563 		 */
3564 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3565 	}
3566 
3567 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3568 	/*
3569 	 * Another VCPU might have used IBS while we were offline.
3570 	 * Let's play safe and flush the VCPU at startup.
3571 	 */
3572 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3573 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3574 	return;
3575 }
3576 
3577 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3578 {
3579 	int i, online_vcpus, started_vcpus = 0;
3580 	struct kvm_vcpu *started_vcpu = NULL;
3581 
3582 	if (is_vcpu_stopped(vcpu))
3583 		return;
3584 
3585 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3586 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3587 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3588 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3589 
3590 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3591 	kvm_s390_clear_stop_irq(vcpu);
3592 
3593 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3594 	__disable_ibs_on_vcpu(vcpu);
3595 
3596 	for (i = 0; i < online_vcpus; i++) {
3597 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3598 			started_vcpus++;
3599 			started_vcpu = vcpu->kvm->vcpus[i];
3600 		}
3601 	}
3602 
3603 	if (started_vcpus == 1) {
3604 		/*
3605 		 * As we only have one VCPU left, we want to enable the
3606 		 * IBS facility for that VCPU to speed it up.
3607 		 */
3608 		__enable_ibs_on_vcpu(started_vcpu);
3609 	}
3610 
3611 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3612 	return;
3613 }
3614 
3615 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3616 				     struct kvm_enable_cap *cap)
3617 {
3618 	int r;
3619 
3620 	if (cap->flags)
3621 		return -EINVAL;
3622 
3623 	switch (cap->cap) {
3624 	case KVM_CAP_S390_CSS_SUPPORT:
3625 		if (!vcpu->kvm->arch.css_support) {
3626 			vcpu->kvm->arch.css_support = 1;
3627 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3628 			trace_kvm_s390_enable_css(vcpu->kvm);
3629 		}
3630 		r = 0;
3631 		break;
3632 	default:
3633 		r = -EINVAL;
3634 		break;
3635 	}
3636 	return r;
3637 }
3638 
3639 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3640 				  struct kvm_s390_mem_op *mop)
3641 {
3642 	void __user *uaddr = (void __user *)mop->buf;
3643 	void *tmpbuf = NULL;
3644 	int r, srcu_idx;
3645 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3646 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3647 
3648 	if (mop->flags & ~supported_flags)
3649 		return -EINVAL;
3650 
3651 	if (mop->size > MEM_OP_MAX_SIZE)
3652 		return -E2BIG;
3653 
3654 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3655 		tmpbuf = vmalloc(mop->size);
3656 		if (!tmpbuf)
3657 			return -ENOMEM;
3658 	}
3659 
3660 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3661 
3662 	switch (mop->op) {
3663 	case KVM_S390_MEMOP_LOGICAL_READ:
3664 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3665 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3666 					    mop->size, GACC_FETCH);
3667 			break;
3668 		}
3669 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3670 		if (r == 0) {
3671 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3672 				r = -EFAULT;
3673 		}
3674 		break;
3675 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3676 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3677 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3678 					    mop->size, GACC_STORE);
3679 			break;
3680 		}
3681 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3682 			r = -EFAULT;
3683 			break;
3684 		}
3685 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3686 		break;
3687 	default:
3688 		r = -EINVAL;
3689 	}
3690 
3691 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3692 
3693 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3694 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3695 
3696 	vfree(tmpbuf);
3697 	return r;
3698 }
3699 
3700 long kvm_arch_vcpu_ioctl(struct file *filp,
3701 			 unsigned int ioctl, unsigned long arg)
3702 {
3703 	struct kvm_vcpu *vcpu = filp->private_data;
3704 	void __user *argp = (void __user *)arg;
3705 	int idx;
3706 	long r;
3707 
3708 	switch (ioctl) {
3709 	case KVM_S390_IRQ: {
3710 		struct kvm_s390_irq s390irq;
3711 
3712 		r = -EFAULT;
3713 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3714 			break;
3715 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3716 		break;
3717 	}
3718 	case KVM_S390_INTERRUPT: {
3719 		struct kvm_s390_interrupt s390int;
3720 		struct kvm_s390_irq s390irq;
3721 
3722 		r = -EFAULT;
3723 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3724 			break;
3725 		if (s390int_to_s390irq(&s390int, &s390irq))
3726 			return -EINVAL;
3727 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3728 		break;
3729 	}
3730 	case KVM_S390_STORE_STATUS:
3731 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3732 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3733 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3734 		break;
3735 	case KVM_S390_SET_INITIAL_PSW: {
3736 		psw_t psw;
3737 
3738 		r = -EFAULT;
3739 		if (copy_from_user(&psw, argp, sizeof(psw)))
3740 			break;
3741 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3742 		break;
3743 	}
3744 	case KVM_S390_INITIAL_RESET:
3745 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3746 		break;
3747 	case KVM_SET_ONE_REG:
3748 	case KVM_GET_ONE_REG: {
3749 		struct kvm_one_reg reg;
3750 		r = -EFAULT;
3751 		if (copy_from_user(&reg, argp, sizeof(reg)))
3752 			break;
3753 		if (ioctl == KVM_SET_ONE_REG)
3754 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3755 		else
3756 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3757 		break;
3758 	}
3759 #ifdef CONFIG_KVM_S390_UCONTROL
3760 	case KVM_S390_UCAS_MAP: {
3761 		struct kvm_s390_ucas_mapping ucasmap;
3762 
3763 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3764 			r = -EFAULT;
3765 			break;
3766 		}
3767 
3768 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3769 			r = -EINVAL;
3770 			break;
3771 		}
3772 
3773 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3774 				     ucasmap.vcpu_addr, ucasmap.length);
3775 		break;
3776 	}
3777 	case KVM_S390_UCAS_UNMAP: {
3778 		struct kvm_s390_ucas_mapping ucasmap;
3779 
3780 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3781 			r = -EFAULT;
3782 			break;
3783 		}
3784 
3785 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3786 			r = -EINVAL;
3787 			break;
3788 		}
3789 
3790 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3791 			ucasmap.length);
3792 		break;
3793 	}
3794 #endif
3795 	case KVM_S390_VCPU_FAULT: {
3796 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3797 		break;
3798 	}
3799 	case KVM_ENABLE_CAP:
3800 	{
3801 		struct kvm_enable_cap cap;
3802 		r = -EFAULT;
3803 		if (copy_from_user(&cap, argp, sizeof(cap)))
3804 			break;
3805 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3806 		break;
3807 	}
3808 	case KVM_S390_MEM_OP: {
3809 		struct kvm_s390_mem_op mem_op;
3810 
3811 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3812 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3813 		else
3814 			r = -EFAULT;
3815 		break;
3816 	}
3817 	case KVM_S390_SET_IRQ_STATE: {
3818 		struct kvm_s390_irq_state irq_state;
3819 
3820 		r = -EFAULT;
3821 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3822 			break;
3823 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3824 		    irq_state.len == 0 ||
3825 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3826 			r = -EINVAL;
3827 			break;
3828 		}
3829 		/* do not use irq_state.flags, it will break old QEMUs */
3830 		r = kvm_s390_set_irq_state(vcpu,
3831 					   (void __user *) irq_state.buf,
3832 					   irq_state.len);
3833 		break;
3834 	}
3835 	case KVM_S390_GET_IRQ_STATE: {
3836 		struct kvm_s390_irq_state irq_state;
3837 
3838 		r = -EFAULT;
3839 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3840 			break;
3841 		if (irq_state.len == 0) {
3842 			r = -EINVAL;
3843 			break;
3844 		}
3845 		/* do not use irq_state.flags, it will break old QEMUs */
3846 		r = kvm_s390_get_irq_state(vcpu,
3847 					   (__u8 __user *)  irq_state.buf,
3848 					   irq_state.len);
3849 		break;
3850 	}
3851 	default:
3852 		r = -ENOTTY;
3853 	}
3854 	return r;
3855 }
3856 
3857 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3858 {
3859 #ifdef CONFIG_KVM_S390_UCONTROL
3860 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3861 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3862 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3863 		get_page(vmf->page);
3864 		return 0;
3865 	}
3866 #endif
3867 	return VM_FAULT_SIGBUS;
3868 }
3869 
3870 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3871 			    unsigned long npages)
3872 {
3873 	return 0;
3874 }
3875 
3876 /* Section: memory related */
3877 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3878 				   struct kvm_memory_slot *memslot,
3879 				   const struct kvm_userspace_memory_region *mem,
3880 				   enum kvm_mr_change change)
3881 {
3882 	/* A few sanity checks. We can have memory slots which have to be
3883 	   located/ended at a segment boundary (1MB). The memory in userland is
3884 	   ok to be fragmented into various different vmas. It is okay to mmap()
3885 	   and munmap() stuff in this slot after doing this call at any time */
3886 
3887 	if (mem->userspace_addr & 0xffffful)
3888 		return -EINVAL;
3889 
3890 	if (mem->memory_size & 0xffffful)
3891 		return -EINVAL;
3892 
3893 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3894 		return -EINVAL;
3895 
3896 	return 0;
3897 }
3898 
3899 void kvm_arch_commit_memory_region(struct kvm *kvm,
3900 				const struct kvm_userspace_memory_region *mem,
3901 				const struct kvm_memory_slot *old,
3902 				const struct kvm_memory_slot *new,
3903 				enum kvm_mr_change change)
3904 {
3905 	int rc;
3906 
3907 	/* If the basics of the memslot do not change, we do not want
3908 	 * to update the gmap. Every update causes several unnecessary
3909 	 * segment translation exceptions. This is usually handled just
3910 	 * fine by the normal fault handler + gmap, but it will also
3911 	 * cause faults on the prefix page of running guest CPUs.
3912 	 */
3913 	if (old->userspace_addr == mem->userspace_addr &&
3914 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3915 	    old->npages * PAGE_SIZE == mem->memory_size)
3916 		return;
3917 
3918 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3919 		mem->guest_phys_addr, mem->memory_size);
3920 	if (rc)
3921 		pr_warn("failed to commit memory region\n");
3922 	return;
3923 }
3924 
3925 static inline unsigned long nonhyp_mask(int i)
3926 {
3927 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3928 
3929 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3930 }
3931 
3932 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3933 {
3934 	vcpu->valid_wakeup = false;
3935 }
3936 
3937 static int __init kvm_s390_init(void)
3938 {
3939 	int i;
3940 
3941 	if (!sclp.has_sief2) {
3942 		pr_info("SIE not available\n");
3943 		return -ENODEV;
3944 	}
3945 
3946 	for (i = 0; i < 16; i++)
3947 		kvm_s390_fac_list_mask[i] |=
3948 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3949 
3950 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3951 }
3952 
3953 static void __exit kvm_s390_exit(void)
3954 {
3955 	kvm_exit();
3956 }
3957 
3958 module_init(kvm_s390_init);
3959 module_exit(kvm_s390_exit);
3960 
3961 /*
3962  * Enable autoloading of the kvm module.
3963  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3964  * since x86 takes a different approach.
3965  */
3966 #include <linux/miscdevice.h>
3967 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3968 MODULE_ALIAS("devname:kvm");
3969