xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision e2c75e76)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
63 	{ "exit_null", VCPU_STAT(exit_null) },
64 	{ "exit_validity", VCPU_STAT(exit_validity) },
65 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
67 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
69 	{ "exit_pei", VCPU_STAT(exit_pei) },
70 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 	{ "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
90 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
91 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
92 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
93 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
94 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
95 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
96 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
97 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
98 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
99 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
100 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
101 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
102 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
103 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
104 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
105 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
106 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
107 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
108 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
109 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
110 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
111 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
112 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
113 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
114 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
115 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
116 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
117 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
118 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
119 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
120 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
121 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
122 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
123 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
124 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
125 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
126 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
127 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
128 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
129 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
130 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
131 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
132 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
133 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
134 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
135 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
136 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
137 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
138 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
139 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
140 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
141 	{ NULL }
142 };
143 
144 struct kvm_s390_tod_clock_ext {
145 	__u8 epoch_idx;
146 	__u64 tod;
147 	__u8 reserved[7];
148 } __packed;
149 
150 /* allow nested virtualization in KVM (if enabled by user space) */
151 static int nested;
152 module_param(nested, int, S_IRUGO);
153 MODULE_PARM_DESC(nested, "Nested virtualization support");
154 
155 /* upper facilities limit for kvm */
156 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
157 
158 unsigned long kvm_s390_fac_list_mask_size(void)
159 {
160 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
161 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
162 }
163 
164 /* available cpu features supported by kvm */
165 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
166 /* available subfunctions indicated via query / "test bit" */
167 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
168 
169 static struct gmap_notifier gmap_notifier;
170 static struct gmap_notifier vsie_gmap_notifier;
171 debug_info_t *kvm_s390_dbf;
172 
173 /* Section: not file related */
174 int kvm_arch_hardware_enable(void)
175 {
176 	/* every s390 is virtualization enabled ;-) */
177 	return 0;
178 }
179 
180 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
181 			      unsigned long end);
182 
183 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
184 {
185 	u8 delta_idx = 0;
186 
187 	/*
188 	 * The TOD jumps by delta, we have to compensate this by adding
189 	 * -delta to the epoch.
190 	 */
191 	delta = -delta;
192 
193 	/* sign-extension - we're adding to signed values below */
194 	if ((s64)delta < 0)
195 		delta_idx = -1;
196 
197 	scb->epoch += delta;
198 	if (scb->ecd & ECD_MEF) {
199 		scb->epdx += delta_idx;
200 		if (scb->epoch < delta)
201 			scb->epdx += 1;
202 	}
203 }
204 
205 /*
206  * This callback is executed during stop_machine(). All CPUs are therefore
207  * temporarily stopped. In order not to change guest behavior, we have to
208  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
209  * so a CPU won't be stopped while calculating with the epoch.
210  */
211 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
212 			  void *v)
213 {
214 	struct kvm *kvm;
215 	struct kvm_vcpu *vcpu;
216 	int i;
217 	unsigned long long *delta = v;
218 
219 	list_for_each_entry(kvm, &vm_list, vm_list) {
220 		kvm_for_each_vcpu(i, vcpu, kvm) {
221 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
222 			if (i == 0) {
223 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
224 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
225 			}
226 			if (vcpu->arch.cputm_enabled)
227 				vcpu->arch.cputm_start += *delta;
228 			if (vcpu->arch.vsie_block)
229 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
230 						   *delta);
231 		}
232 	}
233 	return NOTIFY_OK;
234 }
235 
236 static struct notifier_block kvm_clock_notifier = {
237 	.notifier_call = kvm_clock_sync,
238 };
239 
240 int kvm_arch_hardware_setup(void)
241 {
242 	gmap_notifier.notifier_call = kvm_gmap_notifier;
243 	gmap_register_pte_notifier(&gmap_notifier);
244 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
245 	gmap_register_pte_notifier(&vsie_gmap_notifier);
246 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
247 				       &kvm_clock_notifier);
248 	return 0;
249 }
250 
251 void kvm_arch_hardware_unsetup(void)
252 {
253 	gmap_unregister_pte_notifier(&gmap_notifier);
254 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
255 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
256 					 &kvm_clock_notifier);
257 }
258 
259 static void allow_cpu_feat(unsigned long nr)
260 {
261 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
262 }
263 
264 static inline int plo_test_bit(unsigned char nr)
265 {
266 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
267 	int cc;
268 
269 	asm volatile(
270 		/* Parameter registers are ignored for "test bit" */
271 		"	plo	0,0,0,0(0)\n"
272 		"	ipm	%0\n"
273 		"	srl	%0,28\n"
274 		: "=d" (cc)
275 		: "d" (r0)
276 		: "cc");
277 	return cc == 0;
278 }
279 
280 static void kvm_s390_cpu_feat_init(void)
281 {
282 	int i;
283 
284 	for (i = 0; i < 256; ++i) {
285 		if (plo_test_bit(i))
286 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
287 	}
288 
289 	if (test_facility(28)) /* TOD-clock steering */
290 		ptff(kvm_s390_available_subfunc.ptff,
291 		     sizeof(kvm_s390_available_subfunc.ptff),
292 		     PTFF_QAF);
293 
294 	if (test_facility(17)) { /* MSA */
295 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
296 			      kvm_s390_available_subfunc.kmac);
297 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
298 			      kvm_s390_available_subfunc.kmc);
299 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
300 			      kvm_s390_available_subfunc.km);
301 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
302 			      kvm_s390_available_subfunc.kimd);
303 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
304 			      kvm_s390_available_subfunc.klmd);
305 	}
306 	if (test_facility(76)) /* MSA3 */
307 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
308 			      kvm_s390_available_subfunc.pckmo);
309 	if (test_facility(77)) { /* MSA4 */
310 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
311 			      kvm_s390_available_subfunc.kmctr);
312 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
313 			      kvm_s390_available_subfunc.kmf);
314 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
315 			      kvm_s390_available_subfunc.kmo);
316 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
317 			      kvm_s390_available_subfunc.pcc);
318 	}
319 	if (test_facility(57)) /* MSA5 */
320 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
321 			      kvm_s390_available_subfunc.ppno);
322 
323 	if (test_facility(146)) /* MSA8 */
324 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
325 			      kvm_s390_available_subfunc.kma);
326 
327 	if (MACHINE_HAS_ESOP)
328 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
329 	/*
330 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
331 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
332 	 */
333 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
334 	    !test_facility(3) || !nested)
335 		return;
336 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
337 	if (sclp.has_64bscao)
338 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
339 	if (sclp.has_siif)
340 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
341 	if (sclp.has_gpere)
342 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
343 	if (sclp.has_gsls)
344 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
345 	if (sclp.has_ib)
346 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
347 	if (sclp.has_cei)
348 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
349 	if (sclp.has_ibs)
350 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
351 	if (sclp.has_kss)
352 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
353 	/*
354 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
355 	 * all skey handling functions read/set the skey from the PGSTE
356 	 * instead of the real storage key.
357 	 *
358 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
359 	 * pages being detected as preserved although they are resident.
360 	 *
361 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
362 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
363 	 *
364 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
365 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
366 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
367 	 *
368 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
369 	 * cannot easily shadow the SCA because of the ipte lock.
370 	 */
371 }
372 
373 int kvm_arch_init(void *opaque)
374 {
375 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
376 	if (!kvm_s390_dbf)
377 		return -ENOMEM;
378 
379 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
380 		debug_unregister(kvm_s390_dbf);
381 		return -ENOMEM;
382 	}
383 
384 	kvm_s390_cpu_feat_init();
385 
386 	/* Register floating interrupt controller interface. */
387 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
388 }
389 
390 void kvm_arch_exit(void)
391 {
392 	debug_unregister(kvm_s390_dbf);
393 }
394 
395 /* Section: device related */
396 long kvm_arch_dev_ioctl(struct file *filp,
397 			unsigned int ioctl, unsigned long arg)
398 {
399 	if (ioctl == KVM_S390_ENABLE_SIE)
400 		return s390_enable_sie();
401 	return -EINVAL;
402 }
403 
404 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
405 {
406 	int r;
407 
408 	switch (ext) {
409 	case KVM_CAP_S390_PSW:
410 	case KVM_CAP_S390_GMAP:
411 	case KVM_CAP_SYNC_MMU:
412 #ifdef CONFIG_KVM_S390_UCONTROL
413 	case KVM_CAP_S390_UCONTROL:
414 #endif
415 	case KVM_CAP_ASYNC_PF:
416 	case KVM_CAP_SYNC_REGS:
417 	case KVM_CAP_ONE_REG:
418 	case KVM_CAP_ENABLE_CAP:
419 	case KVM_CAP_S390_CSS_SUPPORT:
420 	case KVM_CAP_IOEVENTFD:
421 	case KVM_CAP_DEVICE_CTRL:
422 	case KVM_CAP_ENABLE_CAP_VM:
423 	case KVM_CAP_S390_IRQCHIP:
424 	case KVM_CAP_VM_ATTRIBUTES:
425 	case KVM_CAP_MP_STATE:
426 	case KVM_CAP_IMMEDIATE_EXIT:
427 	case KVM_CAP_S390_INJECT_IRQ:
428 	case KVM_CAP_S390_USER_SIGP:
429 	case KVM_CAP_S390_USER_STSI:
430 	case KVM_CAP_S390_SKEYS:
431 	case KVM_CAP_S390_IRQ_STATE:
432 	case KVM_CAP_S390_USER_INSTR0:
433 	case KVM_CAP_S390_CMMA_MIGRATION:
434 	case KVM_CAP_S390_AIS:
435 	case KVM_CAP_S390_AIS_MIGRATION:
436 		r = 1;
437 		break;
438 	case KVM_CAP_S390_MEM_OP:
439 		r = MEM_OP_MAX_SIZE;
440 		break;
441 	case KVM_CAP_NR_VCPUS:
442 	case KVM_CAP_MAX_VCPUS:
443 		r = KVM_S390_BSCA_CPU_SLOTS;
444 		if (!kvm_s390_use_sca_entries())
445 			r = KVM_MAX_VCPUS;
446 		else if (sclp.has_esca && sclp.has_64bscao)
447 			r = KVM_S390_ESCA_CPU_SLOTS;
448 		break;
449 	case KVM_CAP_NR_MEMSLOTS:
450 		r = KVM_USER_MEM_SLOTS;
451 		break;
452 	case KVM_CAP_S390_COW:
453 		r = MACHINE_HAS_ESOP;
454 		break;
455 	case KVM_CAP_S390_VECTOR_REGISTERS:
456 		r = MACHINE_HAS_VX;
457 		break;
458 	case KVM_CAP_S390_RI:
459 		r = test_facility(64);
460 		break;
461 	case KVM_CAP_S390_GS:
462 		r = test_facility(133);
463 		break;
464 	case KVM_CAP_S390_BPB:
465 		r = test_facility(82);
466 		break;
467 	default:
468 		r = 0;
469 	}
470 	return r;
471 }
472 
473 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
474 					struct kvm_memory_slot *memslot)
475 {
476 	gfn_t cur_gfn, last_gfn;
477 	unsigned long address;
478 	struct gmap *gmap = kvm->arch.gmap;
479 
480 	/* Loop over all guest pages */
481 	last_gfn = memslot->base_gfn + memslot->npages;
482 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
483 		address = gfn_to_hva_memslot(memslot, cur_gfn);
484 
485 		if (test_and_clear_guest_dirty(gmap->mm, address))
486 			mark_page_dirty(kvm, cur_gfn);
487 		if (fatal_signal_pending(current))
488 			return;
489 		cond_resched();
490 	}
491 }
492 
493 /* Section: vm related */
494 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
495 
496 /*
497  * Get (and clear) the dirty memory log for a memory slot.
498  */
499 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
500 			       struct kvm_dirty_log *log)
501 {
502 	int r;
503 	unsigned long n;
504 	struct kvm_memslots *slots;
505 	struct kvm_memory_slot *memslot;
506 	int is_dirty = 0;
507 
508 	if (kvm_is_ucontrol(kvm))
509 		return -EINVAL;
510 
511 	mutex_lock(&kvm->slots_lock);
512 
513 	r = -EINVAL;
514 	if (log->slot >= KVM_USER_MEM_SLOTS)
515 		goto out;
516 
517 	slots = kvm_memslots(kvm);
518 	memslot = id_to_memslot(slots, log->slot);
519 	r = -ENOENT;
520 	if (!memslot->dirty_bitmap)
521 		goto out;
522 
523 	kvm_s390_sync_dirty_log(kvm, memslot);
524 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
525 	if (r)
526 		goto out;
527 
528 	/* Clear the dirty log */
529 	if (is_dirty) {
530 		n = kvm_dirty_bitmap_bytes(memslot);
531 		memset(memslot->dirty_bitmap, 0, n);
532 	}
533 	r = 0;
534 out:
535 	mutex_unlock(&kvm->slots_lock);
536 	return r;
537 }
538 
539 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
540 {
541 	unsigned int i;
542 	struct kvm_vcpu *vcpu;
543 
544 	kvm_for_each_vcpu(i, vcpu, kvm) {
545 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
546 	}
547 }
548 
549 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
550 {
551 	int r;
552 
553 	if (cap->flags)
554 		return -EINVAL;
555 
556 	switch (cap->cap) {
557 	case KVM_CAP_S390_IRQCHIP:
558 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
559 		kvm->arch.use_irqchip = 1;
560 		r = 0;
561 		break;
562 	case KVM_CAP_S390_USER_SIGP:
563 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
564 		kvm->arch.user_sigp = 1;
565 		r = 0;
566 		break;
567 	case KVM_CAP_S390_VECTOR_REGISTERS:
568 		mutex_lock(&kvm->lock);
569 		if (kvm->created_vcpus) {
570 			r = -EBUSY;
571 		} else if (MACHINE_HAS_VX) {
572 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
573 			set_kvm_facility(kvm->arch.model.fac_list, 129);
574 			if (test_facility(134)) {
575 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
576 				set_kvm_facility(kvm->arch.model.fac_list, 134);
577 			}
578 			if (test_facility(135)) {
579 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
580 				set_kvm_facility(kvm->arch.model.fac_list, 135);
581 			}
582 			r = 0;
583 		} else
584 			r = -EINVAL;
585 		mutex_unlock(&kvm->lock);
586 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
587 			 r ? "(not available)" : "(success)");
588 		break;
589 	case KVM_CAP_S390_RI:
590 		r = -EINVAL;
591 		mutex_lock(&kvm->lock);
592 		if (kvm->created_vcpus) {
593 			r = -EBUSY;
594 		} else if (test_facility(64)) {
595 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
596 			set_kvm_facility(kvm->arch.model.fac_list, 64);
597 			r = 0;
598 		}
599 		mutex_unlock(&kvm->lock);
600 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
601 			 r ? "(not available)" : "(success)");
602 		break;
603 	case KVM_CAP_S390_AIS:
604 		mutex_lock(&kvm->lock);
605 		if (kvm->created_vcpus) {
606 			r = -EBUSY;
607 		} else {
608 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
609 			set_kvm_facility(kvm->arch.model.fac_list, 72);
610 			r = 0;
611 		}
612 		mutex_unlock(&kvm->lock);
613 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
614 			 r ? "(not available)" : "(success)");
615 		break;
616 	case KVM_CAP_S390_GS:
617 		r = -EINVAL;
618 		mutex_lock(&kvm->lock);
619 		if (kvm->created_vcpus) {
620 			r = -EBUSY;
621 		} else if (test_facility(133)) {
622 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
623 			set_kvm_facility(kvm->arch.model.fac_list, 133);
624 			r = 0;
625 		}
626 		mutex_unlock(&kvm->lock);
627 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
628 			 r ? "(not available)" : "(success)");
629 		break;
630 	case KVM_CAP_S390_USER_STSI:
631 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
632 		kvm->arch.user_stsi = 1;
633 		r = 0;
634 		break;
635 	case KVM_CAP_S390_USER_INSTR0:
636 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
637 		kvm->arch.user_instr0 = 1;
638 		icpt_operexc_on_all_vcpus(kvm);
639 		r = 0;
640 		break;
641 	default:
642 		r = -EINVAL;
643 		break;
644 	}
645 	return r;
646 }
647 
648 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
649 {
650 	int ret;
651 
652 	switch (attr->attr) {
653 	case KVM_S390_VM_MEM_LIMIT_SIZE:
654 		ret = 0;
655 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
656 			 kvm->arch.mem_limit);
657 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
658 			ret = -EFAULT;
659 		break;
660 	default:
661 		ret = -ENXIO;
662 		break;
663 	}
664 	return ret;
665 }
666 
667 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
668 {
669 	int ret;
670 	unsigned int idx;
671 	switch (attr->attr) {
672 	case KVM_S390_VM_MEM_ENABLE_CMMA:
673 		ret = -ENXIO;
674 		if (!sclp.has_cmma)
675 			break;
676 
677 		ret = -EBUSY;
678 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
679 		mutex_lock(&kvm->lock);
680 		if (!kvm->created_vcpus) {
681 			kvm->arch.use_cmma = 1;
682 			ret = 0;
683 		}
684 		mutex_unlock(&kvm->lock);
685 		break;
686 	case KVM_S390_VM_MEM_CLR_CMMA:
687 		ret = -ENXIO;
688 		if (!sclp.has_cmma)
689 			break;
690 		ret = -EINVAL;
691 		if (!kvm->arch.use_cmma)
692 			break;
693 
694 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
695 		mutex_lock(&kvm->lock);
696 		idx = srcu_read_lock(&kvm->srcu);
697 		s390_reset_cmma(kvm->arch.gmap->mm);
698 		srcu_read_unlock(&kvm->srcu, idx);
699 		mutex_unlock(&kvm->lock);
700 		ret = 0;
701 		break;
702 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
703 		unsigned long new_limit;
704 
705 		if (kvm_is_ucontrol(kvm))
706 			return -EINVAL;
707 
708 		if (get_user(new_limit, (u64 __user *)attr->addr))
709 			return -EFAULT;
710 
711 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
712 		    new_limit > kvm->arch.mem_limit)
713 			return -E2BIG;
714 
715 		if (!new_limit)
716 			return -EINVAL;
717 
718 		/* gmap_create takes last usable address */
719 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
720 			new_limit -= 1;
721 
722 		ret = -EBUSY;
723 		mutex_lock(&kvm->lock);
724 		if (!kvm->created_vcpus) {
725 			/* gmap_create will round the limit up */
726 			struct gmap *new = gmap_create(current->mm, new_limit);
727 
728 			if (!new) {
729 				ret = -ENOMEM;
730 			} else {
731 				gmap_remove(kvm->arch.gmap);
732 				new->private = kvm;
733 				kvm->arch.gmap = new;
734 				ret = 0;
735 			}
736 		}
737 		mutex_unlock(&kvm->lock);
738 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
739 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
740 			 (void *) kvm->arch.gmap->asce);
741 		break;
742 	}
743 	default:
744 		ret = -ENXIO;
745 		break;
746 	}
747 	return ret;
748 }
749 
750 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
751 
752 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
753 {
754 	struct kvm_vcpu *vcpu;
755 	int i;
756 
757 	if (!test_kvm_facility(kvm, 76))
758 		return -EINVAL;
759 
760 	mutex_lock(&kvm->lock);
761 	switch (attr->attr) {
762 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
763 		get_random_bytes(
764 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
765 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
766 		kvm->arch.crypto.aes_kw = 1;
767 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
768 		break;
769 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
770 		get_random_bytes(
771 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
772 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
773 		kvm->arch.crypto.dea_kw = 1;
774 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
775 		break;
776 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
777 		kvm->arch.crypto.aes_kw = 0;
778 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
779 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
780 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
781 		break;
782 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
783 		kvm->arch.crypto.dea_kw = 0;
784 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
785 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
786 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
787 		break;
788 	default:
789 		mutex_unlock(&kvm->lock);
790 		return -ENXIO;
791 	}
792 
793 	kvm_for_each_vcpu(i, vcpu, kvm) {
794 		kvm_s390_vcpu_crypto_setup(vcpu);
795 		exit_sie(vcpu);
796 	}
797 	mutex_unlock(&kvm->lock);
798 	return 0;
799 }
800 
801 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
802 {
803 	int cx;
804 	struct kvm_vcpu *vcpu;
805 
806 	kvm_for_each_vcpu(cx, vcpu, kvm)
807 		kvm_s390_sync_request(req, vcpu);
808 }
809 
810 /*
811  * Must be called with kvm->srcu held to avoid races on memslots, and with
812  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
813  */
814 static int kvm_s390_vm_start_migration(struct kvm *kvm)
815 {
816 	struct kvm_s390_migration_state *mgs;
817 	struct kvm_memory_slot *ms;
818 	/* should be the only one */
819 	struct kvm_memslots *slots;
820 	unsigned long ram_pages;
821 	int slotnr;
822 
823 	/* migration mode already enabled */
824 	if (kvm->arch.migration_state)
825 		return 0;
826 
827 	slots = kvm_memslots(kvm);
828 	if (!slots || !slots->used_slots)
829 		return -EINVAL;
830 
831 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
832 	if (!mgs)
833 		return -ENOMEM;
834 	kvm->arch.migration_state = mgs;
835 
836 	if (kvm->arch.use_cmma) {
837 		/*
838 		 * Get the first slot. They are reverse sorted by base_gfn, so
839 		 * the first slot is also the one at the end of the address
840 		 * space. We have verified above that at least one slot is
841 		 * present.
842 		 */
843 		ms = slots->memslots;
844 		/* round up so we only use full longs */
845 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
846 		/* allocate enough bytes to store all the bits */
847 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
848 		if (!mgs->pgste_bitmap) {
849 			kfree(mgs);
850 			kvm->arch.migration_state = NULL;
851 			return -ENOMEM;
852 		}
853 
854 		mgs->bitmap_size = ram_pages;
855 		atomic64_set(&mgs->dirty_pages, ram_pages);
856 		/* mark all the pages in active slots as dirty */
857 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
858 			ms = slots->memslots + slotnr;
859 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
860 		}
861 
862 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
863 	}
864 	return 0;
865 }
866 
867 /*
868  * Must be called with kvm->slots_lock to avoid races with ourselves and
869  * kvm_s390_vm_start_migration.
870  */
871 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
872 {
873 	struct kvm_s390_migration_state *mgs;
874 
875 	/* migration mode already disabled */
876 	if (!kvm->arch.migration_state)
877 		return 0;
878 	mgs = kvm->arch.migration_state;
879 	kvm->arch.migration_state = NULL;
880 
881 	if (kvm->arch.use_cmma) {
882 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
883 		/* We have to wait for the essa emulation to finish */
884 		synchronize_srcu(&kvm->srcu);
885 		vfree(mgs->pgste_bitmap);
886 	}
887 	kfree(mgs);
888 	return 0;
889 }
890 
891 static int kvm_s390_vm_set_migration(struct kvm *kvm,
892 				     struct kvm_device_attr *attr)
893 {
894 	int res = -ENXIO;
895 
896 	mutex_lock(&kvm->slots_lock);
897 	switch (attr->attr) {
898 	case KVM_S390_VM_MIGRATION_START:
899 		res = kvm_s390_vm_start_migration(kvm);
900 		break;
901 	case KVM_S390_VM_MIGRATION_STOP:
902 		res = kvm_s390_vm_stop_migration(kvm);
903 		break;
904 	default:
905 		break;
906 	}
907 	mutex_unlock(&kvm->slots_lock);
908 
909 	return res;
910 }
911 
912 static int kvm_s390_vm_get_migration(struct kvm *kvm,
913 				     struct kvm_device_attr *attr)
914 {
915 	u64 mig = (kvm->arch.migration_state != NULL);
916 
917 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
918 		return -ENXIO;
919 
920 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
921 		return -EFAULT;
922 	return 0;
923 }
924 
925 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	struct kvm_s390_vm_tod_clock gtod;
928 
929 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
930 		return -EFAULT;
931 
932 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
933 		return -EINVAL;
934 	kvm_s390_set_tod_clock(kvm, &gtod);
935 
936 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
937 		gtod.epoch_idx, gtod.tod);
938 
939 	return 0;
940 }
941 
942 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
943 {
944 	u8 gtod_high;
945 
946 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
947 					   sizeof(gtod_high)))
948 		return -EFAULT;
949 
950 	if (gtod_high != 0)
951 		return -EINVAL;
952 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
953 
954 	return 0;
955 }
956 
957 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
958 {
959 	struct kvm_s390_vm_tod_clock gtod = { 0 };
960 
961 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
962 			   sizeof(gtod.tod)))
963 		return -EFAULT;
964 
965 	kvm_s390_set_tod_clock(kvm, &gtod);
966 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
967 	return 0;
968 }
969 
970 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 	int ret;
973 
974 	if (attr->flags)
975 		return -EINVAL;
976 
977 	switch (attr->attr) {
978 	case KVM_S390_VM_TOD_EXT:
979 		ret = kvm_s390_set_tod_ext(kvm, attr);
980 		break;
981 	case KVM_S390_VM_TOD_HIGH:
982 		ret = kvm_s390_set_tod_high(kvm, attr);
983 		break;
984 	case KVM_S390_VM_TOD_LOW:
985 		ret = kvm_s390_set_tod_low(kvm, attr);
986 		break;
987 	default:
988 		ret = -ENXIO;
989 		break;
990 	}
991 	return ret;
992 }
993 
994 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
995 					struct kvm_s390_vm_tod_clock *gtod)
996 {
997 	struct kvm_s390_tod_clock_ext htod;
998 
999 	preempt_disable();
1000 
1001 	get_tod_clock_ext((char *)&htod);
1002 
1003 	gtod->tod = htod.tod + kvm->arch.epoch;
1004 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1005 
1006 	if (gtod->tod < htod.tod)
1007 		gtod->epoch_idx += 1;
1008 
1009 	preempt_enable();
1010 }
1011 
1012 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1013 {
1014 	struct kvm_s390_vm_tod_clock gtod;
1015 
1016 	memset(&gtod, 0, sizeof(gtod));
1017 
1018 	if (test_kvm_facility(kvm, 139))
1019 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
1020 	else
1021 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1022 
1023 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1024 		return -EFAULT;
1025 
1026 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1027 		gtod.epoch_idx, gtod.tod);
1028 	return 0;
1029 }
1030 
1031 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1032 {
1033 	u8 gtod_high = 0;
1034 
1035 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1036 					 sizeof(gtod_high)))
1037 		return -EFAULT;
1038 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1039 
1040 	return 0;
1041 }
1042 
1043 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1044 {
1045 	u64 gtod;
1046 
1047 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1048 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1049 		return -EFAULT;
1050 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1051 
1052 	return 0;
1053 }
1054 
1055 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1056 {
1057 	int ret;
1058 
1059 	if (attr->flags)
1060 		return -EINVAL;
1061 
1062 	switch (attr->attr) {
1063 	case KVM_S390_VM_TOD_EXT:
1064 		ret = kvm_s390_get_tod_ext(kvm, attr);
1065 		break;
1066 	case KVM_S390_VM_TOD_HIGH:
1067 		ret = kvm_s390_get_tod_high(kvm, attr);
1068 		break;
1069 	case KVM_S390_VM_TOD_LOW:
1070 		ret = kvm_s390_get_tod_low(kvm, attr);
1071 		break;
1072 	default:
1073 		ret = -ENXIO;
1074 		break;
1075 	}
1076 	return ret;
1077 }
1078 
1079 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081 	struct kvm_s390_vm_cpu_processor *proc;
1082 	u16 lowest_ibc, unblocked_ibc;
1083 	int ret = 0;
1084 
1085 	mutex_lock(&kvm->lock);
1086 	if (kvm->created_vcpus) {
1087 		ret = -EBUSY;
1088 		goto out;
1089 	}
1090 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1091 	if (!proc) {
1092 		ret = -ENOMEM;
1093 		goto out;
1094 	}
1095 	if (!copy_from_user(proc, (void __user *)attr->addr,
1096 			    sizeof(*proc))) {
1097 		kvm->arch.model.cpuid = proc->cpuid;
1098 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1099 		unblocked_ibc = sclp.ibc & 0xfff;
1100 		if (lowest_ibc && proc->ibc) {
1101 			if (proc->ibc > unblocked_ibc)
1102 				kvm->arch.model.ibc = unblocked_ibc;
1103 			else if (proc->ibc < lowest_ibc)
1104 				kvm->arch.model.ibc = lowest_ibc;
1105 			else
1106 				kvm->arch.model.ibc = proc->ibc;
1107 		}
1108 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1109 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1110 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1111 			 kvm->arch.model.ibc,
1112 			 kvm->arch.model.cpuid);
1113 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1114 			 kvm->arch.model.fac_list[0],
1115 			 kvm->arch.model.fac_list[1],
1116 			 kvm->arch.model.fac_list[2]);
1117 	} else
1118 		ret = -EFAULT;
1119 	kfree(proc);
1120 out:
1121 	mutex_unlock(&kvm->lock);
1122 	return ret;
1123 }
1124 
1125 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1126 				       struct kvm_device_attr *attr)
1127 {
1128 	struct kvm_s390_vm_cpu_feat data;
1129 
1130 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1131 		return -EFAULT;
1132 	if (!bitmap_subset((unsigned long *) data.feat,
1133 			   kvm_s390_available_cpu_feat,
1134 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1135 		return -EINVAL;
1136 
1137 	mutex_lock(&kvm->lock);
1138 	if (kvm->created_vcpus) {
1139 		mutex_unlock(&kvm->lock);
1140 		return -EBUSY;
1141 	}
1142 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1143 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1144 	mutex_unlock(&kvm->lock);
1145 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1146 			 data.feat[0],
1147 			 data.feat[1],
1148 			 data.feat[2]);
1149 	return 0;
1150 }
1151 
1152 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1153 					  struct kvm_device_attr *attr)
1154 {
1155 	/*
1156 	 * Once supported by kernel + hw, we have to store the subfunctions
1157 	 * in kvm->arch and remember that user space configured them.
1158 	 */
1159 	return -ENXIO;
1160 }
1161 
1162 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1163 {
1164 	int ret = -ENXIO;
1165 
1166 	switch (attr->attr) {
1167 	case KVM_S390_VM_CPU_PROCESSOR:
1168 		ret = kvm_s390_set_processor(kvm, attr);
1169 		break;
1170 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1171 		ret = kvm_s390_set_processor_feat(kvm, attr);
1172 		break;
1173 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1174 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1175 		break;
1176 	}
1177 	return ret;
1178 }
1179 
1180 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1181 {
1182 	struct kvm_s390_vm_cpu_processor *proc;
1183 	int ret = 0;
1184 
1185 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1186 	if (!proc) {
1187 		ret = -ENOMEM;
1188 		goto out;
1189 	}
1190 	proc->cpuid = kvm->arch.model.cpuid;
1191 	proc->ibc = kvm->arch.model.ibc;
1192 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1193 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1194 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1195 		 kvm->arch.model.ibc,
1196 		 kvm->arch.model.cpuid);
1197 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1198 		 kvm->arch.model.fac_list[0],
1199 		 kvm->arch.model.fac_list[1],
1200 		 kvm->arch.model.fac_list[2]);
1201 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1202 		ret = -EFAULT;
1203 	kfree(proc);
1204 out:
1205 	return ret;
1206 }
1207 
1208 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1209 {
1210 	struct kvm_s390_vm_cpu_machine *mach;
1211 	int ret = 0;
1212 
1213 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1214 	if (!mach) {
1215 		ret = -ENOMEM;
1216 		goto out;
1217 	}
1218 	get_cpu_id((struct cpuid *) &mach->cpuid);
1219 	mach->ibc = sclp.ibc;
1220 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1221 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1222 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1223 	       sizeof(S390_lowcore.stfle_fac_list));
1224 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1225 		 kvm->arch.model.ibc,
1226 		 kvm->arch.model.cpuid);
1227 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1228 		 mach->fac_mask[0],
1229 		 mach->fac_mask[1],
1230 		 mach->fac_mask[2]);
1231 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1232 		 mach->fac_list[0],
1233 		 mach->fac_list[1],
1234 		 mach->fac_list[2]);
1235 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1236 		ret = -EFAULT;
1237 	kfree(mach);
1238 out:
1239 	return ret;
1240 }
1241 
1242 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1243 				       struct kvm_device_attr *attr)
1244 {
1245 	struct kvm_s390_vm_cpu_feat data;
1246 
1247 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1248 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1249 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1250 		return -EFAULT;
1251 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1252 			 data.feat[0],
1253 			 data.feat[1],
1254 			 data.feat[2]);
1255 	return 0;
1256 }
1257 
1258 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1259 				     struct kvm_device_attr *attr)
1260 {
1261 	struct kvm_s390_vm_cpu_feat data;
1262 
1263 	bitmap_copy((unsigned long *) data.feat,
1264 		    kvm_s390_available_cpu_feat,
1265 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1266 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1267 		return -EFAULT;
1268 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1269 			 data.feat[0],
1270 			 data.feat[1],
1271 			 data.feat[2]);
1272 	return 0;
1273 }
1274 
1275 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1276 					  struct kvm_device_attr *attr)
1277 {
1278 	/*
1279 	 * Once we can actually configure subfunctions (kernel + hw support),
1280 	 * we have to check if they were already set by user space, if so copy
1281 	 * them from kvm->arch.
1282 	 */
1283 	return -ENXIO;
1284 }
1285 
1286 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1287 					struct kvm_device_attr *attr)
1288 {
1289 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1290 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1291 		return -EFAULT;
1292 	return 0;
1293 }
1294 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1295 {
1296 	int ret = -ENXIO;
1297 
1298 	switch (attr->attr) {
1299 	case KVM_S390_VM_CPU_PROCESSOR:
1300 		ret = kvm_s390_get_processor(kvm, attr);
1301 		break;
1302 	case KVM_S390_VM_CPU_MACHINE:
1303 		ret = kvm_s390_get_machine(kvm, attr);
1304 		break;
1305 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1306 		ret = kvm_s390_get_processor_feat(kvm, attr);
1307 		break;
1308 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1309 		ret = kvm_s390_get_machine_feat(kvm, attr);
1310 		break;
1311 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1312 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1313 		break;
1314 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1315 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1316 		break;
1317 	}
1318 	return ret;
1319 }
1320 
1321 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1322 {
1323 	int ret;
1324 
1325 	switch (attr->group) {
1326 	case KVM_S390_VM_MEM_CTRL:
1327 		ret = kvm_s390_set_mem_control(kvm, attr);
1328 		break;
1329 	case KVM_S390_VM_TOD:
1330 		ret = kvm_s390_set_tod(kvm, attr);
1331 		break;
1332 	case KVM_S390_VM_CPU_MODEL:
1333 		ret = kvm_s390_set_cpu_model(kvm, attr);
1334 		break;
1335 	case KVM_S390_VM_CRYPTO:
1336 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1337 		break;
1338 	case KVM_S390_VM_MIGRATION:
1339 		ret = kvm_s390_vm_set_migration(kvm, attr);
1340 		break;
1341 	default:
1342 		ret = -ENXIO;
1343 		break;
1344 	}
1345 
1346 	return ret;
1347 }
1348 
1349 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1350 {
1351 	int ret;
1352 
1353 	switch (attr->group) {
1354 	case KVM_S390_VM_MEM_CTRL:
1355 		ret = kvm_s390_get_mem_control(kvm, attr);
1356 		break;
1357 	case KVM_S390_VM_TOD:
1358 		ret = kvm_s390_get_tod(kvm, attr);
1359 		break;
1360 	case KVM_S390_VM_CPU_MODEL:
1361 		ret = kvm_s390_get_cpu_model(kvm, attr);
1362 		break;
1363 	case KVM_S390_VM_MIGRATION:
1364 		ret = kvm_s390_vm_get_migration(kvm, attr);
1365 		break;
1366 	default:
1367 		ret = -ENXIO;
1368 		break;
1369 	}
1370 
1371 	return ret;
1372 }
1373 
1374 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1375 {
1376 	int ret;
1377 
1378 	switch (attr->group) {
1379 	case KVM_S390_VM_MEM_CTRL:
1380 		switch (attr->attr) {
1381 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1382 		case KVM_S390_VM_MEM_CLR_CMMA:
1383 			ret = sclp.has_cmma ? 0 : -ENXIO;
1384 			break;
1385 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1386 			ret = 0;
1387 			break;
1388 		default:
1389 			ret = -ENXIO;
1390 			break;
1391 		}
1392 		break;
1393 	case KVM_S390_VM_TOD:
1394 		switch (attr->attr) {
1395 		case KVM_S390_VM_TOD_LOW:
1396 		case KVM_S390_VM_TOD_HIGH:
1397 			ret = 0;
1398 			break;
1399 		default:
1400 			ret = -ENXIO;
1401 			break;
1402 		}
1403 		break;
1404 	case KVM_S390_VM_CPU_MODEL:
1405 		switch (attr->attr) {
1406 		case KVM_S390_VM_CPU_PROCESSOR:
1407 		case KVM_S390_VM_CPU_MACHINE:
1408 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1409 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1410 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1411 			ret = 0;
1412 			break;
1413 		/* configuring subfunctions is not supported yet */
1414 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1415 		default:
1416 			ret = -ENXIO;
1417 			break;
1418 		}
1419 		break;
1420 	case KVM_S390_VM_CRYPTO:
1421 		switch (attr->attr) {
1422 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1423 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1424 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1425 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1426 			ret = 0;
1427 			break;
1428 		default:
1429 			ret = -ENXIO;
1430 			break;
1431 		}
1432 		break;
1433 	case KVM_S390_VM_MIGRATION:
1434 		ret = 0;
1435 		break;
1436 	default:
1437 		ret = -ENXIO;
1438 		break;
1439 	}
1440 
1441 	return ret;
1442 }
1443 
1444 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1445 {
1446 	uint8_t *keys;
1447 	uint64_t hva;
1448 	int srcu_idx, i, r = 0;
1449 
1450 	if (args->flags != 0)
1451 		return -EINVAL;
1452 
1453 	/* Is this guest using storage keys? */
1454 	if (!mm_use_skey(current->mm))
1455 		return KVM_S390_GET_SKEYS_NONE;
1456 
1457 	/* Enforce sane limit on memory allocation */
1458 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1459 		return -EINVAL;
1460 
1461 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1462 	if (!keys)
1463 		return -ENOMEM;
1464 
1465 	down_read(&current->mm->mmap_sem);
1466 	srcu_idx = srcu_read_lock(&kvm->srcu);
1467 	for (i = 0; i < args->count; i++) {
1468 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1469 		if (kvm_is_error_hva(hva)) {
1470 			r = -EFAULT;
1471 			break;
1472 		}
1473 
1474 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1475 		if (r)
1476 			break;
1477 	}
1478 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1479 	up_read(&current->mm->mmap_sem);
1480 
1481 	if (!r) {
1482 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1483 				 sizeof(uint8_t) * args->count);
1484 		if (r)
1485 			r = -EFAULT;
1486 	}
1487 
1488 	kvfree(keys);
1489 	return r;
1490 }
1491 
1492 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1493 {
1494 	uint8_t *keys;
1495 	uint64_t hva;
1496 	int srcu_idx, i, r = 0;
1497 
1498 	if (args->flags != 0)
1499 		return -EINVAL;
1500 
1501 	/* Enforce sane limit on memory allocation */
1502 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1503 		return -EINVAL;
1504 
1505 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1506 	if (!keys)
1507 		return -ENOMEM;
1508 
1509 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1510 			   sizeof(uint8_t) * args->count);
1511 	if (r) {
1512 		r = -EFAULT;
1513 		goto out;
1514 	}
1515 
1516 	/* Enable storage key handling for the guest */
1517 	r = s390_enable_skey();
1518 	if (r)
1519 		goto out;
1520 
1521 	down_read(&current->mm->mmap_sem);
1522 	srcu_idx = srcu_read_lock(&kvm->srcu);
1523 	for (i = 0; i < args->count; i++) {
1524 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1525 		if (kvm_is_error_hva(hva)) {
1526 			r = -EFAULT;
1527 			break;
1528 		}
1529 
1530 		/* Lowest order bit is reserved */
1531 		if (keys[i] & 0x01) {
1532 			r = -EINVAL;
1533 			break;
1534 		}
1535 
1536 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1537 		if (r)
1538 			break;
1539 	}
1540 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1541 	up_read(&current->mm->mmap_sem);
1542 out:
1543 	kvfree(keys);
1544 	return r;
1545 }
1546 
1547 /*
1548  * Base address and length must be sent at the start of each block, therefore
1549  * it's cheaper to send some clean data, as long as it's less than the size of
1550  * two longs.
1551  */
1552 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1553 /* for consistency */
1554 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1555 
1556 /*
1557  * This function searches for the next page with dirty CMMA attributes, and
1558  * saves the attributes in the buffer up to either the end of the buffer or
1559  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1560  * no trailing clean bytes are saved.
1561  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1562  * output buffer will indicate 0 as length.
1563  */
1564 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1565 				  struct kvm_s390_cmma_log *args)
1566 {
1567 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1568 	unsigned long bufsize, hva, pgstev, i, next, cur;
1569 	int srcu_idx, peek, r = 0, rr;
1570 	u8 *res;
1571 
1572 	cur = args->start_gfn;
1573 	i = next = pgstev = 0;
1574 
1575 	if (unlikely(!kvm->arch.use_cmma))
1576 		return -ENXIO;
1577 	/* Invalid/unsupported flags were specified */
1578 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1579 		return -EINVAL;
1580 	/* Migration mode query, and we are not doing a migration */
1581 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1582 	if (!peek && !s)
1583 		return -EINVAL;
1584 	/* CMMA is disabled or was not used, or the buffer has length zero */
1585 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1586 	if (!bufsize || !kvm->mm->context.use_cmma) {
1587 		memset(args, 0, sizeof(*args));
1588 		return 0;
1589 	}
1590 
1591 	if (!peek) {
1592 		/* We are not peeking, and there are no dirty pages */
1593 		if (!atomic64_read(&s->dirty_pages)) {
1594 			memset(args, 0, sizeof(*args));
1595 			return 0;
1596 		}
1597 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1598 				    args->start_gfn);
1599 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1600 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1601 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1602 			memset(args, 0, sizeof(*args));
1603 			return 0;
1604 		}
1605 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1606 	}
1607 
1608 	res = vmalloc(bufsize);
1609 	if (!res)
1610 		return -ENOMEM;
1611 
1612 	args->start_gfn = cur;
1613 
1614 	down_read(&kvm->mm->mmap_sem);
1615 	srcu_idx = srcu_read_lock(&kvm->srcu);
1616 	while (i < bufsize) {
1617 		hva = gfn_to_hva(kvm, cur);
1618 		if (kvm_is_error_hva(hva)) {
1619 			r = -EFAULT;
1620 			break;
1621 		}
1622 		/* decrement only if we actually flipped the bit to 0 */
1623 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1624 			atomic64_dec(&s->dirty_pages);
1625 		r = get_pgste(kvm->mm, hva, &pgstev);
1626 		if (r < 0)
1627 			pgstev = 0;
1628 		/* save the value */
1629 		res[i++] = (pgstev >> 24) & 0x43;
1630 		/*
1631 		 * if the next bit is too far away, stop.
1632 		 * if we reached the previous "next", find the next one
1633 		 */
1634 		if (!peek) {
1635 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1636 				break;
1637 			if (cur == next)
1638 				next = find_next_bit(s->pgste_bitmap,
1639 						     s->bitmap_size, cur + 1);
1640 		/* reached the end of the bitmap or of the buffer, stop */
1641 			if ((next >= s->bitmap_size) ||
1642 			    (next >= args->start_gfn + bufsize))
1643 				break;
1644 		}
1645 		cur++;
1646 	}
1647 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1648 	up_read(&kvm->mm->mmap_sem);
1649 	args->count = i;
1650 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1651 
1652 	rr = copy_to_user((void __user *)args->values, res, args->count);
1653 	if (rr)
1654 		r = -EFAULT;
1655 
1656 	vfree(res);
1657 	return r;
1658 }
1659 
1660 /*
1661  * This function sets the CMMA attributes for the given pages. If the input
1662  * buffer has zero length, no action is taken, otherwise the attributes are
1663  * set and the mm->context.use_cmma flag is set.
1664  */
1665 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1666 				  const struct kvm_s390_cmma_log *args)
1667 {
1668 	unsigned long hva, mask, pgstev, i;
1669 	uint8_t *bits;
1670 	int srcu_idx, r = 0;
1671 
1672 	mask = args->mask;
1673 
1674 	if (!kvm->arch.use_cmma)
1675 		return -ENXIO;
1676 	/* invalid/unsupported flags */
1677 	if (args->flags != 0)
1678 		return -EINVAL;
1679 	/* Enforce sane limit on memory allocation */
1680 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1681 		return -EINVAL;
1682 	/* Nothing to do */
1683 	if (args->count == 0)
1684 		return 0;
1685 
1686 	bits = vmalloc(sizeof(*bits) * args->count);
1687 	if (!bits)
1688 		return -ENOMEM;
1689 
1690 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1691 	if (r) {
1692 		r = -EFAULT;
1693 		goto out;
1694 	}
1695 
1696 	down_read(&kvm->mm->mmap_sem);
1697 	srcu_idx = srcu_read_lock(&kvm->srcu);
1698 	for (i = 0; i < args->count; i++) {
1699 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1700 		if (kvm_is_error_hva(hva)) {
1701 			r = -EFAULT;
1702 			break;
1703 		}
1704 
1705 		pgstev = bits[i];
1706 		pgstev = pgstev << 24;
1707 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1708 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1709 	}
1710 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1711 	up_read(&kvm->mm->mmap_sem);
1712 
1713 	if (!kvm->mm->context.use_cmma) {
1714 		down_write(&kvm->mm->mmap_sem);
1715 		kvm->mm->context.use_cmma = 1;
1716 		up_write(&kvm->mm->mmap_sem);
1717 	}
1718 out:
1719 	vfree(bits);
1720 	return r;
1721 }
1722 
1723 long kvm_arch_vm_ioctl(struct file *filp,
1724 		       unsigned int ioctl, unsigned long arg)
1725 {
1726 	struct kvm *kvm = filp->private_data;
1727 	void __user *argp = (void __user *)arg;
1728 	struct kvm_device_attr attr;
1729 	int r;
1730 
1731 	switch (ioctl) {
1732 	case KVM_S390_INTERRUPT: {
1733 		struct kvm_s390_interrupt s390int;
1734 
1735 		r = -EFAULT;
1736 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1737 			break;
1738 		r = kvm_s390_inject_vm(kvm, &s390int);
1739 		break;
1740 	}
1741 	case KVM_ENABLE_CAP: {
1742 		struct kvm_enable_cap cap;
1743 		r = -EFAULT;
1744 		if (copy_from_user(&cap, argp, sizeof(cap)))
1745 			break;
1746 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1747 		break;
1748 	}
1749 	case KVM_CREATE_IRQCHIP: {
1750 		struct kvm_irq_routing_entry routing;
1751 
1752 		r = -EINVAL;
1753 		if (kvm->arch.use_irqchip) {
1754 			/* Set up dummy routing. */
1755 			memset(&routing, 0, sizeof(routing));
1756 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1757 		}
1758 		break;
1759 	}
1760 	case KVM_SET_DEVICE_ATTR: {
1761 		r = -EFAULT;
1762 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1763 			break;
1764 		r = kvm_s390_vm_set_attr(kvm, &attr);
1765 		break;
1766 	}
1767 	case KVM_GET_DEVICE_ATTR: {
1768 		r = -EFAULT;
1769 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1770 			break;
1771 		r = kvm_s390_vm_get_attr(kvm, &attr);
1772 		break;
1773 	}
1774 	case KVM_HAS_DEVICE_ATTR: {
1775 		r = -EFAULT;
1776 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1777 			break;
1778 		r = kvm_s390_vm_has_attr(kvm, &attr);
1779 		break;
1780 	}
1781 	case KVM_S390_GET_SKEYS: {
1782 		struct kvm_s390_skeys args;
1783 
1784 		r = -EFAULT;
1785 		if (copy_from_user(&args, argp,
1786 				   sizeof(struct kvm_s390_skeys)))
1787 			break;
1788 		r = kvm_s390_get_skeys(kvm, &args);
1789 		break;
1790 	}
1791 	case KVM_S390_SET_SKEYS: {
1792 		struct kvm_s390_skeys args;
1793 
1794 		r = -EFAULT;
1795 		if (copy_from_user(&args, argp,
1796 				   sizeof(struct kvm_s390_skeys)))
1797 			break;
1798 		r = kvm_s390_set_skeys(kvm, &args);
1799 		break;
1800 	}
1801 	case KVM_S390_GET_CMMA_BITS: {
1802 		struct kvm_s390_cmma_log args;
1803 
1804 		r = -EFAULT;
1805 		if (copy_from_user(&args, argp, sizeof(args)))
1806 			break;
1807 		mutex_lock(&kvm->slots_lock);
1808 		r = kvm_s390_get_cmma_bits(kvm, &args);
1809 		mutex_unlock(&kvm->slots_lock);
1810 		if (!r) {
1811 			r = copy_to_user(argp, &args, sizeof(args));
1812 			if (r)
1813 				r = -EFAULT;
1814 		}
1815 		break;
1816 	}
1817 	case KVM_S390_SET_CMMA_BITS: {
1818 		struct kvm_s390_cmma_log args;
1819 
1820 		r = -EFAULT;
1821 		if (copy_from_user(&args, argp, sizeof(args)))
1822 			break;
1823 		mutex_lock(&kvm->slots_lock);
1824 		r = kvm_s390_set_cmma_bits(kvm, &args);
1825 		mutex_unlock(&kvm->slots_lock);
1826 		break;
1827 	}
1828 	default:
1829 		r = -ENOTTY;
1830 	}
1831 
1832 	return r;
1833 }
1834 
1835 static int kvm_s390_query_ap_config(u8 *config)
1836 {
1837 	u32 fcn_code = 0x04000000UL;
1838 	u32 cc = 0;
1839 
1840 	memset(config, 0, 128);
1841 	asm volatile(
1842 		"lgr 0,%1\n"
1843 		"lgr 2,%2\n"
1844 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1845 		"0: ipm %0\n"
1846 		"srl %0,28\n"
1847 		"1:\n"
1848 		EX_TABLE(0b, 1b)
1849 		: "+r" (cc)
1850 		: "r" (fcn_code), "r" (config)
1851 		: "cc", "0", "2", "memory"
1852 	);
1853 
1854 	return cc;
1855 }
1856 
1857 static int kvm_s390_apxa_installed(void)
1858 {
1859 	u8 config[128];
1860 	int cc;
1861 
1862 	if (test_facility(12)) {
1863 		cc = kvm_s390_query_ap_config(config);
1864 
1865 		if (cc)
1866 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1867 		else
1868 			return config[0] & 0x40;
1869 	}
1870 
1871 	return 0;
1872 }
1873 
1874 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1875 {
1876 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1877 
1878 	if (kvm_s390_apxa_installed())
1879 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1880 	else
1881 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1882 }
1883 
1884 static u64 kvm_s390_get_initial_cpuid(void)
1885 {
1886 	struct cpuid cpuid;
1887 
1888 	get_cpu_id(&cpuid);
1889 	cpuid.version = 0xff;
1890 	return *((u64 *) &cpuid);
1891 }
1892 
1893 static void kvm_s390_crypto_init(struct kvm *kvm)
1894 {
1895 	if (!test_kvm_facility(kvm, 76))
1896 		return;
1897 
1898 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1899 	kvm_s390_set_crycb_format(kvm);
1900 
1901 	/* Enable AES/DEA protected key functions by default */
1902 	kvm->arch.crypto.aes_kw = 1;
1903 	kvm->arch.crypto.dea_kw = 1;
1904 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1905 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1906 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1907 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1908 }
1909 
1910 static void sca_dispose(struct kvm *kvm)
1911 {
1912 	if (kvm->arch.use_esca)
1913 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1914 	else
1915 		free_page((unsigned long)(kvm->arch.sca));
1916 	kvm->arch.sca = NULL;
1917 }
1918 
1919 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1920 {
1921 	gfp_t alloc_flags = GFP_KERNEL;
1922 	int i, rc;
1923 	char debug_name[16];
1924 	static unsigned long sca_offset;
1925 
1926 	rc = -EINVAL;
1927 #ifdef CONFIG_KVM_S390_UCONTROL
1928 	if (type & ~KVM_VM_S390_UCONTROL)
1929 		goto out_err;
1930 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1931 		goto out_err;
1932 #else
1933 	if (type)
1934 		goto out_err;
1935 #endif
1936 
1937 	rc = s390_enable_sie();
1938 	if (rc)
1939 		goto out_err;
1940 
1941 	rc = -ENOMEM;
1942 
1943 	kvm->arch.use_esca = 0; /* start with basic SCA */
1944 	if (!sclp.has_64bscao)
1945 		alloc_flags |= GFP_DMA;
1946 	rwlock_init(&kvm->arch.sca_lock);
1947 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1948 	if (!kvm->arch.sca)
1949 		goto out_err;
1950 	spin_lock(&kvm_lock);
1951 	sca_offset += 16;
1952 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1953 		sca_offset = 0;
1954 	kvm->arch.sca = (struct bsca_block *)
1955 			((char *) kvm->arch.sca + sca_offset);
1956 	spin_unlock(&kvm_lock);
1957 
1958 	sprintf(debug_name, "kvm-%u", current->pid);
1959 
1960 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1961 	if (!kvm->arch.dbf)
1962 		goto out_err;
1963 
1964 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
1965 	kvm->arch.sie_page2 =
1966 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1967 	if (!kvm->arch.sie_page2)
1968 		goto out_err;
1969 
1970 	/* Populate the facility mask initially. */
1971 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1972 	       sizeof(S390_lowcore.stfle_fac_list));
1973 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1974 		if (i < kvm_s390_fac_list_mask_size())
1975 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1976 		else
1977 			kvm->arch.model.fac_mask[i] = 0UL;
1978 	}
1979 
1980 	/* Populate the facility list initially. */
1981 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1982 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1983 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1984 
1985 	/* we are always in czam mode - even on pre z14 machines */
1986 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1987 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1988 	/* we emulate STHYI in kvm */
1989 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1990 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1991 	if (MACHINE_HAS_TLB_GUEST) {
1992 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1993 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1994 	}
1995 
1996 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1997 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1998 
1999 	kvm_s390_crypto_init(kvm);
2000 
2001 	mutex_init(&kvm->arch.float_int.ais_lock);
2002 	kvm->arch.float_int.simm = 0;
2003 	kvm->arch.float_int.nimm = 0;
2004 	spin_lock_init(&kvm->arch.float_int.lock);
2005 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2006 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2007 	init_waitqueue_head(&kvm->arch.ipte_wq);
2008 	mutex_init(&kvm->arch.ipte_mutex);
2009 
2010 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2011 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2012 
2013 	if (type & KVM_VM_S390_UCONTROL) {
2014 		kvm->arch.gmap = NULL;
2015 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2016 	} else {
2017 		if (sclp.hamax == U64_MAX)
2018 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2019 		else
2020 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2021 						    sclp.hamax + 1);
2022 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2023 		if (!kvm->arch.gmap)
2024 			goto out_err;
2025 		kvm->arch.gmap->private = kvm;
2026 		kvm->arch.gmap->pfault_enabled = 0;
2027 	}
2028 
2029 	kvm->arch.css_support = 0;
2030 	kvm->arch.use_irqchip = 0;
2031 	kvm->arch.epoch = 0;
2032 
2033 	spin_lock_init(&kvm->arch.start_stop_lock);
2034 	kvm_s390_vsie_init(kvm);
2035 	kvm_s390_gisa_init(kvm);
2036 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2037 
2038 	return 0;
2039 out_err:
2040 	free_page((unsigned long)kvm->arch.sie_page2);
2041 	debug_unregister(kvm->arch.dbf);
2042 	sca_dispose(kvm);
2043 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2044 	return rc;
2045 }
2046 
2047 bool kvm_arch_has_vcpu_debugfs(void)
2048 {
2049 	return false;
2050 }
2051 
2052 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2053 {
2054 	return 0;
2055 }
2056 
2057 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2058 {
2059 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2060 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2061 	kvm_s390_clear_local_irqs(vcpu);
2062 	kvm_clear_async_pf_completion_queue(vcpu);
2063 	if (!kvm_is_ucontrol(vcpu->kvm))
2064 		sca_del_vcpu(vcpu);
2065 
2066 	if (kvm_is_ucontrol(vcpu->kvm))
2067 		gmap_remove(vcpu->arch.gmap);
2068 
2069 	if (vcpu->kvm->arch.use_cmma)
2070 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2071 	free_page((unsigned long)(vcpu->arch.sie_block));
2072 
2073 	kvm_vcpu_uninit(vcpu);
2074 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2075 }
2076 
2077 static void kvm_free_vcpus(struct kvm *kvm)
2078 {
2079 	unsigned int i;
2080 	struct kvm_vcpu *vcpu;
2081 
2082 	kvm_for_each_vcpu(i, vcpu, kvm)
2083 		kvm_arch_vcpu_destroy(vcpu);
2084 
2085 	mutex_lock(&kvm->lock);
2086 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2087 		kvm->vcpus[i] = NULL;
2088 
2089 	atomic_set(&kvm->online_vcpus, 0);
2090 	mutex_unlock(&kvm->lock);
2091 }
2092 
2093 void kvm_arch_destroy_vm(struct kvm *kvm)
2094 {
2095 	kvm_free_vcpus(kvm);
2096 	sca_dispose(kvm);
2097 	debug_unregister(kvm->arch.dbf);
2098 	kvm_s390_gisa_destroy(kvm);
2099 	free_page((unsigned long)kvm->arch.sie_page2);
2100 	if (!kvm_is_ucontrol(kvm))
2101 		gmap_remove(kvm->arch.gmap);
2102 	kvm_s390_destroy_adapters(kvm);
2103 	kvm_s390_clear_float_irqs(kvm);
2104 	kvm_s390_vsie_destroy(kvm);
2105 	if (kvm->arch.migration_state) {
2106 		vfree(kvm->arch.migration_state->pgste_bitmap);
2107 		kfree(kvm->arch.migration_state);
2108 	}
2109 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2110 }
2111 
2112 /* Section: vcpu related */
2113 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2114 {
2115 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2116 	if (!vcpu->arch.gmap)
2117 		return -ENOMEM;
2118 	vcpu->arch.gmap->private = vcpu->kvm;
2119 
2120 	return 0;
2121 }
2122 
2123 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2124 {
2125 	if (!kvm_s390_use_sca_entries())
2126 		return;
2127 	read_lock(&vcpu->kvm->arch.sca_lock);
2128 	if (vcpu->kvm->arch.use_esca) {
2129 		struct esca_block *sca = vcpu->kvm->arch.sca;
2130 
2131 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2132 		sca->cpu[vcpu->vcpu_id].sda = 0;
2133 	} else {
2134 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2135 
2136 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2137 		sca->cpu[vcpu->vcpu_id].sda = 0;
2138 	}
2139 	read_unlock(&vcpu->kvm->arch.sca_lock);
2140 }
2141 
2142 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2143 {
2144 	if (!kvm_s390_use_sca_entries()) {
2145 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2146 
2147 		/* we still need the basic sca for the ipte control */
2148 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2149 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2150 		return;
2151 	}
2152 	read_lock(&vcpu->kvm->arch.sca_lock);
2153 	if (vcpu->kvm->arch.use_esca) {
2154 		struct esca_block *sca = vcpu->kvm->arch.sca;
2155 
2156 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2157 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2158 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2159 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2160 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2161 	} else {
2162 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2163 
2164 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2165 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2166 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2167 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2168 	}
2169 	read_unlock(&vcpu->kvm->arch.sca_lock);
2170 }
2171 
2172 /* Basic SCA to Extended SCA data copy routines */
2173 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2174 {
2175 	d->sda = s->sda;
2176 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2177 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2178 }
2179 
2180 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2181 {
2182 	int i;
2183 
2184 	d->ipte_control = s->ipte_control;
2185 	d->mcn[0] = s->mcn;
2186 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2187 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2188 }
2189 
2190 static int sca_switch_to_extended(struct kvm *kvm)
2191 {
2192 	struct bsca_block *old_sca = kvm->arch.sca;
2193 	struct esca_block *new_sca;
2194 	struct kvm_vcpu *vcpu;
2195 	unsigned int vcpu_idx;
2196 	u32 scaol, scaoh;
2197 
2198 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2199 	if (!new_sca)
2200 		return -ENOMEM;
2201 
2202 	scaoh = (u32)((u64)(new_sca) >> 32);
2203 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2204 
2205 	kvm_s390_vcpu_block_all(kvm);
2206 	write_lock(&kvm->arch.sca_lock);
2207 
2208 	sca_copy_b_to_e(new_sca, old_sca);
2209 
2210 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2211 		vcpu->arch.sie_block->scaoh = scaoh;
2212 		vcpu->arch.sie_block->scaol = scaol;
2213 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2214 	}
2215 	kvm->arch.sca = new_sca;
2216 	kvm->arch.use_esca = 1;
2217 
2218 	write_unlock(&kvm->arch.sca_lock);
2219 	kvm_s390_vcpu_unblock_all(kvm);
2220 
2221 	free_page((unsigned long)old_sca);
2222 
2223 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2224 		 old_sca, kvm->arch.sca);
2225 	return 0;
2226 }
2227 
2228 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2229 {
2230 	int rc;
2231 
2232 	if (!kvm_s390_use_sca_entries()) {
2233 		if (id < KVM_MAX_VCPUS)
2234 			return true;
2235 		return false;
2236 	}
2237 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2238 		return true;
2239 	if (!sclp.has_esca || !sclp.has_64bscao)
2240 		return false;
2241 
2242 	mutex_lock(&kvm->lock);
2243 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2244 	mutex_unlock(&kvm->lock);
2245 
2246 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2247 }
2248 
2249 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2250 {
2251 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2252 	kvm_clear_async_pf_completion_queue(vcpu);
2253 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2254 				    KVM_SYNC_GPRS |
2255 				    KVM_SYNC_ACRS |
2256 				    KVM_SYNC_CRS |
2257 				    KVM_SYNC_ARCH0 |
2258 				    KVM_SYNC_PFAULT;
2259 	kvm_s390_set_prefix(vcpu, 0);
2260 	if (test_kvm_facility(vcpu->kvm, 64))
2261 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2262 	if (test_kvm_facility(vcpu->kvm, 82))
2263 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2264 	if (test_kvm_facility(vcpu->kvm, 133))
2265 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2266 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2267 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2268 	 */
2269 	if (MACHINE_HAS_VX)
2270 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2271 	else
2272 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2273 
2274 	if (kvm_is_ucontrol(vcpu->kvm))
2275 		return __kvm_ucontrol_vcpu_init(vcpu);
2276 
2277 	return 0;
2278 }
2279 
2280 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2281 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2282 {
2283 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2284 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2285 	vcpu->arch.cputm_start = get_tod_clock_fast();
2286 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2287 }
2288 
2289 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2290 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2291 {
2292 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2293 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2294 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2295 	vcpu->arch.cputm_start = 0;
2296 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2297 }
2298 
2299 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2300 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2301 {
2302 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2303 	vcpu->arch.cputm_enabled = true;
2304 	__start_cpu_timer_accounting(vcpu);
2305 }
2306 
2307 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2308 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2309 {
2310 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2311 	__stop_cpu_timer_accounting(vcpu);
2312 	vcpu->arch.cputm_enabled = false;
2313 }
2314 
2315 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2316 {
2317 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2318 	__enable_cpu_timer_accounting(vcpu);
2319 	preempt_enable();
2320 }
2321 
2322 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2323 {
2324 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2325 	__disable_cpu_timer_accounting(vcpu);
2326 	preempt_enable();
2327 }
2328 
2329 /* set the cpu timer - may only be called from the VCPU thread itself */
2330 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2331 {
2332 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2333 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2334 	if (vcpu->arch.cputm_enabled)
2335 		vcpu->arch.cputm_start = get_tod_clock_fast();
2336 	vcpu->arch.sie_block->cputm = cputm;
2337 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2338 	preempt_enable();
2339 }
2340 
2341 /* update and get the cpu timer - can also be called from other VCPU threads */
2342 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2343 {
2344 	unsigned int seq;
2345 	__u64 value;
2346 
2347 	if (unlikely(!vcpu->arch.cputm_enabled))
2348 		return vcpu->arch.sie_block->cputm;
2349 
2350 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2351 	do {
2352 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2353 		/*
2354 		 * If the writer would ever execute a read in the critical
2355 		 * section, e.g. in irq context, we have a deadlock.
2356 		 */
2357 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2358 		value = vcpu->arch.sie_block->cputm;
2359 		/* if cputm_start is 0, accounting is being started/stopped */
2360 		if (likely(vcpu->arch.cputm_start))
2361 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2362 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2363 	preempt_enable();
2364 	return value;
2365 }
2366 
2367 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2368 {
2369 
2370 	gmap_enable(vcpu->arch.enabled_gmap);
2371 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2372 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2373 		__start_cpu_timer_accounting(vcpu);
2374 	vcpu->cpu = cpu;
2375 }
2376 
2377 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2378 {
2379 	vcpu->cpu = -1;
2380 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2381 		__stop_cpu_timer_accounting(vcpu);
2382 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2383 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2384 	gmap_disable(vcpu->arch.enabled_gmap);
2385 
2386 }
2387 
2388 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2389 {
2390 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2391 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2392 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2393 	kvm_s390_set_prefix(vcpu, 0);
2394 	kvm_s390_set_cpu_timer(vcpu, 0);
2395 	vcpu->arch.sie_block->ckc       = 0UL;
2396 	vcpu->arch.sie_block->todpr     = 0;
2397 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2398 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2399 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2400 	/* make sure the new fpc will be lazily loaded */
2401 	save_fpu_regs();
2402 	current->thread.fpu.fpc = 0;
2403 	vcpu->arch.sie_block->gbea = 1;
2404 	vcpu->arch.sie_block->pp = 0;
2405 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2406 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2407 	kvm_clear_async_pf_completion_queue(vcpu);
2408 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2409 		kvm_s390_vcpu_stop(vcpu);
2410 	kvm_s390_clear_local_irqs(vcpu);
2411 }
2412 
2413 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2414 {
2415 	mutex_lock(&vcpu->kvm->lock);
2416 	preempt_disable();
2417 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2418 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2419 	preempt_enable();
2420 	mutex_unlock(&vcpu->kvm->lock);
2421 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2422 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2423 		sca_add_vcpu(vcpu);
2424 	}
2425 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2426 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2427 	/* make vcpu_load load the right gmap on the first trigger */
2428 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2429 }
2430 
2431 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2432 {
2433 	if (!test_kvm_facility(vcpu->kvm, 76))
2434 		return;
2435 
2436 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2437 
2438 	if (vcpu->kvm->arch.crypto.aes_kw)
2439 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2440 	if (vcpu->kvm->arch.crypto.dea_kw)
2441 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2442 
2443 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2444 }
2445 
2446 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2447 {
2448 	free_page(vcpu->arch.sie_block->cbrlo);
2449 	vcpu->arch.sie_block->cbrlo = 0;
2450 }
2451 
2452 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2453 {
2454 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2455 	if (!vcpu->arch.sie_block->cbrlo)
2456 		return -ENOMEM;
2457 
2458 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2459 	return 0;
2460 }
2461 
2462 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2463 {
2464 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2465 
2466 	vcpu->arch.sie_block->ibc = model->ibc;
2467 	if (test_kvm_facility(vcpu->kvm, 7))
2468 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2469 }
2470 
2471 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2472 {
2473 	int rc = 0;
2474 
2475 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2476 						    CPUSTAT_SM |
2477 						    CPUSTAT_STOPPED);
2478 
2479 	if (test_kvm_facility(vcpu->kvm, 78))
2480 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2481 	else if (test_kvm_facility(vcpu->kvm, 8))
2482 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2483 
2484 	kvm_s390_vcpu_setup_model(vcpu);
2485 
2486 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2487 	if (MACHINE_HAS_ESOP)
2488 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2489 	if (test_kvm_facility(vcpu->kvm, 9))
2490 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2491 	if (test_kvm_facility(vcpu->kvm, 73))
2492 		vcpu->arch.sie_block->ecb |= ECB_TE;
2493 
2494 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2495 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2496 	if (test_kvm_facility(vcpu->kvm, 130))
2497 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2498 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2499 	if (sclp.has_cei)
2500 		vcpu->arch.sie_block->eca |= ECA_CEI;
2501 	if (sclp.has_ib)
2502 		vcpu->arch.sie_block->eca |= ECA_IB;
2503 	if (sclp.has_siif)
2504 		vcpu->arch.sie_block->eca |= ECA_SII;
2505 	if (sclp.has_sigpif)
2506 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2507 	if (test_kvm_facility(vcpu->kvm, 129)) {
2508 		vcpu->arch.sie_block->eca |= ECA_VX;
2509 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2510 	}
2511 	if (test_kvm_facility(vcpu->kvm, 139))
2512 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2513 
2514 	if (vcpu->arch.sie_block->gd) {
2515 		vcpu->arch.sie_block->eca |= ECA_AIV;
2516 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2517 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2518 	}
2519 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2520 					| SDNXC;
2521 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2522 
2523 	if (sclp.has_kss)
2524 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2525 	else
2526 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2527 
2528 	if (vcpu->kvm->arch.use_cmma) {
2529 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2530 		if (rc)
2531 			return rc;
2532 	}
2533 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2534 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2535 
2536 	kvm_s390_vcpu_crypto_setup(vcpu);
2537 
2538 	return rc;
2539 }
2540 
2541 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2542 				      unsigned int id)
2543 {
2544 	struct kvm_vcpu *vcpu;
2545 	struct sie_page *sie_page;
2546 	int rc = -EINVAL;
2547 
2548 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2549 		goto out;
2550 
2551 	rc = -ENOMEM;
2552 
2553 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2554 	if (!vcpu)
2555 		goto out;
2556 
2557 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2558 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2559 	if (!sie_page)
2560 		goto out_free_cpu;
2561 
2562 	vcpu->arch.sie_block = &sie_page->sie_block;
2563 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2564 
2565 	/* the real guest size will always be smaller than msl */
2566 	vcpu->arch.sie_block->mso = 0;
2567 	vcpu->arch.sie_block->msl = sclp.hamax;
2568 
2569 	vcpu->arch.sie_block->icpua = id;
2570 	spin_lock_init(&vcpu->arch.local_int.lock);
2571 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2572 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2573 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2574 	seqcount_init(&vcpu->arch.cputm_seqcount);
2575 
2576 	rc = kvm_vcpu_init(vcpu, kvm, id);
2577 	if (rc)
2578 		goto out_free_sie_block;
2579 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2580 		 vcpu->arch.sie_block);
2581 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2582 
2583 	return vcpu;
2584 out_free_sie_block:
2585 	free_page((unsigned long)(vcpu->arch.sie_block));
2586 out_free_cpu:
2587 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2588 out:
2589 	return ERR_PTR(rc);
2590 }
2591 
2592 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2593 {
2594 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2595 }
2596 
2597 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2598 {
2599 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2600 }
2601 
2602 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2603 {
2604 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2605 	exit_sie(vcpu);
2606 }
2607 
2608 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2609 {
2610 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2611 }
2612 
2613 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2614 {
2615 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2616 	exit_sie(vcpu);
2617 }
2618 
2619 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2620 {
2621 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2622 }
2623 
2624 /*
2625  * Kick a guest cpu out of SIE and wait until SIE is not running.
2626  * If the CPU is not running (e.g. waiting as idle) the function will
2627  * return immediately. */
2628 void exit_sie(struct kvm_vcpu *vcpu)
2629 {
2630 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2631 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2632 		cpu_relax();
2633 }
2634 
2635 /* Kick a guest cpu out of SIE to process a request synchronously */
2636 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2637 {
2638 	kvm_make_request(req, vcpu);
2639 	kvm_s390_vcpu_request(vcpu);
2640 }
2641 
2642 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2643 			      unsigned long end)
2644 {
2645 	struct kvm *kvm = gmap->private;
2646 	struct kvm_vcpu *vcpu;
2647 	unsigned long prefix;
2648 	int i;
2649 
2650 	if (gmap_is_shadow(gmap))
2651 		return;
2652 	if (start >= 1UL << 31)
2653 		/* We are only interested in prefix pages */
2654 		return;
2655 	kvm_for_each_vcpu(i, vcpu, kvm) {
2656 		/* match against both prefix pages */
2657 		prefix = kvm_s390_get_prefix(vcpu);
2658 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2659 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2660 				   start, end);
2661 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2662 		}
2663 	}
2664 }
2665 
2666 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2667 {
2668 	/* kvm common code refers to this, but never calls it */
2669 	BUG();
2670 	return 0;
2671 }
2672 
2673 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2674 					   struct kvm_one_reg *reg)
2675 {
2676 	int r = -EINVAL;
2677 
2678 	switch (reg->id) {
2679 	case KVM_REG_S390_TODPR:
2680 		r = put_user(vcpu->arch.sie_block->todpr,
2681 			     (u32 __user *)reg->addr);
2682 		break;
2683 	case KVM_REG_S390_EPOCHDIFF:
2684 		r = put_user(vcpu->arch.sie_block->epoch,
2685 			     (u64 __user *)reg->addr);
2686 		break;
2687 	case KVM_REG_S390_CPU_TIMER:
2688 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2689 			     (u64 __user *)reg->addr);
2690 		break;
2691 	case KVM_REG_S390_CLOCK_COMP:
2692 		r = put_user(vcpu->arch.sie_block->ckc,
2693 			     (u64 __user *)reg->addr);
2694 		break;
2695 	case KVM_REG_S390_PFTOKEN:
2696 		r = put_user(vcpu->arch.pfault_token,
2697 			     (u64 __user *)reg->addr);
2698 		break;
2699 	case KVM_REG_S390_PFCOMPARE:
2700 		r = put_user(vcpu->arch.pfault_compare,
2701 			     (u64 __user *)reg->addr);
2702 		break;
2703 	case KVM_REG_S390_PFSELECT:
2704 		r = put_user(vcpu->arch.pfault_select,
2705 			     (u64 __user *)reg->addr);
2706 		break;
2707 	case KVM_REG_S390_PP:
2708 		r = put_user(vcpu->arch.sie_block->pp,
2709 			     (u64 __user *)reg->addr);
2710 		break;
2711 	case KVM_REG_S390_GBEA:
2712 		r = put_user(vcpu->arch.sie_block->gbea,
2713 			     (u64 __user *)reg->addr);
2714 		break;
2715 	default:
2716 		break;
2717 	}
2718 
2719 	return r;
2720 }
2721 
2722 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2723 					   struct kvm_one_reg *reg)
2724 {
2725 	int r = -EINVAL;
2726 	__u64 val;
2727 
2728 	switch (reg->id) {
2729 	case KVM_REG_S390_TODPR:
2730 		r = get_user(vcpu->arch.sie_block->todpr,
2731 			     (u32 __user *)reg->addr);
2732 		break;
2733 	case KVM_REG_S390_EPOCHDIFF:
2734 		r = get_user(vcpu->arch.sie_block->epoch,
2735 			     (u64 __user *)reg->addr);
2736 		break;
2737 	case KVM_REG_S390_CPU_TIMER:
2738 		r = get_user(val, (u64 __user *)reg->addr);
2739 		if (!r)
2740 			kvm_s390_set_cpu_timer(vcpu, val);
2741 		break;
2742 	case KVM_REG_S390_CLOCK_COMP:
2743 		r = get_user(vcpu->arch.sie_block->ckc,
2744 			     (u64 __user *)reg->addr);
2745 		break;
2746 	case KVM_REG_S390_PFTOKEN:
2747 		r = get_user(vcpu->arch.pfault_token,
2748 			     (u64 __user *)reg->addr);
2749 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2750 			kvm_clear_async_pf_completion_queue(vcpu);
2751 		break;
2752 	case KVM_REG_S390_PFCOMPARE:
2753 		r = get_user(vcpu->arch.pfault_compare,
2754 			     (u64 __user *)reg->addr);
2755 		break;
2756 	case KVM_REG_S390_PFSELECT:
2757 		r = get_user(vcpu->arch.pfault_select,
2758 			     (u64 __user *)reg->addr);
2759 		break;
2760 	case KVM_REG_S390_PP:
2761 		r = get_user(vcpu->arch.sie_block->pp,
2762 			     (u64 __user *)reg->addr);
2763 		break;
2764 	case KVM_REG_S390_GBEA:
2765 		r = get_user(vcpu->arch.sie_block->gbea,
2766 			     (u64 __user *)reg->addr);
2767 		break;
2768 	default:
2769 		break;
2770 	}
2771 
2772 	return r;
2773 }
2774 
2775 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2776 {
2777 	kvm_s390_vcpu_initial_reset(vcpu);
2778 	return 0;
2779 }
2780 
2781 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2782 {
2783 	vcpu_load(vcpu);
2784 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2785 	vcpu_put(vcpu);
2786 	return 0;
2787 }
2788 
2789 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2790 {
2791 	vcpu_load(vcpu);
2792 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2793 	vcpu_put(vcpu);
2794 	return 0;
2795 }
2796 
2797 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2798 				  struct kvm_sregs *sregs)
2799 {
2800 	vcpu_load(vcpu);
2801 
2802 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2803 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2804 
2805 	vcpu_put(vcpu);
2806 	return 0;
2807 }
2808 
2809 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2810 				  struct kvm_sregs *sregs)
2811 {
2812 	vcpu_load(vcpu);
2813 
2814 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2815 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2816 
2817 	vcpu_put(vcpu);
2818 	return 0;
2819 }
2820 
2821 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2822 {
2823 	int ret = 0;
2824 
2825 	vcpu_load(vcpu);
2826 
2827 	if (test_fp_ctl(fpu->fpc)) {
2828 		ret = -EINVAL;
2829 		goto out;
2830 	}
2831 	vcpu->run->s.regs.fpc = fpu->fpc;
2832 	if (MACHINE_HAS_VX)
2833 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2834 				 (freg_t *) fpu->fprs);
2835 	else
2836 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2837 
2838 out:
2839 	vcpu_put(vcpu);
2840 	return ret;
2841 }
2842 
2843 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2844 {
2845 	vcpu_load(vcpu);
2846 
2847 	/* make sure we have the latest values */
2848 	save_fpu_regs();
2849 	if (MACHINE_HAS_VX)
2850 		convert_vx_to_fp((freg_t *) fpu->fprs,
2851 				 (__vector128 *) vcpu->run->s.regs.vrs);
2852 	else
2853 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2854 	fpu->fpc = vcpu->run->s.regs.fpc;
2855 
2856 	vcpu_put(vcpu);
2857 	return 0;
2858 }
2859 
2860 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2861 {
2862 	int rc = 0;
2863 
2864 	if (!is_vcpu_stopped(vcpu))
2865 		rc = -EBUSY;
2866 	else {
2867 		vcpu->run->psw_mask = psw.mask;
2868 		vcpu->run->psw_addr = psw.addr;
2869 	}
2870 	return rc;
2871 }
2872 
2873 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2874 				  struct kvm_translation *tr)
2875 {
2876 	return -EINVAL; /* not implemented yet */
2877 }
2878 
2879 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2880 			      KVM_GUESTDBG_USE_HW_BP | \
2881 			      KVM_GUESTDBG_ENABLE)
2882 
2883 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2884 					struct kvm_guest_debug *dbg)
2885 {
2886 	int rc = 0;
2887 
2888 	vcpu_load(vcpu);
2889 
2890 	vcpu->guest_debug = 0;
2891 	kvm_s390_clear_bp_data(vcpu);
2892 
2893 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2894 		rc = -EINVAL;
2895 		goto out;
2896 	}
2897 	if (!sclp.has_gpere) {
2898 		rc = -EINVAL;
2899 		goto out;
2900 	}
2901 
2902 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2903 		vcpu->guest_debug = dbg->control;
2904 		/* enforce guest PER */
2905 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2906 
2907 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2908 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2909 	} else {
2910 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2911 		vcpu->arch.guestdbg.last_bp = 0;
2912 	}
2913 
2914 	if (rc) {
2915 		vcpu->guest_debug = 0;
2916 		kvm_s390_clear_bp_data(vcpu);
2917 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2918 	}
2919 
2920 out:
2921 	vcpu_put(vcpu);
2922 	return rc;
2923 }
2924 
2925 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2926 				    struct kvm_mp_state *mp_state)
2927 {
2928 	int ret;
2929 
2930 	vcpu_load(vcpu);
2931 
2932 	/* CHECK_STOP and LOAD are not supported yet */
2933 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2934 				      KVM_MP_STATE_OPERATING;
2935 
2936 	vcpu_put(vcpu);
2937 	return ret;
2938 }
2939 
2940 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2941 				    struct kvm_mp_state *mp_state)
2942 {
2943 	int rc = 0;
2944 
2945 	vcpu_load(vcpu);
2946 
2947 	/* user space knows about this interface - let it control the state */
2948 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2949 
2950 	switch (mp_state->mp_state) {
2951 	case KVM_MP_STATE_STOPPED:
2952 		kvm_s390_vcpu_stop(vcpu);
2953 		break;
2954 	case KVM_MP_STATE_OPERATING:
2955 		kvm_s390_vcpu_start(vcpu);
2956 		break;
2957 	case KVM_MP_STATE_LOAD:
2958 	case KVM_MP_STATE_CHECK_STOP:
2959 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2960 	default:
2961 		rc = -ENXIO;
2962 	}
2963 
2964 	vcpu_put(vcpu);
2965 	return rc;
2966 }
2967 
2968 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2969 {
2970 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
2971 }
2972 
2973 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2974 {
2975 retry:
2976 	kvm_s390_vcpu_request_handled(vcpu);
2977 	if (!kvm_request_pending(vcpu))
2978 		return 0;
2979 	/*
2980 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2981 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2982 	 * This ensures that the ipte instruction for this request has
2983 	 * already finished. We might race against a second unmapper that
2984 	 * wants to set the blocking bit. Lets just retry the request loop.
2985 	 */
2986 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2987 		int rc;
2988 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2989 					  kvm_s390_get_prefix(vcpu),
2990 					  PAGE_SIZE * 2, PROT_WRITE);
2991 		if (rc) {
2992 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2993 			return rc;
2994 		}
2995 		goto retry;
2996 	}
2997 
2998 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2999 		vcpu->arch.sie_block->ihcpu = 0xffff;
3000 		goto retry;
3001 	}
3002 
3003 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3004 		if (!ibs_enabled(vcpu)) {
3005 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3006 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3007 		}
3008 		goto retry;
3009 	}
3010 
3011 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3012 		if (ibs_enabled(vcpu)) {
3013 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3014 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3015 		}
3016 		goto retry;
3017 	}
3018 
3019 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3020 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3021 		goto retry;
3022 	}
3023 
3024 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3025 		/*
3026 		 * Disable CMMA virtualization; we will emulate the ESSA
3027 		 * instruction manually, in order to provide additional
3028 		 * functionalities needed for live migration.
3029 		 */
3030 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3031 		goto retry;
3032 	}
3033 
3034 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3035 		/*
3036 		 * Re-enable CMMA virtualization if CMMA is available and
3037 		 * was used.
3038 		 */
3039 		if ((vcpu->kvm->arch.use_cmma) &&
3040 		    (vcpu->kvm->mm->context.use_cmma))
3041 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3042 		goto retry;
3043 	}
3044 
3045 	/* nothing to do, just clear the request */
3046 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3047 
3048 	return 0;
3049 }
3050 
3051 void kvm_s390_set_tod_clock(struct kvm *kvm,
3052 			    const struct kvm_s390_vm_tod_clock *gtod)
3053 {
3054 	struct kvm_vcpu *vcpu;
3055 	struct kvm_s390_tod_clock_ext htod;
3056 	int i;
3057 
3058 	mutex_lock(&kvm->lock);
3059 	preempt_disable();
3060 
3061 	get_tod_clock_ext((char *)&htod);
3062 
3063 	kvm->arch.epoch = gtod->tod - htod.tod;
3064 	kvm->arch.epdx = 0;
3065 	if (test_kvm_facility(kvm, 139)) {
3066 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3067 		if (kvm->arch.epoch > gtod->tod)
3068 			kvm->arch.epdx -= 1;
3069 	}
3070 
3071 	kvm_s390_vcpu_block_all(kvm);
3072 	kvm_for_each_vcpu(i, vcpu, kvm) {
3073 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3074 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3075 	}
3076 
3077 	kvm_s390_vcpu_unblock_all(kvm);
3078 	preempt_enable();
3079 	mutex_unlock(&kvm->lock);
3080 }
3081 
3082 /**
3083  * kvm_arch_fault_in_page - fault-in guest page if necessary
3084  * @vcpu: The corresponding virtual cpu
3085  * @gpa: Guest physical address
3086  * @writable: Whether the page should be writable or not
3087  *
3088  * Make sure that a guest page has been faulted-in on the host.
3089  *
3090  * Return: Zero on success, negative error code otherwise.
3091  */
3092 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3093 {
3094 	return gmap_fault(vcpu->arch.gmap, gpa,
3095 			  writable ? FAULT_FLAG_WRITE : 0);
3096 }
3097 
3098 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3099 				      unsigned long token)
3100 {
3101 	struct kvm_s390_interrupt inti;
3102 	struct kvm_s390_irq irq;
3103 
3104 	if (start_token) {
3105 		irq.u.ext.ext_params2 = token;
3106 		irq.type = KVM_S390_INT_PFAULT_INIT;
3107 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3108 	} else {
3109 		inti.type = KVM_S390_INT_PFAULT_DONE;
3110 		inti.parm64 = token;
3111 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3112 	}
3113 }
3114 
3115 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3116 				     struct kvm_async_pf *work)
3117 {
3118 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3119 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3120 }
3121 
3122 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3123 				 struct kvm_async_pf *work)
3124 {
3125 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3126 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3127 }
3128 
3129 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3130 			       struct kvm_async_pf *work)
3131 {
3132 	/* s390 will always inject the page directly */
3133 }
3134 
3135 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3136 {
3137 	/*
3138 	 * s390 will always inject the page directly,
3139 	 * but we still want check_async_completion to cleanup
3140 	 */
3141 	return true;
3142 }
3143 
3144 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3145 {
3146 	hva_t hva;
3147 	struct kvm_arch_async_pf arch;
3148 	int rc;
3149 
3150 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3151 		return 0;
3152 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3153 	    vcpu->arch.pfault_compare)
3154 		return 0;
3155 	if (psw_extint_disabled(vcpu))
3156 		return 0;
3157 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3158 		return 0;
3159 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3160 		return 0;
3161 	if (!vcpu->arch.gmap->pfault_enabled)
3162 		return 0;
3163 
3164 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3165 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3166 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3167 		return 0;
3168 
3169 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3170 	return rc;
3171 }
3172 
3173 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3174 {
3175 	int rc, cpuflags;
3176 
3177 	/*
3178 	 * On s390 notifications for arriving pages will be delivered directly
3179 	 * to the guest but the house keeping for completed pfaults is
3180 	 * handled outside the worker.
3181 	 */
3182 	kvm_check_async_pf_completion(vcpu);
3183 
3184 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3185 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3186 
3187 	if (need_resched())
3188 		schedule();
3189 
3190 	if (test_cpu_flag(CIF_MCCK_PENDING))
3191 		s390_handle_mcck();
3192 
3193 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3194 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3195 		if (rc)
3196 			return rc;
3197 	}
3198 
3199 	rc = kvm_s390_handle_requests(vcpu);
3200 	if (rc)
3201 		return rc;
3202 
3203 	if (guestdbg_enabled(vcpu)) {
3204 		kvm_s390_backup_guest_per_regs(vcpu);
3205 		kvm_s390_patch_guest_per_regs(vcpu);
3206 	}
3207 
3208 	vcpu->arch.sie_block->icptcode = 0;
3209 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3210 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3211 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3212 
3213 	return 0;
3214 }
3215 
3216 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3217 {
3218 	struct kvm_s390_pgm_info pgm_info = {
3219 		.code = PGM_ADDRESSING,
3220 	};
3221 	u8 opcode, ilen;
3222 	int rc;
3223 
3224 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3225 	trace_kvm_s390_sie_fault(vcpu);
3226 
3227 	/*
3228 	 * We want to inject an addressing exception, which is defined as a
3229 	 * suppressing or terminating exception. However, since we came here
3230 	 * by a DAT access exception, the PSW still points to the faulting
3231 	 * instruction since DAT exceptions are nullifying. So we've got
3232 	 * to look up the current opcode to get the length of the instruction
3233 	 * to be able to forward the PSW.
3234 	 */
3235 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3236 	ilen = insn_length(opcode);
3237 	if (rc < 0) {
3238 		return rc;
3239 	} else if (rc) {
3240 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3241 		 * Forward by arbitrary ilc, injection will take care of
3242 		 * nullification if necessary.
3243 		 */
3244 		pgm_info = vcpu->arch.pgm;
3245 		ilen = 4;
3246 	}
3247 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3248 	kvm_s390_forward_psw(vcpu, ilen);
3249 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3250 }
3251 
3252 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3253 {
3254 	struct mcck_volatile_info *mcck_info;
3255 	struct sie_page *sie_page;
3256 
3257 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3258 		   vcpu->arch.sie_block->icptcode);
3259 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3260 
3261 	if (guestdbg_enabled(vcpu))
3262 		kvm_s390_restore_guest_per_regs(vcpu);
3263 
3264 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3265 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3266 
3267 	if (exit_reason == -EINTR) {
3268 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3269 		sie_page = container_of(vcpu->arch.sie_block,
3270 					struct sie_page, sie_block);
3271 		mcck_info = &sie_page->mcck_info;
3272 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3273 		return 0;
3274 	}
3275 
3276 	if (vcpu->arch.sie_block->icptcode > 0) {
3277 		int rc = kvm_handle_sie_intercept(vcpu);
3278 
3279 		if (rc != -EOPNOTSUPP)
3280 			return rc;
3281 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3282 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3283 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3284 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3285 		return -EREMOTE;
3286 	} else if (exit_reason != -EFAULT) {
3287 		vcpu->stat.exit_null++;
3288 		return 0;
3289 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3290 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3291 		vcpu->run->s390_ucontrol.trans_exc_code =
3292 						current->thread.gmap_addr;
3293 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3294 		return -EREMOTE;
3295 	} else if (current->thread.gmap_pfault) {
3296 		trace_kvm_s390_major_guest_pfault(vcpu);
3297 		current->thread.gmap_pfault = 0;
3298 		if (kvm_arch_setup_async_pf(vcpu))
3299 			return 0;
3300 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3301 	}
3302 	return vcpu_post_run_fault_in_sie(vcpu);
3303 }
3304 
3305 static int __vcpu_run(struct kvm_vcpu *vcpu)
3306 {
3307 	int rc, exit_reason;
3308 
3309 	/*
3310 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3311 	 * ning the guest), so that memslots (and other stuff) are protected
3312 	 */
3313 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3314 
3315 	do {
3316 		rc = vcpu_pre_run(vcpu);
3317 		if (rc)
3318 			break;
3319 
3320 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3321 		/*
3322 		 * As PF_VCPU will be used in fault handler, between
3323 		 * guest_enter and guest_exit should be no uaccess.
3324 		 */
3325 		local_irq_disable();
3326 		guest_enter_irqoff();
3327 		__disable_cpu_timer_accounting(vcpu);
3328 		local_irq_enable();
3329 		exit_reason = sie64a(vcpu->arch.sie_block,
3330 				     vcpu->run->s.regs.gprs);
3331 		local_irq_disable();
3332 		__enable_cpu_timer_accounting(vcpu);
3333 		guest_exit_irqoff();
3334 		local_irq_enable();
3335 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3336 
3337 		rc = vcpu_post_run(vcpu, exit_reason);
3338 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3339 
3340 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3341 	return rc;
3342 }
3343 
3344 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3345 {
3346 	struct runtime_instr_cb *riccb;
3347 	struct gs_cb *gscb;
3348 
3349 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3350 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3351 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3352 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3353 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3354 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3355 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3356 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3357 		/* some control register changes require a tlb flush */
3358 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3359 	}
3360 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3361 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3362 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3363 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3364 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3365 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3366 	}
3367 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3368 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3369 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3370 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3371 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3372 			kvm_clear_async_pf_completion_queue(vcpu);
3373 	}
3374 	/*
3375 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3376 	 * we should enable RI here instead of doing the lazy enablement.
3377 	 */
3378 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3379 	    test_kvm_facility(vcpu->kvm, 64) &&
3380 	    riccb->v &&
3381 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3382 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3383 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3384 	}
3385 	/*
3386 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3387 	 * we should enable GS here instead of doing the lazy enablement.
3388 	 */
3389 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3390 	    test_kvm_facility(vcpu->kvm, 133) &&
3391 	    gscb->gssm &&
3392 	    !vcpu->arch.gs_enabled) {
3393 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3394 		vcpu->arch.sie_block->ecb |= ECB_GS;
3395 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3396 		vcpu->arch.gs_enabled = 1;
3397 	}
3398 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3399 	    test_kvm_facility(vcpu->kvm, 82)) {
3400 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3401 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3402 	}
3403 	save_access_regs(vcpu->arch.host_acrs);
3404 	restore_access_regs(vcpu->run->s.regs.acrs);
3405 	/* save host (userspace) fprs/vrs */
3406 	save_fpu_regs();
3407 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3408 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3409 	if (MACHINE_HAS_VX)
3410 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3411 	else
3412 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3413 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3414 	if (test_fp_ctl(current->thread.fpu.fpc))
3415 		/* User space provided an invalid FPC, let's clear it */
3416 		current->thread.fpu.fpc = 0;
3417 	if (MACHINE_HAS_GS) {
3418 		preempt_disable();
3419 		__ctl_set_bit(2, 4);
3420 		if (current->thread.gs_cb) {
3421 			vcpu->arch.host_gscb = current->thread.gs_cb;
3422 			save_gs_cb(vcpu->arch.host_gscb);
3423 		}
3424 		if (vcpu->arch.gs_enabled) {
3425 			current->thread.gs_cb = (struct gs_cb *)
3426 						&vcpu->run->s.regs.gscb;
3427 			restore_gs_cb(current->thread.gs_cb);
3428 		}
3429 		preempt_enable();
3430 	}
3431 
3432 	kvm_run->kvm_dirty_regs = 0;
3433 }
3434 
3435 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3436 {
3437 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3438 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3439 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3440 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3441 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3442 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3443 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3444 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3445 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3446 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3447 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3448 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3449 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3450 	save_access_regs(vcpu->run->s.regs.acrs);
3451 	restore_access_regs(vcpu->arch.host_acrs);
3452 	/* Save guest register state */
3453 	save_fpu_regs();
3454 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3455 	/* Restore will be done lazily at return */
3456 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3457 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3458 	if (MACHINE_HAS_GS) {
3459 		__ctl_set_bit(2, 4);
3460 		if (vcpu->arch.gs_enabled)
3461 			save_gs_cb(current->thread.gs_cb);
3462 		preempt_disable();
3463 		current->thread.gs_cb = vcpu->arch.host_gscb;
3464 		restore_gs_cb(vcpu->arch.host_gscb);
3465 		preempt_enable();
3466 		if (!vcpu->arch.host_gscb)
3467 			__ctl_clear_bit(2, 4);
3468 		vcpu->arch.host_gscb = NULL;
3469 	}
3470 
3471 }
3472 
3473 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3474 {
3475 	int rc;
3476 
3477 	if (kvm_run->immediate_exit)
3478 		return -EINTR;
3479 
3480 	vcpu_load(vcpu);
3481 
3482 	if (guestdbg_exit_pending(vcpu)) {
3483 		kvm_s390_prepare_debug_exit(vcpu);
3484 		rc = 0;
3485 		goto out;
3486 	}
3487 
3488 	kvm_sigset_activate(vcpu);
3489 
3490 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3491 		kvm_s390_vcpu_start(vcpu);
3492 	} else if (is_vcpu_stopped(vcpu)) {
3493 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3494 				   vcpu->vcpu_id);
3495 		rc = -EINVAL;
3496 		goto out;
3497 	}
3498 
3499 	sync_regs(vcpu, kvm_run);
3500 	enable_cpu_timer_accounting(vcpu);
3501 
3502 	might_fault();
3503 	rc = __vcpu_run(vcpu);
3504 
3505 	if (signal_pending(current) && !rc) {
3506 		kvm_run->exit_reason = KVM_EXIT_INTR;
3507 		rc = -EINTR;
3508 	}
3509 
3510 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3511 		kvm_s390_prepare_debug_exit(vcpu);
3512 		rc = 0;
3513 	}
3514 
3515 	if (rc == -EREMOTE) {
3516 		/* userspace support is needed, kvm_run has been prepared */
3517 		rc = 0;
3518 	}
3519 
3520 	disable_cpu_timer_accounting(vcpu);
3521 	store_regs(vcpu, kvm_run);
3522 
3523 	kvm_sigset_deactivate(vcpu);
3524 
3525 	vcpu->stat.exit_userspace++;
3526 out:
3527 	vcpu_put(vcpu);
3528 	return rc;
3529 }
3530 
3531 /*
3532  * store status at address
3533  * we use have two special cases:
3534  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3535  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3536  */
3537 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3538 {
3539 	unsigned char archmode = 1;
3540 	freg_t fprs[NUM_FPRS];
3541 	unsigned int px;
3542 	u64 clkcomp, cputm;
3543 	int rc;
3544 
3545 	px = kvm_s390_get_prefix(vcpu);
3546 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3547 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3548 			return -EFAULT;
3549 		gpa = 0;
3550 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3551 		if (write_guest_real(vcpu, 163, &archmode, 1))
3552 			return -EFAULT;
3553 		gpa = px;
3554 	} else
3555 		gpa -= __LC_FPREGS_SAVE_AREA;
3556 
3557 	/* manually convert vector registers if necessary */
3558 	if (MACHINE_HAS_VX) {
3559 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3560 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3561 				     fprs, 128);
3562 	} else {
3563 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3564 				     vcpu->run->s.regs.fprs, 128);
3565 	}
3566 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3567 			      vcpu->run->s.regs.gprs, 128);
3568 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3569 			      &vcpu->arch.sie_block->gpsw, 16);
3570 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3571 			      &px, 4);
3572 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3573 			      &vcpu->run->s.regs.fpc, 4);
3574 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3575 			      &vcpu->arch.sie_block->todpr, 4);
3576 	cputm = kvm_s390_get_cpu_timer(vcpu);
3577 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3578 			      &cputm, 8);
3579 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3580 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3581 			      &clkcomp, 8);
3582 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3583 			      &vcpu->run->s.regs.acrs, 64);
3584 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3585 			      &vcpu->arch.sie_block->gcr, 128);
3586 	return rc ? -EFAULT : 0;
3587 }
3588 
3589 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3590 {
3591 	/*
3592 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3593 	 * switch in the run ioctl. Let's update our copies before we save
3594 	 * it into the save area
3595 	 */
3596 	save_fpu_regs();
3597 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3598 	save_access_regs(vcpu->run->s.regs.acrs);
3599 
3600 	return kvm_s390_store_status_unloaded(vcpu, addr);
3601 }
3602 
3603 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3604 {
3605 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3606 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3607 }
3608 
3609 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3610 {
3611 	unsigned int i;
3612 	struct kvm_vcpu *vcpu;
3613 
3614 	kvm_for_each_vcpu(i, vcpu, kvm) {
3615 		__disable_ibs_on_vcpu(vcpu);
3616 	}
3617 }
3618 
3619 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3620 {
3621 	if (!sclp.has_ibs)
3622 		return;
3623 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3624 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3625 }
3626 
3627 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3628 {
3629 	int i, online_vcpus, started_vcpus = 0;
3630 
3631 	if (!is_vcpu_stopped(vcpu))
3632 		return;
3633 
3634 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3635 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3636 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3637 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3638 
3639 	for (i = 0; i < online_vcpus; i++) {
3640 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3641 			started_vcpus++;
3642 	}
3643 
3644 	if (started_vcpus == 0) {
3645 		/* we're the only active VCPU -> speed it up */
3646 		__enable_ibs_on_vcpu(vcpu);
3647 	} else if (started_vcpus == 1) {
3648 		/*
3649 		 * As we are starting a second VCPU, we have to disable
3650 		 * the IBS facility on all VCPUs to remove potentially
3651 		 * oustanding ENABLE requests.
3652 		 */
3653 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3654 	}
3655 
3656 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3657 	/*
3658 	 * Another VCPU might have used IBS while we were offline.
3659 	 * Let's play safe and flush the VCPU at startup.
3660 	 */
3661 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3662 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3663 	return;
3664 }
3665 
3666 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3667 {
3668 	int i, online_vcpus, started_vcpus = 0;
3669 	struct kvm_vcpu *started_vcpu = NULL;
3670 
3671 	if (is_vcpu_stopped(vcpu))
3672 		return;
3673 
3674 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3675 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3676 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3677 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3678 
3679 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3680 	kvm_s390_clear_stop_irq(vcpu);
3681 
3682 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3683 	__disable_ibs_on_vcpu(vcpu);
3684 
3685 	for (i = 0; i < online_vcpus; i++) {
3686 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3687 			started_vcpus++;
3688 			started_vcpu = vcpu->kvm->vcpus[i];
3689 		}
3690 	}
3691 
3692 	if (started_vcpus == 1) {
3693 		/*
3694 		 * As we only have one VCPU left, we want to enable the
3695 		 * IBS facility for that VCPU to speed it up.
3696 		 */
3697 		__enable_ibs_on_vcpu(started_vcpu);
3698 	}
3699 
3700 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3701 	return;
3702 }
3703 
3704 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3705 				     struct kvm_enable_cap *cap)
3706 {
3707 	int r;
3708 
3709 	if (cap->flags)
3710 		return -EINVAL;
3711 
3712 	switch (cap->cap) {
3713 	case KVM_CAP_S390_CSS_SUPPORT:
3714 		if (!vcpu->kvm->arch.css_support) {
3715 			vcpu->kvm->arch.css_support = 1;
3716 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3717 			trace_kvm_s390_enable_css(vcpu->kvm);
3718 		}
3719 		r = 0;
3720 		break;
3721 	default:
3722 		r = -EINVAL;
3723 		break;
3724 	}
3725 	return r;
3726 }
3727 
3728 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3729 				  struct kvm_s390_mem_op *mop)
3730 {
3731 	void __user *uaddr = (void __user *)mop->buf;
3732 	void *tmpbuf = NULL;
3733 	int r, srcu_idx;
3734 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3735 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3736 
3737 	if (mop->flags & ~supported_flags)
3738 		return -EINVAL;
3739 
3740 	if (mop->size > MEM_OP_MAX_SIZE)
3741 		return -E2BIG;
3742 
3743 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3744 		tmpbuf = vmalloc(mop->size);
3745 		if (!tmpbuf)
3746 			return -ENOMEM;
3747 	}
3748 
3749 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3750 
3751 	switch (mop->op) {
3752 	case KVM_S390_MEMOP_LOGICAL_READ:
3753 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3754 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3755 					    mop->size, GACC_FETCH);
3756 			break;
3757 		}
3758 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3759 		if (r == 0) {
3760 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3761 				r = -EFAULT;
3762 		}
3763 		break;
3764 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3765 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3766 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3767 					    mop->size, GACC_STORE);
3768 			break;
3769 		}
3770 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3771 			r = -EFAULT;
3772 			break;
3773 		}
3774 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3775 		break;
3776 	default:
3777 		r = -EINVAL;
3778 	}
3779 
3780 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3781 
3782 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3783 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3784 
3785 	vfree(tmpbuf);
3786 	return r;
3787 }
3788 
3789 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3790 			       unsigned int ioctl, unsigned long arg)
3791 {
3792 	struct kvm_vcpu *vcpu = filp->private_data;
3793 	void __user *argp = (void __user *)arg;
3794 
3795 	switch (ioctl) {
3796 	case KVM_S390_IRQ: {
3797 		struct kvm_s390_irq s390irq;
3798 
3799 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3800 			return -EFAULT;
3801 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3802 	}
3803 	case KVM_S390_INTERRUPT: {
3804 		struct kvm_s390_interrupt s390int;
3805 		struct kvm_s390_irq s390irq;
3806 
3807 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3808 			return -EFAULT;
3809 		if (s390int_to_s390irq(&s390int, &s390irq))
3810 			return -EINVAL;
3811 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3812 	}
3813 	}
3814 	return -ENOIOCTLCMD;
3815 }
3816 
3817 long kvm_arch_vcpu_ioctl(struct file *filp,
3818 			 unsigned int ioctl, unsigned long arg)
3819 {
3820 	struct kvm_vcpu *vcpu = filp->private_data;
3821 	void __user *argp = (void __user *)arg;
3822 	int idx;
3823 	long r;
3824 
3825 	vcpu_load(vcpu);
3826 
3827 	switch (ioctl) {
3828 	case KVM_S390_STORE_STATUS:
3829 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3830 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3831 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3832 		break;
3833 	case KVM_S390_SET_INITIAL_PSW: {
3834 		psw_t psw;
3835 
3836 		r = -EFAULT;
3837 		if (copy_from_user(&psw, argp, sizeof(psw)))
3838 			break;
3839 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3840 		break;
3841 	}
3842 	case KVM_S390_INITIAL_RESET:
3843 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3844 		break;
3845 	case KVM_SET_ONE_REG:
3846 	case KVM_GET_ONE_REG: {
3847 		struct kvm_one_reg reg;
3848 		r = -EFAULT;
3849 		if (copy_from_user(&reg, argp, sizeof(reg)))
3850 			break;
3851 		if (ioctl == KVM_SET_ONE_REG)
3852 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3853 		else
3854 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3855 		break;
3856 	}
3857 #ifdef CONFIG_KVM_S390_UCONTROL
3858 	case KVM_S390_UCAS_MAP: {
3859 		struct kvm_s390_ucas_mapping ucasmap;
3860 
3861 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3862 			r = -EFAULT;
3863 			break;
3864 		}
3865 
3866 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3867 			r = -EINVAL;
3868 			break;
3869 		}
3870 
3871 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3872 				     ucasmap.vcpu_addr, ucasmap.length);
3873 		break;
3874 	}
3875 	case KVM_S390_UCAS_UNMAP: {
3876 		struct kvm_s390_ucas_mapping ucasmap;
3877 
3878 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3879 			r = -EFAULT;
3880 			break;
3881 		}
3882 
3883 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3884 			r = -EINVAL;
3885 			break;
3886 		}
3887 
3888 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3889 			ucasmap.length);
3890 		break;
3891 	}
3892 #endif
3893 	case KVM_S390_VCPU_FAULT: {
3894 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3895 		break;
3896 	}
3897 	case KVM_ENABLE_CAP:
3898 	{
3899 		struct kvm_enable_cap cap;
3900 		r = -EFAULT;
3901 		if (copy_from_user(&cap, argp, sizeof(cap)))
3902 			break;
3903 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3904 		break;
3905 	}
3906 	case KVM_S390_MEM_OP: {
3907 		struct kvm_s390_mem_op mem_op;
3908 
3909 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3910 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3911 		else
3912 			r = -EFAULT;
3913 		break;
3914 	}
3915 	case KVM_S390_SET_IRQ_STATE: {
3916 		struct kvm_s390_irq_state irq_state;
3917 
3918 		r = -EFAULT;
3919 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3920 			break;
3921 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3922 		    irq_state.len == 0 ||
3923 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3924 			r = -EINVAL;
3925 			break;
3926 		}
3927 		/* do not use irq_state.flags, it will break old QEMUs */
3928 		r = kvm_s390_set_irq_state(vcpu,
3929 					   (void __user *) irq_state.buf,
3930 					   irq_state.len);
3931 		break;
3932 	}
3933 	case KVM_S390_GET_IRQ_STATE: {
3934 		struct kvm_s390_irq_state irq_state;
3935 
3936 		r = -EFAULT;
3937 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3938 			break;
3939 		if (irq_state.len == 0) {
3940 			r = -EINVAL;
3941 			break;
3942 		}
3943 		/* do not use irq_state.flags, it will break old QEMUs */
3944 		r = kvm_s390_get_irq_state(vcpu,
3945 					   (__u8 __user *)  irq_state.buf,
3946 					   irq_state.len);
3947 		break;
3948 	}
3949 	default:
3950 		r = -ENOTTY;
3951 	}
3952 
3953 	vcpu_put(vcpu);
3954 	return r;
3955 }
3956 
3957 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3958 {
3959 #ifdef CONFIG_KVM_S390_UCONTROL
3960 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3961 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3962 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3963 		get_page(vmf->page);
3964 		return 0;
3965 	}
3966 #endif
3967 	return VM_FAULT_SIGBUS;
3968 }
3969 
3970 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3971 			    unsigned long npages)
3972 {
3973 	return 0;
3974 }
3975 
3976 /* Section: memory related */
3977 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3978 				   struct kvm_memory_slot *memslot,
3979 				   const struct kvm_userspace_memory_region *mem,
3980 				   enum kvm_mr_change change)
3981 {
3982 	/* A few sanity checks. We can have memory slots which have to be
3983 	   located/ended at a segment boundary (1MB). The memory in userland is
3984 	   ok to be fragmented into various different vmas. It is okay to mmap()
3985 	   and munmap() stuff in this slot after doing this call at any time */
3986 
3987 	if (mem->userspace_addr & 0xffffful)
3988 		return -EINVAL;
3989 
3990 	if (mem->memory_size & 0xffffful)
3991 		return -EINVAL;
3992 
3993 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3994 		return -EINVAL;
3995 
3996 	return 0;
3997 }
3998 
3999 void kvm_arch_commit_memory_region(struct kvm *kvm,
4000 				const struct kvm_userspace_memory_region *mem,
4001 				const struct kvm_memory_slot *old,
4002 				const struct kvm_memory_slot *new,
4003 				enum kvm_mr_change change)
4004 {
4005 	int rc;
4006 
4007 	/* If the basics of the memslot do not change, we do not want
4008 	 * to update the gmap. Every update causes several unnecessary
4009 	 * segment translation exceptions. This is usually handled just
4010 	 * fine by the normal fault handler + gmap, but it will also
4011 	 * cause faults on the prefix page of running guest CPUs.
4012 	 */
4013 	if (old->userspace_addr == mem->userspace_addr &&
4014 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4015 	    old->npages * PAGE_SIZE == mem->memory_size)
4016 		return;
4017 
4018 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4019 		mem->guest_phys_addr, mem->memory_size);
4020 	if (rc)
4021 		pr_warn("failed to commit memory region\n");
4022 	return;
4023 }
4024 
4025 static inline unsigned long nonhyp_mask(int i)
4026 {
4027 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4028 
4029 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4030 }
4031 
4032 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4033 {
4034 	vcpu->valid_wakeup = false;
4035 }
4036 
4037 static int __init kvm_s390_init(void)
4038 {
4039 	int i;
4040 
4041 	if (!sclp.has_sief2) {
4042 		pr_info("SIE not available\n");
4043 		return -ENODEV;
4044 	}
4045 
4046 	for (i = 0; i < 16; i++)
4047 		kvm_s390_fac_list_mask[i] |=
4048 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4049 
4050 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4051 }
4052 
4053 static void __exit kvm_s390_exit(void)
4054 {
4055 	kvm_exit();
4056 }
4057 
4058 module_init(kvm_s390_init);
4059 module_exit(kvm_s390_exit);
4060 
4061 /*
4062  * Enable autoloading of the kvm module.
4063  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4064  * since x86 takes a different approach.
4065  */
4066 #include <linux/miscdevice.h>
4067 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4068 MODULE_ALIAS("devname:kvm");
4069