xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision a977d045)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48 
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
66 	{ "exit_null", VCPU_STAT(exit_null) },
67 	{ "exit_validity", VCPU_STAT(exit_validity) },
68 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
96 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
98 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
103 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
108 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
125 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
126 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
128 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
129 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
130 	{ NULL }
131 };
132 
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137 
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140 
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146 
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151 
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155 
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159 	/* every s390 is virtualization enabled ;-) */
160 	return 0;
161 }
162 
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164 			      unsigned long end);
165 
166 /*
167  * This callback is executed during stop_machine(). All CPUs are therefore
168  * temporarily stopped. In order not to change guest behavior, we have to
169  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170  * so a CPU won't be stopped while calculating with the epoch.
171  */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173 			  void *v)
174 {
175 	struct kvm *kvm;
176 	struct kvm_vcpu *vcpu;
177 	int i;
178 	unsigned long long *delta = v;
179 
180 	list_for_each_entry(kvm, &vm_list, vm_list) {
181 		kvm->arch.epoch -= *delta;
182 		kvm_for_each_vcpu(i, vcpu, kvm) {
183 			vcpu->arch.sie_block->epoch -= *delta;
184 			if (vcpu->arch.cputm_enabled)
185 				vcpu->arch.cputm_start += *delta;
186 			if (vcpu->arch.vsie_block)
187 				vcpu->arch.vsie_block->epoch -= *delta;
188 		}
189 	}
190 	return NOTIFY_OK;
191 }
192 
193 static struct notifier_block kvm_clock_notifier = {
194 	.notifier_call = kvm_clock_sync,
195 };
196 
197 int kvm_arch_hardware_setup(void)
198 {
199 	gmap_notifier.notifier_call = kvm_gmap_notifier;
200 	gmap_register_pte_notifier(&gmap_notifier);
201 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202 	gmap_register_pte_notifier(&vsie_gmap_notifier);
203 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204 				       &kvm_clock_notifier);
205 	return 0;
206 }
207 
208 void kvm_arch_hardware_unsetup(void)
209 {
210 	gmap_unregister_pte_notifier(&gmap_notifier);
211 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213 					 &kvm_clock_notifier);
214 }
215 
216 static void allow_cpu_feat(unsigned long nr)
217 {
218 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220 
221 static inline int plo_test_bit(unsigned char nr)
222 {
223 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224 	int cc;
225 
226 	asm volatile(
227 		/* Parameter registers are ignored for "test bit" */
228 		"	plo	0,0,0,0(0)\n"
229 		"	ipm	%0\n"
230 		"	srl	%0,28\n"
231 		: "=d" (cc)
232 		: "d" (r0)
233 		: "cc");
234 	return cc == 0;
235 }
236 
237 static void kvm_s390_cpu_feat_init(void)
238 {
239 	int i;
240 
241 	for (i = 0; i < 256; ++i) {
242 		if (plo_test_bit(i))
243 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244 	}
245 
246 	if (test_facility(28)) /* TOD-clock steering */
247 		ptff(kvm_s390_available_subfunc.ptff,
248 		     sizeof(kvm_s390_available_subfunc.ptff),
249 		     PTFF_QAF);
250 
251 	if (test_facility(17)) { /* MSA */
252 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253 			      kvm_s390_available_subfunc.kmac);
254 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255 			      kvm_s390_available_subfunc.kmc);
256 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
257 			      kvm_s390_available_subfunc.km);
258 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259 			      kvm_s390_available_subfunc.kimd);
260 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261 			      kvm_s390_available_subfunc.klmd);
262 	}
263 	if (test_facility(76)) /* MSA3 */
264 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265 			      kvm_s390_available_subfunc.pckmo);
266 	if (test_facility(77)) { /* MSA4 */
267 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268 			      kvm_s390_available_subfunc.kmctr);
269 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270 			      kvm_s390_available_subfunc.kmf);
271 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272 			      kvm_s390_available_subfunc.kmo);
273 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274 			      kvm_s390_available_subfunc.pcc);
275 	}
276 	if (test_facility(57)) /* MSA5 */
277 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278 			      kvm_s390_available_subfunc.ppno);
279 
280 	if (test_facility(146)) /* MSA8 */
281 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282 			      kvm_s390_available_subfunc.kma);
283 
284 	if (MACHINE_HAS_ESOP)
285 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286 	/*
287 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289 	 */
290 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291 	    !test_facility(3) || !nested)
292 		return;
293 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294 	if (sclp.has_64bscao)
295 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296 	if (sclp.has_siif)
297 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298 	if (sclp.has_gpere)
299 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300 	if (sclp.has_gsls)
301 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302 	if (sclp.has_ib)
303 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304 	if (sclp.has_cei)
305 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306 	if (sclp.has_ibs)
307 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308 	if (sclp.has_kss)
309 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310 	/*
311 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312 	 * all skey handling functions read/set the skey from the PGSTE
313 	 * instead of the real storage key.
314 	 *
315 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316 	 * pages being detected as preserved although they are resident.
317 	 *
318 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320 	 *
321 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324 	 *
325 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326 	 * cannot easily shadow the SCA because of the ipte lock.
327 	 */
328 }
329 
330 int kvm_arch_init(void *opaque)
331 {
332 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333 	if (!kvm_s390_dbf)
334 		return -ENOMEM;
335 
336 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337 		debug_unregister(kvm_s390_dbf);
338 		return -ENOMEM;
339 	}
340 
341 	kvm_s390_cpu_feat_init();
342 
343 	/* Register floating interrupt controller interface. */
344 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346 
347 void kvm_arch_exit(void)
348 {
349 	debug_unregister(kvm_s390_dbf);
350 }
351 
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354 			unsigned int ioctl, unsigned long arg)
355 {
356 	if (ioctl == KVM_S390_ENABLE_SIE)
357 		return s390_enable_sie();
358 	return -EINVAL;
359 }
360 
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363 	int r;
364 
365 	switch (ext) {
366 	case KVM_CAP_S390_PSW:
367 	case KVM_CAP_S390_GMAP:
368 	case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370 	case KVM_CAP_S390_UCONTROL:
371 #endif
372 	case KVM_CAP_ASYNC_PF:
373 	case KVM_CAP_SYNC_REGS:
374 	case KVM_CAP_ONE_REG:
375 	case KVM_CAP_ENABLE_CAP:
376 	case KVM_CAP_S390_CSS_SUPPORT:
377 	case KVM_CAP_IOEVENTFD:
378 	case KVM_CAP_DEVICE_CTRL:
379 	case KVM_CAP_ENABLE_CAP_VM:
380 	case KVM_CAP_S390_IRQCHIP:
381 	case KVM_CAP_VM_ATTRIBUTES:
382 	case KVM_CAP_MP_STATE:
383 	case KVM_CAP_IMMEDIATE_EXIT:
384 	case KVM_CAP_S390_INJECT_IRQ:
385 	case KVM_CAP_S390_USER_SIGP:
386 	case KVM_CAP_S390_USER_STSI:
387 	case KVM_CAP_S390_SKEYS:
388 	case KVM_CAP_S390_IRQ_STATE:
389 	case KVM_CAP_S390_USER_INSTR0:
390 	case KVM_CAP_S390_CMMA_MIGRATION:
391 	case KVM_CAP_S390_AIS:
392 		r = 1;
393 		break;
394 	case KVM_CAP_S390_MEM_OP:
395 		r = MEM_OP_MAX_SIZE;
396 		break;
397 	case KVM_CAP_NR_VCPUS:
398 	case KVM_CAP_MAX_VCPUS:
399 		r = KVM_S390_BSCA_CPU_SLOTS;
400 		if (!kvm_s390_use_sca_entries())
401 			r = KVM_MAX_VCPUS;
402 		else if (sclp.has_esca && sclp.has_64bscao)
403 			r = KVM_S390_ESCA_CPU_SLOTS;
404 		break;
405 	case KVM_CAP_NR_MEMSLOTS:
406 		r = KVM_USER_MEM_SLOTS;
407 		break;
408 	case KVM_CAP_S390_COW:
409 		r = MACHINE_HAS_ESOP;
410 		break;
411 	case KVM_CAP_S390_VECTOR_REGISTERS:
412 		r = MACHINE_HAS_VX;
413 		break;
414 	case KVM_CAP_S390_RI:
415 		r = test_facility(64);
416 		break;
417 	case KVM_CAP_S390_GS:
418 		r = test_facility(133);
419 		break;
420 	default:
421 		r = 0;
422 	}
423 	return r;
424 }
425 
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427 					struct kvm_memory_slot *memslot)
428 {
429 	gfn_t cur_gfn, last_gfn;
430 	unsigned long address;
431 	struct gmap *gmap = kvm->arch.gmap;
432 
433 	/* Loop over all guest pages */
434 	last_gfn = memslot->base_gfn + memslot->npages;
435 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436 		address = gfn_to_hva_memslot(memslot, cur_gfn);
437 
438 		if (test_and_clear_guest_dirty(gmap->mm, address))
439 			mark_page_dirty(kvm, cur_gfn);
440 		if (fatal_signal_pending(current))
441 			return;
442 		cond_resched();
443 	}
444 }
445 
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448 
449 /*
450  * Get (and clear) the dirty memory log for a memory slot.
451  */
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453 			       struct kvm_dirty_log *log)
454 {
455 	int r;
456 	unsigned long n;
457 	struct kvm_memslots *slots;
458 	struct kvm_memory_slot *memslot;
459 	int is_dirty = 0;
460 
461 	if (kvm_is_ucontrol(kvm))
462 		return -EINVAL;
463 
464 	mutex_lock(&kvm->slots_lock);
465 
466 	r = -EINVAL;
467 	if (log->slot >= KVM_USER_MEM_SLOTS)
468 		goto out;
469 
470 	slots = kvm_memslots(kvm);
471 	memslot = id_to_memslot(slots, log->slot);
472 	r = -ENOENT;
473 	if (!memslot->dirty_bitmap)
474 		goto out;
475 
476 	kvm_s390_sync_dirty_log(kvm, memslot);
477 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
478 	if (r)
479 		goto out;
480 
481 	/* Clear the dirty log */
482 	if (is_dirty) {
483 		n = kvm_dirty_bitmap_bytes(memslot);
484 		memset(memslot->dirty_bitmap, 0, n);
485 	}
486 	r = 0;
487 out:
488 	mutex_unlock(&kvm->slots_lock);
489 	return r;
490 }
491 
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 {
494 	unsigned int i;
495 	struct kvm_vcpu *vcpu;
496 
497 	kvm_for_each_vcpu(i, vcpu, kvm) {
498 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
499 	}
500 }
501 
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
503 {
504 	int r;
505 
506 	if (cap->flags)
507 		return -EINVAL;
508 
509 	switch (cap->cap) {
510 	case KVM_CAP_S390_IRQCHIP:
511 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512 		kvm->arch.use_irqchip = 1;
513 		r = 0;
514 		break;
515 	case KVM_CAP_S390_USER_SIGP:
516 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517 		kvm->arch.user_sigp = 1;
518 		r = 0;
519 		break;
520 	case KVM_CAP_S390_VECTOR_REGISTERS:
521 		mutex_lock(&kvm->lock);
522 		if (kvm->created_vcpus) {
523 			r = -EBUSY;
524 		} else if (MACHINE_HAS_VX) {
525 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
526 			set_kvm_facility(kvm->arch.model.fac_list, 129);
527 			if (test_facility(134)) {
528 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
529 				set_kvm_facility(kvm->arch.model.fac_list, 134);
530 			}
531 			if (test_facility(135)) {
532 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
533 				set_kvm_facility(kvm->arch.model.fac_list, 135);
534 			}
535 			r = 0;
536 		} else
537 			r = -EINVAL;
538 		mutex_unlock(&kvm->lock);
539 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540 			 r ? "(not available)" : "(success)");
541 		break;
542 	case KVM_CAP_S390_RI:
543 		r = -EINVAL;
544 		mutex_lock(&kvm->lock);
545 		if (kvm->created_vcpus) {
546 			r = -EBUSY;
547 		} else if (test_facility(64)) {
548 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
549 			set_kvm_facility(kvm->arch.model.fac_list, 64);
550 			r = 0;
551 		}
552 		mutex_unlock(&kvm->lock);
553 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554 			 r ? "(not available)" : "(success)");
555 		break;
556 	case KVM_CAP_S390_AIS:
557 		mutex_lock(&kvm->lock);
558 		if (kvm->created_vcpus) {
559 			r = -EBUSY;
560 		} else {
561 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
562 			set_kvm_facility(kvm->arch.model.fac_list, 72);
563 			r = 0;
564 		}
565 		mutex_unlock(&kvm->lock);
566 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
567 			 r ? "(not available)" : "(success)");
568 		break;
569 	case KVM_CAP_S390_GS:
570 		r = -EINVAL;
571 		mutex_lock(&kvm->lock);
572 		if (atomic_read(&kvm->online_vcpus)) {
573 			r = -EBUSY;
574 		} else if (test_facility(133)) {
575 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
576 			set_kvm_facility(kvm->arch.model.fac_list, 133);
577 			r = 0;
578 		}
579 		mutex_unlock(&kvm->lock);
580 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
581 			 r ? "(not available)" : "(success)");
582 		break;
583 	case KVM_CAP_S390_USER_STSI:
584 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
585 		kvm->arch.user_stsi = 1;
586 		r = 0;
587 		break;
588 	case KVM_CAP_S390_USER_INSTR0:
589 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
590 		kvm->arch.user_instr0 = 1;
591 		icpt_operexc_on_all_vcpus(kvm);
592 		r = 0;
593 		break;
594 	default:
595 		r = -EINVAL;
596 		break;
597 	}
598 	return r;
599 }
600 
601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603 	int ret;
604 
605 	switch (attr->attr) {
606 	case KVM_S390_VM_MEM_LIMIT_SIZE:
607 		ret = 0;
608 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
609 			 kvm->arch.mem_limit);
610 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
611 			ret = -EFAULT;
612 		break;
613 	default:
614 		ret = -ENXIO;
615 		break;
616 	}
617 	return ret;
618 }
619 
620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
621 {
622 	int ret;
623 	unsigned int idx;
624 	switch (attr->attr) {
625 	case KVM_S390_VM_MEM_ENABLE_CMMA:
626 		ret = -ENXIO;
627 		if (!sclp.has_cmma)
628 			break;
629 
630 		ret = -EBUSY;
631 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
632 		mutex_lock(&kvm->lock);
633 		if (!kvm->created_vcpus) {
634 			kvm->arch.use_cmma = 1;
635 			ret = 0;
636 		}
637 		mutex_unlock(&kvm->lock);
638 		break;
639 	case KVM_S390_VM_MEM_CLR_CMMA:
640 		ret = -ENXIO;
641 		if (!sclp.has_cmma)
642 			break;
643 		ret = -EINVAL;
644 		if (!kvm->arch.use_cmma)
645 			break;
646 
647 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
648 		mutex_lock(&kvm->lock);
649 		idx = srcu_read_lock(&kvm->srcu);
650 		s390_reset_cmma(kvm->arch.gmap->mm);
651 		srcu_read_unlock(&kvm->srcu, idx);
652 		mutex_unlock(&kvm->lock);
653 		ret = 0;
654 		break;
655 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
656 		unsigned long new_limit;
657 
658 		if (kvm_is_ucontrol(kvm))
659 			return -EINVAL;
660 
661 		if (get_user(new_limit, (u64 __user *)attr->addr))
662 			return -EFAULT;
663 
664 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
665 		    new_limit > kvm->arch.mem_limit)
666 			return -E2BIG;
667 
668 		if (!new_limit)
669 			return -EINVAL;
670 
671 		/* gmap_create takes last usable address */
672 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
673 			new_limit -= 1;
674 
675 		ret = -EBUSY;
676 		mutex_lock(&kvm->lock);
677 		if (!kvm->created_vcpus) {
678 			/* gmap_create will round the limit up */
679 			struct gmap *new = gmap_create(current->mm, new_limit);
680 
681 			if (!new) {
682 				ret = -ENOMEM;
683 			} else {
684 				gmap_remove(kvm->arch.gmap);
685 				new->private = kvm;
686 				kvm->arch.gmap = new;
687 				ret = 0;
688 			}
689 		}
690 		mutex_unlock(&kvm->lock);
691 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
692 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
693 			 (void *) kvm->arch.gmap->asce);
694 		break;
695 	}
696 	default:
697 		ret = -ENXIO;
698 		break;
699 	}
700 	return ret;
701 }
702 
703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
704 
705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
706 {
707 	struct kvm_vcpu *vcpu;
708 	int i;
709 
710 	if (!test_kvm_facility(kvm, 76))
711 		return -EINVAL;
712 
713 	mutex_lock(&kvm->lock);
714 	switch (attr->attr) {
715 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
716 		get_random_bytes(
717 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
718 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
719 		kvm->arch.crypto.aes_kw = 1;
720 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
721 		break;
722 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
723 		get_random_bytes(
724 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
725 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
726 		kvm->arch.crypto.dea_kw = 1;
727 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
728 		break;
729 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
730 		kvm->arch.crypto.aes_kw = 0;
731 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
732 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
733 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
734 		break;
735 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
736 		kvm->arch.crypto.dea_kw = 0;
737 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
738 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
739 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
740 		break;
741 	default:
742 		mutex_unlock(&kvm->lock);
743 		return -ENXIO;
744 	}
745 
746 	kvm_for_each_vcpu(i, vcpu, kvm) {
747 		kvm_s390_vcpu_crypto_setup(vcpu);
748 		exit_sie(vcpu);
749 	}
750 	mutex_unlock(&kvm->lock);
751 	return 0;
752 }
753 
754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
755 {
756 	int cx;
757 	struct kvm_vcpu *vcpu;
758 
759 	kvm_for_each_vcpu(cx, vcpu, kvm)
760 		kvm_s390_sync_request(req, vcpu);
761 }
762 
763 /*
764  * Must be called with kvm->srcu held to avoid races on memslots, and with
765  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
766  */
767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
768 {
769 	struct kvm_s390_migration_state *mgs;
770 	struct kvm_memory_slot *ms;
771 	/* should be the only one */
772 	struct kvm_memslots *slots;
773 	unsigned long ram_pages;
774 	int slotnr;
775 
776 	/* migration mode already enabled */
777 	if (kvm->arch.migration_state)
778 		return 0;
779 
780 	slots = kvm_memslots(kvm);
781 	if (!slots || !slots->used_slots)
782 		return -EINVAL;
783 
784 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
785 	if (!mgs)
786 		return -ENOMEM;
787 	kvm->arch.migration_state = mgs;
788 
789 	if (kvm->arch.use_cmma) {
790 		/*
791 		 * Get the last slot. They should be sorted by base_gfn, so the
792 		 * last slot is also the one at the end of the address space.
793 		 * We have verified above that at least one slot is present.
794 		 */
795 		ms = slots->memslots + slots->used_slots - 1;
796 		/* round up so we only use full longs */
797 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
798 		/* allocate enough bytes to store all the bits */
799 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
800 		if (!mgs->pgste_bitmap) {
801 			kfree(mgs);
802 			kvm->arch.migration_state = NULL;
803 			return -ENOMEM;
804 		}
805 
806 		mgs->bitmap_size = ram_pages;
807 		atomic64_set(&mgs->dirty_pages, ram_pages);
808 		/* mark all the pages in active slots as dirty */
809 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
810 			ms = slots->memslots + slotnr;
811 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
812 		}
813 
814 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
815 	}
816 	return 0;
817 }
818 
819 /*
820  * Must be called with kvm->lock to avoid races with ourselves and
821  * kvm_s390_vm_start_migration.
822  */
823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
824 {
825 	struct kvm_s390_migration_state *mgs;
826 
827 	/* migration mode already disabled */
828 	if (!kvm->arch.migration_state)
829 		return 0;
830 	mgs = kvm->arch.migration_state;
831 	kvm->arch.migration_state = NULL;
832 
833 	if (kvm->arch.use_cmma) {
834 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
835 		vfree(mgs->pgste_bitmap);
836 	}
837 	kfree(mgs);
838 	return 0;
839 }
840 
841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
842 				     struct kvm_device_attr *attr)
843 {
844 	int idx, res = -ENXIO;
845 
846 	mutex_lock(&kvm->lock);
847 	switch (attr->attr) {
848 	case KVM_S390_VM_MIGRATION_START:
849 		idx = srcu_read_lock(&kvm->srcu);
850 		res = kvm_s390_vm_start_migration(kvm);
851 		srcu_read_unlock(&kvm->srcu, idx);
852 		break;
853 	case KVM_S390_VM_MIGRATION_STOP:
854 		res = kvm_s390_vm_stop_migration(kvm);
855 		break;
856 	default:
857 		break;
858 	}
859 	mutex_unlock(&kvm->lock);
860 
861 	return res;
862 }
863 
864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
865 				     struct kvm_device_attr *attr)
866 {
867 	u64 mig = (kvm->arch.migration_state != NULL);
868 
869 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
870 		return -ENXIO;
871 
872 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
873 		return -EFAULT;
874 	return 0;
875 }
876 
877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
878 {
879 	u8 gtod_high;
880 
881 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
882 					   sizeof(gtod_high)))
883 		return -EFAULT;
884 
885 	if (gtod_high != 0)
886 		return -EINVAL;
887 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
888 
889 	return 0;
890 }
891 
892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
893 {
894 	u64 gtod;
895 
896 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
897 		return -EFAULT;
898 
899 	kvm_s390_set_tod_clock(kvm, gtod);
900 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
901 	return 0;
902 }
903 
904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906 	int ret;
907 
908 	if (attr->flags)
909 		return -EINVAL;
910 
911 	switch (attr->attr) {
912 	case KVM_S390_VM_TOD_HIGH:
913 		ret = kvm_s390_set_tod_high(kvm, attr);
914 		break;
915 	case KVM_S390_VM_TOD_LOW:
916 		ret = kvm_s390_set_tod_low(kvm, attr);
917 		break;
918 	default:
919 		ret = -ENXIO;
920 		break;
921 	}
922 	return ret;
923 }
924 
925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	u8 gtod_high = 0;
928 
929 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
930 					 sizeof(gtod_high)))
931 		return -EFAULT;
932 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
933 
934 	return 0;
935 }
936 
937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
938 {
939 	u64 gtod;
940 
941 	gtod = kvm_s390_get_tod_clock_fast(kvm);
942 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
943 		return -EFAULT;
944 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
945 
946 	return 0;
947 }
948 
949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
950 {
951 	int ret;
952 
953 	if (attr->flags)
954 		return -EINVAL;
955 
956 	switch (attr->attr) {
957 	case KVM_S390_VM_TOD_HIGH:
958 		ret = kvm_s390_get_tod_high(kvm, attr);
959 		break;
960 	case KVM_S390_VM_TOD_LOW:
961 		ret = kvm_s390_get_tod_low(kvm, attr);
962 		break;
963 	default:
964 		ret = -ENXIO;
965 		break;
966 	}
967 	return ret;
968 }
969 
970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 	struct kvm_s390_vm_cpu_processor *proc;
973 	u16 lowest_ibc, unblocked_ibc;
974 	int ret = 0;
975 
976 	mutex_lock(&kvm->lock);
977 	if (kvm->created_vcpus) {
978 		ret = -EBUSY;
979 		goto out;
980 	}
981 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
982 	if (!proc) {
983 		ret = -ENOMEM;
984 		goto out;
985 	}
986 	if (!copy_from_user(proc, (void __user *)attr->addr,
987 			    sizeof(*proc))) {
988 		kvm->arch.model.cpuid = proc->cpuid;
989 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
990 		unblocked_ibc = sclp.ibc & 0xfff;
991 		if (lowest_ibc && proc->ibc) {
992 			if (proc->ibc > unblocked_ibc)
993 				kvm->arch.model.ibc = unblocked_ibc;
994 			else if (proc->ibc < lowest_ibc)
995 				kvm->arch.model.ibc = lowest_ibc;
996 			else
997 				kvm->arch.model.ibc = proc->ibc;
998 		}
999 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1001 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002 			 kvm->arch.model.ibc,
1003 			 kvm->arch.model.cpuid);
1004 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005 			 kvm->arch.model.fac_list[0],
1006 			 kvm->arch.model.fac_list[1],
1007 			 kvm->arch.model.fac_list[2]);
1008 	} else
1009 		ret = -EFAULT;
1010 	kfree(proc);
1011 out:
1012 	mutex_unlock(&kvm->lock);
1013 	return ret;
1014 }
1015 
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017 				       struct kvm_device_attr *attr)
1018 {
1019 	struct kvm_s390_vm_cpu_feat data;
1020 	int ret = -EBUSY;
1021 
1022 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023 		return -EFAULT;
1024 	if (!bitmap_subset((unsigned long *) data.feat,
1025 			   kvm_s390_available_cpu_feat,
1026 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1027 		return -EINVAL;
1028 
1029 	mutex_lock(&kvm->lock);
1030 	if (!atomic_read(&kvm->online_vcpus)) {
1031 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1033 		ret = 0;
1034 	}
1035 	mutex_unlock(&kvm->lock);
1036 	return ret;
1037 }
1038 
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040 					  struct kvm_device_attr *attr)
1041 {
1042 	/*
1043 	 * Once supported by kernel + hw, we have to store the subfunctions
1044 	 * in kvm->arch and remember that user space configured them.
1045 	 */
1046 	return -ENXIO;
1047 }
1048 
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051 	int ret = -ENXIO;
1052 
1053 	switch (attr->attr) {
1054 	case KVM_S390_VM_CPU_PROCESSOR:
1055 		ret = kvm_s390_set_processor(kvm, attr);
1056 		break;
1057 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058 		ret = kvm_s390_set_processor_feat(kvm, attr);
1059 		break;
1060 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062 		break;
1063 	}
1064 	return ret;
1065 }
1066 
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069 	struct kvm_s390_vm_cpu_processor *proc;
1070 	int ret = 0;
1071 
1072 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073 	if (!proc) {
1074 		ret = -ENOMEM;
1075 		goto out;
1076 	}
1077 	proc->cpuid = kvm->arch.model.cpuid;
1078 	proc->ibc = kvm->arch.model.ibc;
1079 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1081 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082 		 kvm->arch.model.ibc,
1083 		 kvm->arch.model.cpuid);
1084 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085 		 kvm->arch.model.fac_list[0],
1086 		 kvm->arch.model.fac_list[1],
1087 		 kvm->arch.model.fac_list[2]);
1088 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089 		ret = -EFAULT;
1090 	kfree(proc);
1091 out:
1092 	return ret;
1093 }
1094 
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097 	struct kvm_s390_vm_cpu_machine *mach;
1098 	int ret = 0;
1099 
1100 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101 	if (!mach) {
1102 		ret = -ENOMEM;
1103 		goto out;
1104 	}
1105 	get_cpu_id((struct cpuid *) &mach->cpuid);
1106 	mach->ibc = sclp.ibc;
1107 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1109 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110 	       sizeof(S390_lowcore.stfle_fac_list));
1111 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1112 		 kvm->arch.model.ibc,
1113 		 kvm->arch.model.cpuid);
1114 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1115 		 mach->fac_mask[0],
1116 		 mach->fac_mask[1],
1117 		 mach->fac_mask[2]);
1118 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1119 		 mach->fac_list[0],
1120 		 mach->fac_list[1],
1121 		 mach->fac_list[2]);
1122 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123 		ret = -EFAULT;
1124 	kfree(mach);
1125 out:
1126 	return ret;
1127 }
1128 
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130 				       struct kvm_device_attr *attr)
1131 {
1132 	struct kvm_s390_vm_cpu_feat data;
1133 
1134 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1136 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137 		return -EFAULT;
1138 	return 0;
1139 }
1140 
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142 				     struct kvm_device_attr *attr)
1143 {
1144 	struct kvm_s390_vm_cpu_feat data;
1145 
1146 	bitmap_copy((unsigned long *) data.feat,
1147 		    kvm_s390_available_cpu_feat,
1148 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1149 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150 		return -EFAULT;
1151 	return 0;
1152 }
1153 
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155 					  struct kvm_device_attr *attr)
1156 {
1157 	/*
1158 	 * Once we can actually configure subfunctions (kernel + hw support),
1159 	 * we have to check if they were already set by user space, if so copy
1160 	 * them from kvm->arch.
1161 	 */
1162 	return -ENXIO;
1163 }
1164 
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166 					struct kvm_device_attr *attr)
1167 {
1168 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170 		return -EFAULT;
1171 	return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175 	int ret = -ENXIO;
1176 
1177 	switch (attr->attr) {
1178 	case KVM_S390_VM_CPU_PROCESSOR:
1179 		ret = kvm_s390_get_processor(kvm, attr);
1180 		break;
1181 	case KVM_S390_VM_CPU_MACHINE:
1182 		ret = kvm_s390_get_machine(kvm, attr);
1183 		break;
1184 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185 		ret = kvm_s390_get_processor_feat(kvm, attr);
1186 		break;
1187 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1188 		ret = kvm_s390_get_machine_feat(kvm, attr);
1189 		break;
1190 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192 		break;
1193 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195 		break;
1196 	}
1197 	return ret;
1198 }
1199 
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202 	int ret;
1203 
1204 	switch (attr->group) {
1205 	case KVM_S390_VM_MEM_CTRL:
1206 		ret = kvm_s390_set_mem_control(kvm, attr);
1207 		break;
1208 	case KVM_S390_VM_TOD:
1209 		ret = kvm_s390_set_tod(kvm, attr);
1210 		break;
1211 	case KVM_S390_VM_CPU_MODEL:
1212 		ret = kvm_s390_set_cpu_model(kvm, attr);
1213 		break;
1214 	case KVM_S390_VM_CRYPTO:
1215 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1216 		break;
1217 	case KVM_S390_VM_MIGRATION:
1218 		ret = kvm_s390_vm_set_migration(kvm, attr);
1219 		break;
1220 	default:
1221 		ret = -ENXIO;
1222 		break;
1223 	}
1224 
1225 	return ret;
1226 }
1227 
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230 	int ret;
1231 
1232 	switch (attr->group) {
1233 	case KVM_S390_VM_MEM_CTRL:
1234 		ret = kvm_s390_get_mem_control(kvm, attr);
1235 		break;
1236 	case KVM_S390_VM_TOD:
1237 		ret = kvm_s390_get_tod(kvm, attr);
1238 		break;
1239 	case KVM_S390_VM_CPU_MODEL:
1240 		ret = kvm_s390_get_cpu_model(kvm, attr);
1241 		break;
1242 	case KVM_S390_VM_MIGRATION:
1243 		ret = kvm_s390_vm_get_migration(kvm, attr);
1244 		break;
1245 	default:
1246 		ret = -ENXIO;
1247 		break;
1248 	}
1249 
1250 	return ret;
1251 }
1252 
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255 	int ret;
1256 
1257 	switch (attr->group) {
1258 	case KVM_S390_VM_MEM_CTRL:
1259 		switch (attr->attr) {
1260 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1261 		case KVM_S390_VM_MEM_CLR_CMMA:
1262 			ret = sclp.has_cmma ? 0 : -ENXIO;
1263 			break;
1264 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1265 			ret = 0;
1266 			break;
1267 		default:
1268 			ret = -ENXIO;
1269 			break;
1270 		}
1271 		break;
1272 	case KVM_S390_VM_TOD:
1273 		switch (attr->attr) {
1274 		case KVM_S390_VM_TOD_LOW:
1275 		case KVM_S390_VM_TOD_HIGH:
1276 			ret = 0;
1277 			break;
1278 		default:
1279 			ret = -ENXIO;
1280 			break;
1281 		}
1282 		break;
1283 	case KVM_S390_VM_CPU_MODEL:
1284 		switch (attr->attr) {
1285 		case KVM_S390_VM_CPU_PROCESSOR:
1286 		case KVM_S390_VM_CPU_MACHINE:
1287 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1289 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290 			ret = 0;
1291 			break;
1292 		/* configuring subfunctions is not supported yet */
1293 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294 		default:
1295 			ret = -ENXIO;
1296 			break;
1297 		}
1298 		break;
1299 	case KVM_S390_VM_CRYPTO:
1300 		switch (attr->attr) {
1301 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305 			ret = 0;
1306 			break;
1307 		default:
1308 			ret = -ENXIO;
1309 			break;
1310 		}
1311 		break;
1312 	case KVM_S390_VM_MIGRATION:
1313 		ret = 0;
1314 		break;
1315 	default:
1316 		ret = -ENXIO;
1317 		break;
1318 	}
1319 
1320 	return ret;
1321 }
1322 
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325 	uint8_t *keys;
1326 	uint64_t hva;
1327 	int srcu_idx, i, r = 0;
1328 
1329 	if (args->flags != 0)
1330 		return -EINVAL;
1331 
1332 	/* Is this guest using storage keys? */
1333 	if (!mm_use_skey(current->mm))
1334 		return KVM_S390_GET_SKEYS_NONE;
1335 
1336 	/* Enforce sane limit on memory allocation */
1337 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338 		return -EINVAL;
1339 
1340 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341 	if (!keys)
1342 		return -ENOMEM;
1343 
1344 	down_read(&current->mm->mmap_sem);
1345 	srcu_idx = srcu_read_lock(&kvm->srcu);
1346 	for (i = 0; i < args->count; i++) {
1347 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1348 		if (kvm_is_error_hva(hva)) {
1349 			r = -EFAULT;
1350 			break;
1351 		}
1352 
1353 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1354 		if (r)
1355 			break;
1356 	}
1357 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1358 	up_read(&current->mm->mmap_sem);
1359 
1360 	if (!r) {
1361 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1362 				 sizeof(uint8_t) * args->count);
1363 		if (r)
1364 			r = -EFAULT;
1365 	}
1366 
1367 	kvfree(keys);
1368 	return r;
1369 }
1370 
1371 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1372 {
1373 	uint8_t *keys;
1374 	uint64_t hva;
1375 	int srcu_idx, i, r = 0;
1376 
1377 	if (args->flags != 0)
1378 		return -EINVAL;
1379 
1380 	/* Enforce sane limit on memory allocation */
1381 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1382 		return -EINVAL;
1383 
1384 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1385 	if (!keys)
1386 		return -ENOMEM;
1387 
1388 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1389 			   sizeof(uint8_t) * args->count);
1390 	if (r) {
1391 		r = -EFAULT;
1392 		goto out;
1393 	}
1394 
1395 	/* Enable storage key handling for the guest */
1396 	r = s390_enable_skey();
1397 	if (r)
1398 		goto out;
1399 
1400 	down_read(&current->mm->mmap_sem);
1401 	srcu_idx = srcu_read_lock(&kvm->srcu);
1402 	for (i = 0; i < args->count; i++) {
1403 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1404 		if (kvm_is_error_hva(hva)) {
1405 			r = -EFAULT;
1406 			break;
1407 		}
1408 
1409 		/* Lowest order bit is reserved */
1410 		if (keys[i] & 0x01) {
1411 			r = -EINVAL;
1412 			break;
1413 		}
1414 
1415 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1416 		if (r)
1417 			break;
1418 	}
1419 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1420 	up_read(&current->mm->mmap_sem);
1421 out:
1422 	kvfree(keys);
1423 	return r;
1424 }
1425 
1426 /*
1427  * Base address and length must be sent at the start of each block, therefore
1428  * it's cheaper to send some clean data, as long as it's less than the size of
1429  * two longs.
1430  */
1431 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1432 /* for consistency */
1433 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1434 
1435 /*
1436  * This function searches for the next page with dirty CMMA attributes, and
1437  * saves the attributes in the buffer up to either the end of the buffer or
1438  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1439  * no trailing clean bytes are saved.
1440  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1441  * output buffer will indicate 0 as length.
1442  */
1443 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1444 				  struct kvm_s390_cmma_log *args)
1445 {
1446 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1447 	unsigned long bufsize, hva, pgstev, i, next, cur;
1448 	int srcu_idx, peek, r = 0, rr;
1449 	u8 *res;
1450 
1451 	cur = args->start_gfn;
1452 	i = next = pgstev = 0;
1453 
1454 	if (unlikely(!kvm->arch.use_cmma))
1455 		return -ENXIO;
1456 	/* Invalid/unsupported flags were specified */
1457 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1458 		return -EINVAL;
1459 	/* Migration mode query, and we are not doing a migration */
1460 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1461 	if (!peek && !s)
1462 		return -EINVAL;
1463 	/* CMMA is disabled or was not used, or the buffer has length zero */
1464 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1465 	if (!bufsize || !kvm->mm->context.use_cmma) {
1466 		memset(args, 0, sizeof(*args));
1467 		return 0;
1468 	}
1469 
1470 	if (!peek) {
1471 		/* We are not peeking, and there are no dirty pages */
1472 		if (!atomic64_read(&s->dirty_pages)) {
1473 			memset(args, 0, sizeof(*args));
1474 			return 0;
1475 		}
1476 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1477 				    args->start_gfn);
1478 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1479 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1480 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1481 			memset(args, 0, sizeof(*args));
1482 			return 0;
1483 		}
1484 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1485 	}
1486 
1487 	res = vmalloc(bufsize);
1488 	if (!res)
1489 		return -ENOMEM;
1490 
1491 	args->start_gfn = cur;
1492 
1493 	down_read(&kvm->mm->mmap_sem);
1494 	srcu_idx = srcu_read_lock(&kvm->srcu);
1495 	while (i < bufsize) {
1496 		hva = gfn_to_hva(kvm, cur);
1497 		if (kvm_is_error_hva(hva)) {
1498 			r = -EFAULT;
1499 			break;
1500 		}
1501 		/* decrement only if we actually flipped the bit to 0 */
1502 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1503 			atomic64_dec(&s->dirty_pages);
1504 		r = get_pgste(kvm->mm, hva, &pgstev);
1505 		if (r < 0)
1506 			pgstev = 0;
1507 		/* save the value */
1508 		res[i++] = (pgstev >> 24) & 0x3;
1509 		/*
1510 		 * if the next bit is too far away, stop.
1511 		 * if we reached the previous "next", find the next one
1512 		 */
1513 		if (!peek) {
1514 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1515 				break;
1516 			if (cur == next)
1517 				next = find_next_bit(s->pgste_bitmap,
1518 						     s->bitmap_size, cur + 1);
1519 		/* reached the end of the bitmap or of the buffer, stop */
1520 			if ((next >= s->bitmap_size) ||
1521 			    (next >= args->start_gfn + bufsize))
1522 				break;
1523 		}
1524 		cur++;
1525 	}
1526 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1527 	up_read(&kvm->mm->mmap_sem);
1528 	args->count = i;
1529 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1530 
1531 	rr = copy_to_user((void __user *)args->values, res, args->count);
1532 	if (rr)
1533 		r = -EFAULT;
1534 
1535 	vfree(res);
1536 	return r;
1537 }
1538 
1539 /*
1540  * This function sets the CMMA attributes for the given pages. If the input
1541  * buffer has zero length, no action is taken, otherwise the attributes are
1542  * set and the mm->context.use_cmma flag is set.
1543  */
1544 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1545 				  const struct kvm_s390_cmma_log *args)
1546 {
1547 	unsigned long hva, mask, pgstev, i;
1548 	uint8_t *bits;
1549 	int srcu_idx, r = 0;
1550 
1551 	mask = args->mask;
1552 
1553 	if (!kvm->arch.use_cmma)
1554 		return -ENXIO;
1555 	/* invalid/unsupported flags */
1556 	if (args->flags != 0)
1557 		return -EINVAL;
1558 	/* Enforce sane limit on memory allocation */
1559 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1560 		return -EINVAL;
1561 	/* Nothing to do */
1562 	if (args->count == 0)
1563 		return 0;
1564 
1565 	bits = vmalloc(sizeof(*bits) * args->count);
1566 	if (!bits)
1567 		return -ENOMEM;
1568 
1569 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1570 	if (r) {
1571 		r = -EFAULT;
1572 		goto out;
1573 	}
1574 
1575 	down_read(&kvm->mm->mmap_sem);
1576 	srcu_idx = srcu_read_lock(&kvm->srcu);
1577 	for (i = 0; i < args->count; i++) {
1578 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1579 		if (kvm_is_error_hva(hva)) {
1580 			r = -EFAULT;
1581 			break;
1582 		}
1583 
1584 		pgstev = bits[i];
1585 		pgstev = pgstev << 24;
1586 		mask &= _PGSTE_GPS_USAGE_MASK;
1587 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1588 	}
1589 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1590 	up_read(&kvm->mm->mmap_sem);
1591 
1592 	if (!kvm->mm->context.use_cmma) {
1593 		down_write(&kvm->mm->mmap_sem);
1594 		kvm->mm->context.use_cmma = 1;
1595 		up_write(&kvm->mm->mmap_sem);
1596 	}
1597 out:
1598 	vfree(bits);
1599 	return r;
1600 }
1601 
1602 long kvm_arch_vm_ioctl(struct file *filp,
1603 		       unsigned int ioctl, unsigned long arg)
1604 {
1605 	struct kvm *kvm = filp->private_data;
1606 	void __user *argp = (void __user *)arg;
1607 	struct kvm_device_attr attr;
1608 	int r;
1609 
1610 	switch (ioctl) {
1611 	case KVM_S390_INTERRUPT: {
1612 		struct kvm_s390_interrupt s390int;
1613 
1614 		r = -EFAULT;
1615 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1616 			break;
1617 		r = kvm_s390_inject_vm(kvm, &s390int);
1618 		break;
1619 	}
1620 	case KVM_ENABLE_CAP: {
1621 		struct kvm_enable_cap cap;
1622 		r = -EFAULT;
1623 		if (copy_from_user(&cap, argp, sizeof(cap)))
1624 			break;
1625 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1626 		break;
1627 	}
1628 	case KVM_CREATE_IRQCHIP: {
1629 		struct kvm_irq_routing_entry routing;
1630 
1631 		r = -EINVAL;
1632 		if (kvm->arch.use_irqchip) {
1633 			/* Set up dummy routing. */
1634 			memset(&routing, 0, sizeof(routing));
1635 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1636 		}
1637 		break;
1638 	}
1639 	case KVM_SET_DEVICE_ATTR: {
1640 		r = -EFAULT;
1641 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1642 			break;
1643 		r = kvm_s390_vm_set_attr(kvm, &attr);
1644 		break;
1645 	}
1646 	case KVM_GET_DEVICE_ATTR: {
1647 		r = -EFAULT;
1648 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1649 			break;
1650 		r = kvm_s390_vm_get_attr(kvm, &attr);
1651 		break;
1652 	}
1653 	case KVM_HAS_DEVICE_ATTR: {
1654 		r = -EFAULT;
1655 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1656 			break;
1657 		r = kvm_s390_vm_has_attr(kvm, &attr);
1658 		break;
1659 	}
1660 	case KVM_S390_GET_SKEYS: {
1661 		struct kvm_s390_skeys args;
1662 
1663 		r = -EFAULT;
1664 		if (copy_from_user(&args, argp,
1665 				   sizeof(struct kvm_s390_skeys)))
1666 			break;
1667 		r = kvm_s390_get_skeys(kvm, &args);
1668 		break;
1669 	}
1670 	case KVM_S390_SET_SKEYS: {
1671 		struct kvm_s390_skeys args;
1672 
1673 		r = -EFAULT;
1674 		if (copy_from_user(&args, argp,
1675 				   sizeof(struct kvm_s390_skeys)))
1676 			break;
1677 		r = kvm_s390_set_skeys(kvm, &args);
1678 		break;
1679 	}
1680 	case KVM_S390_GET_CMMA_BITS: {
1681 		struct kvm_s390_cmma_log args;
1682 
1683 		r = -EFAULT;
1684 		if (copy_from_user(&args, argp, sizeof(args)))
1685 			break;
1686 		r = kvm_s390_get_cmma_bits(kvm, &args);
1687 		if (!r) {
1688 			r = copy_to_user(argp, &args, sizeof(args));
1689 			if (r)
1690 				r = -EFAULT;
1691 		}
1692 		break;
1693 	}
1694 	case KVM_S390_SET_CMMA_BITS: {
1695 		struct kvm_s390_cmma_log args;
1696 
1697 		r = -EFAULT;
1698 		if (copy_from_user(&args, argp, sizeof(args)))
1699 			break;
1700 		r = kvm_s390_set_cmma_bits(kvm, &args);
1701 		break;
1702 	}
1703 	default:
1704 		r = -ENOTTY;
1705 	}
1706 
1707 	return r;
1708 }
1709 
1710 static int kvm_s390_query_ap_config(u8 *config)
1711 {
1712 	u32 fcn_code = 0x04000000UL;
1713 	u32 cc = 0;
1714 
1715 	memset(config, 0, 128);
1716 	asm volatile(
1717 		"lgr 0,%1\n"
1718 		"lgr 2,%2\n"
1719 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1720 		"0: ipm %0\n"
1721 		"srl %0,28\n"
1722 		"1:\n"
1723 		EX_TABLE(0b, 1b)
1724 		: "+r" (cc)
1725 		: "r" (fcn_code), "r" (config)
1726 		: "cc", "0", "2", "memory"
1727 	);
1728 
1729 	return cc;
1730 }
1731 
1732 static int kvm_s390_apxa_installed(void)
1733 {
1734 	u8 config[128];
1735 	int cc;
1736 
1737 	if (test_facility(12)) {
1738 		cc = kvm_s390_query_ap_config(config);
1739 
1740 		if (cc)
1741 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1742 		else
1743 			return config[0] & 0x40;
1744 	}
1745 
1746 	return 0;
1747 }
1748 
1749 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1750 {
1751 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1752 
1753 	if (kvm_s390_apxa_installed())
1754 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1755 	else
1756 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1757 }
1758 
1759 static u64 kvm_s390_get_initial_cpuid(void)
1760 {
1761 	struct cpuid cpuid;
1762 
1763 	get_cpu_id(&cpuid);
1764 	cpuid.version = 0xff;
1765 	return *((u64 *) &cpuid);
1766 }
1767 
1768 static void kvm_s390_crypto_init(struct kvm *kvm)
1769 {
1770 	if (!test_kvm_facility(kvm, 76))
1771 		return;
1772 
1773 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1774 	kvm_s390_set_crycb_format(kvm);
1775 
1776 	/* Enable AES/DEA protected key functions by default */
1777 	kvm->arch.crypto.aes_kw = 1;
1778 	kvm->arch.crypto.dea_kw = 1;
1779 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1780 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1781 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1782 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1783 }
1784 
1785 static void sca_dispose(struct kvm *kvm)
1786 {
1787 	if (kvm->arch.use_esca)
1788 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1789 	else
1790 		free_page((unsigned long)(kvm->arch.sca));
1791 	kvm->arch.sca = NULL;
1792 }
1793 
1794 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1795 {
1796 	gfp_t alloc_flags = GFP_KERNEL;
1797 	int i, rc;
1798 	char debug_name[16];
1799 	static unsigned long sca_offset;
1800 
1801 	rc = -EINVAL;
1802 #ifdef CONFIG_KVM_S390_UCONTROL
1803 	if (type & ~KVM_VM_S390_UCONTROL)
1804 		goto out_err;
1805 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1806 		goto out_err;
1807 #else
1808 	if (type)
1809 		goto out_err;
1810 #endif
1811 
1812 	rc = s390_enable_sie();
1813 	if (rc)
1814 		goto out_err;
1815 
1816 	rc = -ENOMEM;
1817 
1818 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1819 
1820 	kvm->arch.use_esca = 0; /* start with basic SCA */
1821 	if (!sclp.has_64bscao)
1822 		alloc_flags |= GFP_DMA;
1823 	rwlock_init(&kvm->arch.sca_lock);
1824 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1825 	if (!kvm->arch.sca)
1826 		goto out_err;
1827 	spin_lock(&kvm_lock);
1828 	sca_offset += 16;
1829 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1830 		sca_offset = 0;
1831 	kvm->arch.sca = (struct bsca_block *)
1832 			((char *) kvm->arch.sca + sca_offset);
1833 	spin_unlock(&kvm_lock);
1834 
1835 	sprintf(debug_name, "kvm-%u", current->pid);
1836 
1837 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1838 	if (!kvm->arch.dbf)
1839 		goto out_err;
1840 
1841 	kvm->arch.sie_page2 =
1842 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1843 	if (!kvm->arch.sie_page2)
1844 		goto out_err;
1845 
1846 	/* Populate the facility mask initially. */
1847 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1848 	       sizeof(S390_lowcore.stfle_fac_list));
1849 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1850 		if (i < kvm_s390_fac_list_mask_size())
1851 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1852 		else
1853 			kvm->arch.model.fac_mask[i] = 0UL;
1854 	}
1855 
1856 	/* Populate the facility list initially. */
1857 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1858 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1859 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1860 
1861 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1862 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1863 
1864 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1865 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1866 
1867 	kvm_s390_crypto_init(kvm);
1868 
1869 	mutex_init(&kvm->arch.float_int.ais_lock);
1870 	kvm->arch.float_int.simm = 0;
1871 	kvm->arch.float_int.nimm = 0;
1872 	spin_lock_init(&kvm->arch.float_int.lock);
1873 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1874 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1875 	init_waitqueue_head(&kvm->arch.ipte_wq);
1876 	mutex_init(&kvm->arch.ipte_mutex);
1877 
1878 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1879 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1880 
1881 	if (type & KVM_VM_S390_UCONTROL) {
1882 		kvm->arch.gmap = NULL;
1883 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1884 	} else {
1885 		if (sclp.hamax == U64_MAX)
1886 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1887 		else
1888 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1889 						    sclp.hamax + 1);
1890 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1891 		if (!kvm->arch.gmap)
1892 			goto out_err;
1893 		kvm->arch.gmap->private = kvm;
1894 		kvm->arch.gmap->pfault_enabled = 0;
1895 	}
1896 
1897 	kvm->arch.css_support = 0;
1898 	kvm->arch.use_irqchip = 0;
1899 	kvm->arch.epoch = 0;
1900 
1901 	spin_lock_init(&kvm->arch.start_stop_lock);
1902 	kvm_s390_vsie_init(kvm);
1903 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1904 
1905 	return 0;
1906 out_err:
1907 	free_page((unsigned long)kvm->arch.sie_page2);
1908 	debug_unregister(kvm->arch.dbf);
1909 	sca_dispose(kvm);
1910 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1911 	return rc;
1912 }
1913 
1914 bool kvm_arch_has_vcpu_debugfs(void)
1915 {
1916 	return false;
1917 }
1918 
1919 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1920 {
1921 	return 0;
1922 }
1923 
1924 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1925 {
1926 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1927 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1928 	kvm_s390_clear_local_irqs(vcpu);
1929 	kvm_clear_async_pf_completion_queue(vcpu);
1930 	if (!kvm_is_ucontrol(vcpu->kvm))
1931 		sca_del_vcpu(vcpu);
1932 
1933 	if (kvm_is_ucontrol(vcpu->kvm))
1934 		gmap_remove(vcpu->arch.gmap);
1935 
1936 	if (vcpu->kvm->arch.use_cmma)
1937 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1938 	free_page((unsigned long)(vcpu->arch.sie_block));
1939 
1940 	kvm_vcpu_uninit(vcpu);
1941 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1942 }
1943 
1944 static void kvm_free_vcpus(struct kvm *kvm)
1945 {
1946 	unsigned int i;
1947 	struct kvm_vcpu *vcpu;
1948 
1949 	kvm_for_each_vcpu(i, vcpu, kvm)
1950 		kvm_arch_vcpu_destroy(vcpu);
1951 
1952 	mutex_lock(&kvm->lock);
1953 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1954 		kvm->vcpus[i] = NULL;
1955 
1956 	atomic_set(&kvm->online_vcpus, 0);
1957 	mutex_unlock(&kvm->lock);
1958 }
1959 
1960 void kvm_arch_destroy_vm(struct kvm *kvm)
1961 {
1962 	kvm_free_vcpus(kvm);
1963 	sca_dispose(kvm);
1964 	debug_unregister(kvm->arch.dbf);
1965 	free_page((unsigned long)kvm->arch.sie_page2);
1966 	if (!kvm_is_ucontrol(kvm))
1967 		gmap_remove(kvm->arch.gmap);
1968 	kvm_s390_destroy_adapters(kvm);
1969 	kvm_s390_clear_float_irqs(kvm);
1970 	kvm_s390_vsie_destroy(kvm);
1971 	if (kvm->arch.migration_state) {
1972 		vfree(kvm->arch.migration_state->pgste_bitmap);
1973 		kfree(kvm->arch.migration_state);
1974 	}
1975 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1976 }
1977 
1978 /* Section: vcpu related */
1979 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1980 {
1981 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1982 	if (!vcpu->arch.gmap)
1983 		return -ENOMEM;
1984 	vcpu->arch.gmap->private = vcpu->kvm;
1985 
1986 	return 0;
1987 }
1988 
1989 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1990 {
1991 	if (!kvm_s390_use_sca_entries())
1992 		return;
1993 	read_lock(&vcpu->kvm->arch.sca_lock);
1994 	if (vcpu->kvm->arch.use_esca) {
1995 		struct esca_block *sca = vcpu->kvm->arch.sca;
1996 
1997 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1998 		sca->cpu[vcpu->vcpu_id].sda = 0;
1999 	} else {
2000 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2001 
2002 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2003 		sca->cpu[vcpu->vcpu_id].sda = 0;
2004 	}
2005 	read_unlock(&vcpu->kvm->arch.sca_lock);
2006 }
2007 
2008 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2009 {
2010 	if (!kvm_s390_use_sca_entries()) {
2011 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2012 
2013 		/* we still need the basic sca for the ipte control */
2014 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2015 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2016 	}
2017 	read_lock(&vcpu->kvm->arch.sca_lock);
2018 	if (vcpu->kvm->arch.use_esca) {
2019 		struct esca_block *sca = vcpu->kvm->arch.sca;
2020 
2021 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2022 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2023 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2024 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2025 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2026 	} else {
2027 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2028 
2029 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2030 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2031 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2032 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2033 	}
2034 	read_unlock(&vcpu->kvm->arch.sca_lock);
2035 }
2036 
2037 /* Basic SCA to Extended SCA data copy routines */
2038 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2039 {
2040 	d->sda = s->sda;
2041 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2042 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2043 }
2044 
2045 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2046 {
2047 	int i;
2048 
2049 	d->ipte_control = s->ipte_control;
2050 	d->mcn[0] = s->mcn;
2051 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2052 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2053 }
2054 
2055 static int sca_switch_to_extended(struct kvm *kvm)
2056 {
2057 	struct bsca_block *old_sca = kvm->arch.sca;
2058 	struct esca_block *new_sca;
2059 	struct kvm_vcpu *vcpu;
2060 	unsigned int vcpu_idx;
2061 	u32 scaol, scaoh;
2062 
2063 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2064 	if (!new_sca)
2065 		return -ENOMEM;
2066 
2067 	scaoh = (u32)((u64)(new_sca) >> 32);
2068 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2069 
2070 	kvm_s390_vcpu_block_all(kvm);
2071 	write_lock(&kvm->arch.sca_lock);
2072 
2073 	sca_copy_b_to_e(new_sca, old_sca);
2074 
2075 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2076 		vcpu->arch.sie_block->scaoh = scaoh;
2077 		vcpu->arch.sie_block->scaol = scaol;
2078 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2079 	}
2080 	kvm->arch.sca = new_sca;
2081 	kvm->arch.use_esca = 1;
2082 
2083 	write_unlock(&kvm->arch.sca_lock);
2084 	kvm_s390_vcpu_unblock_all(kvm);
2085 
2086 	free_page((unsigned long)old_sca);
2087 
2088 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2089 		 old_sca, kvm->arch.sca);
2090 	return 0;
2091 }
2092 
2093 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2094 {
2095 	int rc;
2096 
2097 	if (!kvm_s390_use_sca_entries()) {
2098 		if (id < KVM_MAX_VCPUS)
2099 			return true;
2100 		return false;
2101 	}
2102 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2103 		return true;
2104 	if (!sclp.has_esca || !sclp.has_64bscao)
2105 		return false;
2106 
2107 	mutex_lock(&kvm->lock);
2108 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2109 	mutex_unlock(&kvm->lock);
2110 
2111 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2112 }
2113 
2114 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2115 {
2116 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2117 	kvm_clear_async_pf_completion_queue(vcpu);
2118 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2119 				    KVM_SYNC_GPRS |
2120 				    KVM_SYNC_ACRS |
2121 				    KVM_SYNC_CRS |
2122 				    KVM_SYNC_ARCH0 |
2123 				    KVM_SYNC_PFAULT;
2124 	kvm_s390_set_prefix(vcpu, 0);
2125 	if (test_kvm_facility(vcpu->kvm, 64))
2126 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2127 	if (test_kvm_facility(vcpu->kvm, 133))
2128 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2129 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2130 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2131 	 */
2132 	if (MACHINE_HAS_VX)
2133 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2134 	else
2135 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2136 
2137 	if (kvm_is_ucontrol(vcpu->kvm))
2138 		return __kvm_ucontrol_vcpu_init(vcpu);
2139 
2140 	return 0;
2141 }
2142 
2143 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2144 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2145 {
2146 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2147 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2148 	vcpu->arch.cputm_start = get_tod_clock_fast();
2149 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2150 }
2151 
2152 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2153 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2154 {
2155 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2156 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2157 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2158 	vcpu->arch.cputm_start = 0;
2159 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2160 }
2161 
2162 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2163 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2164 {
2165 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2166 	vcpu->arch.cputm_enabled = true;
2167 	__start_cpu_timer_accounting(vcpu);
2168 }
2169 
2170 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2171 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2172 {
2173 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2174 	__stop_cpu_timer_accounting(vcpu);
2175 	vcpu->arch.cputm_enabled = false;
2176 }
2177 
2178 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2179 {
2180 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2181 	__enable_cpu_timer_accounting(vcpu);
2182 	preempt_enable();
2183 }
2184 
2185 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2186 {
2187 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2188 	__disable_cpu_timer_accounting(vcpu);
2189 	preempt_enable();
2190 }
2191 
2192 /* set the cpu timer - may only be called from the VCPU thread itself */
2193 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2194 {
2195 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2196 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2197 	if (vcpu->arch.cputm_enabled)
2198 		vcpu->arch.cputm_start = get_tod_clock_fast();
2199 	vcpu->arch.sie_block->cputm = cputm;
2200 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2201 	preempt_enable();
2202 }
2203 
2204 /* update and get the cpu timer - can also be called from other VCPU threads */
2205 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2206 {
2207 	unsigned int seq;
2208 	__u64 value;
2209 
2210 	if (unlikely(!vcpu->arch.cputm_enabled))
2211 		return vcpu->arch.sie_block->cputm;
2212 
2213 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2214 	do {
2215 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2216 		/*
2217 		 * If the writer would ever execute a read in the critical
2218 		 * section, e.g. in irq context, we have a deadlock.
2219 		 */
2220 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2221 		value = vcpu->arch.sie_block->cputm;
2222 		/* if cputm_start is 0, accounting is being started/stopped */
2223 		if (likely(vcpu->arch.cputm_start))
2224 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2225 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2226 	preempt_enable();
2227 	return value;
2228 }
2229 
2230 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2231 {
2232 
2233 	gmap_enable(vcpu->arch.enabled_gmap);
2234 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2235 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2236 		__start_cpu_timer_accounting(vcpu);
2237 	vcpu->cpu = cpu;
2238 }
2239 
2240 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2241 {
2242 	vcpu->cpu = -1;
2243 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2244 		__stop_cpu_timer_accounting(vcpu);
2245 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2246 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2247 	gmap_disable(vcpu->arch.enabled_gmap);
2248 
2249 }
2250 
2251 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2252 {
2253 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2254 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2255 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2256 	kvm_s390_set_prefix(vcpu, 0);
2257 	kvm_s390_set_cpu_timer(vcpu, 0);
2258 	vcpu->arch.sie_block->ckc       = 0UL;
2259 	vcpu->arch.sie_block->todpr     = 0;
2260 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2261 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2262 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2263 	/* make sure the new fpc will be lazily loaded */
2264 	save_fpu_regs();
2265 	current->thread.fpu.fpc = 0;
2266 	vcpu->arch.sie_block->gbea = 1;
2267 	vcpu->arch.sie_block->pp = 0;
2268 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2269 	kvm_clear_async_pf_completion_queue(vcpu);
2270 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2271 		kvm_s390_vcpu_stop(vcpu);
2272 	kvm_s390_clear_local_irqs(vcpu);
2273 }
2274 
2275 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2276 {
2277 	mutex_lock(&vcpu->kvm->lock);
2278 	preempt_disable();
2279 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2280 	preempt_enable();
2281 	mutex_unlock(&vcpu->kvm->lock);
2282 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2283 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2284 		sca_add_vcpu(vcpu);
2285 	}
2286 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2287 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2288 	/* make vcpu_load load the right gmap on the first trigger */
2289 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2290 }
2291 
2292 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2293 {
2294 	if (!test_kvm_facility(vcpu->kvm, 76))
2295 		return;
2296 
2297 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2298 
2299 	if (vcpu->kvm->arch.crypto.aes_kw)
2300 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2301 	if (vcpu->kvm->arch.crypto.dea_kw)
2302 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2303 
2304 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2305 }
2306 
2307 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2308 {
2309 	free_page(vcpu->arch.sie_block->cbrlo);
2310 	vcpu->arch.sie_block->cbrlo = 0;
2311 }
2312 
2313 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2314 {
2315 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2316 	if (!vcpu->arch.sie_block->cbrlo)
2317 		return -ENOMEM;
2318 
2319 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2320 	return 0;
2321 }
2322 
2323 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2324 {
2325 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2326 
2327 	vcpu->arch.sie_block->ibc = model->ibc;
2328 	if (test_kvm_facility(vcpu->kvm, 7))
2329 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2330 }
2331 
2332 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2333 {
2334 	int rc = 0;
2335 
2336 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2337 						    CPUSTAT_SM |
2338 						    CPUSTAT_STOPPED);
2339 
2340 	if (test_kvm_facility(vcpu->kvm, 78))
2341 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2342 	else if (test_kvm_facility(vcpu->kvm, 8))
2343 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2344 
2345 	kvm_s390_vcpu_setup_model(vcpu);
2346 
2347 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2348 	if (MACHINE_HAS_ESOP)
2349 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2350 	if (test_kvm_facility(vcpu->kvm, 9))
2351 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2352 	if (test_kvm_facility(vcpu->kvm, 73))
2353 		vcpu->arch.sie_block->ecb |= ECB_TE;
2354 
2355 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2356 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2357 	if (test_kvm_facility(vcpu->kvm, 130))
2358 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2359 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2360 	if (sclp.has_cei)
2361 		vcpu->arch.sie_block->eca |= ECA_CEI;
2362 	if (sclp.has_ib)
2363 		vcpu->arch.sie_block->eca |= ECA_IB;
2364 	if (sclp.has_siif)
2365 		vcpu->arch.sie_block->eca |= ECA_SII;
2366 	if (sclp.has_sigpif)
2367 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2368 	if (test_kvm_facility(vcpu->kvm, 129)) {
2369 		vcpu->arch.sie_block->eca |= ECA_VX;
2370 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2371 	}
2372 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2373 					| SDNXC;
2374 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2375 
2376 	if (sclp.has_kss)
2377 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2378 	else
2379 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2380 
2381 	if (vcpu->kvm->arch.use_cmma) {
2382 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2383 		if (rc)
2384 			return rc;
2385 	}
2386 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2387 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2388 
2389 	kvm_s390_vcpu_crypto_setup(vcpu);
2390 
2391 	return rc;
2392 }
2393 
2394 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2395 				      unsigned int id)
2396 {
2397 	struct kvm_vcpu *vcpu;
2398 	struct sie_page *sie_page;
2399 	int rc = -EINVAL;
2400 
2401 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2402 		goto out;
2403 
2404 	rc = -ENOMEM;
2405 
2406 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2407 	if (!vcpu)
2408 		goto out;
2409 
2410 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2411 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2412 	if (!sie_page)
2413 		goto out_free_cpu;
2414 
2415 	vcpu->arch.sie_block = &sie_page->sie_block;
2416 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2417 
2418 	/* the real guest size will always be smaller than msl */
2419 	vcpu->arch.sie_block->mso = 0;
2420 	vcpu->arch.sie_block->msl = sclp.hamax;
2421 
2422 	vcpu->arch.sie_block->icpua = id;
2423 	spin_lock_init(&vcpu->arch.local_int.lock);
2424 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2425 	vcpu->arch.local_int.wq = &vcpu->wq;
2426 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2427 	seqcount_init(&vcpu->arch.cputm_seqcount);
2428 
2429 	rc = kvm_vcpu_init(vcpu, kvm, id);
2430 	if (rc)
2431 		goto out_free_sie_block;
2432 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2433 		 vcpu->arch.sie_block);
2434 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2435 
2436 	return vcpu;
2437 out_free_sie_block:
2438 	free_page((unsigned long)(vcpu->arch.sie_block));
2439 out_free_cpu:
2440 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2441 out:
2442 	return ERR_PTR(rc);
2443 }
2444 
2445 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2446 {
2447 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2448 }
2449 
2450 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2451 {
2452 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2453 	exit_sie(vcpu);
2454 }
2455 
2456 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2457 {
2458 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2459 }
2460 
2461 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2462 {
2463 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2464 	exit_sie(vcpu);
2465 }
2466 
2467 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2468 {
2469 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2470 }
2471 
2472 /*
2473  * Kick a guest cpu out of SIE and wait until SIE is not running.
2474  * If the CPU is not running (e.g. waiting as idle) the function will
2475  * return immediately. */
2476 void exit_sie(struct kvm_vcpu *vcpu)
2477 {
2478 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2479 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2480 		cpu_relax();
2481 }
2482 
2483 /* Kick a guest cpu out of SIE to process a request synchronously */
2484 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2485 {
2486 	kvm_make_request(req, vcpu);
2487 	kvm_s390_vcpu_request(vcpu);
2488 }
2489 
2490 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2491 			      unsigned long end)
2492 {
2493 	struct kvm *kvm = gmap->private;
2494 	struct kvm_vcpu *vcpu;
2495 	unsigned long prefix;
2496 	int i;
2497 
2498 	if (gmap_is_shadow(gmap))
2499 		return;
2500 	if (start >= 1UL << 31)
2501 		/* We are only interested in prefix pages */
2502 		return;
2503 	kvm_for_each_vcpu(i, vcpu, kvm) {
2504 		/* match against both prefix pages */
2505 		prefix = kvm_s390_get_prefix(vcpu);
2506 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2507 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2508 				   start, end);
2509 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2510 		}
2511 	}
2512 }
2513 
2514 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2515 {
2516 	/* kvm common code refers to this, but never calls it */
2517 	BUG();
2518 	return 0;
2519 }
2520 
2521 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2522 					   struct kvm_one_reg *reg)
2523 {
2524 	int r = -EINVAL;
2525 
2526 	switch (reg->id) {
2527 	case KVM_REG_S390_TODPR:
2528 		r = put_user(vcpu->arch.sie_block->todpr,
2529 			     (u32 __user *)reg->addr);
2530 		break;
2531 	case KVM_REG_S390_EPOCHDIFF:
2532 		r = put_user(vcpu->arch.sie_block->epoch,
2533 			     (u64 __user *)reg->addr);
2534 		break;
2535 	case KVM_REG_S390_CPU_TIMER:
2536 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2537 			     (u64 __user *)reg->addr);
2538 		break;
2539 	case KVM_REG_S390_CLOCK_COMP:
2540 		r = put_user(vcpu->arch.sie_block->ckc,
2541 			     (u64 __user *)reg->addr);
2542 		break;
2543 	case KVM_REG_S390_PFTOKEN:
2544 		r = put_user(vcpu->arch.pfault_token,
2545 			     (u64 __user *)reg->addr);
2546 		break;
2547 	case KVM_REG_S390_PFCOMPARE:
2548 		r = put_user(vcpu->arch.pfault_compare,
2549 			     (u64 __user *)reg->addr);
2550 		break;
2551 	case KVM_REG_S390_PFSELECT:
2552 		r = put_user(vcpu->arch.pfault_select,
2553 			     (u64 __user *)reg->addr);
2554 		break;
2555 	case KVM_REG_S390_PP:
2556 		r = put_user(vcpu->arch.sie_block->pp,
2557 			     (u64 __user *)reg->addr);
2558 		break;
2559 	case KVM_REG_S390_GBEA:
2560 		r = put_user(vcpu->arch.sie_block->gbea,
2561 			     (u64 __user *)reg->addr);
2562 		break;
2563 	default:
2564 		break;
2565 	}
2566 
2567 	return r;
2568 }
2569 
2570 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2571 					   struct kvm_one_reg *reg)
2572 {
2573 	int r = -EINVAL;
2574 	__u64 val;
2575 
2576 	switch (reg->id) {
2577 	case KVM_REG_S390_TODPR:
2578 		r = get_user(vcpu->arch.sie_block->todpr,
2579 			     (u32 __user *)reg->addr);
2580 		break;
2581 	case KVM_REG_S390_EPOCHDIFF:
2582 		r = get_user(vcpu->arch.sie_block->epoch,
2583 			     (u64 __user *)reg->addr);
2584 		break;
2585 	case KVM_REG_S390_CPU_TIMER:
2586 		r = get_user(val, (u64 __user *)reg->addr);
2587 		if (!r)
2588 			kvm_s390_set_cpu_timer(vcpu, val);
2589 		break;
2590 	case KVM_REG_S390_CLOCK_COMP:
2591 		r = get_user(vcpu->arch.sie_block->ckc,
2592 			     (u64 __user *)reg->addr);
2593 		break;
2594 	case KVM_REG_S390_PFTOKEN:
2595 		r = get_user(vcpu->arch.pfault_token,
2596 			     (u64 __user *)reg->addr);
2597 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2598 			kvm_clear_async_pf_completion_queue(vcpu);
2599 		break;
2600 	case KVM_REG_S390_PFCOMPARE:
2601 		r = get_user(vcpu->arch.pfault_compare,
2602 			     (u64 __user *)reg->addr);
2603 		break;
2604 	case KVM_REG_S390_PFSELECT:
2605 		r = get_user(vcpu->arch.pfault_select,
2606 			     (u64 __user *)reg->addr);
2607 		break;
2608 	case KVM_REG_S390_PP:
2609 		r = get_user(vcpu->arch.sie_block->pp,
2610 			     (u64 __user *)reg->addr);
2611 		break;
2612 	case KVM_REG_S390_GBEA:
2613 		r = get_user(vcpu->arch.sie_block->gbea,
2614 			     (u64 __user *)reg->addr);
2615 		break;
2616 	default:
2617 		break;
2618 	}
2619 
2620 	return r;
2621 }
2622 
2623 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2624 {
2625 	kvm_s390_vcpu_initial_reset(vcpu);
2626 	return 0;
2627 }
2628 
2629 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2630 {
2631 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2632 	return 0;
2633 }
2634 
2635 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2636 {
2637 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2638 	return 0;
2639 }
2640 
2641 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2642 				  struct kvm_sregs *sregs)
2643 {
2644 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2645 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2646 	return 0;
2647 }
2648 
2649 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2650 				  struct kvm_sregs *sregs)
2651 {
2652 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2653 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2654 	return 0;
2655 }
2656 
2657 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2658 {
2659 	if (test_fp_ctl(fpu->fpc))
2660 		return -EINVAL;
2661 	vcpu->run->s.regs.fpc = fpu->fpc;
2662 	if (MACHINE_HAS_VX)
2663 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2664 				 (freg_t *) fpu->fprs);
2665 	else
2666 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2667 	return 0;
2668 }
2669 
2670 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2671 {
2672 	/* make sure we have the latest values */
2673 	save_fpu_regs();
2674 	if (MACHINE_HAS_VX)
2675 		convert_vx_to_fp((freg_t *) fpu->fprs,
2676 				 (__vector128 *) vcpu->run->s.regs.vrs);
2677 	else
2678 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2679 	fpu->fpc = vcpu->run->s.regs.fpc;
2680 	return 0;
2681 }
2682 
2683 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2684 {
2685 	int rc = 0;
2686 
2687 	if (!is_vcpu_stopped(vcpu))
2688 		rc = -EBUSY;
2689 	else {
2690 		vcpu->run->psw_mask = psw.mask;
2691 		vcpu->run->psw_addr = psw.addr;
2692 	}
2693 	return rc;
2694 }
2695 
2696 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2697 				  struct kvm_translation *tr)
2698 {
2699 	return -EINVAL; /* not implemented yet */
2700 }
2701 
2702 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2703 			      KVM_GUESTDBG_USE_HW_BP | \
2704 			      KVM_GUESTDBG_ENABLE)
2705 
2706 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2707 					struct kvm_guest_debug *dbg)
2708 {
2709 	int rc = 0;
2710 
2711 	vcpu->guest_debug = 0;
2712 	kvm_s390_clear_bp_data(vcpu);
2713 
2714 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2715 		return -EINVAL;
2716 	if (!sclp.has_gpere)
2717 		return -EINVAL;
2718 
2719 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2720 		vcpu->guest_debug = dbg->control;
2721 		/* enforce guest PER */
2722 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2723 
2724 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2725 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2726 	} else {
2727 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2728 		vcpu->arch.guestdbg.last_bp = 0;
2729 	}
2730 
2731 	if (rc) {
2732 		vcpu->guest_debug = 0;
2733 		kvm_s390_clear_bp_data(vcpu);
2734 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2735 	}
2736 
2737 	return rc;
2738 }
2739 
2740 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2741 				    struct kvm_mp_state *mp_state)
2742 {
2743 	/* CHECK_STOP and LOAD are not supported yet */
2744 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2745 				       KVM_MP_STATE_OPERATING;
2746 }
2747 
2748 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2749 				    struct kvm_mp_state *mp_state)
2750 {
2751 	int rc = 0;
2752 
2753 	/* user space knows about this interface - let it control the state */
2754 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2755 
2756 	switch (mp_state->mp_state) {
2757 	case KVM_MP_STATE_STOPPED:
2758 		kvm_s390_vcpu_stop(vcpu);
2759 		break;
2760 	case KVM_MP_STATE_OPERATING:
2761 		kvm_s390_vcpu_start(vcpu);
2762 		break;
2763 	case KVM_MP_STATE_LOAD:
2764 	case KVM_MP_STATE_CHECK_STOP:
2765 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2766 	default:
2767 		rc = -ENXIO;
2768 	}
2769 
2770 	return rc;
2771 }
2772 
2773 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2774 {
2775 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2776 }
2777 
2778 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2779 {
2780 retry:
2781 	kvm_s390_vcpu_request_handled(vcpu);
2782 	if (!kvm_request_pending(vcpu))
2783 		return 0;
2784 	/*
2785 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2786 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2787 	 * This ensures that the ipte instruction for this request has
2788 	 * already finished. We might race against a second unmapper that
2789 	 * wants to set the blocking bit. Lets just retry the request loop.
2790 	 */
2791 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2792 		int rc;
2793 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2794 					  kvm_s390_get_prefix(vcpu),
2795 					  PAGE_SIZE * 2, PROT_WRITE);
2796 		if (rc) {
2797 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2798 			return rc;
2799 		}
2800 		goto retry;
2801 	}
2802 
2803 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2804 		vcpu->arch.sie_block->ihcpu = 0xffff;
2805 		goto retry;
2806 	}
2807 
2808 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2809 		if (!ibs_enabled(vcpu)) {
2810 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2811 			atomic_or(CPUSTAT_IBS,
2812 					&vcpu->arch.sie_block->cpuflags);
2813 		}
2814 		goto retry;
2815 	}
2816 
2817 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2818 		if (ibs_enabled(vcpu)) {
2819 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2820 			atomic_andnot(CPUSTAT_IBS,
2821 					  &vcpu->arch.sie_block->cpuflags);
2822 		}
2823 		goto retry;
2824 	}
2825 
2826 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2827 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2828 		goto retry;
2829 	}
2830 
2831 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2832 		/*
2833 		 * Disable CMMA virtualization; we will emulate the ESSA
2834 		 * instruction manually, in order to provide additional
2835 		 * functionalities needed for live migration.
2836 		 */
2837 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2838 		goto retry;
2839 	}
2840 
2841 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2842 		/*
2843 		 * Re-enable CMMA virtualization if CMMA is available and
2844 		 * was used.
2845 		 */
2846 		if ((vcpu->kvm->arch.use_cmma) &&
2847 		    (vcpu->kvm->mm->context.use_cmma))
2848 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2849 		goto retry;
2850 	}
2851 
2852 	/* nothing to do, just clear the request */
2853 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2854 
2855 	return 0;
2856 }
2857 
2858 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2859 {
2860 	struct kvm_vcpu *vcpu;
2861 	int i;
2862 
2863 	mutex_lock(&kvm->lock);
2864 	preempt_disable();
2865 	kvm->arch.epoch = tod - get_tod_clock();
2866 	kvm_s390_vcpu_block_all(kvm);
2867 	kvm_for_each_vcpu(i, vcpu, kvm)
2868 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2869 	kvm_s390_vcpu_unblock_all(kvm);
2870 	preempt_enable();
2871 	mutex_unlock(&kvm->lock);
2872 }
2873 
2874 /**
2875  * kvm_arch_fault_in_page - fault-in guest page if necessary
2876  * @vcpu: The corresponding virtual cpu
2877  * @gpa: Guest physical address
2878  * @writable: Whether the page should be writable or not
2879  *
2880  * Make sure that a guest page has been faulted-in on the host.
2881  *
2882  * Return: Zero on success, negative error code otherwise.
2883  */
2884 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2885 {
2886 	return gmap_fault(vcpu->arch.gmap, gpa,
2887 			  writable ? FAULT_FLAG_WRITE : 0);
2888 }
2889 
2890 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2891 				      unsigned long token)
2892 {
2893 	struct kvm_s390_interrupt inti;
2894 	struct kvm_s390_irq irq;
2895 
2896 	if (start_token) {
2897 		irq.u.ext.ext_params2 = token;
2898 		irq.type = KVM_S390_INT_PFAULT_INIT;
2899 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2900 	} else {
2901 		inti.type = KVM_S390_INT_PFAULT_DONE;
2902 		inti.parm64 = token;
2903 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2904 	}
2905 }
2906 
2907 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2908 				     struct kvm_async_pf *work)
2909 {
2910 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2911 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2912 }
2913 
2914 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2915 				 struct kvm_async_pf *work)
2916 {
2917 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2918 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2919 }
2920 
2921 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2922 			       struct kvm_async_pf *work)
2923 {
2924 	/* s390 will always inject the page directly */
2925 }
2926 
2927 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2928 {
2929 	/*
2930 	 * s390 will always inject the page directly,
2931 	 * but we still want check_async_completion to cleanup
2932 	 */
2933 	return true;
2934 }
2935 
2936 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2937 {
2938 	hva_t hva;
2939 	struct kvm_arch_async_pf arch;
2940 	int rc;
2941 
2942 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2943 		return 0;
2944 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2945 	    vcpu->arch.pfault_compare)
2946 		return 0;
2947 	if (psw_extint_disabled(vcpu))
2948 		return 0;
2949 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2950 		return 0;
2951 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2952 		return 0;
2953 	if (!vcpu->arch.gmap->pfault_enabled)
2954 		return 0;
2955 
2956 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2957 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2958 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2959 		return 0;
2960 
2961 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2962 	return rc;
2963 }
2964 
2965 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2966 {
2967 	int rc, cpuflags;
2968 
2969 	/*
2970 	 * On s390 notifications for arriving pages will be delivered directly
2971 	 * to the guest but the house keeping for completed pfaults is
2972 	 * handled outside the worker.
2973 	 */
2974 	kvm_check_async_pf_completion(vcpu);
2975 
2976 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2977 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2978 
2979 	if (need_resched())
2980 		schedule();
2981 
2982 	if (test_cpu_flag(CIF_MCCK_PENDING))
2983 		s390_handle_mcck();
2984 
2985 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2986 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2987 		if (rc)
2988 			return rc;
2989 	}
2990 
2991 	rc = kvm_s390_handle_requests(vcpu);
2992 	if (rc)
2993 		return rc;
2994 
2995 	if (guestdbg_enabled(vcpu)) {
2996 		kvm_s390_backup_guest_per_regs(vcpu);
2997 		kvm_s390_patch_guest_per_regs(vcpu);
2998 	}
2999 
3000 	vcpu->arch.sie_block->icptcode = 0;
3001 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3002 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3003 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3004 
3005 	return 0;
3006 }
3007 
3008 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3009 {
3010 	struct kvm_s390_pgm_info pgm_info = {
3011 		.code = PGM_ADDRESSING,
3012 	};
3013 	u8 opcode, ilen;
3014 	int rc;
3015 
3016 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3017 	trace_kvm_s390_sie_fault(vcpu);
3018 
3019 	/*
3020 	 * We want to inject an addressing exception, which is defined as a
3021 	 * suppressing or terminating exception. However, since we came here
3022 	 * by a DAT access exception, the PSW still points to the faulting
3023 	 * instruction since DAT exceptions are nullifying. So we've got
3024 	 * to look up the current opcode to get the length of the instruction
3025 	 * to be able to forward the PSW.
3026 	 */
3027 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3028 	ilen = insn_length(opcode);
3029 	if (rc < 0) {
3030 		return rc;
3031 	} else if (rc) {
3032 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3033 		 * Forward by arbitrary ilc, injection will take care of
3034 		 * nullification if necessary.
3035 		 */
3036 		pgm_info = vcpu->arch.pgm;
3037 		ilen = 4;
3038 	}
3039 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3040 	kvm_s390_forward_psw(vcpu, ilen);
3041 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3042 }
3043 
3044 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3045 {
3046 	struct mcck_volatile_info *mcck_info;
3047 	struct sie_page *sie_page;
3048 
3049 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3050 		   vcpu->arch.sie_block->icptcode);
3051 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3052 
3053 	if (guestdbg_enabled(vcpu))
3054 		kvm_s390_restore_guest_per_regs(vcpu);
3055 
3056 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3057 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3058 
3059 	if (exit_reason == -EINTR) {
3060 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3061 		sie_page = container_of(vcpu->arch.sie_block,
3062 					struct sie_page, sie_block);
3063 		mcck_info = &sie_page->mcck_info;
3064 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3065 		return 0;
3066 	}
3067 
3068 	if (vcpu->arch.sie_block->icptcode > 0) {
3069 		int rc = kvm_handle_sie_intercept(vcpu);
3070 
3071 		if (rc != -EOPNOTSUPP)
3072 			return rc;
3073 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3074 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3075 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3076 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3077 		return -EREMOTE;
3078 	} else if (exit_reason != -EFAULT) {
3079 		vcpu->stat.exit_null++;
3080 		return 0;
3081 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3082 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3083 		vcpu->run->s390_ucontrol.trans_exc_code =
3084 						current->thread.gmap_addr;
3085 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3086 		return -EREMOTE;
3087 	} else if (current->thread.gmap_pfault) {
3088 		trace_kvm_s390_major_guest_pfault(vcpu);
3089 		current->thread.gmap_pfault = 0;
3090 		if (kvm_arch_setup_async_pf(vcpu))
3091 			return 0;
3092 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3093 	}
3094 	return vcpu_post_run_fault_in_sie(vcpu);
3095 }
3096 
3097 static int __vcpu_run(struct kvm_vcpu *vcpu)
3098 {
3099 	int rc, exit_reason;
3100 
3101 	/*
3102 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3103 	 * ning the guest), so that memslots (and other stuff) are protected
3104 	 */
3105 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3106 
3107 	do {
3108 		rc = vcpu_pre_run(vcpu);
3109 		if (rc)
3110 			break;
3111 
3112 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3113 		/*
3114 		 * As PF_VCPU will be used in fault handler, between
3115 		 * guest_enter and guest_exit should be no uaccess.
3116 		 */
3117 		local_irq_disable();
3118 		guest_enter_irqoff();
3119 		__disable_cpu_timer_accounting(vcpu);
3120 		local_irq_enable();
3121 		exit_reason = sie64a(vcpu->arch.sie_block,
3122 				     vcpu->run->s.regs.gprs);
3123 		local_irq_disable();
3124 		__enable_cpu_timer_accounting(vcpu);
3125 		guest_exit_irqoff();
3126 		local_irq_enable();
3127 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3128 
3129 		rc = vcpu_post_run(vcpu, exit_reason);
3130 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3131 
3132 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3133 	return rc;
3134 }
3135 
3136 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3137 {
3138 	struct runtime_instr_cb *riccb;
3139 	struct gs_cb *gscb;
3140 
3141 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3142 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3143 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3144 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3145 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3146 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3147 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3148 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3149 		/* some control register changes require a tlb flush */
3150 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3151 	}
3152 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3153 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3154 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3155 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3156 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3157 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3158 	}
3159 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3160 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3161 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3162 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3163 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3164 			kvm_clear_async_pf_completion_queue(vcpu);
3165 	}
3166 	/*
3167 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3168 	 * we should enable RI here instead of doing the lazy enablement.
3169 	 */
3170 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3171 	    test_kvm_facility(vcpu->kvm, 64) &&
3172 	    riccb->valid &&
3173 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3174 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3175 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3176 	}
3177 	/*
3178 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3179 	 * we should enable GS here instead of doing the lazy enablement.
3180 	 */
3181 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3182 	    test_kvm_facility(vcpu->kvm, 133) &&
3183 	    gscb->gssm &&
3184 	    !vcpu->arch.gs_enabled) {
3185 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3186 		vcpu->arch.sie_block->ecb |= ECB_GS;
3187 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3188 		vcpu->arch.gs_enabled = 1;
3189 	}
3190 	save_access_regs(vcpu->arch.host_acrs);
3191 	restore_access_regs(vcpu->run->s.regs.acrs);
3192 	/* save host (userspace) fprs/vrs */
3193 	save_fpu_regs();
3194 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3195 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3196 	if (MACHINE_HAS_VX)
3197 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3198 	else
3199 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3200 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3201 	if (test_fp_ctl(current->thread.fpu.fpc))
3202 		/* User space provided an invalid FPC, let's clear it */
3203 		current->thread.fpu.fpc = 0;
3204 	if (MACHINE_HAS_GS) {
3205 		preempt_disable();
3206 		__ctl_set_bit(2, 4);
3207 		if (current->thread.gs_cb) {
3208 			vcpu->arch.host_gscb = current->thread.gs_cb;
3209 			save_gs_cb(vcpu->arch.host_gscb);
3210 		}
3211 		if (vcpu->arch.gs_enabled) {
3212 			current->thread.gs_cb = (struct gs_cb *)
3213 						&vcpu->run->s.regs.gscb;
3214 			restore_gs_cb(current->thread.gs_cb);
3215 		}
3216 		preempt_enable();
3217 	}
3218 
3219 	kvm_run->kvm_dirty_regs = 0;
3220 }
3221 
3222 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3223 {
3224 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3225 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3226 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3227 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3228 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3229 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3230 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3231 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3232 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3233 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3234 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3235 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3236 	save_access_regs(vcpu->run->s.regs.acrs);
3237 	restore_access_regs(vcpu->arch.host_acrs);
3238 	/* Save guest register state */
3239 	save_fpu_regs();
3240 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3241 	/* Restore will be done lazily at return */
3242 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3243 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3244 	if (MACHINE_HAS_GS) {
3245 		__ctl_set_bit(2, 4);
3246 		if (vcpu->arch.gs_enabled)
3247 			save_gs_cb(current->thread.gs_cb);
3248 		preempt_disable();
3249 		current->thread.gs_cb = vcpu->arch.host_gscb;
3250 		restore_gs_cb(vcpu->arch.host_gscb);
3251 		preempt_enable();
3252 		if (!vcpu->arch.host_gscb)
3253 			__ctl_clear_bit(2, 4);
3254 		vcpu->arch.host_gscb = NULL;
3255 	}
3256 
3257 }
3258 
3259 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3260 {
3261 	int rc;
3262 	sigset_t sigsaved;
3263 
3264 	if (kvm_run->immediate_exit)
3265 		return -EINTR;
3266 
3267 	if (guestdbg_exit_pending(vcpu)) {
3268 		kvm_s390_prepare_debug_exit(vcpu);
3269 		return 0;
3270 	}
3271 
3272 	if (vcpu->sigset_active)
3273 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3274 
3275 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3276 		kvm_s390_vcpu_start(vcpu);
3277 	} else if (is_vcpu_stopped(vcpu)) {
3278 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3279 				   vcpu->vcpu_id);
3280 		return -EINVAL;
3281 	}
3282 
3283 	sync_regs(vcpu, kvm_run);
3284 	enable_cpu_timer_accounting(vcpu);
3285 
3286 	might_fault();
3287 	rc = __vcpu_run(vcpu);
3288 
3289 	if (signal_pending(current) && !rc) {
3290 		kvm_run->exit_reason = KVM_EXIT_INTR;
3291 		rc = -EINTR;
3292 	}
3293 
3294 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3295 		kvm_s390_prepare_debug_exit(vcpu);
3296 		rc = 0;
3297 	}
3298 
3299 	if (rc == -EREMOTE) {
3300 		/* userspace support is needed, kvm_run has been prepared */
3301 		rc = 0;
3302 	}
3303 
3304 	disable_cpu_timer_accounting(vcpu);
3305 	store_regs(vcpu, kvm_run);
3306 
3307 	if (vcpu->sigset_active)
3308 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3309 
3310 	vcpu->stat.exit_userspace++;
3311 	return rc;
3312 }
3313 
3314 /*
3315  * store status at address
3316  * we use have two special cases:
3317  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3318  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3319  */
3320 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3321 {
3322 	unsigned char archmode = 1;
3323 	freg_t fprs[NUM_FPRS];
3324 	unsigned int px;
3325 	u64 clkcomp, cputm;
3326 	int rc;
3327 
3328 	px = kvm_s390_get_prefix(vcpu);
3329 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3330 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3331 			return -EFAULT;
3332 		gpa = 0;
3333 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3334 		if (write_guest_real(vcpu, 163, &archmode, 1))
3335 			return -EFAULT;
3336 		gpa = px;
3337 	} else
3338 		gpa -= __LC_FPREGS_SAVE_AREA;
3339 
3340 	/* manually convert vector registers if necessary */
3341 	if (MACHINE_HAS_VX) {
3342 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3343 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3344 				     fprs, 128);
3345 	} else {
3346 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3347 				     vcpu->run->s.regs.fprs, 128);
3348 	}
3349 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3350 			      vcpu->run->s.regs.gprs, 128);
3351 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3352 			      &vcpu->arch.sie_block->gpsw, 16);
3353 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3354 			      &px, 4);
3355 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3356 			      &vcpu->run->s.regs.fpc, 4);
3357 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3358 			      &vcpu->arch.sie_block->todpr, 4);
3359 	cputm = kvm_s390_get_cpu_timer(vcpu);
3360 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3361 			      &cputm, 8);
3362 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3363 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3364 			      &clkcomp, 8);
3365 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3366 			      &vcpu->run->s.regs.acrs, 64);
3367 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3368 			      &vcpu->arch.sie_block->gcr, 128);
3369 	return rc ? -EFAULT : 0;
3370 }
3371 
3372 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3373 {
3374 	/*
3375 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3376 	 * switch in the run ioctl. Let's update our copies before we save
3377 	 * it into the save area
3378 	 */
3379 	save_fpu_regs();
3380 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3381 	save_access_regs(vcpu->run->s.regs.acrs);
3382 
3383 	return kvm_s390_store_status_unloaded(vcpu, addr);
3384 }
3385 
3386 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3387 {
3388 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3389 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3390 }
3391 
3392 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3393 {
3394 	unsigned int i;
3395 	struct kvm_vcpu *vcpu;
3396 
3397 	kvm_for_each_vcpu(i, vcpu, kvm) {
3398 		__disable_ibs_on_vcpu(vcpu);
3399 	}
3400 }
3401 
3402 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3403 {
3404 	if (!sclp.has_ibs)
3405 		return;
3406 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3407 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3408 }
3409 
3410 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3411 {
3412 	int i, online_vcpus, started_vcpus = 0;
3413 
3414 	if (!is_vcpu_stopped(vcpu))
3415 		return;
3416 
3417 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3418 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3419 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3420 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3421 
3422 	for (i = 0; i < online_vcpus; i++) {
3423 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3424 			started_vcpus++;
3425 	}
3426 
3427 	if (started_vcpus == 0) {
3428 		/* we're the only active VCPU -> speed it up */
3429 		__enable_ibs_on_vcpu(vcpu);
3430 	} else if (started_vcpus == 1) {
3431 		/*
3432 		 * As we are starting a second VCPU, we have to disable
3433 		 * the IBS facility on all VCPUs to remove potentially
3434 		 * oustanding ENABLE requests.
3435 		 */
3436 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3437 	}
3438 
3439 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3440 	/*
3441 	 * Another VCPU might have used IBS while we were offline.
3442 	 * Let's play safe and flush the VCPU at startup.
3443 	 */
3444 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3445 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3446 	return;
3447 }
3448 
3449 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3450 {
3451 	int i, online_vcpus, started_vcpus = 0;
3452 	struct kvm_vcpu *started_vcpu = NULL;
3453 
3454 	if (is_vcpu_stopped(vcpu))
3455 		return;
3456 
3457 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3458 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3459 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3460 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3461 
3462 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3463 	kvm_s390_clear_stop_irq(vcpu);
3464 
3465 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3466 	__disable_ibs_on_vcpu(vcpu);
3467 
3468 	for (i = 0; i < online_vcpus; i++) {
3469 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3470 			started_vcpus++;
3471 			started_vcpu = vcpu->kvm->vcpus[i];
3472 		}
3473 	}
3474 
3475 	if (started_vcpus == 1) {
3476 		/*
3477 		 * As we only have one VCPU left, we want to enable the
3478 		 * IBS facility for that VCPU to speed it up.
3479 		 */
3480 		__enable_ibs_on_vcpu(started_vcpu);
3481 	}
3482 
3483 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3484 	return;
3485 }
3486 
3487 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3488 				     struct kvm_enable_cap *cap)
3489 {
3490 	int r;
3491 
3492 	if (cap->flags)
3493 		return -EINVAL;
3494 
3495 	switch (cap->cap) {
3496 	case KVM_CAP_S390_CSS_SUPPORT:
3497 		if (!vcpu->kvm->arch.css_support) {
3498 			vcpu->kvm->arch.css_support = 1;
3499 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3500 			trace_kvm_s390_enable_css(vcpu->kvm);
3501 		}
3502 		r = 0;
3503 		break;
3504 	default:
3505 		r = -EINVAL;
3506 		break;
3507 	}
3508 	return r;
3509 }
3510 
3511 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3512 				  struct kvm_s390_mem_op *mop)
3513 {
3514 	void __user *uaddr = (void __user *)mop->buf;
3515 	void *tmpbuf = NULL;
3516 	int r, srcu_idx;
3517 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3518 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3519 
3520 	if (mop->flags & ~supported_flags)
3521 		return -EINVAL;
3522 
3523 	if (mop->size > MEM_OP_MAX_SIZE)
3524 		return -E2BIG;
3525 
3526 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3527 		tmpbuf = vmalloc(mop->size);
3528 		if (!tmpbuf)
3529 			return -ENOMEM;
3530 	}
3531 
3532 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3533 
3534 	switch (mop->op) {
3535 	case KVM_S390_MEMOP_LOGICAL_READ:
3536 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3537 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3538 					    mop->size, GACC_FETCH);
3539 			break;
3540 		}
3541 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3542 		if (r == 0) {
3543 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3544 				r = -EFAULT;
3545 		}
3546 		break;
3547 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3548 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3549 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3550 					    mop->size, GACC_STORE);
3551 			break;
3552 		}
3553 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3554 			r = -EFAULT;
3555 			break;
3556 		}
3557 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3558 		break;
3559 	default:
3560 		r = -EINVAL;
3561 	}
3562 
3563 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3564 
3565 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3566 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3567 
3568 	vfree(tmpbuf);
3569 	return r;
3570 }
3571 
3572 long kvm_arch_vcpu_ioctl(struct file *filp,
3573 			 unsigned int ioctl, unsigned long arg)
3574 {
3575 	struct kvm_vcpu *vcpu = filp->private_data;
3576 	void __user *argp = (void __user *)arg;
3577 	int idx;
3578 	long r;
3579 
3580 	switch (ioctl) {
3581 	case KVM_S390_IRQ: {
3582 		struct kvm_s390_irq s390irq;
3583 
3584 		r = -EFAULT;
3585 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3586 			break;
3587 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3588 		break;
3589 	}
3590 	case KVM_S390_INTERRUPT: {
3591 		struct kvm_s390_interrupt s390int;
3592 		struct kvm_s390_irq s390irq;
3593 
3594 		r = -EFAULT;
3595 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3596 			break;
3597 		if (s390int_to_s390irq(&s390int, &s390irq))
3598 			return -EINVAL;
3599 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3600 		break;
3601 	}
3602 	case KVM_S390_STORE_STATUS:
3603 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3604 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3605 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3606 		break;
3607 	case KVM_S390_SET_INITIAL_PSW: {
3608 		psw_t psw;
3609 
3610 		r = -EFAULT;
3611 		if (copy_from_user(&psw, argp, sizeof(psw)))
3612 			break;
3613 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3614 		break;
3615 	}
3616 	case KVM_S390_INITIAL_RESET:
3617 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3618 		break;
3619 	case KVM_SET_ONE_REG:
3620 	case KVM_GET_ONE_REG: {
3621 		struct kvm_one_reg reg;
3622 		r = -EFAULT;
3623 		if (copy_from_user(&reg, argp, sizeof(reg)))
3624 			break;
3625 		if (ioctl == KVM_SET_ONE_REG)
3626 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3627 		else
3628 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3629 		break;
3630 	}
3631 #ifdef CONFIG_KVM_S390_UCONTROL
3632 	case KVM_S390_UCAS_MAP: {
3633 		struct kvm_s390_ucas_mapping ucasmap;
3634 
3635 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3636 			r = -EFAULT;
3637 			break;
3638 		}
3639 
3640 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3641 			r = -EINVAL;
3642 			break;
3643 		}
3644 
3645 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3646 				     ucasmap.vcpu_addr, ucasmap.length);
3647 		break;
3648 	}
3649 	case KVM_S390_UCAS_UNMAP: {
3650 		struct kvm_s390_ucas_mapping ucasmap;
3651 
3652 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3653 			r = -EFAULT;
3654 			break;
3655 		}
3656 
3657 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3658 			r = -EINVAL;
3659 			break;
3660 		}
3661 
3662 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3663 			ucasmap.length);
3664 		break;
3665 	}
3666 #endif
3667 	case KVM_S390_VCPU_FAULT: {
3668 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3669 		break;
3670 	}
3671 	case KVM_ENABLE_CAP:
3672 	{
3673 		struct kvm_enable_cap cap;
3674 		r = -EFAULT;
3675 		if (copy_from_user(&cap, argp, sizeof(cap)))
3676 			break;
3677 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3678 		break;
3679 	}
3680 	case KVM_S390_MEM_OP: {
3681 		struct kvm_s390_mem_op mem_op;
3682 
3683 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3684 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3685 		else
3686 			r = -EFAULT;
3687 		break;
3688 	}
3689 	case KVM_S390_SET_IRQ_STATE: {
3690 		struct kvm_s390_irq_state irq_state;
3691 
3692 		r = -EFAULT;
3693 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3694 			break;
3695 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3696 		    irq_state.len == 0 ||
3697 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3698 			r = -EINVAL;
3699 			break;
3700 		}
3701 		r = kvm_s390_set_irq_state(vcpu,
3702 					   (void __user *) irq_state.buf,
3703 					   irq_state.len);
3704 		break;
3705 	}
3706 	case KVM_S390_GET_IRQ_STATE: {
3707 		struct kvm_s390_irq_state irq_state;
3708 
3709 		r = -EFAULT;
3710 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3711 			break;
3712 		if (irq_state.len == 0) {
3713 			r = -EINVAL;
3714 			break;
3715 		}
3716 		r = kvm_s390_get_irq_state(vcpu,
3717 					   (__u8 __user *)  irq_state.buf,
3718 					   irq_state.len);
3719 		break;
3720 	}
3721 	default:
3722 		r = -ENOTTY;
3723 	}
3724 	return r;
3725 }
3726 
3727 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3728 {
3729 #ifdef CONFIG_KVM_S390_UCONTROL
3730 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3731 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3732 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3733 		get_page(vmf->page);
3734 		return 0;
3735 	}
3736 #endif
3737 	return VM_FAULT_SIGBUS;
3738 }
3739 
3740 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3741 			    unsigned long npages)
3742 {
3743 	return 0;
3744 }
3745 
3746 /* Section: memory related */
3747 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3748 				   struct kvm_memory_slot *memslot,
3749 				   const struct kvm_userspace_memory_region *mem,
3750 				   enum kvm_mr_change change)
3751 {
3752 	/* A few sanity checks. We can have memory slots which have to be
3753 	   located/ended at a segment boundary (1MB). The memory in userland is
3754 	   ok to be fragmented into various different vmas. It is okay to mmap()
3755 	   and munmap() stuff in this slot after doing this call at any time */
3756 
3757 	if (mem->userspace_addr & 0xffffful)
3758 		return -EINVAL;
3759 
3760 	if (mem->memory_size & 0xffffful)
3761 		return -EINVAL;
3762 
3763 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3764 		return -EINVAL;
3765 
3766 	return 0;
3767 }
3768 
3769 void kvm_arch_commit_memory_region(struct kvm *kvm,
3770 				const struct kvm_userspace_memory_region *mem,
3771 				const struct kvm_memory_slot *old,
3772 				const struct kvm_memory_slot *new,
3773 				enum kvm_mr_change change)
3774 {
3775 	int rc;
3776 
3777 	/* If the basics of the memslot do not change, we do not want
3778 	 * to update the gmap. Every update causes several unnecessary
3779 	 * segment translation exceptions. This is usually handled just
3780 	 * fine by the normal fault handler + gmap, but it will also
3781 	 * cause faults on the prefix page of running guest CPUs.
3782 	 */
3783 	if (old->userspace_addr == mem->userspace_addr &&
3784 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3785 	    old->npages * PAGE_SIZE == mem->memory_size)
3786 		return;
3787 
3788 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3789 		mem->guest_phys_addr, mem->memory_size);
3790 	if (rc)
3791 		pr_warn("failed to commit memory region\n");
3792 	return;
3793 }
3794 
3795 static inline unsigned long nonhyp_mask(int i)
3796 {
3797 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3798 
3799 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3800 }
3801 
3802 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3803 {
3804 	vcpu->valid_wakeup = false;
3805 }
3806 
3807 static int __init kvm_s390_init(void)
3808 {
3809 	int i;
3810 
3811 	if (!sclp.has_sief2) {
3812 		pr_info("SIE not available\n");
3813 		return -ENODEV;
3814 	}
3815 
3816 	for (i = 0; i < 16; i++)
3817 		kvm_s390_fac_list_mask[i] |=
3818 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3819 
3820 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3821 }
3822 
3823 static void __exit kvm_s390_exit(void)
3824 {
3825 	kvm_exit();
3826 }
3827 
3828 module_init(kvm_s390_init);
3829 module_exit(kvm_s390_exit);
3830 
3831 /*
3832  * Enable autoloading of the kvm module.
3833  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3834  * since x86 takes a different approach.
3835  */
3836 #include <linux/miscdevice.h>
3837 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3838 MODULE_ALIAS("devname:kvm");
3839