xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 4da722ca19f30f7db250db808d1ab1703607a932)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48 
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
66 	{ "exit_null", VCPU_STAT(exit_null) },
67 	{ "exit_validity", VCPU_STAT(exit_validity) },
68 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
96 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
98 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
103 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
108 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
125 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
126 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
128 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
129 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
130 	{ NULL }
131 };
132 
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137 
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140 
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146 
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151 
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155 
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159 	/* every s390 is virtualization enabled ;-) */
160 	return 0;
161 }
162 
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164 			      unsigned long end);
165 
166 /*
167  * This callback is executed during stop_machine(). All CPUs are therefore
168  * temporarily stopped. In order not to change guest behavior, we have to
169  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170  * so a CPU won't be stopped while calculating with the epoch.
171  */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173 			  void *v)
174 {
175 	struct kvm *kvm;
176 	struct kvm_vcpu *vcpu;
177 	int i;
178 	unsigned long long *delta = v;
179 
180 	list_for_each_entry(kvm, &vm_list, vm_list) {
181 		kvm->arch.epoch -= *delta;
182 		kvm_for_each_vcpu(i, vcpu, kvm) {
183 			vcpu->arch.sie_block->epoch -= *delta;
184 			if (vcpu->arch.cputm_enabled)
185 				vcpu->arch.cputm_start += *delta;
186 			if (vcpu->arch.vsie_block)
187 				vcpu->arch.vsie_block->epoch -= *delta;
188 		}
189 	}
190 	return NOTIFY_OK;
191 }
192 
193 static struct notifier_block kvm_clock_notifier = {
194 	.notifier_call = kvm_clock_sync,
195 };
196 
197 int kvm_arch_hardware_setup(void)
198 {
199 	gmap_notifier.notifier_call = kvm_gmap_notifier;
200 	gmap_register_pte_notifier(&gmap_notifier);
201 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202 	gmap_register_pte_notifier(&vsie_gmap_notifier);
203 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204 				       &kvm_clock_notifier);
205 	return 0;
206 }
207 
208 void kvm_arch_hardware_unsetup(void)
209 {
210 	gmap_unregister_pte_notifier(&gmap_notifier);
211 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213 					 &kvm_clock_notifier);
214 }
215 
216 static void allow_cpu_feat(unsigned long nr)
217 {
218 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220 
221 static inline int plo_test_bit(unsigned char nr)
222 {
223 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224 	int cc;
225 
226 	asm volatile(
227 		/* Parameter registers are ignored for "test bit" */
228 		"	plo	0,0,0,0(0)\n"
229 		"	ipm	%0\n"
230 		"	srl	%0,28\n"
231 		: "=d" (cc)
232 		: "d" (r0)
233 		: "cc");
234 	return cc == 0;
235 }
236 
237 static void kvm_s390_cpu_feat_init(void)
238 {
239 	int i;
240 
241 	for (i = 0; i < 256; ++i) {
242 		if (plo_test_bit(i))
243 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244 	}
245 
246 	if (test_facility(28)) /* TOD-clock steering */
247 		ptff(kvm_s390_available_subfunc.ptff,
248 		     sizeof(kvm_s390_available_subfunc.ptff),
249 		     PTFF_QAF);
250 
251 	if (test_facility(17)) { /* MSA */
252 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253 			      kvm_s390_available_subfunc.kmac);
254 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255 			      kvm_s390_available_subfunc.kmc);
256 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
257 			      kvm_s390_available_subfunc.km);
258 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259 			      kvm_s390_available_subfunc.kimd);
260 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261 			      kvm_s390_available_subfunc.klmd);
262 	}
263 	if (test_facility(76)) /* MSA3 */
264 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265 			      kvm_s390_available_subfunc.pckmo);
266 	if (test_facility(77)) { /* MSA4 */
267 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268 			      kvm_s390_available_subfunc.kmctr);
269 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270 			      kvm_s390_available_subfunc.kmf);
271 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272 			      kvm_s390_available_subfunc.kmo);
273 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274 			      kvm_s390_available_subfunc.pcc);
275 	}
276 	if (test_facility(57)) /* MSA5 */
277 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278 			      kvm_s390_available_subfunc.ppno);
279 
280 	if (test_facility(146)) /* MSA8 */
281 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282 			      kvm_s390_available_subfunc.kma);
283 
284 	if (MACHINE_HAS_ESOP)
285 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286 	/*
287 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289 	 */
290 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291 	    !test_facility(3) || !nested)
292 		return;
293 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294 	if (sclp.has_64bscao)
295 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296 	if (sclp.has_siif)
297 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298 	if (sclp.has_gpere)
299 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300 	if (sclp.has_gsls)
301 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302 	if (sclp.has_ib)
303 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304 	if (sclp.has_cei)
305 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306 	if (sclp.has_ibs)
307 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308 	if (sclp.has_kss)
309 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310 	/*
311 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312 	 * all skey handling functions read/set the skey from the PGSTE
313 	 * instead of the real storage key.
314 	 *
315 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316 	 * pages being detected as preserved although they are resident.
317 	 *
318 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320 	 *
321 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324 	 *
325 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326 	 * cannot easily shadow the SCA because of the ipte lock.
327 	 */
328 }
329 
330 int kvm_arch_init(void *opaque)
331 {
332 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333 	if (!kvm_s390_dbf)
334 		return -ENOMEM;
335 
336 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337 		debug_unregister(kvm_s390_dbf);
338 		return -ENOMEM;
339 	}
340 
341 	kvm_s390_cpu_feat_init();
342 
343 	/* Register floating interrupt controller interface. */
344 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346 
347 void kvm_arch_exit(void)
348 {
349 	debug_unregister(kvm_s390_dbf);
350 }
351 
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354 			unsigned int ioctl, unsigned long arg)
355 {
356 	if (ioctl == KVM_S390_ENABLE_SIE)
357 		return s390_enable_sie();
358 	return -EINVAL;
359 }
360 
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363 	int r;
364 
365 	switch (ext) {
366 	case KVM_CAP_S390_PSW:
367 	case KVM_CAP_S390_GMAP:
368 	case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370 	case KVM_CAP_S390_UCONTROL:
371 #endif
372 	case KVM_CAP_ASYNC_PF:
373 	case KVM_CAP_SYNC_REGS:
374 	case KVM_CAP_ONE_REG:
375 	case KVM_CAP_ENABLE_CAP:
376 	case KVM_CAP_S390_CSS_SUPPORT:
377 	case KVM_CAP_IOEVENTFD:
378 	case KVM_CAP_DEVICE_CTRL:
379 	case KVM_CAP_ENABLE_CAP_VM:
380 	case KVM_CAP_S390_IRQCHIP:
381 	case KVM_CAP_VM_ATTRIBUTES:
382 	case KVM_CAP_MP_STATE:
383 	case KVM_CAP_IMMEDIATE_EXIT:
384 	case KVM_CAP_S390_INJECT_IRQ:
385 	case KVM_CAP_S390_USER_SIGP:
386 	case KVM_CAP_S390_USER_STSI:
387 	case KVM_CAP_S390_SKEYS:
388 	case KVM_CAP_S390_IRQ_STATE:
389 	case KVM_CAP_S390_USER_INSTR0:
390 	case KVM_CAP_S390_CMMA_MIGRATION:
391 	case KVM_CAP_S390_AIS:
392 		r = 1;
393 		break;
394 	case KVM_CAP_S390_MEM_OP:
395 		r = MEM_OP_MAX_SIZE;
396 		break;
397 	case KVM_CAP_NR_VCPUS:
398 	case KVM_CAP_MAX_VCPUS:
399 		r = KVM_S390_BSCA_CPU_SLOTS;
400 		if (!kvm_s390_use_sca_entries())
401 			r = KVM_MAX_VCPUS;
402 		else if (sclp.has_esca && sclp.has_64bscao)
403 			r = KVM_S390_ESCA_CPU_SLOTS;
404 		break;
405 	case KVM_CAP_NR_MEMSLOTS:
406 		r = KVM_USER_MEM_SLOTS;
407 		break;
408 	case KVM_CAP_S390_COW:
409 		r = MACHINE_HAS_ESOP;
410 		break;
411 	case KVM_CAP_S390_VECTOR_REGISTERS:
412 		r = MACHINE_HAS_VX;
413 		break;
414 	case KVM_CAP_S390_RI:
415 		r = test_facility(64);
416 		break;
417 	case KVM_CAP_S390_GS:
418 		r = test_facility(133);
419 		break;
420 	default:
421 		r = 0;
422 	}
423 	return r;
424 }
425 
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427 					struct kvm_memory_slot *memslot)
428 {
429 	gfn_t cur_gfn, last_gfn;
430 	unsigned long address;
431 	struct gmap *gmap = kvm->arch.gmap;
432 
433 	/* Loop over all guest pages */
434 	last_gfn = memslot->base_gfn + memslot->npages;
435 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436 		address = gfn_to_hva_memslot(memslot, cur_gfn);
437 
438 		if (test_and_clear_guest_dirty(gmap->mm, address))
439 			mark_page_dirty(kvm, cur_gfn);
440 		if (fatal_signal_pending(current))
441 			return;
442 		cond_resched();
443 	}
444 }
445 
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448 
449 /*
450  * Get (and clear) the dirty memory log for a memory slot.
451  */
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453 			       struct kvm_dirty_log *log)
454 {
455 	int r;
456 	unsigned long n;
457 	struct kvm_memslots *slots;
458 	struct kvm_memory_slot *memslot;
459 	int is_dirty = 0;
460 
461 	if (kvm_is_ucontrol(kvm))
462 		return -EINVAL;
463 
464 	mutex_lock(&kvm->slots_lock);
465 
466 	r = -EINVAL;
467 	if (log->slot >= KVM_USER_MEM_SLOTS)
468 		goto out;
469 
470 	slots = kvm_memslots(kvm);
471 	memslot = id_to_memslot(slots, log->slot);
472 	r = -ENOENT;
473 	if (!memslot->dirty_bitmap)
474 		goto out;
475 
476 	kvm_s390_sync_dirty_log(kvm, memslot);
477 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
478 	if (r)
479 		goto out;
480 
481 	/* Clear the dirty log */
482 	if (is_dirty) {
483 		n = kvm_dirty_bitmap_bytes(memslot);
484 		memset(memslot->dirty_bitmap, 0, n);
485 	}
486 	r = 0;
487 out:
488 	mutex_unlock(&kvm->slots_lock);
489 	return r;
490 }
491 
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 {
494 	unsigned int i;
495 	struct kvm_vcpu *vcpu;
496 
497 	kvm_for_each_vcpu(i, vcpu, kvm) {
498 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
499 	}
500 }
501 
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
503 {
504 	int r;
505 
506 	if (cap->flags)
507 		return -EINVAL;
508 
509 	switch (cap->cap) {
510 	case KVM_CAP_S390_IRQCHIP:
511 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512 		kvm->arch.use_irqchip = 1;
513 		r = 0;
514 		break;
515 	case KVM_CAP_S390_USER_SIGP:
516 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517 		kvm->arch.user_sigp = 1;
518 		r = 0;
519 		break;
520 	case KVM_CAP_S390_VECTOR_REGISTERS:
521 		mutex_lock(&kvm->lock);
522 		if (kvm->created_vcpus) {
523 			r = -EBUSY;
524 		} else if (MACHINE_HAS_VX) {
525 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
526 			set_kvm_facility(kvm->arch.model.fac_list, 129);
527 			if (test_facility(134)) {
528 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
529 				set_kvm_facility(kvm->arch.model.fac_list, 134);
530 			}
531 			if (test_facility(135)) {
532 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
533 				set_kvm_facility(kvm->arch.model.fac_list, 135);
534 			}
535 			r = 0;
536 		} else
537 			r = -EINVAL;
538 		mutex_unlock(&kvm->lock);
539 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540 			 r ? "(not available)" : "(success)");
541 		break;
542 	case KVM_CAP_S390_RI:
543 		r = -EINVAL;
544 		mutex_lock(&kvm->lock);
545 		if (kvm->created_vcpus) {
546 			r = -EBUSY;
547 		} else if (test_facility(64)) {
548 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
549 			set_kvm_facility(kvm->arch.model.fac_list, 64);
550 			r = 0;
551 		}
552 		mutex_unlock(&kvm->lock);
553 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554 			 r ? "(not available)" : "(success)");
555 		break;
556 	case KVM_CAP_S390_AIS:
557 		mutex_lock(&kvm->lock);
558 		if (kvm->created_vcpus) {
559 			r = -EBUSY;
560 		} else {
561 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
562 			set_kvm_facility(kvm->arch.model.fac_list, 72);
563 			r = 0;
564 		}
565 		mutex_unlock(&kvm->lock);
566 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
567 			 r ? "(not available)" : "(success)");
568 		break;
569 	case KVM_CAP_S390_GS:
570 		r = -EINVAL;
571 		mutex_lock(&kvm->lock);
572 		if (atomic_read(&kvm->online_vcpus)) {
573 			r = -EBUSY;
574 		} else if (test_facility(133)) {
575 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
576 			set_kvm_facility(kvm->arch.model.fac_list, 133);
577 			r = 0;
578 		}
579 		mutex_unlock(&kvm->lock);
580 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
581 			 r ? "(not available)" : "(success)");
582 		break;
583 	case KVM_CAP_S390_USER_STSI:
584 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
585 		kvm->arch.user_stsi = 1;
586 		r = 0;
587 		break;
588 	case KVM_CAP_S390_USER_INSTR0:
589 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
590 		kvm->arch.user_instr0 = 1;
591 		icpt_operexc_on_all_vcpus(kvm);
592 		r = 0;
593 		break;
594 	default:
595 		r = -EINVAL;
596 		break;
597 	}
598 	return r;
599 }
600 
601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603 	int ret;
604 
605 	switch (attr->attr) {
606 	case KVM_S390_VM_MEM_LIMIT_SIZE:
607 		ret = 0;
608 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
609 			 kvm->arch.mem_limit);
610 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
611 			ret = -EFAULT;
612 		break;
613 	default:
614 		ret = -ENXIO;
615 		break;
616 	}
617 	return ret;
618 }
619 
620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
621 {
622 	int ret;
623 	unsigned int idx;
624 	switch (attr->attr) {
625 	case KVM_S390_VM_MEM_ENABLE_CMMA:
626 		ret = -ENXIO;
627 		if (!sclp.has_cmma)
628 			break;
629 
630 		ret = -EBUSY;
631 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
632 		mutex_lock(&kvm->lock);
633 		if (!kvm->created_vcpus) {
634 			kvm->arch.use_cmma = 1;
635 			ret = 0;
636 		}
637 		mutex_unlock(&kvm->lock);
638 		break;
639 	case KVM_S390_VM_MEM_CLR_CMMA:
640 		ret = -ENXIO;
641 		if (!sclp.has_cmma)
642 			break;
643 		ret = -EINVAL;
644 		if (!kvm->arch.use_cmma)
645 			break;
646 
647 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
648 		mutex_lock(&kvm->lock);
649 		idx = srcu_read_lock(&kvm->srcu);
650 		s390_reset_cmma(kvm->arch.gmap->mm);
651 		srcu_read_unlock(&kvm->srcu, idx);
652 		mutex_unlock(&kvm->lock);
653 		ret = 0;
654 		break;
655 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
656 		unsigned long new_limit;
657 
658 		if (kvm_is_ucontrol(kvm))
659 			return -EINVAL;
660 
661 		if (get_user(new_limit, (u64 __user *)attr->addr))
662 			return -EFAULT;
663 
664 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
665 		    new_limit > kvm->arch.mem_limit)
666 			return -E2BIG;
667 
668 		if (!new_limit)
669 			return -EINVAL;
670 
671 		/* gmap_create takes last usable address */
672 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
673 			new_limit -= 1;
674 
675 		ret = -EBUSY;
676 		mutex_lock(&kvm->lock);
677 		if (!kvm->created_vcpus) {
678 			/* gmap_create will round the limit up */
679 			struct gmap *new = gmap_create(current->mm, new_limit);
680 
681 			if (!new) {
682 				ret = -ENOMEM;
683 			} else {
684 				gmap_remove(kvm->arch.gmap);
685 				new->private = kvm;
686 				kvm->arch.gmap = new;
687 				ret = 0;
688 			}
689 		}
690 		mutex_unlock(&kvm->lock);
691 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
692 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
693 			 (void *) kvm->arch.gmap->asce);
694 		break;
695 	}
696 	default:
697 		ret = -ENXIO;
698 		break;
699 	}
700 	return ret;
701 }
702 
703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
704 
705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
706 {
707 	struct kvm_vcpu *vcpu;
708 	int i;
709 
710 	if (!test_kvm_facility(kvm, 76))
711 		return -EINVAL;
712 
713 	mutex_lock(&kvm->lock);
714 	switch (attr->attr) {
715 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
716 		get_random_bytes(
717 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
718 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
719 		kvm->arch.crypto.aes_kw = 1;
720 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
721 		break;
722 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
723 		get_random_bytes(
724 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
725 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
726 		kvm->arch.crypto.dea_kw = 1;
727 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
728 		break;
729 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
730 		kvm->arch.crypto.aes_kw = 0;
731 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
732 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
733 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
734 		break;
735 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
736 		kvm->arch.crypto.dea_kw = 0;
737 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
738 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
739 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
740 		break;
741 	default:
742 		mutex_unlock(&kvm->lock);
743 		return -ENXIO;
744 	}
745 
746 	kvm_for_each_vcpu(i, vcpu, kvm) {
747 		kvm_s390_vcpu_crypto_setup(vcpu);
748 		exit_sie(vcpu);
749 	}
750 	mutex_unlock(&kvm->lock);
751 	return 0;
752 }
753 
754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
755 {
756 	int cx;
757 	struct kvm_vcpu *vcpu;
758 
759 	kvm_for_each_vcpu(cx, vcpu, kvm)
760 		kvm_s390_sync_request(req, vcpu);
761 }
762 
763 /*
764  * Must be called with kvm->srcu held to avoid races on memslots, and with
765  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
766  */
767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
768 {
769 	struct kvm_s390_migration_state *mgs;
770 	struct kvm_memory_slot *ms;
771 	/* should be the only one */
772 	struct kvm_memslots *slots;
773 	unsigned long ram_pages;
774 	int slotnr;
775 
776 	/* migration mode already enabled */
777 	if (kvm->arch.migration_state)
778 		return 0;
779 
780 	slots = kvm_memslots(kvm);
781 	if (!slots || !slots->used_slots)
782 		return -EINVAL;
783 
784 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
785 	if (!mgs)
786 		return -ENOMEM;
787 	kvm->arch.migration_state = mgs;
788 
789 	if (kvm->arch.use_cmma) {
790 		/*
791 		 * Get the last slot. They should be sorted by base_gfn, so the
792 		 * last slot is also the one at the end of the address space.
793 		 * We have verified above that at least one slot is present.
794 		 */
795 		ms = slots->memslots + slots->used_slots - 1;
796 		/* round up so we only use full longs */
797 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
798 		/* allocate enough bytes to store all the bits */
799 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
800 		if (!mgs->pgste_bitmap) {
801 			kfree(mgs);
802 			kvm->arch.migration_state = NULL;
803 			return -ENOMEM;
804 		}
805 
806 		mgs->bitmap_size = ram_pages;
807 		atomic64_set(&mgs->dirty_pages, ram_pages);
808 		/* mark all the pages in active slots as dirty */
809 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
810 			ms = slots->memslots + slotnr;
811 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
812 		}
813 
814 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
815 	}
816 	return 0;
817 }
818 
819 /*
820  * Must be called with kvm->lock to avoid races with ourselves and
821  * kvm_s390_vm_start_migration.
822  */
823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
824 {
825 	struct kvm_s390_migration_state *mgs;
826 
827 	/* migration mode already disabled */
828 	if (!kvm->arch.migration_state)
829 		return 0;
830 	mgs = kvm->arch.migration_state;
831 	kvm->arch.migration_state = NULL;
832 
833 	if (kvm->arch.use_cmma) {
834 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
835 		vfree(mgs->pgste_bitmap);
836 	}
837 	kfree(mgs);
838 	return 0;
839 }
840 
841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
842 				     struct kvm_device_attr *attr)
843 {
844 	int idx, res = -ENXIO;
845 
846 	mutex_lock(&kvm->lock);
847 	switch (attr->attr) {
848 	case KVM_S390_VM_MIGRATION_START:
849 		idx = srcu_read_lock(&kvm->srcu);
850 		res = kvm_s390_vm_start_migration(kvm);
851 		srcu_read_unlock(&kvm->srcu, idx);
852 		break;
853 	case KVM_S390_VM_MIGRATION_STOP:
854 		res = kvm_s390_vm_stop_migration(kvm);
855 		break;
856 	default:
857 		break;
858 	}
859 	mutex_unlock(&kvm->lock);
860 
861 	return res;
862 }
863 
864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
865 				     struct kvm_device_attr *attr)
866 {
867 	u64 mig = (kvm->arch.migration_state != NULL);
868 
869 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
870 		return -ENXIO;
871 
872 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
873 		return -EFAULT;
874 	return 0;
875 }
876 
877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
878 {
879 	u8 gtod_high;
880 
881 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
882 					   sizeof(gtod_high)))
883 		return -EFAULT;
884 
885 	if (gtod_high != 0)
886 		return -EINVAL;
887 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
888 
889 	return 0;
890 }
891 
892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
893 {
894 	u64 gtod;
895 
896 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
897 		return -EFAULT;
898 
899 	kvm_s390_set_tod_clock(kvm, gtod);
900 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
901 	return 0;
902 }
903 
904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906 	int ret;
907 
908 	if (attr->flags)
909 		return -EINVAL;
910 
911 	switch (attr->attr) {
912 	case KVM_S390_VM_TOD_HIGH:
913 		ret = kvm_s390_set_tod_high(kvm, attr);
914 		break;
915 	case KVM_S390_VM_TOD_LOW:
916 		ret = kvm_s390_set_tod_low(kvm, attr);
917 		break;
918 	default:
919 		ret = -ENXIO;
920 		break;
921 	}
922 	return ret;
923 }
924 
925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	u8 gtod_high = 0;
928 
929 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
930 					 sizeof(gtod_high)))
931 		return -EFAULT;
932 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
933 
934 	return 0;
935 }
936 
937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
938 {
939 	u64 gtod;
940 
941 	gtod = kvm_s390_get_tod_clock_fast(kvm);
942 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
943 		return -EFAULT;
944 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
945 
946 	return 0;
947 }
948 
949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
950 {
951 	int ret;
952 
953 	if (attr->flags)
954 		return -EINVAL;
955 
956 	switch (attr->attr) {
957 	case KVM_S390_VM_TOD_HIGH:
958 		ret = kvm_s390_get_tod_high(kvm, attr);
959 		break;
960 	case KVM_S390_VM_TOD_LOW:
961 		ret = kvm_s390_get_tod_low(kvm, attr);
962 		break;
963 	default:
964 		ret = -ENXIO;
965 		break;
966 	}
967 	return ret;
968 }
969 
970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 	struct kvm_s390_vm_cpu_processor *proc;
973 	u16 lowest_ibc, unblocked_ibc;
974 	int ret = 0;
975 
976 	mutex_lock(&kvm->lock);
977 	if (kvm->created_vcpus) {
978 		ret = -EBUSY;
979 		goto out;
980 	}
981 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
982 	if (!proc) {
983 		ret = -ENOMEM;
984 		goto out;
985 	}
986 	if (!copy_from_user(proc, (void __user *)attr->addr,
987 			    sizeof(*proc))) {
988 		kvm->arch.model.cpuid = proc->cpuid;
989 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
990 		unblocked_ibc = sclp.ibc & 0xfff;
991 		if (lowest_ibc && proc->ibc) {
992 			if (proc->ibc > unblocked_ibc)
993 				kvm->arch.model.ibc = unblocked_ibc;
994 			else if (proc->ibc < lowest_ibc)
995 				kvm->arch.model.ibc = lowest_ibc;
996 			else
997 				kvm->arch.model.ibc = proc->ibc;
998 		}
999 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1001 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002 			 kvm->arch.model.ibc,
1003 			 kvm->arch.model.cpuid);
1004 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005 			 kvm->arch.model.fac_list[0],
1006 			 kvm->arch.model.fac_list[1],
1007 			 kvm->arch.model.fac_list[2]);
1008 	} else
1009 		ret = -EFAULT;
1010 	kfree(proc);
1011 out:
1012 	mutex_unlock(&kvm->lock);
1013 	return ret;
1014 }
1015 
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017 				       struct kvm_device_attr *attr)
1018 {
1019 	struct kvm_s390_vm_cpu_feat data;
1020 	int ret = -EBUSY;
1021 
1022 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023 		return -EFAULT;
1024 	if (!bitmap_subset((unsigned long *) data.feat,
1025 			   kvm_s390_available_cpu_feat,
1026 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1027 		return -EINVAL;
1028 
1029 	mutex_lock(&kvm->lock);
1030 	if (!atomic_read(&kvm->online_vcpus)) {
1031 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1033 		ret = 0;
1034 	}
1035 	mutex_unlock(&kvm->lock);
1036 	return ret;
1037 }
1038 
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040 					  struct kvm_device_attr *attr)
1041 {
1042 	/*
1043 	 * Once supported by kernel + hw, we have to store the subfunctions
1044 	 * in kvm->arch and remember that user space configured them.
1045 	 */
1046 	return -ENXIO;
1047 }
1048 
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051 	int ret = -ENXIO;
1052 
1053 	switch (attr->attr) {
1054 	case KVM_S390_VM_CPU_PROCESSOR:
1055 		ret = kvm_s390_set_processor(kvm, attr);
1056 		break;
1057 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058 		ret = kvm_s390_set_processor_feat(kvm, attr);
1059 		break;
1060 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062 		break;
1063 	}
1064 	return ret;
1065 }
1066 
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069 	struct kvm_s390_vm_cpu_processor *proc;
1070 	int ret = 0;
1071 
1072 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073 	if (!proc) {
1074 		ret = -ENOMEM;
1075 		goto out;
1076 	}
1077 	proc->cpuid = kvm->arch.model.cpuid;
1078 	proc->ibc = kvm->arch.model.ibc;
1079 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1081 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082 		 kvm->arch.model.ibc,
1083 		 kvm->arch.model.cpuid);
1084 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085 		 kvm->arch.model.fac_list[0],
1086 		 kvm->arch.model.fac_list[1],
1087 		 kvm->arch.model.fac_list[2]);
1088 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089 		ret = -EFAULT;
1090 	kfree(proc);
1091 out:
1092 	return ret;
1093 }
1094 
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097 	struct kvm_s390_vm_cpu_machine *mach;
1098 	int ret = 0;
1099 
1100 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101 	if (!mach) {
1102 		ret = -ENOMEM;
1103 		goto out;
1104 	}
1105 	get_cpu_id((struct cpuid *) &mach->cpuid);
1106 	mach->ibc = sclp.ibc;
1107 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1109 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110 	       sizeof(S390_lowcore.stfle_fac_list));
1111 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1112 		 kvm->arch.model.ibc,
1113 		 kvm->arch.model.cpuid);
1114 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1115 		 mach->fac_mask[0],
1116 		 mach->fac_mask[1],
1117 		 mach->fac_mask[2]);
1118 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1119 		 mach->fac_list[0],
1120 		 mach->fac_list[1],
1121 		 mach->fac_list[2]);
1122 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123 		ret = -EFAULT;
1124 	kfree(mach);
1125 out:
1126 	return ret;
1127 }
1128 
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130 				       struct kvm_device_attr *attr)
1131 {
1132 	struct kvm_s390_vm_cpu_feat data;
1133 
1134 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1136 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137 		return -EFAULT;
1138 	return 0;
1139 }
1140 
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142 				     struct kvm_device_attr *attr)
1143 {
1144 	struct kvm_s390_vm_cpu_feat data;
1145 
1146 	bitmap_copy((unsigned long *) data.feat,
1147 		    kvm_s390_available_cpu_feat,
1148 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1149 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150 		return -EFAULT;
1151 	return 0;
1152 }
1153 
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155 					  struct kvm_device_attr *attr)
1156 {
1157 	/*
1158 	 * Once we can actually configure subfunctions (kernel + hw support),
1159 	 * we have to check if they were already set by user space, if so copy
1160 	 * them from kvm->arch.
1161 	 */
1162 	return -ENXIO;
1163 }
1164 
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166 					struct kvm_device_attr *attr)
1167 {
1168 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170 		return -EFAULT;
1171 	return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175 	int ret = -ENXIO;
1176 
1177 	switch (attr->attr) {
1178 	case KVM_S390_VM_CPU_PROCESSOR:
1179 		ret = kvm_s390_get_processor(kvm, attr);
1180 		break;
1181 	case KVM_S390_VM_CPU_MACHINE:
1182 		ret = kvm_s390_get_machine(kvm, attr);
1183 		break;
1184 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185 		ret = kvm_s390_get_processor_feat(kvm, attr);
1186 		break;
1187 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1188 		ret = kvm_s390_get_machine_feat(kvm, attr);
1189 		break;
1190 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192 		break;
1193 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195 		break;
1196 	}
1197 	return ret;
1198 }
1199 
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202 	int ret;
1203 
1204 	switch (attr->group) {
1205 	case KVM_S390_VM_MEM_CTRL:
1206 		ret = kvm_s390_set_mem_control(kvm, attr);
1207 		break;
1208 	case KVM_S390_VM_TOD:
1209 		ret = kvm_s390_set_tod(kvm, attr);
1210 		break;
1211 	case KVM_S390_VM_CPU_MODEL:
1212 		ret = kvm_s390_set_cpu_model(kvm, attr);
1213 		break;
1214 	case KVM_S390_VM_CRYPTO:
1215 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1216 		break;
1217 	case KVM_S390_VM_MIGRATION:
1218 		ret = kvm_s390_vm_set_migration(kvm, attr);
1219 		break;
1220 	default:
1221 		ret = -ENXIO;
1222 		break;
1223 	}
1224 
1225 	return ret;
1226 }
1227 
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230 	int ret;
1231 
1232 	switch (attr->group) {
1233 	case KVM_S390_VM_MEM_CTRL:
1234 		ret = kvm_s390_get_mem_control(kvm, attr);
1235 		break;
1236 	case KVM_S390_VM_TOD:
1237 		ret = kvm_s390_get_tod(kvm, attr);
1238 		break;
1239 	case KVM_S390_VM_CPU_MODEL:
1240 		ret = kvm_s390_get_cpu_model(kvm, attr);
1241 		break;
1242 	case KVM_S390_VM_MIGRATION:
1243 		ret = kvm_s390_vm_get_migration(kvm, attr);
1244 		break;
1245 	default:
1246 		ret = -ENXIO;
1247 		break;
1248 	}
1249 
1250 	return ret;
1251 }
1252 
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255 	int ret;
1256 
1257 	switch (attr->group) {
1258 	case KVM_S390_VM_MEM_CTRL:
1259 		switch (attr->attr) {
1260 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1261 		case KVM_S390_VM_MEM_CLR_CMMA:
1262 			ret = sclp.has_cmma ? 0 : -ENXIO;
1263 			break;
1264 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1265 			ret = 0;
1266 			break;
1267 		default:
1268 			ret = -ENXIO;
1269 			break;
1270 		}
1271 		break;
1272 	case KVM_S390_VM_TOD:
1273 		switch (attr->attr) {
1274 		case KVM_S390_VM_TOD_LOW:
1275 		case KVM_S390_VM_TOD_HIGH:
1276 			ret = 0;
1277 			break;
1278 		default:
1279 			ret = -ENXIO;
1280 			break;
1281 		}
1282 		break;
1283 	case KVM_S390_VM_CPU_MODEL:
1284 		switch (attr->attr) {
1285 		case KVM_S390_VM_CPU_PROCESSOR:
1286 		case KVM_S390_VM_CPU_MACHINE:
1287 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1289 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290 			ret = 0;
1291 			break;
1292 		/* configuring subfunctions is not supported yet */
1293 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294 		default:
1295 			ret = -ENXIO;
1296 			break;
1297 		}
1298 		break;
1299 	case KVM_S390_VM_CRYPTO:
1300 		switch (attr->attr) {
1301 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305 			ret = 0;
1306 			break;
1307 		default:
1308 			ret = -ENXIO;
1309 			break;
1310 		}
1311 		break;
1312 	case KVM_S390_VM_MIGRATION:
1313 		ret = 0;
1314 		break;
1315 	default:
1316 		ret = -ENXIO;
1317 		break;
1318 	}
1319 
1320 	return ret;
1321 }
1322 
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325 	uint8_t *keys;
1326 	uint64_t hva;
1327 	int i, r = 0;
1328 
1329 	if (args->flags != 0)
1330 		return -EINVAL;
1331 
1332 	/* Is this guest using storage keys? */
1333 	if (!mm_use_skey(current->mm))
1334 		return KVM_S390_GET_SKEYS_NONE;
1335 
1336 	/* Enforce sane limit on memory allocation */
1337 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338 		return -EINVAL;
1339 
1340 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341 	if (!keys)
1342 		return -ENOMEM;
1343 
1344 	down_read(&current->mm->mmap_sem);
1345 	for (i = 0; i < args->count; i++) {
1346 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1347 		if (kvm_is_error_hva(hva)) {
1348 			r = -EFAULT;
1349 			break;
1350 		}
1351 
1352 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1353 		if (r)
1354 			break;
1355 	}
1356 	up_read(&current->mm->mmap_sem);
1357 
1358 	if (!r) {
1359 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1360 				 sizeof(uint8_t) * args->count);
1361 		if (r)
1362 			r = -EFAULT;
1363 	}
1364 
1365 	kvfree(keys);
1366 	return r;
1367 }
1368 
1369 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1370 {
1371 	uint8_t *keys;
1372 	uint64_t hva;
1373 	int i, r = 0;
1374 
1375 	if (args->flags != 0)
1376 		return -EINVAL;
1377 
1378 	/* Enforce sane limit on memory allocation */
1379 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1380 		return -EINVAL;
1381 
1382 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1383 	if (!keys)
1384 		return -ENOMEM;
1385 
1386 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1387 			   sizeof(uint8_t) * args->count);
1388 	if (r) {
1389 		r = -EFAULT;
1390 		goto out;
1391 	}
1392 
1393 	/* Enable storage key handling for the guest */
1394 	r = s390_enable_skey();
1395 	if (r)
1396 		goto out;
1397 
1398 	down_read(&current->mm->mmap_sem);
1399 	for (i = 0; i < args->count; i++) {
1400 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1401 		if (kvm_is_error_hva(hva)) {
1402 			r = -EFAULT;
1403 			break;
1404 		}
1405 
1406 		/* Lowest order bit is reserved */
1407 		if (keys[i] & 0x01) {
1408 			r = -EINVAL;
1409 			break;
1410 		}
1411 
1412 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1413 		if (r)
1414 			break;
1415 	}
1416 	up_read(&current->mm->mmap_sem);
1417 out:
1418 	kvfree(keys);
1419 	return r;
1420 }
1421 
1422 /*
1423  * Base address and length must be sent at the start of each block, therefore
1424  * it's cheaper to send some clean data, as long as it's less than the size of
1425  * two longs.
1426  */
1427 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1428 /* for consistency */
1429 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1430 
1431 /*
1432  * This function searches for the next page with dirty CMMA attributes, and
1433  * saves the attributes in the buffer up to either the end of the buffer or
1434  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1435  * no trailing clean bytes are saved.
1436  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1437  * output buffer will indicate 0 as length.
1438  */
1439 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1440 				  struct kvm_s390_cmma_log *args)
1441 {
1442 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1443 	unsigned long bufsize, hva, pgstev, i, next, cur;
1444 	int srcu_idx, peek, r = 0, rr;
1445 	u8 *res;
1446 
1447 	cur = args->start_gfn;
1448 	i = next = pgstev = 0;
1449 
1450 	if (unlikely(!kvm->arch.use_cmma))
1451 		return -ENXIO;
1452 	/* Invalid/unsupported flags were specified */
1453 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1454 		return -EINVAL;
1455 	/* Migration mode query, and we are not doing a migration */
1456 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1457 	if (!peek && !s)
1458 		return -EINVAL;
1459 	/* CMMA is disabled or was not used, or the buffer has length zero */
1460 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1461 	if (!bufsize || !kvm->mm->context.use_cmma) {
1462 		memset(args, 0, sizeof(*args));
1463 		return 0;
1464 	}
1465 
1466 	if (!peek) {
1467 		/* We are not peeking, and there are no dirty pages */
1468 		if (!atomic64_read(&s->dirty_pages)) {
1469 			memset(args, 0, sizeof(*args));
1470 			return 0;
1471 		}
1472 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1473 				    args->start_gfn);
1474 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1475 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1476 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1477 			memset(args, 0, sizeof(*args));
1478 			return 0;
1479 		}
1480 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1481 	}
1482 
1483 	res = vmalloc(bufsize);
1484 	if (!res)
1485 		return -ENOMEM;
1486 
1487 	args->start_gfn = cur;
1488 
1489 	down_read(&kvm->mm->mmap_sem);
1490 	srcu_idx = srcu_read_lock(&kvm->srcu);
1491 	while (i < bufsize) {
1492 		hva = gfn_to_hva(kvm, cur);
1493 		if (kvm_is_error_hva(hva)) {
1494 			r = -EFAULT;
1495 			break;
1496 		}
1497 		/* decrement only if we actually flipped the bit to 0 */
1498 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1499 			atomic64_dec(&s->dirty_pages);
1500 		r = get_pgste(kvm->mm, hva, &pgstev);
1501 		if (r < 0)
1502 			pgstev = 0;
1503 		/* save the value */
1504 		res[i++] = (pgstev >> 24) & 0x3;
1505 		/*
1506 		 * if the next bit is too far away, stop.
1507 		 * if we reached the previous "next", find the next one
1508 		 */
1509 		if (!peek) {
1510 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1511 				break;
1512 			if (cur == next)
1513 				next = find_next_bit(s->pgste_bitmap,
1514 						     s->bitmap_size, cur + 1);
1515 		/* reached the end of the bitmap or of the buffer, stop */
1516 			if ((next >= s->bitmap_size) ||
1517 			    (next >= args->start_gfn + bufsize))
1518 				break;
1519 		}
1520 		cur++;
1521 	}
1522 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1523 	up_read(&kvm->mm->mmap_sem);
1524 	args->count = i;
1525 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1526 
1527 	rr = copy_to_user((void __user *)args->values, res, args->count);
1528 	if (rr)
1529 		r = -EFAULT;
1530 
1531 	vfree(res);
1532 	return r;
1533 }
1534 
1535 /*
1536  * This function sets the CMMA attributes for the given pages. If the input
1537  * buffer has zero length, no action is taken, otherwise the attributes are
1538  * set and the mm->context.use_cmma flag is set.
1539  */
1540 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1541 				  const struct kvm_s390_cmma_log *args)
1542 {
1543 	unsigned long hva, mask, pgstev, i;
1544 	uint8_t *bits;
1545 	int srcu_idx, r = 0;
1546 
1547 	mask = args->mask;
1548 
1549 	if (!kvm->arch.use_cmma)
1550 		return -ENXIO;
1551 	/* invalid/unsupported flags */
1552 	if (args->flags != 0)
1553 		return -EINVAL;
1554 	/* Enforce sane limit on memory allocation */
1555 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1556 		return -EINVAL;
1557 	/* Nothing to do */
1558 	if (args->count == 0)
1559 		return 0;
1560 
1561 	bits = vmalloc(sizeof(*bits) * args->count);
1562 	if (!bits)
1563 		return -ENOMEM;
1564 
1565 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1566 	if (r) {
1567 		r = -EFAULT;
1568 		goto out;
1569 	}
1570 
1571 	down_read(&kvm->mm->mmap_sem);
1572 	srcu_idx = srcu_read_lock(&kvm->srcu);
1573 	for (i = 0; i < args->count; i++) {
1574 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1575 		if (kvm_is_error_hva(hva)) {
1576 			r = -EFAULT;
1577 			break;
1578 		}
1579 
1580 		pgstev = bits[i];
1581 		pgstev = pgstev << 24;
1582 		mask &= _PGSTE_GPS_USAGE_MASK;
1583 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1584 	}
1585 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1586 	up_read(&kvm->mm->mmap_sem);
1587 
1588 	if (!kvm->mm->context.use_cmma) {
1589 		down_write(&kvm->mm->mmap_sem);
1590 		kvm->mm->context.use_cmma = 1;
1591 		up_write(&kvm->mm->mmap_sem);
1592 	}
1593 out:
1594 	vfree(bits);
1595 	return r;
1596 }
1597 
1598 long kvm_arch_vm_ioctl(struct file *filp,
1599 		       unsigned int ioctl, unsigned long arg)
1600 {
1601 	struct kvm *kvm = filp->private_data;
1602 	void __user *argp = (void __user *)arg;
1603 	struct kvm_device_attr attr;
1604 	int r;
1605 
1606 	switch (ioctl) {
1607 	case KVM_S390_INTERRUPT: {
1608 		struct kvm_s390_interrupt s390int;
1609 
1610 		r = -EFAULT;
1611 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1612 			break;
1613 		r = kvm_s390_inject_vm(kvm, &s390int);
1614 		break;
1615 	}
1616 	case KVM_ENABLE_CAP: {
1617 		struct kvm_enable_cap cap;
1618 		r = -EFAULT;
1619 		if (copy_from_user(&cap, argp, sizeof(cap)))
1620 			break;
1621 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1622 		break;
1623 	}
1624 	case KVM_CREATE_IRQCHIP: {
1625 		struct kvm_irq_routing_entry routing;
1626 
1627 		r = -EINVAL;
1628 		if (kvm->arch.use_irqchip) {
1629 			/* Set up dummy routing. */
1630 			memset(&routing, 0, sizeof(routing));
1631 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1632 		}
1633 		break;
1634 	}
1635 	case KVM_SET_DEVICE_ATTR: {
1636 		r = -EFAULT;
1637 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1638 			break;
1639 		r = kvm_s390_vm_set_attr(kvm, &attr);
1640 		break;
1641 	}
1642 	case KVM_GET_DEVICE_ATTR: {
1643 		r = -EFAULT;
1644 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1645 			break;
1646 		r = kvm_s390_vm_get_attr(kvm, &attr);
1647 		break;
1648 	}
1649 	case KVM_HAS_DEVICE_ATTR: {
1650 		r = -EFAULT;
1651 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1652 			break;
1653 		r = kvm_s390_vm_has_attr(kvm, &attr);
1654 		break;
1655 	}
1656 	case KVM_S390_GET_SKEYS: {
1657 		struct kvm_s390_skeys args;
1658 
1659 		r = -EFAULT;
1660 		if (copy_from_user(&args, argp,
1661 				   sizeof(struct kvm_s390_skeys)))
1662 			break;
1663 		r = kvm_s390_get_skeys(kvm, &args);
1664 		break;
1665 	}
1666 	case KVM_S390_SET_SKEYS: {
1667 		struct kvm_s390_skeys args;
1668 
1669 		r = -EFAULT;
1670 		if (copy_from_user(&args, argp,
1671 				   sizeof(struct kvm_s390_skeys)))
1672 			break;
1673 		r = kvm_s390_set_skeys(kvm, &args);
1674 		break;
1675 	}
1676 	case KVM_S390_GET_CMMA_BITS: {
1677 		struct kvm_s390_cmma_log args;
1678 
1679 		r = -EFAULT;
1680 		if (copy_from_user(&args, argp, sizeof(args)))
1681 			break;
1682 		r = kvm_s390_get_cmma_bits(kvm, &args);
1683 		if (!r) {
1684 			r = copy_to_user(argp, &args, sizeof(args));
1685 			if (r)
1686 				r = -EFAULT;
1687 		}
1688 		break;
1689 	}
1690 	case KVM_S390_SET_CMMA_BITS: {
1691 		struct kvm_s390_cmma_log args;
1692 
1693 		r = -EFAULT;
1694 		if (copy_from_user(&args, argp, sizeof(args)))
1695 			break;
1696 		r = kvm_s390_set_cmma_bits(kvm, &args);
1697 		break;
1698 	}
1699 	default:
1700 		r = -ENOTTY;
1701 	}
1702 
1703 	return r;
1704 }
1705 
1706 static int kvm_s390_query_ap_config(u8 *config)
1707 {
1708 	u32 fcn_code = 0x04000000UL;
1709 	u32 cc = 0;
1710 
1711 	memset(config, 0, 128);
1712 	asm volatile(
1713 		"lgr 0,%1\n"
1714 		"lgr 2,%2\n"
1715 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1716 		"0: ipm %0\n"
1717 		"srl %0,28\n"
1718 		"1:\n"
1719 		EX_TABLE(0b, 1b)
1720 		: "+r" (cc)
1721 		: "r" (fcn_code), "r" (config)
1722 		: "cc", "0", "2", "memory"
1723 	);
1724 
1725 	return cc;
1726 }
1727 
1728 static int kvm_s390_apxa_installed(void)
1729 {
1730 	u8 config[128];
1731 	int cc;
1732 
1733 	if (test_facility(12)) {
1734 		cc = kvm_s390_query_ap_config(config);
1735 
1736 		if (cc)
1737 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1738 		else
1739 			return config[0] & 0x40;
1740 	}
1741 
1742 	return 0;
1743 }
1744 
1745 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1746 {
1747 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1748 
1749 	if (kvm_s390_apxa_installed())
1750 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1751 	else
1752 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1753 }
1754 
1755 static u64 kvm_s390_get_initial_cpuid(void)
1756 {
1757 	struct cpuid cpuid;
1758 
1759 	get_cpu_id(&cpuid);
1760 	cpuid.version = 0xff;
1761 	return *((u64 *) &cpuid);
1762 }
1763 
1764 static void kvm_s390_crypto_init(struct kvm *kvm)
1765 {
1766 	if (!test_kvm_facility(kvm, 76))
1767 		return;
1768 
1769 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1770 	kvm_s390_set_crycb_format(kvm);
1771 
1772 	/* Enable AES/DEA protected key functions by default */
1773 	kvm->arch.crypto.aes_kw = 1;
1774 	kvm->arch.crypto.dea_kw = 1;
1775 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1776 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1777 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1778 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1779 }
1780 
1781 static void sca_dispose(struct kvm *kvm)
1782 {
1783 	if (kvm->arch.use_esca)
1784 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1785 	else
1786 		free_page((unsigned long)(kvm->arch.sca));
1787 	kvm->arch.sca = NULL;
1788 }
1789 
1790 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1791 {
1792 	gfp_t alloc_flags = GFP_KERNEL;
1793 	int i, rc;
1794 	char debug_name[16];
1795 	static unsigned long sca_offset;
1796 
1797 	rc = -EINVAL;
1798 #ifdef CONFIG_KVM_S390_UCONTROL
1799 	if (type & ~KVM_VM_S390_UCONTROL)
1800 		goto out_err;
1801 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1802 		goto out_err;
1803 #else
1804 	if (type)
1805 		goto out_err;
1806 #endif
1807 
1808 	rc = s390_enable_sie();
1809 	if (rc)
1810 		goto out_err;
1811 
1812 	rc = -ENOMEM;
1813 
1814 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1815 
1816 	kvm->arch.use_esca = 0; /* start with basic SCA */
1817 	if (!sclp.has_64bscao)
1818 		alloc_flags |= GFP_DMA;
1819 	rwlock_init(&kvm->arch.sca_lock);
1820 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1821 	if (!kvm->arch.sca)
1822 		goto out_err;
1823 	spin_lock(&kvm_lock);
1824 	sca_offset += 16;
1825 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1826 		sca_offset = 0;
1827 	kvm->arch.sca = (struct bsca_block *)
1828 			((char *) kvm->arch.sca + sca_offset);
1829 	spin_unlock(&kvm_lock);
1830 
1831 	sprintf(debug_name, "kvm-%u", current->pid);
1832 
1833 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1834 	if (!kvm->arch.dbf)
1835 		goto out_err;
1836 
1837 	kvm->arch.sie_page2 =
1838 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1839 	if (!kvm->arch.sie_page2)
1840 		goto out_err;
1841 
1842 	/* Populate the facility mask initially. */
1843 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1844 	       sizeof(S390_lowcore.stfle_fac_list));
1845 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1846 		if (i < kvm_s390_fac_list_mask_size())
1847 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1848 		else
1849 			kvm->arch.model.fac_mask[i] = 0UL;
1850 	}
1851 
1852 	/* Populate the facility list initially. */
1853 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1854 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1855 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1856 
1857 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1858 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1859 
1860 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1861 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1862 
1863 	kvm_s390_crypto_init(kvm);
1864 
1865 	mutex_init(&kvm->arch.float_int.ais_lock);
1866 	kvm->arch.float_int.simm = 0;
1867 	kvm->arch.float_int.nimm = 0;
1868 	spin_lock_init(&kvm->arch.float_int.lock);
1869 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1870 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1871 	init_waitqueue_head(&kvm->arch.ipte_wq);
1872 	mutex_init(&kvm->arch.ipte_mutex);
1873 
1874 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1875 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1876 
1877 	if (type & KVM_VM_S390_UCONTROL) {
1878 		kvm->arch.gmap = NULL;
1879 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1880 	} else {
1881 		if (sclp.hamax == U64_MAX)
1882 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1883 		else
1884 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1885 						    sclp.hamax + 1);
1886 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1887 		if (!kvm->arch.gmap)
1888 			goto out_err;
1889 		kvm->arch.gmap->private = kvm;
1890 		kvm->arch.gmap->pfault_enabled = 0;
1891 	}
1892 
1893 	kvm->arch.css_support = 0;
1894 	kvm->arch.use_irqchip = 0;
1895 	kvm->arch.epoch = 0;
1896 
1897 	spin_lock_init(&kvm->arch.start_stop_lock);
1898 	kvm_s390_vsie_init(kvm);
1899 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1900 
1901 	return 0;
1902 out_err:
1903 	free_page((unsigned long)kvm->arch.sie_page2);
1904 	debug_unregister(kvm->arch.dbf);
1905 	sca_dispose(kvm);
1906 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1907 	return rc;
1908 }
1909 
1910 bool kvm_arch_has_vcpu_debugfs(void)
1911 {
1912 	return false;
1913 }
1914 
1915 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1916 {
1917 	return 0;
1918 }
1919 
1920 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1921 {
1922 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1923 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1924 	kvm_s390_clear_local_irqs(vcpu);
1925 	kvm_clear_async_pf_completion_queue(vcpu);
1926 	if (!kvm_is_ucontrol(vcpu->kvm))
1927 		sca_del_vcpu(vcpu);
1928 
1929 	if (kvm_is_ucontrol(vcpu->kvm))
1930 		gmap_remove(vcpu->arch.gmap);
1931 
1932 	if (vcpu->kvm->arch.use_cmma)
1933 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1934 	free_page((unsigned long)(vcpu->arch.sie_block));
1935 
1936 	kvm_vcpu_uninit(vcpu);
1937 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1938 }
1939 
1940 static void kvm_free_vcpus(struct kvm *kvm)
1941 {
1942 	unsigned int i;
1943 	struct kvm_vcpu *vcpu;
1944 
1945 	kvm_for_each_vcpu(i, vcpu, kvm)
1946 		kvm_arch_vcpu_destroy(vcpu);
1947 
1948 	mutex_lock(&kvm->lock);
1949 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1950 		kvm->vcpus[i] = NULL;
1951 
1952 	atomic_set(&kvm->online_vcpus, 0);
1953 	mutex_unlock(&kvm->lock);
1954 }
1955 
1956 void kvm_arch_destroy_vm(struct kvm *kvm)
1957 {
1958 	kvm_free_vcpus(kvm);
1959 	sca_dispose(kvm);
1960 	debug_unregister(kvm->arch.dbf);
1961 	free_page((unsigned long)kvm->arch.sie_page2);
1962 	if (!kvm_is_ucontrol(kvm))
1963 		gmap_remove(kvm->arch.gmap);
1964 	kvm_s390_destroy_adapters(kvm);
1965 	kvm_s390_clear_float_irqs(kvm);
1966 	kvm_s390_vsie_destroy(kvm);
1967 	if (kvm->arch.migration_state) {
1968 		vfree(kvm->arch.migration_state->pgste_bitmap);
1969 		kfree(kvm->arch.migration_state);
1970 	}
1971 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1972 }
1973 
1974 /* Section: vcpu related */
1975 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1976 {
1977 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1978 	if (!vcpu->arch.gmap)
1979 		return -ENOMEM;
1980 	vcpu->arch.gmap->private = vcpu->kvm;
1981 
1982 	return 0;
1983 }
1984 
1985 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1986 {
1987 	if (!kvm_s390_use_sca_entries())
1988 		return;
1989 	read_lock(&vcpu->kvm->arch.sca_lock);
1990 	if (vcpu->kvm->arch.use_esca) {
1991 		struct esca_block *sca = vcpu->kvm->arch.sca;
1992 
1993 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1994 		sca->cpu[vcpu->vcpu_id].sda = 0;
1995 	} else {
1996 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1997 
1998 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1999 		sca->cpu[vcpu->vcpu_id].sda = 0;
2000 	}
2001 	read_unlock(&vcpu->kvm->arch.sca_lock);
2002 }
2003 
2004 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2005 {
2006 	if (!kvm_s390_use_sca_entries()) {
2007 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2008 
2009 		/* we still need the basic sca for the ipte control */
2010 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2011 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2012 	}
2013 	read_lock(&vcpu->kvm->arch.sca_lock);
2014 	if (vcpu->kvm->arch.use_esca) {
2015 		struct esca_block *sca = vcpu->kvm->arch.sca;
2016 
2017 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2018 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2019 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2020 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2021 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2022 	} else {
2023 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2024 
2025 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2026 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2027 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2028 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2029 	}
2030 	read_unlock(&vcpu->kvm->arch.sca_lock);
2031 }
2032 
2033 /* Basic SCA to Extended SCA data copy routines */
2034 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2035 {
2036 	d->sda = s->sda;
2037 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2038 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2039 }
2040 
2041 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2042 {
2043 	int i;
2044 
2045 	d->ipte_control = s->ipte_control;
2046 	d->mcn[0] = s->mcn;
2047 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2048 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2049 }
2050 
2051 static int sca_switch_to_extended(struct kvm *kvm)
2052 {
2053 	struct bsca_block *old_sca = kvm->arch.sca;
2054 	struct esca_block *new_sca;
2055 	struct kvm_vcpu *vcpu;
2056 	unsigned int vcpu_idx;
2057 	u32 scaol, scaoh;
2058 
2059 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2060 	if (!new_sca)
2061 		return -ENOMEM;
2062 
2063 	scaoh = (u32)((u64)(new_sca) >> 32);
2064 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2065 
2066 	kvm_s390_vcpu_block_all(kvm);
2067 	write_lock(&kvm->arch.sca_lock);
2068 
2069 	sca_copy_b_to_e(new_sca, old_sca);
2070 
2071 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2072 		vcpu->arch.sie_block->scaoh = scaoh;
2073 		vcpu->arch.sie_block->scaol = scaol;
2074 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2075 	}
2076 	kvm->arch.sca = new_sca;
2077 	kvm->arch.use_esca = 1;
2078 
2079 	write_unlock(&kvm->arch.sca_lock);
2080 	kvm_s390_vcpu_unblock_all(kvm);
2081 
2082 	free_page((unsigned long)old_sca);
2083 
2084 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2085 		 old_sca, kvm->arch.sca);
2086 	return 0;
2087 }
2088 
2089 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2090 {
2091 	int rc;
2092 
2093 	if (!kvm_s390_use_sca_entries()) {
2094 		if (id < KVM_MAX_VCPUS)
2095 			return true;
2096 		return false;
2097 	}
2098 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2099 		return true;
2100 	if (!sclp.has_esca || !sclp.has_64bscao)
2101 		return false;
2102 
2103 	mutex_lock(&kvm->lock);
2104 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2105 	mutex_unlock(&kvm->lock);
2106 
2107 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2108 }
2109 
2110 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2111 {
2112 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2113 	kvm_clear_async_pf_completion_queue(vcpu);
2114 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2115 				    KVM_SYNC_GPRS |
2116 				    KVM_SYNC_ACRS |
2117 				    KVM_SYNC_CRS |
2118 				    KVM_SYNC_ARCH0 |
2119 				    KVM_SYNC_PFAULT;
2120 	kvm_s390_set_prefix(vcpu, 0);
2121 	if (test_kvm_facility(vcpu->kvm, 64))
2122 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2123 	if (test_kvm_facility(vcpu->kvm, 133))
2124 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2125 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2126 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2127 	 */
2128 	if (MACHINE_HAS_VX)
2129 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2130 	else
2131 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2132 
2133 	if (kvm_is_ucontrol(vcpu->kvm))
2134 		return __kvm_ucontrol_vcpu_init(vcpu);
2135 
2136 	return 0;
2137 }
2138 
2139 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2140 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2141 {
2142 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2143 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2144 	vcpu->arch.cputm_start = get_tod_clock_fast();
2145 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2146 }
2147 
2148 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2149 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2150 {
2151 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2152 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2153 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2154 	vcpu->arch.cputm_start = 0;
2155 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2156 }
2157 
2158 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2159 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2160 {
2161 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2162 	vcpu->arch.cputm_enabled = true;
2163 	__start_cpu_timer_accounting(vcpu);
2164 }
2165 
2166 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2167 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2168 {
2169 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2170 	__stop_cpu_timer_accounting(vcpu);
2171 	vcpu->arch.cputm_enabled = false;
2172 }
2173 
2174 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2175 {
2176 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2177 	__enable_cpu_timer_accounting(vcpu);
2178 	preempt_enable();
2179 }
2180 
2181 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2182 {
2183 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2184 	__disable_cpu_timer_accounting(vcpu);
2185 	preempt_enable();
2186 }
2187 
2188 /* set the cpu timer - may only be called from the VCPU thread itself */
2189 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2190 {
2191 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2192 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2193 	if (vcpu->arch.cputm_enabled)
2194 		vcpu->arch.cputm_start = get_tod_clock_fast();
2195 	vcpu->arch.sie_block->cputm = cputm;
2196 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2197 	preempt_enable();
2198 }
2199 
2200 /* update and get the cpu timer - can also be called from other VCPU threads */
2201 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2202 {
2203 	unsigned int seq;
2204 	__u64 value;
2205 
2206 	if (unlikely(!vcpu->arch.cputm_enabled))
2207 		return vcpu->arch.sie_block->cputm;
2208 
2209 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2210 	do {
2211 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2212 		/*
2213 		 * If the writer would ever execute a read in the critical
2214 		 * section, e.g. in irq context, we have a deadlock.
2215 		 */
2216 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2217 		value = vcpu->arch.sie_block->cputm;
2218 		/* if cputm_start is 0, accounting is being started/stopped */
2219 		if (likely(vcpu->arch.cputm_start))
2220 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2221 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2222 	preempt_enable();
2223 	return value;
2224 }
2225 
2226 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2227 {
2228 
2229 	gmap_enable(vcpu->arch.enabled_gmap);
2230 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2231 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2232 		__start_cpu_timer_accounting(vcpu);
2233 	vcpu->cpu = cpu;
2234 }
2235 
2236 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2237 {
2238 	vcpu->cpu = -1;
2239 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2240 		__stop_cpu_timer_accounting(vcpu);
2241 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2242 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2243 	gmap_disable(vcpu->arch.enabled_gmap);
2244 
2245 }
2246 
2247 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2248 {
2249 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2250 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2251 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2252 	kvm_s390_set_prefix(vcpu, 0);
2253 	kvm_s390_set_cpu_timer(vcpu, 0);
2254 	vcpu->arch.sie_block->ckc       = 0UL;
2255 	vcpu->arch.sie_block->todpr     = 0;
2256 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2257 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2258 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2259 	/* make sure the new fpc will be lazily loaded */
2260 	save_fpu_regs();
2261 	current->thread.fpu.fpc = 0;
2262 	vcpu->arch.sie_block->gbea = 1;
2263 	vcpu->arch.sie_block->pp = 0;
2264 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2265 	kvm_clear_async_pf_completion_queue(vcpu);
2266 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2267 		kvm_s390_vcpu_stop(vcpu);
2268 	kvm_s390_clear_local_irqs(vcpu);
2269 }
2270 
2271 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2272 {
2273 	mutex_lock(&vcpu->kvm->lock);
2274 	preempt_disable();
2275 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2276 	preempt_enable();
2277 	mutex_unlock(&vcpu->kvm->lock);
2278 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2279 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2280 		sca_add_vcpu(vcpu);
2281 	}
2282 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2283 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2284 	/* make vcpu_load load the right gmap on the first trigger */
2285 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2286 }
2287 
2288 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2289 {
2290 	if (!test_kvm_facility(vcpu->kvm, 76))
2291 		return;
2292 
2293 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2294 
2295 	if (vcpu->kvm->arch.crypto.aes_kw)
2296 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2297 	if (vcpu->kvm->arch.crypto.dea_kw)
2298 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2299 
2300 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2301 }
2302 
2303 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2304 {
2305 	free_page(vcpu->arch.sie_block->cbrlo);
2306 	vcpu->arch.sie_block->cbrlo = 0;
2307 }
2308 
2309 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2310 {
2311 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2312 	if (!vcpu->arch.sie_block->cbrlo)
2313 		return -ENOMEM;
2314 
2315 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2316 	return 0;
2317 }
2318 
2319 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2320 {
2321 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2322 
2323 	vcpu->arch.sie_block->ibc = model->ibc;
2324 	if (test_kvm_facility(vcpu->kvm, 7))
2325 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2326 }
2327 
2328 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2329 {
2330 	int rc = 0;
2331 
2332 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2333 						    CPUSTAT_SM |
2334 						    CPUSTAT_STOPPED);
2335 
2336 	if (test_kvm_facility(vcpu->kvm, 78))
2337 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2338 	else if (test_kvm_facility(vcpu->kvm, 8))
2339 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2340 
2341 	kvm_s390_vcpu_setup_model(vcpu);
2342 
2343 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2344 	if (MACHINE_HAS_ESOP)
2345 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2346 	if (test_kvm_facility(vcpu->kvm, 9))
2347 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2348 	if (test_kvm_facility(vcpu->kvm, 73))
2349 		vcpu->arch.sie_block->ecb |= ECB_TE;
2350 
2351 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2352 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2353 	if (test_kvm_facility(vcpu->kvm, 130))
2354 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2355 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2356 	if (sclp.has_cei)
2357 		vcpu->arch.sie_block->eca |= ECA_CEI;
2358 	if (sclp.has_ib)
2359 		vcpu->arch.sie_block->eca |= ECA_IB;
2360 	if (sclp.has_siif)
2361 		vcpu->arch.sie_block->eca |= ECA_SII;
2362 	if (sclp.has_sigpif)
2363 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2364 	if (test_kvm_facility(vcpu->kvm, 129)) {
2365 		vcpu->arch.sie_block->eca |= ECA_VX;
2366 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2367 	}
2368 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2369 					| SDNXC;
2370 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2371 
2372 	if (sclp.has_kss)
2373 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2374 	else
2375 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2376 
2377 	if (vcpu->kvm->arch.use_cmma) {
2378 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2379 		if (rc)
2380 			return rc;
2381 	}
2382 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2383 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2384 
2385 	kvm_s390_vcpu_crypto_setup(vcpu);
2386 
2387 	return rc;
2388 }
2389 
2390 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2391 				      unsigned int id)
2392 {
2393 	struct kvm_vcpu *vcpu;
2394 	struct sie_page *sie_page;
2395 	int rc = -EINVAL;
2396 
2397 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2398 		goto out;
2399 
2400 	rc = -ENOMEM;
2401 
2402 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2403 	if (!vcpu)
2404 		goto out;
2405 
2406 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2407 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2408 	if (!sie_page)
2409 		goto out_free_cpu;
2410 
2411 	vcpu->arch.sie_block = &sie_page->sie_block;
2412 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2413 
2414 	/* the real guest size will always be smaller than msl */
2415 	vcpu->arch.sie_block->mso = 0;
2416 	vcpu->arch.sie_block->msl = sclp.hamax;
2417 
2418 	vcpu->arch.sie_block->icpua = id;
2419 	spin_lock_init(&vcpu->arch.local_int.lock);
2420 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2421 	vcpu->arch.local_int.wq = &vcpu->wq;
2422 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2423 	seqcount_init(&vcpu->arch.cputm_seqcount);
2424 
2425 	rc = kvm_vcpu_init(vcpu, kvm, id);
2426 	if (rc)
2427 		goto out_free_sie_block;
2428 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2429 		 vcpu->arch.sie_block);
2430 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2431 
2432 	return vcpu;
2433 out_free_sie_block:
2434 	free_page((unsigned long)(vcpu->arch.sie_block));
2435 out_free_cpu:
2436 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2437 out:
2438 	return ERR_PTR(rc);
2439 }
2440 
2441 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2442 {
2443 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2444 }
2445 
2446 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2447 {
2448 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2449 	exit_sie(vcpu);
2450 }
2451 
2452 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2453 {
2454 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2455 }
2456 
2457 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2458 {
2459 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2460 	exit_sie(vcpu);
2461 }
2462 
2463 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2464 {
2465 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2466 }
2467 
2468 /*
2469  * Kick a guest cpu out of SIE and wait until SIE is not running.
2470  * If the CPU is not running (e.g. waiting as idle) the function will
2471  * return immediately. */
2472 void exit_sie(struct kvm_vcpu *vcpu)
2473 {
2474 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2475 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2476 		cpu_relax();
2477 }
2478 
2479 /* Kick a guest cpu out of SIE to process a request synchronously */
2480 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2481 {
2482 	kvm_make_request(req, vcpu);
2483 	kvm_s390_vcpu_request(vcpu);
2484 }
2485 
2486 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2487 			      unsigned long end)
2488 {
2489 	struct kvm *kvm = gmap->private;
2490 	struct kvm_vcpu *vcpu;
2491 	unsigned long prefix;
2492 	int i;
2493 
2494 	if (gmap_is_shadow(gmap))
2495 		return;
2496 	if (start >= 1UL << 31)
2497 		/* We are only interested in prefix pages */
2498 		return;
2499 	kvm_for_each_vcpu(i, vcpu, kvm) {
2500 		/* match against both prefix pages */
2501 		prefix = kvm_s390_get_prefix(vcpu);
2502 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2503 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2504 				   start, end);
2505 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2506 		}
2507 	}
2508 }
2509 
2510 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2511 {
2512 	/* kvm common code refers to this, but never calls it */
2513 	BUG();
2514 	return 0;
2515 }
2516 
2517 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2518 					   struct kvm_one_reg *reg)
2519 {
2520 	int r = -EINVAL;
2521 
2522 	switch (reg->id) {
2523 	case KVM_REG_S390_TODPR:
2524 		r = put_user(vcpu->arch.sie_block->todpr,
2525 			     (u32 __user *)reg->addr);
2526 		break;
2527 	case KVM_REG_S390_EPOCHDIFF:
2528 		r = put_user(vcpu->arch.sie_block->epoch,
2529 			     (u64 __user *)reg->addr);
2530 		break;
2531 	case KVM_REG_S390_CPU_TIMER:
2532 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2533 			     (u64 __user *)reg->addr);
2534 		break;
2535 	case KVM_REG_S390_CLOCK_COMP:
2536 		r = put_user(vcpu->arch.sie_block->ckc,
2537 			     (u64 __user *)reg->addr);
2538 		break;
2539 	case KVM_REG_S390_PFTOKEN:
2540 		r = put_user(vcpu->arch.pfault_token,
2541 			     (u64 __user *)reg->addr);
2542 		break;
2543 	case KVM_REG_S390_PFCOMPARE:
2544 		r = put_user(vcpu->arch.pfault_compare,
2545 			     (u64 __user *)reg->addr);
2546 		break;
2547 	case KVM_REG_S390_PFSELECT:
2548 		r = put_user(vcpu->arch.pfault_select,
2549 			     (u64 __user *)reg->addr);
2550 		break;
2551 	case KVM_REG_S390_PP:
2552 		r = put_user(vcpu->arch.sie_block->pp,
2553 			     (u64 __user *)reg->addr);
2554 		break;
2555 	case KVM_REG_S390_GBEA:
2556 		r = put_user(vcpu->arch.sie_block->gbea,
2557 			     (u64 __user *)reg->addr);
2558 		break;
2559 	default:
2560 		break;
2561 	}
2562 
2563 	return r;
2564 }
2565 
2566 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2567 					   struct kvm_one_reg *reg)
2568 {
2569 	int r = -EINVAL;
2570 	__u64 val;
2571 
2572 	switch (reg->id) {
2573 	case KVM_REG_S390_TODPR:
2574 		r = get_user(vcpu->arch.sie_block->todpr,
2575 			     (u32 __user *)reg->addr);
2576 		break;
2577 	case KVM_REG_S390_EPOCHDIFF:
2578 		r = get_user(vcpu->arch.sie_block->epoch,
2579 			     (u64 __user *)reg->addr);
2580 		break;
2581 	case KVM_REG_S390_CPU_TIMER:
2582 		r = get_user(val, (u64 __user *)reg->addr);
2583 		if (!r)
2584 			kvm_s390_set_cpu_timer(vcpu, val);
2585 		break;
2586 	case KVM_REG_S390_CLOCK_COMP:
2587 		r = get_user(vcpu->arch.sie_block->ckc,
2588 			     (u64 __user *)reg->addr);
2589 		break;
2590 	case KVM_REG_S390_PFTOKEN:
2591 		r = get_user(vcpu->arch.pfault_token,
2592 			     (u64 __user *)reg->addr);
2593 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2594 			kvm_clear_async_pf_completion_queue(vcpu);
2595 		break;
2596 	case KVM_REG_S390_PFCOMPARE:
2597 		r = get_user(vcpu->arch.pfault_compare,
2598 			     (u64 __user *)reg->addr);
2599 		break;
2600 	case KVM_REG_S390_PFSELECT:
2601 		r = get_user(vcpu->arch.pfault_select,
2602 			     (u64 __user *)reg->addr);
2603 		break;
2604 	case KVM_REG_S390_PP:
2605 		r = get_user(vcpu->arch.sie_block->pp,
2606 			     (u64 __user *)reg->addr);
2607 		break;
2608 	case KVM_REG_S390_GBEA:
2609 		r = get_user(vcpu->arch.sie_block->gbea,
2610 			     (u64 __user *)reg->addr);
2611 		break;
2612 	default:
2613 		break;
2614 	}
2615 
2616 	return r;
2617 }
2618 
2619 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2620 {
2621 	kvm_s390_vcpu_initial_reset(vcpu);
2622 	return 0;
2623 }
2624 
2625 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2626 {
2627 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2628 	return 0;
2629 }
2630 
2631 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2632 {
2633 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2634 	return 0;
2635 }
2636 
2637 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2638 				  struct kvm_sregs *sregs)
2639 {
2640 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2641 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2642 	return 0;
2643 }
2644 
2645 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2646 				  struct kvm_sregs *sregs)
2647 {
2648 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2649 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2650 	return 0;
2651 }
2652 
2653 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2654 {
2655 	if (test_fp_ctl(fpu->fpc))
2656 		return -EINVAL;
2657 	vcpu->run->s.regs.fpc = fpu->fpc;
2658 	if (MACHINE_HAS_VX)
2659 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2660 				 (freg_t *) fpu->fprs);
2661 	else
2662 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2663 	return 0;
2664 }
2665 
2666 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2667 {
2668 	/* make sure we have the latest values */
2669 	save_fpu_regs();
2670 	if (MACHINE_HAS_VX)
2671 		convert_vx_to_fp((freg_t *) fpu->fprs,
2672 				 (__vector128 *) vcpu->run->s.regs.vrs);
2673 	else
2674 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2675 	fpu->fpc = vcpu->run->s.regs.fpc;
2676 	return 0;
2677 }
2678 
2679 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2680 {
2681 	int rc = 0;
2682 
2683 	if (!is_vcpu_stopped(vcpu))
2684 		rc = -EBUSY;
2685 	else {
2686 		vcpu->run->psw_mask = psw.mask;
2687 		vcpu->run->psw_addr = psw.addr;
2688 	}
2689 	return rc;
2690 }
2691 
2692 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2693 				  struct kvm_translation *tr)
2694 {
2695 	return -EINVAL; /* not implemented yet */
2696 }
2697 
2698 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2699 			      KVM_GUESTDBG_USE_HW_BP | \
2700 			      KVM_GUESTDBG_ENABLE)
2701 
2702 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2703 					struct kvm_guest_debug *dbg)
2704 {
2705 	int rc = 0;
2706 
2707 	vcpu->guest_debug = 0;
2708 	kvm_s390_clear_bp_data(vcpu);
2709 
2710 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2711 		return -EINVAL;
2712 	if (!sclp.has_gpere)
2713 		return -EINVAL;
2714 
2715 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2716 		vcpu->guest_debug = dbg->control;
2717 		/* enforce guest PER */
2718 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2719 
2720 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2721 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2722 	} else {
2723 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2724 		vcpu->arch.guestdbg.last_bp = 0;
2725 	}
2726 
2727 	if (rc) {
2728 		vcpu->guest_debug = 0;
2729 		kvm_s390_clear_bp_data(vcpu);
2730 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2731 	}
2732 
2733 	return rc;
2734 }
2735 
2736 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2737 				    struct kvm_mp_state *mp_state)
2738 {
2739 	/* CHECK_STOP and LOAD are not supported yet */
2740 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2741 				       KVM_MP_STATE_OPERATING;
2742 }
2743 
2744 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2745 				    struct kvm_mp_state *mp_state)
2746 {
2747 	int rc = 0;
2748 
2749 	/* user space knows about this interface - let it control the state */
2750 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2751 
2752 	switch (mp_state->mp_state) {
2753 	case KVM_MP_STATE_STOPPED:
2754 		kvm_s390_vcpu_stop(vcpu);
2755 		break;
2756 	case KVM_MP_STATE_OPERATING:
2757 		kvm_s390_vcpu_start(vcpu);
2758 		break;
2759 	case KVM_MP_STATE_LOAD:
2760 	case KVM_MP_STATE_CHECK_STOP:
2761 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2762 	default:
2763 		rc = -ENXIO;
2764 	}
2765 
2766 	return rc;
2767 }
2768 
2769 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2770 {
2771 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2772 }
2773 
2774 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2775 {
2776 retry:
2777 	kvm_s390_vcpu_request_handled(vcpu);
2778 	if (!kvm_request_pending(vcpu))
2779 		return 0;
2780 	/*
2781 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2782 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2783 	 * This ensures that the ipte instruction for this request has
2784 	 * already finished. We might race against a second unmapper that
2785 	 * wants to set the blocking bit. Lets just retry the request loop.
2786 	 */
2787 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2788 		int rc;
2789 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2790 					  kvm_s390_get_prefix(vcpu),
2791 					  PAGE_SIZE * 2, PROT_WRITE);
2792 		if (rc) {
2793 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2794 			return rc;
2795 		}
2796 		goto retry;
2797 	}
2798 
2799 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2800 		vcpu->arch.sie_block->ihcpu = 0xffff;
2801 		goto retry;
2802 	}
2803 
2804 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2805 		if (!ibs_enabled(vcpu)) {
2806 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2807 			atomic_or(CPUSTAT_IBS,
2808 					&vcpu->arch.sie_block->cpuflags);
2809 		}
2810 		goto retry;
2811 	}
2812 
2813 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2814 		if (ibs_enabled(vcpu)) {
2815 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2816 			atomic_andnot(CPUSTAT_IBS,
2817 					  &vcpu->arch.sie_block->cpuflags);
2818 		}
2819 		goto retry;
2820 	}
2821 
2822 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2823 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2824 		goto retry;
2825 	}
2826 
2827 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2828 		/*
2829 		 * Disable CMMA virtualization; we will emulate the ESSA
2830 		 * instruction manually, in order to provide additional
2831 		 * functionalities needed for live migration.
2832 		 */
2833 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2834 		goto retry;
2835 	}
2836 
2837 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2838 		/*
2839 		 * Re-enable CMMA virtualization if CMMA is available and
2840 		 * was used.
2841 		 */
2842 		if ((vcpu->kvm->arch.use_cmma) &&
2843 		    (vcpu->kvm->mm->context.use_cmma))
2844 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2845 		goto retry;
2846 	}
2847 
2848 	/* nothing to do, just clear the request */
2849 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2850 
2851 	return 0;
2852 }
2853 
2854 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2855 {
2856 	struct kvm_vcpu *vcpu;
2857 	int i;
2858 
2859 	mutex_lock(&kvm->lock);
2860 	preempt_disable();
2861 	kvm->arch.epoch = tod - get_tod_clock();
2862 	kvm_s390_vcpu_block_all(kvm);
2863 	kvm_for_each_vcpu(i, vcpu, kvm)
2864 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2865 	kvm_s390_vcpu_unblock_all(kvm);
2866 	preempt_enable();
2867 	mutex_unlock(&kvm->lock);
2868 }
2869 
2870 /**
2871  * kvm_arch_fault_in_page - fault-in guest page if necessary
2872  * @vcpu: The corresponding virtual cpu
2873  * @gpa: Guest physical address
2874  * @writable: Whether the page should be writable or not
2875  *
2876  * Make sure that a guest page has been faulted-in on the host.
2877  *
2878  * Return: Zero on success, negative error code otherwise.
2879  */
2880 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2881 {
2882 	return gmap_fault(vcpu->arch.gmap, gpa,
2883 			  writable ? FAULT_FLAG_WRITE : 0);
2884 }
2885 
2886 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2887 				      unsigned long token)
2888 {
2889 	struct kvm_s390_interrupt inti;
2890 	struct kvm_s390_irq irq;
2891 
2892 	if (start_token) {
2893 		irq.u.ext.ext_params2 = token;
2894 		irq.type = KVM_S390_INT_PFAULT_INIT;
2895 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2896 	} else {
2897 		inti.type = KVM_S390_INT_PFAULT_DONE;
2898 		inti.parm64 = token;
2899 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2900 	}
2901 }
2902 
2903 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2904 				     struct kvm_async_pf *work)
2905 {
2906 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2907 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2908 }
2909 
2910 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2911 				 struct kvm_async_pf *work)
2912 {
2913 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2914 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2915 }
2916 
2917 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2918 			       struct kvm_async_pf *work)
2919 {
2920 	/* s390 will always inject the page directly */
2921 }
2922 
2923 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2924 {
2925 	/*
2926 	 * s390 will always inject the page directly,
2927 	 * but we still want check_async_completion to cleanup
2928 	 */
2929 	return true;
2930 }
2931 
2932 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2933 {
2934 	hva_t hva;
2935 	struct kvm_arch_async_pf arch;
2936 	int rc;
2937 
2938 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2939 		return 0;
2940 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2941 	    vcpu->arch.pfault_compare)
2942 		return 0;
2943 	if (psw_extint_disabled(vcpu))
2944 		return 0;
2945 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2946 		return 0;
2947 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2948 		return 0;
2949 	if (!vcpu->arch.gmap->pfault_enabled)
2950 		return 0;
2951 
2952 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2953 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2954 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2955 		return 0;
2956 
2957 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2958 	return rc;
2959 }
2960 
2961 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2962 {
2963 	int rc, cpuflags;
2964 
2965 	/*
2966 	 * On s390 notifications for arriving pages will be delivered directly
2967 	 * to the guest but the house keeping for completed pfaults is
2968 	 * handled outside the worker.
2969 	 */
2970 	kvm_check_async_pf_completion(vcpu);
2971 
2972 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2973 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2974 
2975 	if (need_resched())
2976 		schedule();
2977 
2978 	if (test_cpu_flag(CIF_MCCK_PENDING))
2979 		s390_handle_mcck();
2980 
2981 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2982 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2983 		if (rc)
2984 			return rc;
2985 	}
2986 
2987 	rc = kvm_s390_handle_requests(vcpu);
2988 	if (rc)
2989 		return rc;
2990 
2991 	if (guestdbg_enabled(vcpu)) {
2992 		kvm_s390_backup_guest_per_regs(vcpu);
2993 		kvm_s390_patch_guest_per_regs(vcpu);
2994 	}
2995 
2996 	vcpu->arch.sie_block->icptcode = 0;
2997 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2998 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2999 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3000 
3001 	return 0;
3002 }
3003 
3004 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3005 {
3006 	struct kvm_s390_pgm_info pgm_info = {
3007 		.code = PGM_ADDRESSING,
3008 	};
3009 	u8 opcode, ilen;
3010 	int rc;
3011 
3012 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3013 	trace_kvm_s390_sie_fault(vcpu);
3014 
3015 	/*
3016 	 * We want to inject an addressing exception, which is defined as a
3017 	 * suppressing or terminating exception. However, since we came here
3018 	 * by a DAT access exception, the PSW still points to the faulting
3019 	 * instruction since DAT exceptions are nullifying. So we've got
3020 	 * to look up the current opcode to get the length of the instruction
3021 	 * to be able to forward the PSW.
3022 	 */
3023 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3024 	ilen = insn_length(opcode);
3025 	if (rc < 0) {
3026 		return rc;
3027 	} else if (rc) {
3028 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3029 		 * Forward by arbitrary ilc, injection will take care of
3030 		 * nullification if necessary.
3031 		 */
3032 		pgm_info = vcpu->arch.pgm;
3033 		ilen = 4;
3034 	}
3035 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3036 	kvm_s390_forward_psw(vcpu, ilen);
3037 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3038 }
3039 
3040 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3041 {
3042 	struct mcck_volatile_info *mcck_info;
3043 	struct sie_page *sie_page;
3044 
3045 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3046 		   vcpu->arch.sie_block->icptcode);
3047 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3048 
3049 	if (guestdbg_enabled(vcpu))
3050 		kvm_s390_restore_guest_per_regs(vcpu);
3051 
3052 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3053 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3054 
3055 	if (exit_reason == -EINTR) {
3056 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3057 		sie_page = container_of(vcpu->arch.sie_block,
3058 					struct sie_page, sie_block);
3059 		mcck_info = &sie_page->mcck_info;
3060 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3061 		return 0;
3062 	}
3063 
3064 	if (vcpu->arch.sie_block->icptcode > 0) {
3065 		int rc = kvm_handle_sie_intercept(vcpu);
3066 
3067 		if (rc != -EOPNOTSUPP)
3068 			return rc;
3069 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3070 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3071 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3072 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3073 		return -EREMOTE;
3074 	} else if (exit_reason != -EFAULT) {
3075 		vcpu->stat.exit_null++;
3076 		return 0;
3077 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3078 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3079 		vcpu->run->s390_ucontrol.trans_exc_code =
3080 						current->thread.gmap_addr;
3081 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3082 		return -EREMOTE;
3083 	} else if (current->thread.gmap_pfault) {
3084 		trace_kvm_s390_major_guest_pfault(vcpu);
3085 		current->thread.gmap_pfault = 0;
3086 		if (kvm_arch_setup_async_pf(vcpu))
3087 			return 0;
3088 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3089 	}
3090 	return vcpu_post_run_fault_in_sie(vcpu);
3091 }
3092 
3093 static int __vcpu_run(struct kvm_vcpu *vcpu)
3094 {
3095 	int rc, exit_reason;
3096 
3097 	/*
3098 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3099 	 * ning the guest), so that memslots (and other stuff) are protected
3100 	 */
3101 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3102 
3103 	do {
3104 		rc = vcpu_pre_run(vcpu);
3105 		if (rc)
3106 			break;
3107 
3108 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3109 		/*
3110 		 * As PF_VCPU will be used in fault handler, between
3111 		 * guest_enter and guest_exit should be no uaccess.
3112 		 */
3113 		local_irq_disable();
3114 		guest_enter_irqoff();
3115 		__disable_cpu_timer_accounting(vcpu);
3116 		local_irq_enable();
3117 		exit_reason = sie64a(vcpu->arch.sie_block,
3118 				     vcpu->run->s.regs.gprs);
3119 		local_irq_disable();
3120 		__enable_cpu_timer_accounting(vcpu);
3121 		guest_exit_irqoff();
3122 		local_irq_enable();
3123 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3124 
3125 		rc = vcpu_post_run(vcpu, exit_reason);
3126 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3127 
3128 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3129 	return rc;
3130 }
3131 
3132 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3133 {
3134 	struct runtime_instr_cb *riccb;
3135 	struct gs_cb *gscb;
3136 
3137 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3138 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3139 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3140 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3141 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3142 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3143 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3144 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3145 		/* some control register changes require a tlb flush */
3146 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3147 	}
3148 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3149 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3150 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3151 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3152 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3153 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3154 	}
3155 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3156 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3157 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3158 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3159 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3160 			kvm_clear_async_pf_completion_queue(vcpu);
3161 	}
3162 	/*
3163 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3164 	 * we should enable RI here instead of doing the lazy enablement.
3165 	 */
3166 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3167 	    test_kvm_facility(vcpu->kvm, 64) &&
3168 	    riccb->valid &&
3169 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3170 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3171 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3172 	}
3173 	/*
3174 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3175 	 * we should enable GS here instead of doing the lazy enablement.
3176 	 */
3177 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3178 	    test_kvm_facility(vcpu->kvm, 133) &&
3179 	    gscb->gssm &&
3180 	    !vcpu->arch.gs_enabled) {
3181 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3182 		vcpu->arch.sie_block->ecb |= ECB_GS;
3183 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3184 		vcpu->arch.gs_enabled = 1;
3185 	}
3186 	save_access_regs(vcpu->arch.host_acrs);
3187 	restore_access_regs(vcpu->run->s.regs.acrs);
3188 	/* save host (userspace) fprs/vrs */
3189 	save_fpu_regs();
3190 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3191 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3192 	if (MACHINE_HAS_VX)
3193 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3194 	else
3195 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3196 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3197 	if (test_fp_ctl(current->thread.fpu.fpc))
3198 		/* User space provided an invalid FPC, let's clear it */
3199 		current->thread.fpu.fpc = 0;
3200 	if (MACHINE_HAS_GS) {
3201 		preempt_disable();
3202 		__ctl_set_bit(2, 4);
3203 		if (current->thread.gs_cb) {
3204 			vcpu->arch.host_gscb = current->thread.gs_cb;
3205 			save_gs_cb(vcpu->arch.host_gscb);
3206 		}
3207 		if (vcpu->arch.gs_enabled) {
3208 			current->thread.gs_cb = (struct gs_cb *)
3209 						&vcpu->run->s.regs.gscb;
3210 			restore_gs_cb(current->thread.gs_cb);
3211 		}
3212 		preempt_enable();
3213 	}
3214 
3215 	kvm_run->kvm_dirty_regs = 0;
3216 }
3217 
3218 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3219 {
3220 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3221 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3222 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3223 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3224 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3225 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3226 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3227 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3228 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3229 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3230 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3231 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3232 	save_access_regs(vcpu->run->s.regs.acrs);
3233 	restore_access_regs(vcpu->arch.host_acrs);
3234 	/* Save guest register state */
3235 	save_fpu_regs();
3236 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3237 	/* Restore will be done lazily at return */
3238 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3239 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3240 	if (MACHINE_HAS_GS) {
3241 		__ctl_set_bit(2, 4);
3242 		if (vcpu->arch.gs_enabled)
3243 			save_gs_cb(current->thread.gs_cb);
3244 		preempt_disable();
3245 		current->thread.gs_cb = vcpu->arch.host_gscb;
3246 		restore_gs_cb(vcpu->arch.host_gscb);
3247 		preempt_enable();
3248 		if (!vcpu->arch.host_gscb)
3249 			__ctl_clear_bit(2, 4);
3250 		vcpu->arch.host_gscb = NULL;
3251 	}
3252 
3253 }
3254 
3255 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3256 {
3257 	int rc;
3258 	sigset_t sigsaved;
3259 
3260 	if (kvm_run->immediate_exit)
3261 		return -EINTR;
3262 
3263 	if (guestdbg_exit_pending(vcpu)) {
3264 		kvm_s390_prepare_debug_exit(vcpu);
3265 		return 0;
3266 	}
3267 
3268 	if (vcpu->sigset_active)
3269 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3270 
3271 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3272 		kvm_s390_vcpu_start(vcpu);
3273 	} else if (is_vcpu_stopped(vcpu)) {
3274 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3275 				   vcpu->vcpu_id);
3276 		return -EINVAL;
3277 	}
3278 
3279 	sync_regs(vcpu, kvm_run);
3280 	enable_cpu_timer_accounting(vcpu);
3281 
3282 	might_fault();
3283 	rc = __vcpu_run(vcpu);
3284 
3285 	if (signal_pending(current) && !rc) {
3286 		kvm_run->exit_reason = KVM_EXIT_INTR;
3287 		rc = -EINTR;
3288 	}
3289 
3290 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3291 		kvm_s390_prepare_debug_exit(vcpu);
3292 		rc = 0;
3293 	}
3294 
3295 	if (rc == -EREMOTE) {
3296 		/* userspace support is needed, kvm_run has been prepared */
3297 		rc = 0;
3298 	}
3299 
3300 	disable_cpu_timer_accounting(vcpu);
3301 	store_regs(vcpu, kvm_run);
3302 
3303 	if (vcpu->sigset_active)
3304 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3305 
3306 	vcpu->stat.exit_userspace++;
3307 	return rc;
3308 }
3309 
3310 /*
3311  * store status at address
3312  * we use have two special cases:
3313  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3314  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3315  */
3316 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3317 {
3318 	unsigned char archmode = 1;
3319 	freg_t fprs[NUM_FPRS];
3320 	unsigned int px;
3321 	u64 clkcomp, cputm;
3322 	int rc;
3323 
3324 	px = kvm_s390_get_prefix(vcpu);
3325 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3326 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3327 			return -EFAULT;
3328 		gpa = 0;
3329 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3330 		if (write_guest_real(vcpu, 163, &archmode, 1))
3331 			return -EFAULT;
3332 		gpa = px;
3333 	} else
3334 		gpa -= __LC_FPREGS_SAVE_AREA;
3335 
3336 	/* manually convert vector registers if necessary */
3337 	if (MACHINE_HAS_VX) {
3338 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3339 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3340 				     fprs, 128);
3341 	} else {
3342 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3343 				     vcpu->run->s.regs.fprs, 128);
3344 	}
3345 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3346 			      vcpu->run->s.regs.gprs, 128);
3347 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3348 			      &vcpu->arch.sie_block->gpsw, 16);
3349 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3350 			      &px, 4);
3351 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3352 			      &vcpu->run->s.regs.fpc, 4);
3353 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3354 			      &vcpu->arch.sie_block->todpr, 4);
3355 	cputm = kvm_s390_get_cpu_timer(vcpu);
3356 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3357 			      &cputm, 8);
3358 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3359 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3360 			      &clkcomp, 8);
3361 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3362 			      &vcpu->run->s.regs.acrs, 64);
3363 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3364 			      &vcpu->arch.sie_block->gcr, 128);
3365 	return rc ? -EFAULT : 0;
3366 }
3367 
3368 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3369 {
3370 	/*
3371 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3372 	 * switch in the run ioctl. Let's update our copies before we save
3373 	 * it into the save area
3374 	 */
3375 	save_fpu_regs();
3376 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3377 	save_access_regs(vcpu->run->s.regs.acrs);
3378 
3379 	return kvm_s390_store_status_unloaded(vcpu, addr);
3380 }
3381 
3382 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3383 {
3384 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3385 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3386 }
3387 
3388 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3389 {
3390 	unsigned int i;
3391 	struct kvm_vcpu *vcpu;
3392 
3393 	kvm_for_each_vcpu(i, vcpu, kvm) {
3394 		__disable_ibs_on_vcpu(vcpu);
3395 	}
3396 }
3397 
3398 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3399 {
3400 	if (!sclp.has_ibs)
3401 		return;
3402 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3403 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3404 }
3405 
3406 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3407 {
3408 	int i, online_vcpus, started_vcpus = 0;
3409 
3410 	if (!is_vcpu_stopped(vcpu))
3411 		return;
3412 
3413 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3414 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3415 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3416 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3417 
3418 	for (i = 0; i < online_vcpus; i++) {
3419 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3420 			started_vcpus++;
3421 	}
3422 
3423 	if (started_vcpus == 0) {
3424 		/* we're the only active VCPU -> speed it up */
3425 		__enable_ibs_on_vcpu(vcpu);
3426 	} else if (started_vcpus == 1) {
3427 		/*
3428 		 * As we are starting a second VCPU, we have to disable
3429 		 * the IBS facility on all VCPUs to remove potentially
3430 		 * oustanding ENABLE requests.
3431 		 */
3432 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3433 	}
3434 
3435 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3436 	/*
3437 	 * Another VCPU might have used IBS while we were offline.
3438 	 * Let's play safe and flush the VCPU at startup.
3439 	 */
3440 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3441 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3442 	return;
3443 }
3444 
3445 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3446 {
3447 	int i, online_vcpus, started_vcpus = 0;
3448 	struct kvm_vcpu *started_vcpu = NULL;
3449 
3450 	if (is_vcpu_stopped(vcpu))
3451 		return;
3452 
3453 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3454 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3455 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3456 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3457 
3458 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3459 	kvm_s390_clear_stop_irq(vcpu);
3460 
3461 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3462 	__disable_ibs_on_vcpu(vcpu);
3463 
3464 	for (i = 0; i < online_vcpus; i++) {
3465 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3466 			started_vcpus++;
3467 			started_vcpu = vcpu->kvm->vcpus[i];
3468 		}
3469 	}
3470 
3471 	if (started_vcpus == 1) {
3472 		/*
3473 		 * As we only have one VCPU left, we want to enable the
3474 		 * IBS facility for that VCPU to speed it up.
3475 		 */
3476 		__enable_ibs_on_vcpu(started_vcpu);
3477 	}
3478 
3479 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3480 	return;
3481 }
3482 
3483 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3484 				     struct kvm_enable_cap *cap)
3485 {
3486 	int r;
3487 
3488 	if (cap->flags)
3489 		return -EINVAL;
3490 
3491 	switch (cap->cap) {
3492 	case KVM_CAP_S390_CSS_SUPPORT:
3493 		if (!vcpu->kvm->arch.css_support) {
3494 			vcpu->kvm->arch.css_support = 1;
3495 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3496 			trace_kvm_s390_enable_css(vcpu->kvm);
3497 		}
3498 		r = 0;
3499 		break;
3500 	default:
3501 		r = -EINVAL;
3502 		break;
3503 	}
3504 	return r;
3505 }
3506 
3507 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3508 				  struct kvm_s390_mem_op *mop)
3509 {
3510 	void __user *uaddr = (void __user *)mop->buf;
3511 	void *tmpbuf = NULL;
3512 	int r, srcu_idx;
3513 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3514 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3515 
3516 	if (mop->flags & ~supported_flags)
3517 		return -EINVAL;
3518 
3519 	if (mop->size > MEM_OP_MAX_SIZE)
3520 		return -E2BIG;
3521 
3522 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3523 		tmpbuf = vmalloc(mop->size);
3524 		if (!tmpbuf)
3525 			return -ENOMEM;
3526 	}
3527 
3528 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3529 
3530 	switch (mop->op) {
3531 	case KVM_S390_MEMOP_LOGICAL_READ:
3532 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3533 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3534 					    mop->size, GACC_FETCH);
3535 			break;
3536 		}
3537 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3538 		if (r == 0) {
3539 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3540 				r = -EFAULT;
3541 		}
3542 		break;
3543 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3544 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3545 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3546 					    mop->size, GACC_STORE);
3547 			break;
3548 		}
3549 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3550 			r = -EFAULT;
3551 			break;
3552 		}
3553 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3554 		break;
3555 	default:
3556 		r = -EINVAL;
3557 	}
3558 
3559 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3560 
3561 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3562 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3563 
3564 	vfree(tmpbuf);
3565 	return r;
3566 }
3567 
3568 long kvm_arch_vcpu_ioctl(struct file *filp,
3569 			 unsigned int ioctl, unsigned long arg)
3570 {
3571 	struct kvm_vcpu *vcpu = filp->private_data;
3572 	void __user *argp = (void __user *)arg;
3573 	int idx;
3574 	long r;
3575 
3576 	switch (ioctl) {
3577 	case KVM_S390_IRQ: {
3578 		struct kvm_s390_irq s390irq;
3579 
3580 		r = -EFAULT;
3581 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3582 			break;
3583 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3584 		break;
3585 	}
3586 	case KVM_S390_INTERRUPT: {
3587 		struct kvm_s390_interrupt s390int;
3588 		struct kvm_s390_irq s390irq;
3589 
3590 		r = -EFAULT;
3591 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3592 			break;
3593 		if (s390int_to_s390irq(&s390int, &s390irq))
3594 			return -EINVAL;
3595 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3596 		break;
3597 	}
3598 	case KVM_S390_STORE_STATUS:
3599 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3600 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3601 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3602 		break;
3603 	case KVM_S390_SET_INITIAL_PSW: {
3604 		psw_t psw;
3605 
3606 		r = -EFAULT;
3607 		if (copy_from_user(&psw, argp, sizeof(psw)))
3608 			break;
3609 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3610 		break;
3611 	}
3612 	case KVM_S390_INITIAL_RESET:
3613 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3614 		break;
3615 	case KVM_SET_ONE_REG:
3616 	case KVM_GET_ONE_REG: {
3617 		struct kvm_one_reg reg;
3618 		r = -EFAULT;
3619 		if (copy_from_user(&reg, argp, sizeof(reg)))
3620 			break;
3621 		if (ioctl == KVM_SET_ONE_REG)
3622 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3623 		else
3624 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3625 		break;
3626 	}
3627 #ifdef CONFIG_KVM_S390_UCONTROL
3628 	case KVM_S390_UCAS_MAP: {
3629 		struct kvm_s390_ucas_mapping ucasmap;
3630 
3631 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3632 			r = -EFAULT;
3633 			break;
3634 		}
3635 
3636 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3637 			r = -EINVAL;
3638 			break;
3639 		}
3640 
3641 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3642 				     ucasmap.vcpu_addr, ucasmap.length);
3643 		break;
3644 	}
3645 	case KVM_S390_UCAS_UNMAP: {
3646 		struct kvm_s390_ucas_mapping ucasmap;
3647 
3648 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3649 			r = -EFAULT;
3650 			break;
3651 		}
3652 
3653 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3654 			r = -EINVAL;
3655 			break;
3656 		}
3657 
3658 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3659 			ucasmap.length);
3660 		break;
3661 	}
3662 #endif
3663 	case KVM_S390_VCPU_FAULT: {
3664 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3665 		break;
3666 	}
3667 	case KVM_ENABLE_CAP:
3668 	{
3669 		struct kvm_enable_cap cap;
3670 		r = -EFAULT;
3671 		if (copy_from_user(&cap, argp, sizeof(cap)))
3672 			break;
3673 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3674 		break;
3675 	}
3676 	case KVM_S390_MEM_OP: {
3677 		struct kvm_s390_mem_op mem_op;
3678 
3679 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3680 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3681 		else
3682 			r = -EFAULT;
3683 		break;
3684 	}
3685 	case KVM_S390_SET_IRQ_STATE: {
3686 		struct kvm_s390_irq_state irq_state;
3687 
3688 		r = -EFAULT;
3689 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3690 			break;
3691 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3692 		    irq_state.len == 0 ||
3693 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3694 			r = -EINVAL;
3695 			break;
3696 		}
3697 		r = kvm_s390_set_irq_state(vcpu,
3698 					   (void __user *) irq_state.buf,
3699 					   irq_state.len);
3700 		break;
3701 	}
3702 	case KVM_S390_GET_IRQ_STATE: {
3703 		struct kvm_s390_irq_state irq_state;
3704 
3705 		r = -EFAULT;
3706 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3707 			break;
3708 		if (irq_state.len == 0) {
3709 			r = -EINVAL;
3710 			break;
3711 		}
3712 		r = kvm_s390_get_irq_state(vcpu,
3713 					   (__u8 __user *)  irq_state.buf,
3714 					   irq_state.len);
3715 		break;
3716 	}
3717 	default:
3718 		r = -ENOTTY;
3719 	}
3720 	return r;
3721 }
3722 
3723 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3724 {
3725 #ifdef CONFIG_KVM_S390_UCONTROL
3726 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3727 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3728 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3729 		get_page(vmf->page);
3730 		return 0;
3731 	}
3732 #endif
3733 	return VM_FAULT_SIGBUS;
3734 }
3735 
3736 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3737 			    unsigned long npages)
3738 {
3739 	return 0;
3740 }
3741 
3742 /* Section: memory related */
3743 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3744 				   struct kvm_memory_slot *memslot,
3745 				   const struct kvm_userspace_memory_region *mem,
3746 				   enum kvm_mr_change change)
3747 {
3748 	/* A few sanity checks. We can have memory slots which have to be
3749 	   located/ended at a segment boundary (1MB). The memory in userland is
3750 	   ok to be fragmented into various different vmas. It is okay to mmap()
3751 	   and munmap() stuff in this slot after doing this call at any time */
3752 
3753 	if (mem->userspace_addr & 0xffffful)
3754 		return -EINVAL;
3755 
3756 	if (mem->memory_size & 0xffffful)
3757 		return -EINVAL;
3758 
3759 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3760 		return -EINVAL;
3761 
3762 	return 0;
3763 }
3764 
3765 void kvm_arch_commit_memory_region(struct kvm *kvm,
3766 				const struct kvm_userspace_memory_region *mem,
3767 				const struct kvm_memory_slot *old,
3768 				const struct kvm_memory_slot *new,
3769 				enum kvm_mr_change change)
3770 {
3771 	int rc;
3772 
3773 	/* If the basics of the memslot do not change, we do not want
3774 	 * to update the gmap. Every update causes several unnecessary
3775 	 * segment translation exceptions. This is usually handled just
3776 	 * fine by the normal fault handler + gmap, but it will also
3777 	 * cause faults on the prefix page of running guest CPUs.
3778 	 */
3779 	if (old->userspace_addr == mem->userspace_addr &&
3780 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3781 	    old->npages * PAGE_SIZE == mem->memory_size)
3782 		return;
3783 
3784 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3785 		mem->guest_phys_addr, mem->memory_size);
3786 	if (rc)
3787 		pr_warn("failed to commit memory region\n");
3788 	return;
3789 }
3790 
3791 static inline unsigned long nonhyp_mask(int i)
3792 {
3793 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3794 
3795 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3796 }
3797 
3798 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3799 {
3800 	vcpu->valid_wakeup = false;
3801 }
3802 
3803 static int __init kvm_s390_init(void)
3804 {
3805 	int i;
3806 
3807 	if (!sclp.has_sief2) {
3808 		pr_info("SIE not available\n");
3809 		return -ENODEV;
3810 	}
3811 
3812 	for (i = 0; i < 16; i++)
3813 		kvm_s390_fac_list_mask[i] |=
3814 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3815 
3816 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3817 }
3818 
3819 static void __exit kvm_s390_exit(void)
3820 {
3821 	kvm_exit();
3822 }
3823 
3824 module_init(kvm_s390_init);
3825 module_exit(kvm_s390_exit);
3826 
3827 /*
3828  * Enable autoloading of the kvm module.
3829  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3830  * since x86 takes a different approach.
3831  */
3832 #include <linux/miscdevice.h>
3833 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3834 MODULE_ALIAS("devname:kvm");
3835