xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 13ee3f678b1117d7511a2c5e10549f7c37f4cadf)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
33 #include <asm/etr.h>
34 #include <asm/pgtable.h>
35 #include <asm/gmap.h>
36 #include <asm/nmi.h>
37 #include <asm/switch_to.h>
38 #include <asm/isc.h>
39 #include <asm/sclp.h>
40 #include <asm/cpacf.h>
41 #include <asm/etr.h>
42 #include "kvm-s390.h"
43 #include "gaccess.h"
44 
45 #define KMSG_COMPONENT "kvm-s390"
46 #undef pr_fmt
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48 
49 #define CREATE_TRACE_POINTS
50 #include "trace.h"
51 #include "trace-s390.h"
52 
53 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
54 #define LOCAL_IRQS 32
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
57 
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59 
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
62 	{ "exit_null", VCPU_STAT(exit_null) },
63 	{ "exit_validity", VCPU_STAT(exit_validity) },
64 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
66 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
68 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
69 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
70 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
71 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
72 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
73 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
74 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
75 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
76 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
77 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
78 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
79 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
80 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
81 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
82 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
83 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
84 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
85 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
86 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
87 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
88 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
89 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
90 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
91 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
92 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
93 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
94 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
95 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
96 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
97 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
98 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
99 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
100 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
101 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
102 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
103 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
104 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
105 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
106 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
107 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
108 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
109 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
110 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
111 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
112 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
113 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
114 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
115 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
116 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
117 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
118 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
119 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
120 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
121 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
122 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
123 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
124 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
125 	{ NULL }
126 };
127 
128 /* upper facilities limit for kvm */
129 unsigned long kvm_s390_fac_list_mask[16] = {
130 	0xffe6000000000000UL,
131 	0x005e000000000000UL,
132 };
133 
134 unsigned long kvm_s390_fac_list_mask_size(void)
135 {
136 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
137 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
138 }
139 
140 /* available cpu features supported by kvm */
141 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
142 /* available subfunctions indicated via query / "test bit" */
143 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
144 
145 static struct gmap_notifier gmap_notifier;
146 static struct gmap_notifier vsie_gmap_notifier;
147 debug_info_t *kvm_s390_dbf;
148 
149 /* Section: not file related */
150 int kvm_arch_hardware_enable(void)
151 {
152 	/* every s390 is virtualization enabled ;-) */
153 	return 0;
154 }
155 
156 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
157 			      unsigned long end);
158 
159 /*
160  * This callback is executed during stop_machine(). All CPUs are therefore
161  * temporarily stopped. In order not to change guest behavior, we have to
162  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
163  * so a CPU won't be stopped while calculating with the epoch.
164  */
165 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
166 			  void *v)
167 {
168 	struct kvm *kvm;
169 	struct kvm_vcpu *vcpu;
170 	int i;
171 	unsigned long long *delta = v;
172 
173 	list_for_each_entry(kvm, &vm_list, vm_list) {
174 		kvm->arch.epoch -= *delta;
175 		kvm_for_each_vcpu(i, vcpu, kvm) {
176 			vcpu->arch.sie_block->epoch -= *delta;
177 			if (vcpu->arch.cputm_enabled)
178 				vcpu->arch.cputm_start += *delta;
179 		}
180 	}
181 	return NOTIFY_OK;
182 }
183 
184 static struct notifier_block kvm_clock_notifier = {
185 	.notifier_call = kvm_clock_sync,
186 };
187 
188 int kvm_arch_hardware_setup(void)
189 {
190 	gmap_notifier.notifier_call = kvm_gmap_notifier;
191 	gmap_register_pte_notifier(&gmap_notifier);
192 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
193 	gmap_register_pte_notifier(&vsie_gmap_notifier);
194 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
195 				       &kvm_clock_notifier);
196 	return 0;
197 }
198 
199 void kvm_arch_hardware_unsetup(void)
200 {
201 	gmap_unregister_pte_notifier(&gmap_notifier);
202 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
203 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
204 					 &kvm_clock_notifier);
205 }
206 
207 static void allow_cpu_feat(unsigned long nr)
208 {
209 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
210 }
211 
212 static inline int plo_test_bit(unsigned char nr)
213 {
214 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
215 	int cc = 3; /* subfunction not available */
216 
217 	asm volatile(
218 		/* Parameter registers are ignored for "test bit" */
219 		"	plo	0,0,0,0(0)\n"
220 		"	ipm	%0\n"
221 		"	srl	%0,28\n"
222 		: "=d" (cc)
223 		: "d" (r0)
224 		: "cc");
225 	return cc == 0;
226 }
227 
228 static void kvm_s390_cpu_feat_init(void)
229 {
230 	int i;
231 
232 	for (i = 0; i < 256; ++i) {
233 		if (plo_test_bit(i))
234 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
235 	}
236 
237 	if (test_facility(28)) /* TOD-clock steering */
238 		etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
239 
240 	if (test_facility(17)) { /* MSA */
241 		__cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
242 		__cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
243 		__cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
244 		__cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
245 		__cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
246 	}
247 	if (test_facility(76)) /* MSA3 */
248 		__cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
249 	if (test_facility(77)) { /* MSA4 */
250 		__cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
251 		__cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
252 		__cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
253 		__cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
254 	}
255 	if (test_facility(57)) /* MSA5 */
256 		__cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
257 
258 	if (MACHINE_HAS_ESOP)
259 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
260 	/*
261 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
262 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
263 	 */
264 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
265 	    !test_facility(3))
266 		return;
267 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
268 	if (sclp.has_64bscao)
269 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
270 	if (sclp.has_siif)
271 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
272 	if (sclp.has_gpere)
273 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
274 	if (sclp.has_gsls)
275 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
276 	if (sclp.has_ib)
277 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
278 	if (sclp.has_cei)
279 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
280 }
281 
282 int kvm_arch_init(void *opaque)
283 {
284 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
285 	if (!kvm_s390_dbf)
286 		return -ENOMEM;
287 
288 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
289 		debug_unregister(kvm_s390_dbf);
290 		return -ENOMEM;
291 	}
292 
293 	kvm_s390_cpu_feat_init();
294 
295 	/* Register floating interrupt controller interface. */
296 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
297 }
298 
299 void kvm_arch_exit(void)
300 {
301 	debug_unregister(kvm_s390_dbf);
302 }
303 
304 /* Section: device related */
305 long kvm_arch_dev_ioctl(struct file *filp,
306 			unsigned int ioctl, unsigned long arg)
307 {
308 	if (ioctl == KVM_S390_ENABLE_SIE)
309 		return s390_enable_sie();
310 	return -EINVAL;
311 }
312 
313 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
314 {
315 	int r;
316 
317 	switch (ext) {
318 	case KVM_CAP_S390_PSW:
319 	case KVM_CAP_S390_GMAP:
320 	case KVM_CAP_SYNC_MMU:
321 #ifdef CONFIG_KVM_S390_UCONTROL
322 	case KVM_CAP_S390_UCONTROL:
323 #endif
324 	case KVM_CAP_ASYNC_PF:
325 	case KVM_CAP_SYNC_REGS:
326 	case KVM_CAP_ONE_REG:
327 	case KVM_CAP_ENABLE_CAP:
328 	case KVM_CAP_S390_CSS_SUPPORT:
329 	case KVM_CAP_IOEVENTFD:
330 	case KVM_CAP_DEVICE_CTRL:
331 	case KVM_CAP_ENABLE_CAP_VM:
332 	case KVM_CAP_S390_IRQCHIP:
333 	case KVM_CAP_VM_ATTRIBUTES:
334 	case KVM_CAP_MP_STATE:
335 	case KVM_CAP_S390_INJECT_IRQ:
336 	case KVM_CAP_S390_USER_SIGP:
337 	case KVM_CAP_S390_USER_STSI:
338 	case KVM_CAP_S390_SKEYS:
339 	case KVM_CAP_S390_IRQ_STATE:
340 		r = 1;
341 		break;
342 	case KVM_CAP_S390_MEM_OP:
343 		r = MEM_OP_MAX_SIZE;
344 		break;
345 	case KVM_CAP_NR_VCPUS:
346 	case KVM_CAP_MAX_VCPUS:
347 		r = KVM_S390_BSCA_CPU_SLOTS;
348 		if (sclp.has_esca && sclp.has_64bscao)
349 			r = KVM_S390_ESCA_CPU_SLOTS;
350 		break;
351 	case KVM_CAP_NR_MEMSLOTS:
352 		r = KVM_USER_MEM_SLOTS;
353 		break;
354 	case KVM_CAP_S390_COW:
355 		r = MACHINE_HAS_ESOP;
356 		break;
357 	case KVM_CAP_S390_VECTOR_REGISTERS:
358 		r = MACHINE_HAS_VX;
359 		break;
360 	case KVM_CAP_S390_RI:
361 		r = test_facility(64);
362 		break;
363 	default:
364 		r = 0;
365 	}
366 	return r;
367 }
368 
369 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
370 					struct kvm_memory_slot *memslot)
371 {
372 	gfn_t cur_gfn, last_gfn;
373 	unsigned long address;
374 	struct gmap *gmap = kvm->arch.gmap;
375 
376 	/* Loop over all guest pages */
377 	last_gfn = memslot->base_gfn + memslot->npages;
378 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
379 		address = gfn_to_hva_memslot(memslot, cur_gfn);
380 
381 		if (test_and_clear_guest_dirty(gmap->mm, address))
382 			mark_page_dirty(kvm, cur_gfn);
383 		if (fatal_signal_pending(current))
384 			return;
385 		cond_resched();
386 	}
387 }
388 
389 /* Section: vm related */
390 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
391 
392 /*
393  * Get (and clear) the dirty memory log for a memory slot.
394  */
395 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
396 			       struct kvm_dirty_log *log)
397 {
398 	int r;
399 	unsigned long n;
400 	struct kvm_memslots *slots;
401 	struct kvm_memory_slot *memslot;
402 	int is_dirty = 0;
403 
404 	mutex_lock(&kvm->slots_lock);
405 
406 	r = -EINVAL;
407 	if (log->slot >= KVM_USER_MEM_SLOTS)
408 		goto out;
409 
410 	slots = kvm_memslots(kvm);
411 	memslot = id_to_memslot(slots, log->slot);
412 	r = -ENOENT;
413 	if (!memslot->dirty_bitmap)
414 		goto out;
415 
416 	kvm_s390_sync_dirty_log(kvm, memslot);
417 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
418 	if (r)
419 		goto out;
420 
421 	/* Clear the dirty log */
422 	if (is_dirty) {
423 		n = kvm_dirty_bitmap_bytes(memslot);
424 		memset(memslot->dirty_bitmap, 0, n);
425 	}
426 	r = 0;
427 out:
428 	mutex_unlock(&kvm->slots_lock);
429 	return r;
430 }
431 
432 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
433 {
434 	int r;
435 
436 	if (cap->flags)
437 		return -EINVAL;
438 
439 	switch (cap->cap) {
440 	case KVM_CAP_S390_IRQCHIP:
441 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
442 		kvm->arch.use_irqchip = 1;
443 		r = 0;
444 		break;
445 	case KVM_CAP_S390_USER_SIGP:
446 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
447 		kvm->arch.user_sigp = 1;
448 		r = 0;
449 		break;
450 	case KVM_CAP_S390_VECTOR_REGISTERS:
451 		mutex_lock(&kvm->lock);
452 		if (kvm->created_vcpus) {
453 			r = -EBUSY;
454 		} else if (MACHINE_HAS_VX) {
455 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
456 			set_kvm_facility(kvm->arch.model.fac_list, 129);
457 			r = 0;
458 		} else
459 			r = -EINVAL;
460 		mutex_unlock(&kvm->lock);
461 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
462 			 r ? "(not available)" : "(success)");
463 		break;
464 	case KVM_CAP_S390_RI:
465 		r = -EINVAL;
466 		mutex_lock(&kvm->lock);
467 		if (kvm->created_vcpus) {
468 			r = -EBUSY;
469 		} else if (test_facility(64)) {
470 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
471 			set_kvm_facility(kvm->arch.model.fac_list, 64);
472 			r = 0;
473 		}
474 		mutex_unlock(&kvm->lock);
475 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
476 			 r ? "(not available)" : "(success)");
477 		break;
478 	case KVM_CAP_S390_USER_STSI:
479 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
480 		kvm->arch.user_stsi = 1;
481 		r = 0;
482 		break;
483 	default:
484 		r = -EINVAL;
485 		break;
486 	}
487 	return r;
488 }
489 
490 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
491 {
492 	int ret;
493 
494 	switch (attr->attr) {
495 	case KVM_S390_VM_MEM_LIMIT_SIZE:
496 		ret = 0;
497 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
498 			 kvm->arch.mem_limit);
499 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
500 			ret = -EFAULT;
501 		break;
502 	default:
503 		ret = -ENXIO;
504 		break;
505 	}
506 	return ret;
507 }
508 
509 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
510 {
511 	int ret;
512 	unsigned int idx;
513 	switch (attr->attr) {
514 	case KVM_S390_VM_MEM_ENABLE_CMMA:
515 		ret = -ENXIO;
516 		if (!sclp.has_cmma)
517 			break;
518 
519 		ret = -EBUSY;
520 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
521 		mutex_lock(&kvm->lock);
522 		if (!kvm->created_vcpus) {
523 			kvm->arch.use_cmma = 1;
524 			ret = 0;
525 		}
526 		mutex_unlock(&kvm->lock);
527 		break;
528 	case KVM_S390_VM_MEM_CLR_CMMA:
529 		ret = -ENXIO;
530 		if (!sclp.has_cmma)
531 			break;
532 		ret = -EINVAL;
533 		if (!kvm->arch.use_cmma)
534 			break;
535 
536 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
537 		mutex_lock(&kvm->lock);
538 		idx = srcu_read_lock(&kvm->srcu);
539 		s390_reset_cmma(kvm->arch.gmap->mm);
540 		srcu_read_unlock(&kvm->srcu, idx);
541 		mutex_unlock(&kvm->lock);
542 		ret = 0;
543 		break;
544 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
545 		unsigned long new_limit;
546 
547 		if (kvm_is_ucontrol(kvm))
548 			return -EINVAL;
549 
550 		if (get_user(new_limit, (u64 __user *)attr->addr))
551 			return -EFAULT;
552 
553 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
554 		    new_limit > kvm->arch.mem_limit)
555 			return -E2BIG;
556 
557 		if (!new_limit)
558 			return -EINVAL;
559 
560 		/* gmap_create takes last usable address */
561 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
562 			new_limit -= 1;
563 
564 		ret = -EBUSY;
565 		mutex_lock(&kvm->lock);
566 		if (!kvm->created_vcpus) {
567 			/* gmap_create will round the limit up */
568 			struct gmap *new = gmap_create(current->mm, new_limit);
569 
570 			if (!new) {
571 				ret = -ENOMEM;
572 			} else {
573 				gmap_remove(kvm->arch.gmap);
574 				new->private = kvm;
575 				kvm->arch.gmap = new;
576 				ret = 0;
577 			}
578 		}
579 		mutex_unlock(&kvm->lock);
580 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
581 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
582 			 (void *) kvm->arch.gmap->asce);
583 		break;
584 	}
585 	default:
586 		ret = -ENXIO;
587 		break;
588 	}
589 	return ret;
590 }
591 
592 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
593 
594 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
595 {
596 	struct kvm_vcpu *vcpu;
597 	int i;
598 
599 	if (!test_kvm_facility(kvm, 76))
600 		return -EINVAL;
601 
602 	mutex_lock(&kvm->lock);
603 	switch (attr->attr) {
604 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
605 		get_random_bytes(
606 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
607 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
608 		kvm->arch.crypto.aes_kw = 1;
609 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
610 		break;
611 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
612 		get_random_bytes(
613 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
614 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
615 		kvm->arch.crypto.dea_kw = 1;
616 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
617 		break;
618 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
619 		kvm->arch.crypto.aes_kw = 0;
620 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
621 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
622 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
623 		break;
624 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
625 		kvm->arch.crypto.dea_kw = 0;
626 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
627 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
628 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
629 		break;
630 	default:
631 		mutex_unlock(&kvm->lock);
632 		return -ENXIO;
633 	}
634 
635 	kvm_for_each_vcpu(i, vcpu, kvm) {
636 		kvm_s390_vcpu_crypto_setup(vcpu);
637 		exit_sie(vcpu);
638 	}
639 	mutex_unlock(&kvm->lock);
640 	return 0;
641 }
642 
643 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
644 {
645 	u8 gtod_high;
646 
647 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
648 					   sizeof(gtod_high)))
649 		return -EFAULT;
650 
651 	if (gtod_high != 0)
652 		return -EINVAL;
653 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
654 
655 	return 0;
656 }
657 
658 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
659 {
660 	u64 gtod;
661 
662 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
663 		return -EFAULT;
664 
665 	kvm_s390_set_tod_clock(kvm, gtod);
666 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
667 	return 0;
668 }
669 
670 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
671 {
672 	int ret;
673 
674 	if (attr->flags)
675 		return -EINVAL;
676 
677 	switch (attr->attr) {
678 	case KVM_S390_VM_TOD_HIGH:
679 		ret = kvm_s390_set_tod_high(kvm, attr);
680 		break;
681 	case KVM_S390_VM_TOD_LOW:
682 		ret = kvm_s390_set_tod_low(kvm, attr);
683 		break;
684 	default:
685 		ret = -ENXIO;
686 		break;
687 	}
688 	return ret;
689 }
690 
691 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
692 {
693 	u8 gtod_high = 0;
694 
695 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
696 					 sizeof(gtod_high)))
697 		return -EFAULT;
698 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
699 
700 	return 0;
701 }
702 
703 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
704 {
705 	u64 gtod;
706 
707 	gtod = kvm_s390_get_tod_clock_fast(kvm);
708 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
709 		return -EFAULT;
710 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
711 
712 	return 0;
713 }
714 
715 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
716 {
717 	int ret;
718 
719 	if (attr->flags)
720 		return -EINVAL;
721 
722 	switch (attr->attr) {
723 	case KVM_S390_VM_TOD_HIGH:
724 		ret = kvm_s390_get_tod_high(kvm, attr);
725 		break;
726 	case KVM_S390_VM_TOD_LOW:
727 		ret = kvm_s390_get_tod_low(kvm, attr);
728 		break;
729 	default:
730 		ret = -ENXIO;
731 		break;
732 	}
733 	return ret;
734 }
735 
736 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
737 {
738 	struct kvm_s390_vm_cpu_processor *proc;
739 	u16 lowest_ibc, unblocked_ibc;
740 	int ret = 0;
741 
742 	mutex_lock(&kvm->lock);
743 	if (kvm->created_vcpus) {
744 		ret = -EBUSY;
745 		goto out;
746 	}
747 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
748 	if (!proc) {
749 		ret = -ENOMEM;
750 		goto out;
751 	}
752 	if (!copy_from_user(proc, (void __user *)attr->addr,
753 			    sizeof(*proc))) {
754 		kvm->arch.model.cpuid = proc->cpuid;
755 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
756 		unblocked_ibc = sclp.ibc & 0xfff;
757 		if (lowest_ibc) {
758 			if (proc->ibc > unblocked_ibc)
759 				kvm->arch.model.ibc = unblocked_ibc;
760 			else if (proc->ibc < lowest_ibc)
761 				kvm->arch.model.ibc = lowest_ibc;
762 			else
763 				kvm->arch.model.ibc = proc->ibc;
764 		}
765 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
766 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
767 	} else
768 		ret = -EFAULT;
769 	kfree(proc);
770 out:
771 	mutex_unlock(&kvm->lock);
772 	return ret;
773 }
774 
775 static int kvm_s390_set_processor_feat(struct kvm *kvm,
776 				       struct kvm_device_attr *attr)
777 {
778 	struct kvm_s390_vm_cpu_feat data;
779 	int ret = -EBUSY;
780 
781 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
782 		return -EFAULT;
783 	if (!bitmap_subset((unsigned long *) data.feat,
784 			   kvm_s390_available_cpu_feat,
785 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
786 		return -EINVAL;
787 
788 	mutex_lock(&kvm->lock);
789 	if (!atomic_read(&kvm->online_vcpus)) {
790 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
791 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
792 		ret = 0;
793 	}
794 	mutex_unlock(&kvm->lock);
795 	return ret;
796 }
797 
798 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
799 					  struct kvm_device_attr *attr)
800 {
801 	/*
802 	 * Once supported by kernel + hw, we have to store the subfunctions
803 	 * in kvm->arch and remember that user space configured them.
804 	 */
805 	return -ENXIO;
806 }
807 
808 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
809 {
810 	int ret = -ENXIO;
811 
812 	switch (attr->attr) {
813 	case KVM_S390_VM_CPU_PROCESSOR:
814 		ret = kvm_s390_set_processor(kvm, attr);
815 		break;
816 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
817 		ret = kvm_s390_set_processor_feat(kvm, attr);
818 		break;
819 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
820 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
821 		break;
822 	}
823 	return ret;
824 }
825 
826 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
827 {
828 	struct kvm_s390_vm_cpu_processor *proc;
829 	int ret = 0;
830 
831 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
832 	if (!proc) {
833 		ret = -ENOMEM;
834 		goto out;
835 	}
836 	proc->cpuid = kvm->arch.model.cpuid;
837 	proc->ibc = kvm->arch.model.ibc;
838 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
839 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
840 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
841 		ret = -EFAULT;
842 	kfree(proc);
843 out:
844 	return ret;
845 }
846 
847 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
848 {
849 	struct kvm_s390_vm_cpu_machine *mach;
850 	int ret = 0;
851 
852 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
853 	if (!mach) {
854 		ret = -ENOMEM;
855 		goto out;
856 	}
857 	get_cpu_id((struct cpuid *) &mach->cpuid);
858 	mach->ibc = sclp.ibc;
859 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
860 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
861 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
862 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
863 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
864 		ret = -EFAULT;
865 	kfree(mach);
866 out:
867 	return ret;
868 }
869 
870 static int kvm_s390_get_processor_feat(struct kvm *kvm,
871 				       struct kvm_device_attr *attr)
872 {
873 	struct kvm_s390_vm_cpu_feat data;
874 
875 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
876 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
877 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
878 		return -EFAULT;
879 	return 0;
880 }
881 
882 static int kvm_s390_get_machine_feat(struct kvm *kvm,
883 				     struct kvm_device_attr *attr)
884 {
885 	struct kvm_s390_vm_cpu_feat data;
886 
887 	bitmap_copy((unsigned long *) data.feat,
888 		    kvm_s390_available_cpu_feat,
889 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
890 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
891 		return -EFAULT;
892 	return 0;
893 }
894 
895 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
896 					  struct kvm_device_attr *attr)
897 {
898 	/*
899 	 * Once we can actually configure subfunctions (kernel + hw support),
900 	 * we have to check if they were already set by user space, if so copy
901 	 * them from kvm->arch.
902 	 */
903 	return -ENXIO;
904 }
905 
906 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
907 					struct kvm_device_attr *attr)
908 {
909 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
910 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
911 		return -EFAULT;
912 	return 0;
913 }
914 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
915 {
916 	int ret = -ENXIO;
917 
918 	switch (attr->attr) {
919 	case KVM_S390_VM_CPU_PROCESSOR:
920 		ret = kvm_s390_get_processor(kvm, attr);
921 		break;
922 	case KVM_S390_VM_CPU_MACHINE:
923 		ret = kvm_s390_get_machine(kvm, attr);
924 		break;
925 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
926 		ret = kvm_s390_get_processor_feat(kvm, attr);
927 		break;
928 	case KVM_S390_VM_CPU_MACHINE_FEAT:
929 		ret = kvm_s390_get_machine_feat(kvm, attr);
930 		break;
931 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
932 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
933 		break;
934 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
935 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
936 		break;
937 	}
938 	return ret;
939 }
940 
941 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
942 {
943 	int ret;
944 
945 	switch (attr->group) {
946 	case KVM_S390_VM_MEM_CTRL:
947 		ret = kvm_s390_set_mem_control(kvm, attr);
948 		break;
949 	case KVM_S390_VM_TOD:
950 		ret = kvm_s390_set_tod(kvm, attr);
951 		break;
952 	case KVM_S390_VM_CPU_MODEL:
953 		ret = kvm_s390_set_cpu_model(kvm, attr);
954 		break;
955 	case KVM_S390_VM_CRYPTO:
956 		ret = kvm_s390_vm_set_crypto(kvm, attr);
957 		break;
958 	default:
959 		ret = -ENXIO;
960 		break;
961 	}
962 
963 	return ret;
964 }
965 
966 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
967 {
968 	int ret;
969 
970 	switch (attr->group) {
971 	case KVM_S390_VM_MEM_CTRL:
972 		ret = kvm_s390_get_mem_control(kvm, attr);
973 		break;
974 	case KVM_S390_VM_TOD:
975 		ret = kvm_s390_get_tod(kvm, attr);
976 		break;
977 	case KVM_S390_VM_CPU_MODEL:
978 		ret = kvm_s390_get_cpu_model(kvm, attr);
979 		break;
980 	default:
981 		ret = -ENXIO;
982 		break;
983 	}
984 
985 	return ret;
986 }
987 
988 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
989 {
990 	int ret;
991 
992 	switch (attr->group) {
993 	case KVM_S390_VM_MEM_CTRL:
994 		switch (attr->attr) {
995 		case KVM_S390_VM_MEM_ENABLE_CMMA:
996 		case KVM_S390_VM_MEM_CLR_CMMA:
997 			ret = sclp.has_cmma ? 0 : -ENXIO;
998 			break;
999 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1000 			ret = 0;
1001 			break;
1002 		default:
1003 			ret = -ENXIO;
1004 			break;
1005 		}
1006 		break;
1007 	case KVM_S390_VM_TOD:
1008 		switch (attr->attr) {
1009 		case KVM_S390_VM_TOD_LOW:
1010 		case KVM_S390_VM_TOD_HIGH:
1011 			ret = 0;
1012 			break;
1013 		default:
1014 			ret = -ENXIO;
1015 			break;
1016 		}
1017 		break;
1018 	case KVM_S390_VM_CPU_MODEL:
1019 		switch (attr->attr) {
1020 		case KVM_S390_VM_CPU_PROCESSOR:
1021 		case KVM_S390_VM_CPU_MACHINE:
1022 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1023 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1024 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1025 			ret = 0;
1026 			break;
1027 		/* configuring subfunctions is not supported yet */
1028 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1029 		default:
1030 			ret = -ENXIO;
1031 			break;
1032 		}
1033 		break;
1034 	case KVM_S390_VM_CRYPTO:
1035 		switch (attr->attr) {
1036 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1037 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1038 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1039 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1040 			ret = 0;
1041 			break;
1042 		default:
1043 			ret = -ENXIO;
1044 			break;
1045 		}
1046 		break;
1047 	default:
1048 		ret = -ENXIO;
1049 		break;
1050 	}
1051 
1052 	return ret;
1053 }
1054 
1055 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1056 {
1057 	uint8_t *keys;
1058 	uint64_t hva;
1059 	int i, r = 0;
1060 
1061 	if (args->flags != 0)
1062 		return -EINVAL;
1063 
1064 	/* Is this guest using storage keys? */
1065 	if (!mm_use_skey(current->mm))
1066 		return KVM_S390_GET_SKEYS_NONE;
1067 
1068 	/* Enforce sane limit on memory allocation */
1069 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1070 		return -EINVAL;
1071 
1072 	keys = kmalloc_array(args->count, sizeof(uint8_t),
1073 			     GFP_KERNEL | __GFP_NOWARN);
1074 	if (!keys)
1075 		keys = vmalloc(sizeof(uint8_t) * args->count);
1076 	if (!keys)
1077 		return -ENOMEM;
1078 
1079 	down_read(&current->mm->mmap_sem);
1080 	for (i = 0; i < args->count; i++) {
1081 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1082 		if (kvm_is_error_hva(hva)) {
1083 			r = -EFAULT;
1084 			break;
1085 		}
1086 
1087 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1088 		if (r)
1089 			break;
1090 	}
1091 	up_read(&current->mm->mmap_sem);
1092 
1093 	if (!r) {
1094 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1095 				 sizeof(uint8_t) * args->count);
1096 		if (r)
1097 			r = -EFAULT;
1098 	}
1099 
1100 	kvfree(keys);
1101 	return r;
1102 }
1103 
1104 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1105 {
1106 	uint8_t *keys;
1107 	uint64_t hva;
1108 	int i, r = 0;
1109 
1110 	if (args->flags != 0)
1111 		return -EINVAL;
1112 
1113 	/* Enforce sane limit on memory allocation */
1114 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1115 		return -EINVAL;
1116 
1117 	keys = kmalloc_array(args->count, sizeof(uint8_t),
1118 			     GFP_KERNEL | __GFP_NOWARN);
1119 	if (!keys)
1120 		keys = vmalloc(sizeof(uint8_t) * args->count);
1121 	if (!keys)
1122 		return -ENOMEM;
1123 
1124 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1125 			   sizeof(uint8_t) * args->count);
1126 	if (r) {
1127 		r = -EFAULT;
1128 		goto out;
1129 	}
1130 
1131 	/* Enable storage key handling for the guest */
1132 	r = s390_enable_skey();
1133 	if (r)
1134 		goto out;
1135 
1136 	down_read(&current->mm->mmap_sem);
1137 	for (i = 0; i < args->count; i++) {
1138 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1139 		if (kvm_is_error_hva(hva)) {
1140 			r = -EFAULT;
1141 			break;
1142 		}
1143 
1144 		/* Lowest order bit is reserved */
1145 		if (keys[i] & 0x01) {
1146 			r = -EINVAL;
1147 			break;
1148 		}
1149 
1150 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1151 		if (r)
1152 			break;
1153 	}
1154 	up_read(&current->mm->mmap_sem);
1155 out:
1156 	kvfree(keys);
1157 	return r;
1158 }
1159 
1160 long kvm_arch_vm_ioctl(struct file *filp,
1161 		       unsigned int ioctl, unsigned long arg)
1162 {
1163 	struct kvm *kvm = filp->private_data;
1164 	void __user *argp = (void __user *)arg;
1165 	struct kvm_device_attr attr;
1166 	int r;
1167 
1168 	switch (ioctl) {
1169 	case KVM_S390_INTERRUPT: {
1170 		struct kvm_s390_interrupt s390int;
1171 
1172 		r = -EFAULT;
1173 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1174 			break;
1175 		r = kvm_s390_inject_vm(kvm, &s390int);
1176 		break;
1177 	}
1178 	case KVM_ENABLE_CAP: {
1179 		struct kvm_enable_cap cap;
1180 		r = -EFAULT;
1181 		if (copy_from_user(&cap, argp, sizeof(cap)))
1182 			break;
1183 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1184 		break;
1185 	}
1186 	case KVM_CREATE_IRQCHIP: {
1187 		struct kvm_irq_routing_entry routing;
1188 
1189 		r = -EINVAL;
1190 		if (kvm->arch.use_irqchip) {
1191 			/* Set up dummy routing. */
1192 			memset(&routing, 0, sizeof(routing));
1193 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1194 		}
1195 		break;
1196 	}
1197 	case KVM_SET_DEVICE_ATTR: {
1198 		r = -EFAULT;
1199 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1200 			break;
1201 		r = kvm_s390_vm_set_attr(kvm, &attr);
1202 		break;
1203 	}
1204 	case KVM_GET_DEVICE_ATTR: {
1205 		r = -EFAULT;
1206 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1207 			break;
1208 		r = kvm_s390_vm_get_attr(kvm, &attr);
1209 		break;
1210 	}
1211 	case KVM_HAS_DEVICE_ATTR: {
1212 		r = -EFAULT;
1213 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1214 			break;
1215 		r = kvm_s390_vm_has_attr(kvm, &attr);
1216 		break;
1217 	}
1218 	case KVM_S390_GET_SKEYS: {
1219 		struct kvm_s390_skeys args;
1220 
1221 		r = -EFAULT;
1222 		if (copy_from_user(&args, argp,
1223 				   sizeof(struct kvm_s390_skeys)))
1224 			break;
1225 		r = kvm_s390_get_skeys(kvm, &args);
1226 		break;
1227 	}
1228 	case KVM_S390_SET_SKEYS: {
1229 		struct kvm_s390_skeys args;
1230 
1231 		r = -EFAULT;
1232 		if (copy_from_user(&args, argp,
1233 				   sizeof(struct kvm_s390_skeys)))
1234 			break;
1235 		r = kvm_s390_set_skeys(kvm, &args);
1236 		break;
1237 	}
1238 	default:
1239 		r = -ENOTTY;
1240 	}
1241 
1242 	return r;
1243 }
1244 
1245 static int kvm_s390_query_ap_config(u8 *config)
1246 {
1247 	u32 fcn_code = 0x04000000UL;
1248 	u32 cc = 0;
1249 
1250 	memset(config, 0, 128);
1251 	asm volatile(
1252 		"lgr 0,%1\n"
1253 		"lgr 2,%2\n"
1254 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1255 		"0: ipm %0\n"
1256 		"srl %0,28\n"
1257 		"1:\n"
1258 		EX_TABLE(0b, 1b)
1259 		: "+r" (cc)
1260 		: "r" (fcn_code), "r" (config)
1261 		: "cc", "0", "2", "memory"
1262 	);
1263 
1264 	return cc;
1265 }
1266 
1267 static int kvm_s390_apxa_installed(void)
1268 {
1269 	u8 config[128];
1270 	int cc;
1271 
1272 	if (test_facility(12)) {
1273 		cc = kvm_s390_query_ap_config(config);
1274 
1275 		if (cc)
1276 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1277 		else
1278 			return config[0] & 0x40;
1279 	}
1280 
1281 	return 0;
1282 }
1283 
1284 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1285 {
1286 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1287 
1288 	if (kvm_s390_apxa_installed())
1289 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1290 	else
1291 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1292 }
1293 
1294 static u64 kvm_s390_get_initial_cpuid(void)
1295 {
1296 	struct cpuid cpuid;
1297 
1298 	get_cpu_id(&cpuid);
1299 	cpuid.version = 0xff;
1300 	return *((u64 *) &cpuid);
1301 }
1302 
1303 static void kvm_s390_crypto_init(struct kvm *kvm)
1304 {
1305 	if (!test_kvm_facility(kvm, 76))
1306 		return;
1307 
1308 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1309 	kvm_s390_set_crycb_format(kvm);
1310 
1311 	/* Enable AES/DEA protected key functions by default */
1312 	kvm->arch.crypto.aes_kw = 1;
1313 	kvm->arch.crypto.dea_kw = 1;
1314 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1315 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1316 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1317 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1318 }
1319 
1320 static void sca_dispose(struct kvm *kvm)
1321 {
1322 	if (kvm->arch.use_esca)
1323 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1324 	else
1325 		free_page((unsigned long)(kvm->arch.sca));
1326 	kvm->arch.sca = NULL;
1327 }
1328 
1329 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1330 {
1331 	gfp_t alloc_flags = GFP_KERNEL;
1332 	int i, rc;
1333 	char debug_name[16];
1334 	static unsigned long sca_offset;
1335 
1336 	rc = -EINVAL;
1337 #ifdef CONFIG_KVM_S390_UCONTROL
1338 	if (type & ~KVM_VM_S390_UCONTROL)
1339 		goto out_err;
1340 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1341 		goto out_err;
1342 #else
1343 	if (type)
1344 		goto out_err;
1345 #endif
1346 
1347 	rc = s390_enable_sie();
1348 	if (rc)
1349 		goto out_err;
1350 
1351 	rc = -ENOMEM;
1352 
1353 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1354 
1355 	kvm->arch.use_esca = 0; /* start with basic SCA */
1356 	if (!sclp.has_64bscao)
1357 		alloc_flags |= GFP_DMA;
1358 	rwlock_init(&kvm->arch.sca_lock);
1359 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1360 	if (!kvm->arch.sca)
1361 		goto out_err;
1362 	spin_lock(&kvm_lock);
1363 	sca_offset += 16;
1364 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1365 		sca_offset = 0;
1366 	kvm->arch.sca = (struct bsca_block *)
1367 			((char *) kvm->arch.sca + sca_offset);
1368 	spin_unlock(&kvm_lock);
1369 
1370 	sprintf(debug_name, "kvm-%u", current->pid);
1371 
1372 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1373 	if (!kvm->arch.dbf)
1374 		goto out_err;
1375 
1376 	kvm->arch.sie_page2 =
1377 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1378 	if (!kvm->arch.sie_page2)
1379 		goto out_err;
1380 
1381 	/* Populate the facility mask initially. */
1382 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1383 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1384 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1385 		if (i < kvm_s390_fac_list_mask_size())
1386 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1387 		else
1388 			kvm->arch.model.fac_mask[i] = 0UL;
1389 	}
1390 
1391 	/* Populate the facility list initially. */
1392 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1393 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1394 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1395 
1396 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1397 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1398 
1399 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1400 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1401 
1402 	kvm_s390_crypto_init(kvm);
1403 
1404 	spin_lock_init(&kvm->arch.float_int.lock);
1405 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1406 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1407 	init_waitqueue_head(&kvm->arch.ipte_wq);
1408 	mutex_init(&kvm->arch.ipte_mutex);
1409 
1410 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1411 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1412 
1413 	if (type & KVM_VM_S390_UCONTROL) {
1414 		kvm->arch.gmap = NULL;
1415 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1416 	} else {
1417 		if (sclp.hamax == U64_MAX)
1418 			kvm->arch.mem_limit = TASK_MAX_SIZE;
1419 		else
1420 			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1421 						    sclp.hamax + 1);
1422 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1423 		if (!kvm->arch.gmap)
1424 			goto out_err;
1425 		kvm->arch.gmap->private = kvm;
1426 		kvm->arch.gmap->pfault_enabled = 0;
1427 	}
1428 
1429 	kvm->arch.css_support = 0;
1430 	kvm->arch.use_irqchip = 0;
1431 	kvm->arch.epoch = 0;
1432 
1433 	spin_lock_init(&kvm->arch.start_stop_lock);
1434 	kvm_s390_vsie_init(kvm);
1435 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1436 
1437 	return 0;
1438 out_err:
1439 	free_page((unsigned long)kvm->arch.sie_page2);
1440 	debug_unregister(kvm->arch.dbf);
1441 	sca_dispose(kvm);
1442 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1443 	return rc;
1444 }
1445 
1446 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1447 {
1448 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1449 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1450 	kvm_s390_clear_local_irqs(vcpu);
1451 	kvm_clear_async_pf_completion_queue(vcpu);
1452 	if (!kvm_is_ucontrol(vcpu->kvm))
1453 		sca_del_vcpu(vcpu);
1454 
1455 	if (kvm_is_ucontrol(vcpu->kvm))
1456 		gmap_remove(vcpu->arch.gmap);
1457 
1458 	if (vcpu->kvm->arch.use_cmma)
1459 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1460 	free_page((unsigned long)(vcpu->arch.sie_block));
1461 
1462 	kvm_vcpu_uninit(vcpu);
1463 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1464 }
1465 
1466 static void kvm_free_vcpus(struct kvm *kvm)
1467 {
1468 	unsigned int i;
1469 	struct kvm_vcpu *vcpu;
1470 
1471 	kvm_for_each_vcpu(i, vcpu, kvm)
1472 		kvm_arch_vcpu_destroy(vcpu);
1473 
1474 	mutex_lock(&kvm->lock);
1475 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1476 		kvm->vcpus[i] = NULL;
1477 
1478 	atomic_set(&kvm->online_vcpus, 0);
1479 	mutex_unlock(&kvm->lock);
1480 }
1481 
1482 void kvm_arch_destroy_vm(struct kvm *kvm)
1483 {
1484 	kvm_free_vcpus(kvm);
1485 	sca_dispose(kvm);
1486 	debug_unregister(kvm->arch.dbf);
1487 	free_page((unsigned long)kvm->arch.sie_page2);
1488 	if (!kvm_is_ucontrol(kvm))
1489 		gmap_remove(kvm->arch.gmap);
1490 	kvm_s390_destroy_adapters(kvm);
1491 	kvm_s390_clear_float_irqs(kvm);
1492 	kvm_s390_vsie_destroy(kvm);
1493 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1494 }
1495 
1496 /* Section: vcpu related */
1497 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1498 {
1499 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1500 	if (!vcpu->arch.gmap)
1501 		return -ENOMEM;
1502 	vcpu->arch.gmap->private = vcpu->kvm;
1503 
1504 	return 0;
1505 }
1506 
1507 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1508 {
1509 	read_lock(&vcpu->kvm->arch.sca_lock);
1510 	if (vcpu->kvm->arch.use_esca) {
1511 		struct esca_block *sca = vcpu->kvm->arch.sca;
1512 
1513 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1514 		sca->cpu[vcpu->vcpu_id].sda = 0;
1515 	} else {
1516 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1517 
1518 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1519 		sca->cpu[vcpu->vcpu_id].sda = 0;
1520 	}
1521 	read_unlock(&vcpu->kvm->arch.sca_lock);
1522 }
1523 
1524 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1525 {
1526 	read_lock(&vcpu->kvm->arch.sca_lock);
1527 	if (vcpu->kvm->arch.use_esca) {
1528 		struct esca_block *sca = vcpu->kvm->arch.sca;
1529 
1530 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1531 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1532 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1533 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1534 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1535 	} else {
1536 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1537 
1538 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1539 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1540 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1541 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1542 	}
1543 	read_unlock(&vcpu->kvm->arch.sca_lock);
1544 }
1545 
1546 /* Basic SCA to Extended SCA data copy routines */
1547 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1548 {
1549 	d->sda = s->sda;
1550 	d->sigp_ctrl.c = s->sigp_ctrl.c;
1551 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1552 }
1553 
1554 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1555 {
1556 	int i;
1557 
1558 	d->ipte_control = s->ipte_control;
1559 	d->mcn[0] = s->mcn;
1560 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1561 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1562 }
1563 
1564 static int sca_switch_to_extended(struct kvm *kvm)
1565 {
1566 	struct bsca_block *old_sca = kvm->arch.sca;
1567 	struct esca_block *new_sca;
1568 	struct kvm_vcpu *vcpu;
1569 	unsigned int vcpu_idx;
1570 	u32 scaol, scaoh;
1571 
1572 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1573 	if (!new_sca)
1574 		return -ENOMEM;
1575 
1576 	scaoh = (u32)((u64)(new_sca) >> 32);
1577 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
1578 
1579 	kvm_s390_vcpu_block_all(kvm);
1580 	write_lock(&kvm->arch.sca_lock);
1581 
1582 	sca_copy_b_to_e(new_sca, old_sca);
1583 
1584 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1585 		vcpu->arch.sie_block->scaoh = scaoh;
1586 		vcpu->arch.sie_block->scaol = scaol;
1587 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1588 	}
1589 	kvm->arch.sca = new_sca;
1590 	kvm->arch.use_esca = 1;
1591 
1592 	write_unlock(&kvm->arch.sca_lock);
1593 	kvm_s390_vcpu_unblock_all(kvm);
1594 
1595 	free_page((unsigned long)old_sca);
1596 
1597 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1598 		 old_sca, kvm->arch.sca);
1599 	return 0;
1600 }
1601 
1602 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1603 {
1604 	int rc;
1605 
1606 	if (id < KVM_S390_BSCA_CPU_SLOTS)
1607 		return true;
1608 	if (!sclp.has_esca || !sclp.has_64bscao)
1609 		return false;
1610 
1611 	mutex_lock(&kvm->lock);
1612 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1613 	mutex_unlock(&kvm->lock);
1614 
1615 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1616 }
1617 
1618 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1619 {
1620 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1621 	kvm_clear_async_pf_completion_queue(vcpu);
1622 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1623 				    KVM_SYNC_GPRS |
1624 				    KVM_SYNC_ACRS |
1625 				    KVM_SYNC_CRS |
1626 				    KVM_SYNC_ARCH0 |
1627 				    KVM_SYNC_PFAULT;
1628 	if (test_kvm_facility(vcpu->kvm, 64))
1629 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1630 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1631 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1632 	 */
1633 	if (MACHINE_HAS_VX)
1634 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1635 	else
1636 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1637 
1638 	if (kvm_is_ucontrol(vcpu->kvm))
1639 		return __kvm_ucontrol_vcpu_init(vcpu);
1640 
1641 	return 0;
1642 }
1643 
1644 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1645 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1646 {
1647 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1648 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1649 	vcpu->arch.cputm_start = get_tod_clock_fast();
1650 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1651 }
1652 
1653 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1654 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1655 {
1656 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1657 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1658 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1659 	vcpu->arch.cputm_start = 0;
1660 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1661 }
1662 
1663 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1664 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1665 {
1666 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1667 	vcpu->arch.cputm_enabled = true;
1668 	__start_cpu_timer_accounting(vcpu);
1669 }
1670 
1671 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1672 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1673 {
1674 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1675 	__stop_cpu_timer_accounting(vcpu);
1676 	vcpu->arch.cputm_enabled = false;
1677 }
1678 
1679 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1680 {
1681 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1682 	__enable_cpu_timer_accounting(vcpu);
1683 	preempt_enable();
1684 }
1685 
1686 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1687 {
1688 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1689 	__disable_cpu_timer_accounting(vcpu);
1690 	preempt_enable();
1691 }
1692 
1693 /* set the cpu timer - may only be called from the VCPU thread itself */
1694 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1695 {
1696 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1697 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1698 	if (vcpu->arch.cputm_enabled)
1699 		vcpu->arch.cputm_start = get_tod_clock_fast();
1700 	vcpu->arch.sie_block->cputm = cputm;
1701 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1702 	preempt_enable();
1703 }
1704 
1705 /* update and get the cpu timer - can also be called from other VCPU threads */
1706 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1707 {
1708 	unsigned int seq;
1709 	__u64 value;
1710 
1711 	if (unlikely(!vcpu->arch.cputm_enabled))
1712 		return vcpu->arch.sie_block->cputm;
1713 
1714 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1715 	do {
1716 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1717 		/*
1718 		 * If the writer would ever execute a read in the critical
1719 		 * section, e.g. in irq context, we have a deadlock.
1720 		 */
1721 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1722 		value = vcpu->arch.sie_block->cputm;
1723 		/* if cputm_start is 0, accounting is being started/stopped */
1724 		if (likely(vcpu->arch.cputm_start))
1725 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1726 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1727 	preempt_enable();
1728 	return value;
1729 }
1730 
1731 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1732 {
1733 	/* Save host register state */
1734 	save_fpu_regs();
1735 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1736 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1737 
1738 	if (MACHINE_HAS_VX)
1739 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1740 	else
1741 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1742 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1743 	if (test_fp_ctl(current->thread.fpu.fpc))
1744 		/* User space provided an invalid FPC, let's clear it */
1745 		current->thread.fpu.fpc = 0;
1746 
1747 	save_access_regs(vcpu->arch.host_acrs);
1748 	restore_access_regs(vcpu->run->s.regs.acrs);
1749 	gmap_enable(vcpu->arch.enabled_gmap);
1750 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1751 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1752 		__start_cpu_timer_accounting(vcpu);
1753 	vcpu->cpu = cpu;
1754 }
1755 
1756 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1757 {
1758 	vcpu->cpu = -1;
1759 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1760 		__stop_cpu_timer_accounting(vcpu);
1761 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1762 	vcpu->arch.enabled_gmap = gmap_get_enabled();
1763 	gmap_disable(vcpu->arch.enabled_gmap);
1764 
1765 	/* Save guest register state */
1766 	save_fpu_regs();
1767 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1768 
1769 	/* Restore host register state */
1770 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1771 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1772 
1773 	save_access_regs(vcpu->run->s.regs.acrs);
1774 	restore_access_regs(vcpu->arch.host_acrs);
1775 }
1776 
1777 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1778 {
1779 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1780 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1781 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1782 	kvm_s390_set_prefix(vcpu, 0);
1783 	kvm_s390_set_cpu_timer(vcpu, 0);
1784 	vcpu->arch.sie_block->ckc       = 0UL;
1785 	vcpu->arch.sie_block->todpr     = 0;
1786 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1787 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1788 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1789 	/* make sure the new fpc will be lazily loaded */
1790 	save_fpu_regs();
1791 	current->thread.fpu.fpc = 0;
1792 	vcpu->arch.sie_block->gbea = 1;
1793 	vcpu->arch.sie_block->pp = 0;
1794 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1795 	kvm_clear_async_pf_completion_queue(vcpu);
1796 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1797 		kvm_s390_vcpu_stop(vcpu);
1798 	kvm_s390_clear_local_irqs(vcpu);
1799 }
1800 
1801 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1802 {
1803 	mutex_lock(&vcpu->kvm->lock);
1804 	preempt_disable();
1805 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1806 	preempt_enable();
1807 	mutex_unlock(&vcpu->kvm->lock);
1808 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1809 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1810 		sca_add_vcpu(vcpu);
1811 	}
1812 	/* make vcpu_load load the right gmap on the first trigger */
1813 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1814 }
1815 
1816 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1817 {
1818 	if (!test_kvm_facility(vcpu->kvm, 76))
1819 		return;
1820 
1821 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1822 
1823 	if (vcpu->kvm->arch.crypto.aes_kw)
1824 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1825 	if (vcpu->kvm->arch.crypto.dea_kw)
1826 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1827 
1828 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1829 }
1830 
1831 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1832 {
1833 	free_page(vcpu->arch.sie_block->cbrlo);
1834 	vcpu->arch.sie_block->cbrlo = 0;
1835 }
1836 
1837 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1838 {
1839 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1840 	if (!vcpu->arch.sie_block->cbrlo)
1841 		return -ENOMEM;
1842 
1843 	vcpu->arch.sie_block->ecb2 |= 0x80;
1844 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1845 	return 0;
1846 }
1847 
1848 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1849 {
1850 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1851 
1852 	vcpu->arch.sie_block->ibc = model->ibc;
1853 	if (test_kvm_facility(vcpu->kvm, 7))
1854 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1855 }
1856 
1857 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1858 {
1859 	int rc = 0;
1860 
1861 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1862 						    CPUSTAT_SM |
1863 						    CPUSTAT_STOPPED);
1864 
1865 	if (test_kvm_facility(vcpu->kvm, 78))
1866 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1867 	else if (test_kvm_facility(vcpu->kvm, 8))
1868 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1869 
1870 	kvm_s390_vcpu_setup_model(vcpu);
1871 
1872 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1873 	if (MACHINE_HAS_ESOP)
1874 		vcpu->arch.sie_block->ecb |= 0x02;
1875 	if (test_kvm_facility(vcpu->kvm, 9))
1876 		vcpu->arch.sie_block->ecb |= 0x04;
1877 	if (test_kvm_facility(vcpu->kvm, 73))
1878 		vcpu->arch.sie_block->ecb |= 0x10;
1879 
1880 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1881 		vcpu->arch.sie_block->ecb2 |= 0x08;
1882 	vcpu->arch.sie_block->eca = 0x1002000U;
1883 	if (sclp.has_cei)
1884 		vcpu->arch.sie_block->eca |= 0x80000000U;
1885 	if (sclp.has_ib)
1886 		vcpu->arch.sie_block->eca |= 0x40000000U;
1887 	if (sclp.has_siif)
1888 		vcpu->arch.sie_block->eca |= 1;
1889 	if (sclp.has_sigpif)
1890 		vcpu->arch.sie_block->eca |= 0x10000000U;
1891 	if (test_kvm_facility(vcpu->kvm, 64))
1892 		vcpu->arch.sie_block->ecb3 |= 0x01;
1893 	if (test_kvm_facility(vcpu->kvm, 129)) {
1894 		vcpu->arch.sie_block->eca |= 0x00020000;
1895 		vcpu->arch.sie_block->ecd |= 0x20000000;
1896 	}
1897 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1898 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1899 	if (test_kvm_facility(vcpu->kvm, 74))
1900 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1901 
1902 	if (vcpu->kvm->arch.use_cmma) {
1903 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1904 		if (rc)
1905 			return rc;
1906 	}
1907 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1908 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1909 
1910 	kvm_s390_vcpu_crypto_setup(vcpu);
1911 
1912 	return rc;
1913 }
1914 
1915 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1916 				      unsigned int id)
1917 {
1918 	struct kvm_vcpu *vcpu;
1919 	struct sie_page *sie_page;
1920 	int rc = -EINVAL;
1921 
1922 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1923 		goto out;
1924 
1925 	rc = -ENOMEM;
1926 
1927 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1928 	if (!vcpu)
1929 		goto out;
1930 
1931 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1932 	if (!sie_page)
1933 		goto out_free_cpu;
1934 
1935 	vcpu->arch.sie_block = &sie_page->sie_block;
1936 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1937 
1938 	/* the real guest size will always be smaller than msl */
1939 	vcpu->arch.sie_block->mso = 0;
1940 	vcpu->arch.sie_block->msl = sclp.hamax;
1941 
1942 	vcpu->arch.sie_block->icpua = id;
1943 	spin_lock_init(&vcpu->arch.local_int.lock);
1944 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1945 	vcpu->arch.local_int.wq = &vcpu->wq;
1946 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1947 	seqcount_init(&vcpu->arch.cputm_seqcount);
1948 
1949 	rc = kvm_vcpu_init(vcpu, kvm, id);
1950 	if (rc)
1951 		goto out_free_sie_block;
1952 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1953 		 vcpu->arch.sie_block);
1954 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1955 
1956 	return vcpu;
1957 out_free_sie_block:
1958 	free_page((unsigned long)(vcpu->arch.sie_block));
1959 out_free_cpu:
1960 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1961 out:
1962 	return ERR_PTR(rc);
1963 }
1964 
1965 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1966 {
1967 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1968 }
1969 
1970 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1971 {
1972 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1973 	exit_sie(vcpu);
1974 }
1975 
1976 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1977 {
1978 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1979 }
1980 
1981 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1982 {
1983 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1984 	exit_sie(vcpu);
1985 }
1986 
1987 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1988 {
1989 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1990 }
1991 
1992 /*
1993  * Kick a guest cpu out of SIE and wait until SIE is not running.
1994  * If the CPU is not running (e.g. waiting as idle) the function will
1995  * return immediately. */
1996 void exit_sie(struct kvm_vcpu *vcpu)
1997 {
1998 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1999 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2000 		cpu_relax();
2001 }
2002 
2003 /* Kick a guest cpu out of SIE to process a request synchronously */
2004 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2005 {
2006 	kvm_make_request(req, vcpu);
2007 	kvm_s390_vcpu_request(vcpu);
2008 }
2009 
2010 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2011 			      unsigned long end)
2012 {
2013 	struct kvm *kvm = gmap->private;
2014 	struct kvm_vcpu *vcpu;
2015 	unsigned long prefix;
2016 	int i;
2017 
2018 	if (gmap_is_shadow(gmap))
2019 		return;
2020 	if (start >= 1UL << 31)
2021 		/* We are only interested in prefix pages */
2022 		return;
2023 	kvm_for_each_vcpu(i, vcpu, kvm) {
2024 		/* match against both prefix pages */
2025 		prefix = kvm_s390_get_prefix(vcpu);
2026 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2027 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2028 				   start, end);
2029 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2030 		}
2031 	}
2032 }
2033 
2034 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2035 {
2036 	/* kvm common code refers to this, but never calls it */
2037 	BUG();
2038 	return 0;
2039 }
2040 
2041 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2042 					   struct kvm_one_reg *reg)
2043 {
2044 	int r = -EINVAL;
2045 
2046 	switch (reg->id) {
2047 	case KVM_REG_S390_TODPR:
2048 		r = put_user(vcpu->arch.sie_block->todpr,
2049 			     (u32 __user *)reg->addr);
2050 		break;
2051 	case KVM_REG_S390_EPOCHDIFF:
2052 		r = put_user(vcpu->arch.sie_block->epoch,
2053 			     (u64 __user *)reg->addr);
2054 		break;
2055 	case KVM_REG_S390_CPU_TIMER:
2056 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2057 			     (u64 __user *)reg->addr);
2058 		break;
2059 	case KVM_REG_S390_CLOCK_COMP:
2060 		r = put_user(vcpu->arch.sie_block->ckc,
2061 			     (u64 __user *)reg->addr);
2062 		break;
2063 	case KVM_REG_S390_PFTOKEN:
2064 		r = put_user(vcpu->arch.pfault_token,
2065 			     (u64 __user *)reg->addr);
2066 		break;
2067 	case KVM_REG_S390_PFCOMPARE:
2068 		r = put_user(vcpu->arch.pfault_compare,
2069 			     (u64 __user *)reg->addr);
2070 		break;
2071 	case KVM_REG_S390_PFSELECT:
2072 		r = put_user(vcpu->arch.pfault_select,
2073 			     (u64 __user *)reg->addr);
2074 		break;
2075 	case KVM_REG_S390_PP:
2076 		r = put_user(vcpu->arch.sie_block->pp,
2077 			     (u64 __user *)reg->addr);
2078 		break;
2079 	case KVM_REG_S390_GBEA:
2080 		r = put_user(vcpu->arch.sie_block->gbea,
2081 			     (u64 __user *)reg->addr);
2082 		break;
2083 	default:
2084 		break;
2085 	}
2086 
2087 	return r;
2088 }
2089 
2090 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2091 					   struct kvm_one_reg *reg)
2092 {
2093 	int r = -EINVAL;
2094 	__u64 val;
2095 
2096 	switch (reg->id) {
2097 	case KVM_REG_S390_TODPR:
2098 		r = get_user(vcpu->arch.sie_block->todpr,
2099 			     (u32 __user *)reg->addr);
2100 		break;
2101 	case KVM_REG_S390_EPOCHDIFF:
2102 		r = get_user(vcpu->arch.sie_block->epoch,
2103 			     (u64 __user *)reg->addr);
2104 		break;
2105 	case KVM_REG_S390_CPU_TIMER:
2106 		r = get_user(val, (u64 __user *)reg->addr);
2107 		if (!r)
2108 			kvm_s390_set_cpu_timer(vcpu, val);
2109 		break;
2110 	case KVM_REG_S390_CLOCK_COMP:
2111 		r = get_user(vcpu->arch.sie_block->ckc,
2112 			     (u64 __user *)reg->addr);
2113 		break;
2114 	case KVM_REG_S390_PFTOKEN:
2115 		r = get_user(vcpu->arch.pfault_token,
2116 			     (u64 __user *)reg->addr);
2117 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2118 			kvm_clear_async_pf_completion_queue(vcpu);
2119 		break;
2120 	case KVM_REG_S390_PFCOMPARE:
2121 		r = get_user(vcpu->arch.pfault_compare,
2122 			     (u64 __user *)reg->addr);
2123 		break;
2124 	case KVM_REG_S390_PFSELECT:
2125 		r = get_user(vcpu->arch.pfault_select,
2126 			     (u64 __user *)reg->addr);
2127 		break;
2128 	case KVM_REG_S390_PP:
2129 		r = get_user(vcpu->arch.sie_block->pp,
2130 			     (u64 __user *)reg->addr);
2131 		break;
2132 	case KVM_REG_S390_GBEA:
2133 		r = get_user(vcpu->arch.sie_block->gbea,
2134 			     (u64 __user *)reg->addr);
2135 		break;
2136 	default:
2137 		break;
2138 	}
2139 
2140 	return r;
2141 }
2142 
2143 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2144 {
2145 	kvm_s390_vcpu_initial_reset(vcpu);
2146 	return 0;
2147 }
2148 
2149 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2150 {
2151 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2152 	return 0;
2153 }
2154 
2155 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2156 {
2157 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2158 	return 0;
2159 }
2160 
2161 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2162 				  struct kvm_sregs *sregs)
2163 {
2164 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2165 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2166 	restore_access_regs(vcpu->run->s.regs.acrs);
2167 	return 0;
2168 }
2169 
2170 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2171 				  struct kvm_sregs *sregs)
2172 {
2173 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2174 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2175 	return 0;
2176 }
2177 
2178 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2179 {
2180 	/* make sure the new values will be lazily loaded */
2181 	save_fpu_regs();
2182 	if (test_fp_ctl(fpu->fpc))
2183 		return -EINVAL;
2184 	current->thread.fpu.fpc = fpu->fpc;
2185 	if (MACHINE_HAS_VX)
2186 		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2187 	else
2188 		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2189 	return 0;
2190 }
2191 
2192 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2193 {
2194 	/* make sure we have the latest values */
2195 	save_fpu_regs();
2196 	if (MACHINE_HAS_VX)
2197 		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2198 	else
2199 		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2200 	fpu->fpc = current->thread.fpu.fpc;
2201 	return 0;
2202 }
2203 
2204 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2205 {
2206 	int rc = 0;
2207 
2208 	if (!is_vcpu_stopped(vcpu))
2209 		rc = -EBUSY;
2210 	else {
2211 		vcpu->run->psw_mask = psw.mask;
2212 		vcpu->run->psw_addr = psw.addr;
2213 	}
2214 	return rc;
2215 }
2216 
2217 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2218 				  struct kvm_translation *tr)
2219 {
2220 	return -EINVAL; /* not implemented yet */
2221 }
2222 
2223 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2224 			      KVM_GUESTDBG_USE_HW_BP | \
2225 			      KVM_GUESTDBG_ENABLE)
2226 
2227 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2228 					struct kvm_guest_debug *dbg)
2229 {
2230 	int rc = 0;
2231 
2232 	vcpu->guest_debug = 0;
2233 	kvm_s390_clear_bp_data(vcpu);
2234 
2235 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2236 		return -EINVAL;
2237 	if (!sclp.has_gpere)
2238 		return -EINVAL;
2239 
2240 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2241 		vcpu->guest_debug = dbg->control;
2242 		/* enforce guest PER */
2243 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2244 
2245 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2246 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2247 	} else {
2248 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2249 		vcpu->arch.guestdbg.last_bp = 0;
2250 	}
2251 
2252 	if (rc) {
2253 		vcpu->guest_debug = 0;
2254 		kvm_s390_clear_bp_data(vcpu);
2255 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2256 	}
2257 
2258 	return rc;
2259 }
2260 
2261 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2262 				    struct kvm_mp_state *mp_state)
2263 {
2264 	/* CHECK_STOP and LOAD are not supported yet */
2265 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2266 				       KVM_MP_STATE_OPERATING;
2267 }
2268 
2269 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2270 				    struct kvm_mp_state *mp_state)
2271 {
2272 	int rc = 0;
2273 
2274 	/* user space knows about this interface - let it control the state */
2275 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2276 
2277 	switch (mp_state->mp_state) {
2278 	case KVM_MP_STATE_STOPPED:
2279 		kvm_s390_vcpu_stop(vcpu);
2280 		break;
2281 	case KVM_MP_STATE_OPERATING:
2282 		kvm_s390_vcpu_start(vcpu);
2283 		break;
2284 	case KVM_MP_STATE_LOAD:
2285 	case KVM_MP_STATE_CHECK_STOP:
2286 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2287 	default:
2288 		rc = -ENXIO;
2289 	}
2290 
2291 	return rc;
2292 }
2293 
2294 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2295 {
2296 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2297 }
2298 
2299 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2300 {
2301 retry:
2302 	kvm_s390_vcpu_request_handled(vcpu);
2303 	if (!vcpu->requests)
2304 		return 0;
2305 	/*
2306 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2307 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2308 	 * This ensures that the ipte instruction for this request has
2309 	 * already finished. We might race against a second unmapper that
2310 	 * wants to set the blocking bit. Lets just retry the request loop.
2311 	 */
2312 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2313 		int rc;
2314 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2315 					  kvm_s390_get_prefix(vcpu),
2316 					  PAGE_SIZE * 2, PROT_WRITE);
2317 		if (rc)
2318 			return rc;
2319 		goto retry;
2320 	}
2321 
2322 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2323 		vcpu->arch.sie_block->ihcpu = 0xffff;
2324 		goto retry;
2325 	}
2326 
2327 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2328 		if (!ibs_enabled(vcpu)) {
2329 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2330 			atomic_or(CPUSTAT_IBS,
2331 					&vcpu->arch.sie_block->cpuflags);
2332 		}
2333 		goto retry;
2334 	}
2335 
2336 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2337 		if (ibs_enabled(vcpu)) {
2338 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2339 			atomic_andnot(CPUSTAT_IBS,
2340 					  &vcpu->arch.sie_block->cpuflags);
2341 		}
2342 		goto retry;
2343 	}
2344 
2345 	/* nothing to do, just clear the request */
2346 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2347 
2348 	return 0;
2349 }
2350 
2351 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2352 {
2353 	struct kvm_vcpu *vcpu;
2354 	int i;
2355 
2356 	mutex_lock(&kvm->lock);
2357 	preempt_disable();
2358 	kvm->arch.epoch = tod - get_tod_clock();
2359 	kvm_s390_vcpu_block_all(kvm);
2360 	kvm_for_each_vcpu(i, vcpu, kvm)
2361 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2362 	kvm_s390_vcpu_unblock_all(kvm);
2363 	preempt_enable();
2364 	mutex_unlock(&kvm->lock);
2365 }
2366 
2367 /**
2368  * kvm_arch_fault_in_page - fault-in guest page if necessary
2369  * @vcpu: The corresponding virtual cpu
2370  * @gpa: Guest physical address
2371  * @writable: Whether the page should be writable or not
2372  *
2373  * Make sure that a guest page has been faulted-in on the host.
2374  *
2375  * Return: Zero on success, negative error code otherwise.
2376  */
2377 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2378 {
2379 	return gmap_fault(vcpu->arch.gmap, gpa,
2380 			  writable ? FAULT_FLAG_WRITE : 0);
2381 }
2382 
2383 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2384 				      unsigned long token)
2385 {
2386 	struct kvm_s390_interrupt inti;
2387 	struct kvm_s390_irq irq;
2388 
2389 	if (start_token) {
2390 		irq.u.ext.ext_params2 = token;
2391 		irq.type = KVM_S390_INT_PFAULT_INIT;
2392 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2393 	} else {
2394 		inti.type = KVM_S390_INT_PFAULT_DONE;
2395 		inti.parm64 = token;
2396 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2397 	}
2398 }
2399 
2400 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2401 				     struct kvm_async_pf *work)
2402 {
2403 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2404 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2405 }
2406 
2407 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2408 				 struct kvm_async_pf *work)
2409 {
2410 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2411 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2412 }
2413 
2414 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2415 			       struct kvm_async_pf *work)
2416 {
2417 	/* s390 will always inject the page directly */
2418 }
2419 
2420 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2421 {
2422 	/*
2423 	 * s390 will always inject the page directly,
2424 	 * but we still want check_async_completion to cleanup
2425 	 */
2426 	return true;
2427 }
2428 
2429 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2430 {
2431 	hva_t hva;
2432 	struct kvm_arch_async_pf arch;
2433 	int rc;
2434 
2435 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2436 		return 0;
2437 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2438 	    vcpu->arch.pfault_compare)
2439 		return 0;
2440 	if (psw_extint_disabled(vcpu))
2441 		return 0;
2442 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2443 		return 0;
2444 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2445 		return 0;
2446 	if (!vcpu->arch.gmap->pfault_enabled)
2447 		return 0;
2448 
2449 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2450 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2451 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2452 		return 0;
2453 
2454 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2455 	return rc;
2456 }
2457 
2458 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2459 {
2460 	int rc, cpuflags;
2461 
2462 	/*
2463 	 * On s390 notifications for arriving pages will be delivered directly
2464 	 * to the guest but the house keeping for completed pfaults is
2465 	 * handled outside the worker.
2466 	 */
2467 	kvm_check_async_pf_completion(vcpu);
2468 
2469 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2470 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2471 
2472 	if (need_resched())
2473 		schedule();
2474 
2475 	if (test_cpu_flag(CIF_MCCK_PENDING))
2476 		s390_handle_mcck();
2477 
2478 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2479 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2480 		if (rc)
2481 			return rc;
2482 	}
2483 
2484 	rc = kvm_s390_handle_requests(vcpu);
2485 	if (rc)
2486 		return rc;
2487 
2488 	if (guestdbg_enabled(vcpu)) {
2489 		kvm_s390_backup_guest_per_regs(vcpu);
2490 		kvm_s390_patch_guest_per_regs(vcpu);
2491 	}
2492 
2493 	vcpu->arch.sie_block->icptcode = 0;
2494 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2495 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2496 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2497 
2498 	return 0;
2499 }
2500 
2501 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2502 {
2503 	struct kvm_s390_pgm_info pgm_info = {
2504 		.code = PGM_ADDRESSING,
2505 	};
2506 	u8 opcode, ilen;
2507 	int rc;
2508 
2509 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2510 	trace_kvm_s390_sie_fault(vcpu);
2511 
2512 	/*
2513 	 * We want to inject an addressing exception, which is defined as a
2514 	 * suppressing or terminating exception. However, since we came here
2515 	 * by a DAT access exception, the PSW still points to the faulting
2516 	 * instruction since DAT exceptions are nullifying. So we've got
2517 	 * to look up the current opcode to get the length of the instruction
2518 	 * to be able to forward the PSW.
2519 	 */
2520 	rc = read_guest_instr(vcpu, &opcode, 1);
2521 	ilen = insn_length(opcode);
2522 	if (rc < 0) {
2523 		return rc;
2524 	} else if (rc) {
2525 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2526 		 * Forward by arbitrary ilc, injection will take care of
2527 		 * nullification if necessary.
2528 		 */
2529 		pgm_info = vcpu->arch.pgm;
2530 		ilen = 4;
2531 	}
2532 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2533 	kvm_s390_forward_psw(vcpu, ilen);
2534 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2535 }
2536 
2537 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2538 {
2539 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2540 		   vcpu->arch.sie_block->icptcode);
2541 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2542 
2543 	if (guestdbg_enabled(vcpu))
2544 		kvm_s390_restore_guest_per_regs(vcpu);
2545 
2546 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2547 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2548 
2549 	if (vcpu->arch.sie_block->icptcode > 0) {
2550 		int rc = kvm_handle_sie_intercept(vcpu);
2551 
2552 		if (rc != -EOPNOTSUPP)
2553 			return rc;
2554 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2555 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2556 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2557 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2558 		return -EREMOTE;
2559 	} else if (exit_reason != -EFAULT) {
2560 		vcpu->stat.exit_null++;
2561 		return 0;
2562 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2563 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2564 		vcpu->run->s390_ucontrol.trans_exc_code =
2565 						current->thread.gmap_addr;
2566 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2567 		return -EREMOTE;
2568 	} else if (current->thread.gmap_pfault) {
2569 		trace_kvm_s390_major_guest_pfault(vcpu);
2570 		current->thread.gmap_pfault = 0;
2571 		if (kvm_arch_setup_async_pf(vcpu))
2572 			return 0;
2573 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2574 	}
2575 	return vcpu_post_run_fault_in_sie(vcpu);
2576 }
2577 
2578 static int __vcpu_run(struct kvm_vcpu *vcpu)
2579 {
2580 	int rc, exit_reason;
2581 
2582 	/*
2583 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2584 	 * ning the guest), so that memslots (and other stuff) are protected
2585 	 */
2586 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2587 
2588 	do {
2589 		rc = vcpu_pre_run(vcpu);
2590 		if (rc)
2591 			break;
2592 
2593 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2594 		/*
2595 		 * As PF_VCPU will be used in fault handler, between
2596 		 * guest_enter and guest_exit should be no uaccess.
2597 		 */
2598 		local_irq_disable();
2599 		__kvm_guest_enter();
2600 		__disable_cpu_timer_accounting(vcpu);
2601 		local_irq_enable();
2602 		exit_reason = sie64a(vcpu->arch.sie_block,
2603 				     vcpu->run->s.regs.gprs);
2604 		local_irq_disable();
2605 		__enable_cpu_timer_accounting(vcpu);
2606 		__kvm_guest_exit();
2607 		local_irq_enable();
2608 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2609 
2610 		rc = vcpu_post_run(vcpu, exit_reason);
2611 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2612 
2613 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2614 	return rc;
2615 }
2616 
2617 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2618 {
2619 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2620 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2621 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2622 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2623 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2624 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2625 		/* some control register changes require a tlb flush */
2626 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2627 	}
2628 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2629 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2630 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2631 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2632 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2633 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2634 	}
2635 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2636 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2637 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2638 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2639 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2640 			kvm_clear_async_pf_completion_queue(vcpu);
2641 	}
2642 	kvm_run->kvm_dirty_regs = 0;
2643 }
2644 
2645 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2646 {
2647 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2648 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2649 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2650 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2651 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2652 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2653 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2654 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2655 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2656 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2657 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2658 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2659 }
2660 
2661 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2662 {
2663 	int rc;
2664 	sigset_t sigsaved;
2665 
2666 	if (guestdbg_exit_pending(vcpu)) {
2667 		kvm_s390_prepare_debug_exit(vcpu);
2668 		return 0;
2669 	}
2670 
2671 	if (vcpu->sigset_active)
2672 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2673 
2674 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2675 		kvm_s390_vcpu_start(vcpu);
2676 	} else if (is_vcpu_stopped(vcpu)) {
2677 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2678 				   vcpu->vcpu_id);
2679 		return -EINVAL;
2680 	}
2681 
2682 	sync_regs(vcpu, kvm_run);
2683 	enable_cpu_timer_accounting(vcpu);
2684 
2685 	might_fault();
2686 	rc = __vcpu_run(vcpu);
2687 
2688 	if (signal_pending(current) && !rc) {
2689 		kvm_run->exit_reason = KVM_EXIT_INTR;
2690 		rc = -EINTR;
2691 	}
2692 
2693 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2694 		kvm_s390_prepare_debug_exit(vcpu);
2695 		rc = 0;
2696 	}
2697 
2698 	if (rc == -EREMOTE) {
2699 		/* userspace support is needed, kvm_run has been prepared */
2700 		rc = 0;
2701 	}
2702 
2703 	disable_cpu_timer_accounting(vcpu);
2704 	store_regs(vcpu, kvm_run);
2705 
2706 	if (vcpu->sigset_active)
2707 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2708 
2709 	vcpu->stat.exit_userspace++;
2710 	return rc;
2711 }
2712 
2713 /*
2714  * store status at address
2715  * we use have two special cases:
2716  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2717  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2718  */
2719 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2720 {
2721 	unsigned char archmode = 1;
2722 	freg_t fprs[NUM_FPRS];
2723 	unsigned int px;
2724 	u64 clkcomp, cputm;
2725 	int rc;
2726 
2727 	px = kvm_s390_get_prefix(vcpu);
2728 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2729 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2730 			return -EFAULT;
2731 		gpa = 0;
2732 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2733 		if (write_guest_real(vcpu, 163, &archmode, 1))
2734 			return -EFAULT;
2735 		gpa = px;
2736 	} else
2737 		gpa -= __LC_FPREGS_SAVE_AREA;
2738 
2739 	/* manually convert vector registers if necessary */
2740 	if (MACHINE_HAS_VX) {
2741 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2742 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2743 				     fprs, 128);
2744 	} else {
2745 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2746 				     vcpu->run->s.regs.fprs, 128);
2747 	}
2748 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2749 			      vcpu->run->s.regs.gprs, 128);
2750 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2751 			      &vcpu->arch.sie_block->gpsw, 16);
2752 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2753 			      &px, 4);
2754 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2755 			      &vcpu->run->s.regs.fpc, 4);
2756 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2757 			      &vcpu->arch.sie_block->todpr, 4);
2758 	cputm = kvm_s390_get_cpu_timer(vcpu);
2759 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2760 			      &cputm, 8);
2761 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2762 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2763 			      &clkcomp, 8);
2764 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2765 			      &vcpu->run->s.regs.acrs, 64);
2766 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2767 			      &vcpu->arch.sie_block->gcr, 128);
2768 	return rc ? -EFAULT : 0;
2769 }
2770 
2771 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2772 {
2773 	/*
2774 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2775 	 * copying in vcpu load/put. Lets update our copies before we save
2776 	 * it into the save area
2777 	 */
2778 	save_fpu_regs();
2779 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2780 	save_access_regs(vcpu->run->s.regs.acrs);
2781 
2782 	return kvm_s390_store_status_unloaded(vcpu, addr);
2783 }
2784 
2785 /*
2786  * store additional status at address
2787  */
2788 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2789 					unsigned long gpa)
2790 {
2791 	/* Only bits 0-53 are used for address formation */
2792 	if (!(gpa & ~0x3ff))
2793 		return 0;
2794 
2795 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2796 			       (void *)&vcpu->run->s.regs.vrs, 512);
2797 }
2798 
2799 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2800 {
2801 	if (!test_kvm_facility(vcpu->kvm, 129))
2802 		return 0;
2803 
2804 	/*
2805 	 * The guest VXRS are in the host VXRs due to the lazy
2806 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2807 	 * to save the current register state because we are in the
2808 	 * middle of a load/put cycle.
2809 	 *
2810 	 * Let's update our copies before we save it into the save area.
2811 	 */
2812 	save_fpu_regs();
2813 
2814 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2815 }
2816 
2817 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2818 {
2819 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2820 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2821 }
2822 
2823 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2824 {
2825 	unsigned int i;
2826 	struct kvm_vcpu *vcpu;
2827 
2828 	kvm_for_each_vcpu(i, vcpu, kvm) {
2829 		__disable_ibs_on_vcpu(vcpu);
2830 	}
2831 }
2832 
2833 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2834 {
2835 	if (!sclp.has_ibs)
2836 		return;
2837 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2838 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2839 }
2840 
2841 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2842 {
2843 	int i, online_vcpus, started_vcpus = 0;
2844 
2845 	if (!is_vcpu_stopped(vcpu))
2846 		return;
2847 
2848 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2849 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2850 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2851 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2852 
2853 	for (i = 0; i < online_vcpus; i++) {
2854 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2855 			started_vcpus++;
2856 	}
2857 
2858 	if (started_vcpus == 0) {
2859 		/* we're the only active VCPU -> speed it up */
2860 		__enable_ibs_on_vcpu(vcpu);
2861 	} else if (started_vcpus == 1) {
2862 		/*
2863 		 * As we are starting a second VCPU, we have to disable
2864 		 * the IBS facility on all VCPUs to remove potentially
2865 		 * oustanding ENABLE requests.
2866 		 */
2867 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2868 	}
2869 
2870 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2871 	/*
2872 	 * Another VCPU might have used IBS while we were offline.
2873 	 * Let's play safe and flush the VCPU at startup.
2874 	 */
2875 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2876 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2877 	return;
2878 }
2879 
2880 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2881 {
2882 	int i, online_vcpus, started_vcpus = 0;
2883 	struct kvm_vcpu *started_vcpu = NULL;
2884 
2885 	if (is_vcpu_stopped(vcpu))
2886 		return;
2887 
2888 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2889 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2890 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2891 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2892 
2893 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2894 	kvm_s390_clear_stop_irq(vcpu);
2895 
2896 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2897 	__disable_ibs_on_vcpu(vcpu);
2898 
2899 	for (i = 0; i < online_vcpus; i++) {
2900 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2901 			started_vcpus++;
2902 			started_vcpu = vcpu->kvm->vcpus[i];
2903 		}
2904 	}
2905 
2906 	if (started_vcpus == 1) {
2907 		/*
2908 		 * As we only have one VCPU left, we want to enable the
2909 		 * IBS facility for that VCPU to speed it up.
2910 		 */
2911 		__enable_ibs_on_vcpu(started_vcpu);
2912 	}
2913 
2914 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2915 	return;
2916 }
2917 
2918 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2919 				     struct kvm_enable_cap *cap)
2920 {
2921 	int r;
2922 
2923 	if (cap->flags)
2924 		return -EINVAL;
2925 
2926 	switch (cap->cap) {
2927 	case KVM_CAP_S390_CSS_SUPPORT:
2928 		if (!vcpu->kvm->arch.css_support) {
2929 			vcpu->kvm->arch.css_support = 1;
2930 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2931 			trace_kvm_s390_enable_css(vcpu->kvm);
2932 		}
2933 		r = 0;
2934 		break;
2935 	default:
2936 		r = -EINVAL;
2937 		break;
2938 	}
2939 	return r;
2940 }
2941 
2942 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2943 				  struct kvm_s390_mem_op *mop)
2944 {
2945 	void __user *uaddr = (void __user *)mop->buf;
2946 	void *tmpbuf = NULL;
2947 	int r, srcu_idx;
2948 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2949 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2950 
2951 	if (mop->flags & ~supported_flags)
2952 		return -EINVAL;
2953 
2954 	if (mop->size > MEM_OP_MAX_SIZE)
2955 		return -E2BIG;
2956 
2957 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2958 		tmpbuf = vmalloc(mop->size);
2959 		if (!tmpbuf)
2960 			return -ENOMEM;
2961 	}
2962 
2963 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2964 
2965 	switch (mop->op) {
2966 	case KVM_S390_MEMOP_LOGICAL_READ:
2967 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2968 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2969 					    mop->size, GACC_FETCH);
2970 			break;
2971 		}
2972 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2973 		if (r == 0) {
2974 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2975 				r = -EFAULT;
2976 		}
2977 		break;
2978 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2979 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2980 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2981 					    mop->size, GACC_STORE);
2982 			break;
2983 		}
2984 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2985 			r = -EFAULT;
2986 			break;
2987 		}
2988 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2989 		break;
2990 	default:
2991 		r = -EINVAL;
2992 	}
2993 
2994 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2995 
2996 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2997 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2998 
2999 	vfree(tmpbuf);
3000 	return r;
3001 }
3002 
3003 long kvm_arch_vcpu_ioctl(struct file *filp,
3004 			 unsigned int ioctl, unsigned long arg)
3005 {
3006 	struct kvm_vcpu *vcpu = filp->private_data;
3007 	void __user *argp = (void __user *)arg;
3008 	int idx;
3009 	long r;
3010 
3011 	switch (ioctl) {
3012 	case KVM_S390_IRQ: {
3013 		struct kvm_s390_irq s390irq;
3014 
3015 		r = -EFAULT;
3016 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3017 			break;
3018 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3019 		break;
3020 	}
3021 	case KVM_S390_INTERRUPT: {
3022 		struct kvm_s390_interrupt s390int;
3023 		struct kvm_s390_irq s390irq;
3024 
3025 		r = -EFAULT;
3026 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3027 			break;
3028 		if (s390int_to_s390irq(&s390int, &s390irq))
3029 			return -EINVAL;
3030 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3031 		break;
3032 	}
3033 	case KVM_S390_STORE_STATUS:
3034 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3035 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3036 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3037 		break;
3038 	case KVM_S390_SET_INITIAL_PSW: {
3039 		psw_t psw;
3040 
3041 		r = -EFAULT;
3042 		if (copy_from_user(&psw, argp, sizeof(psw)))
3043 			break;
3044 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3045 		break;
3046 	}
3047 	case KVM_S390_INITIAL_RESET:
3048 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3049 		break;
3050 	case KVM_SET_ONE_REG:
3051 	case KVM_GET_ONE_REG: {
3052 		struct kvm_one_reg reg;
3053 		r = -EFAULT;
3054 		if (copy_from_user(&reg, argp, sizeof(reg)))
3055 			break;
3056 		if (ioctl == KVM_SET_ONE_REG)
3057 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3058 		else
3059 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3060 		break;
3061 	}
3062 #ifdef CONFIG_KVM_S390_UCONTROL
3063 	case KVM_S390_UCAS_MAP: {
3064 		struct kvm_s390_ucas_mapping ucasmap;
3065 
3066 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3067 			r = -EFAULT;
3068 			break;
3069 		}
3070 
3071 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3072 			r = -EINVAL;
3073 			break;
3074 		}
3075 
3076 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3077 				     ucasmap.vcpu_addr, ucasmap.length);
3078 		break;
3079 	}
3080 	case KVM_S390_UCAS_UNMAP: {
3081 		struct kvm_s390_ucas_mapping ucasmap;
3082 
3083 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3084 			r = -EFAULT;
3085 			break;
3086 		}
3087 
3088 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3089 			r = -EINVAL;
3090 			break;
3091 		}
3092 
3093 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3094 			ucasmap.length);
3095 		break;
3096 	}
3097 #endif
3098 	case KVM_S390_VCPU_FAULT: {
3099 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3100 		break;
3101 	}
3102 	case KVM_ENABLE_CAP:
3103 	{
3104 		struct kvm_enable_cap cap;
3105 		r = -EFAULT;
3106 		if (copy_from_user(&cap, argp, sizeof(cap)))
3107 			break;
3108 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3109 		break;
3110 	}
3111 	case KVM_S390_MEM_OP: {
3112 		struct kvm_s390_mem_op mem_op;
3113 
3114 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3115 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3116 		else
3117 			r = -EFAULT;
3118 		break;
3119 	}
3120 	case KVM_S390_SET_IRQ_STATE: {
3121 		struct kvm_s390_irq_state irq_state;
3122 
3123 		r = -EFAULT;
3124 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3125 			break;
3126 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3127 		    irq_state.len == 0 ||
3128 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3129 			r = -EINVAL;
3130 			break;
3131 		}
3132 		r = kvm_s390_set_irq_state(vcpu,
3133 					   (void __user *) irq_state.buf,
3134 					   irq_state.len);
3135 		break;
3136 	}
3137 	case KVM_S390_GET_IRQ_STATE: {
3138 		struct kvm_s390_irq_state irq_state;
3139 
3140 		r = -EFAULT;
3141 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3142 			break;
3143 		if (irq_state.len == 0) {
3144 			r = -EINVAL;
3145 			break;
3146 		}
3147 		r = kvm_s390_get_irq_state(vcpu,
3148 					   (__u8 __user *)  irq_state.buf,
3149 					   irq_state.len);
3150 		break;
3151 	}
3152 	default:
3153 		r = -ENOTTY;
3154 	}
3155 	return r;
3156 }
3157 
3158 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3159 {
3160 #ifdef CONFIG_KVM_S390_UCONTROL
3161 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3162 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3163 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3164 		get_page(vmf->page);
3165 		return 0;
3166 	}
3167 #endif
3168 	return VM_FAULT_SIGBUS;
3169 }
3170 
3171 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3172 			    unsigned long npages)
3173 {
3174 	return 0;
3175 }
3176 
3177 /* Section: memory related */
3178 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3179 				   struct kvm_memory_slot *memslot,
3180 				   const struct kvm_userspace_memory_region *mem,
3181 				   enum kvm_mr_change change)
3182 {
3183 	/* A few sanity checks. We can have memory slots which have to be
3184 	   located/ended at a segment boundary (1MB). The memory in userland is
3185 	   ok to be fragmented into various different vmas. It is okay to mmap()
3186 	   and munmap() stuff in this slot after doing this call at any time */
3187 
3188 	if (mem->userspace_addr & 0xffffful)
3189 		return -EINVAL;
3190 
3191 	if (mem->memory_size & 0xffffful)
3192 		return -EINVAL;
3193 
3194 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3195 		return -EINVAL;
3196 
3197 	return 0;
3198 }
3199 
3200 void kvm_arch_commit_memory_region(struct kvm *kvm,
3201 				const struct kvm_userspace_memory_region *mem,
3202 				const struct kvm_memory_slot *old,
3203 				const struct kvm_memory_slot *new,
3204 				enum kvm_mr_change change)
3205 {
3206 	int rc;
3207 
3208 	/* If the basics of the memslot do not change, we do not want
3209 	 * to update the gmap. Every update causes several unnecessary
3210 	 * segment translation exceptions. This is usually handled just
3211 	 * fine by the normal fault handler + gmap, but it will also
3212 	 * cause faults on the prefix page of running guest CPUs.
3213 	 */
3214 	if (old->userspace_addr == mem->userspace_addr &&
3215 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3216 	    old->npages * PAGE_SIZE == mem->memory_size)
3217 		return;
3218 
3219 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3220 		mem->guest_phys_addr, mem->memory_size);
3221 	if (rc)
3222 		pr_warn("failed to commit memory region\n");
3223 	return;
3224 }
3225 
3226 static inline unsigned long nonhyp_mask(int i)
3227 {
3228 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3229 
3230 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3231 }
3232 
3233 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3234 {
3235 	vcpu->valid_wakeup = false;
3236 }
3237 
3238 static int __init kvm_s390_init(void)
3239 {
3240 	int i;
3241 
3242 	if (!sclp.has_sief2) {
3243 		pr_info("SIE not available\n");
3244 		return -ENODEV;
3245 	}
3246 
3247 	for (i = 0; i < 16; i++)
3248 		kvm_s390_fac_list_mask[i] |=
3249 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3250 
3251 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3252 }
3253 
3254 static void __exit kvm_s390_exit(void)
3255 {
3256 	kvm_exit();
3257 }
3258 
3259 module_init(kvm_s390_init);
3260 module_exit(kvm_s390_exit);
3261 
3262 /*
3263  * Enable autoloading of the kvm module.
3264  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3265  * since x86 takes a different approach.
3266  */
3267 #include <linux/miscdevice.h>
3268 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3269 MODULE_ALIAS("devname:kvm");
3270