xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 873b425e4c2fd0ba6617d67a45fbf119b65575b4)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <linux/bitmap.h>
30 #include <asm/asm-offsets.h>
31 #include <asm/lowcore.h>
32 #include <asm/etr.h>
33 #include <asm/pgtable.h>
34 #include <asm/gmap.h>
35 #include <asm/nmi.h>
36 #include <asm/switch_to.h>
37 #include <asm/isc.h>
38 #include <asm/sclp.h>
39 #include <asm/cpacf.h>
40 #include <asm/etr.h>
41 #include "kvm-s390.h"
42 #include "gaccess.h"
43 
44 #define KMSG_COMPONENT "kvm-s390"
45 #undef pr_fmt
46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
47 
48 #define CREATE_TRACE_POINTS
49 #include "trace.h"
50 #include "trace-s390.h"
51 
52 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
53 #define LOCAL_IRQS 32
54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
55 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
56 
57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
58 
59 struct kvm_stats_debugfs_item debugfs_entries[] = {
60 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
61 	{ "exit_null", VCPU_STAT(exit_null) },
62 	{ "exit_validity", VCPU_STAT(exit_validity) },
63 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
64 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
65 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
66 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
67 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
68 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
69 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
70 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
71 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
72 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
73 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
74 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
75 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
76 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
77 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
78 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
79 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
80 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
81 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
82 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
83 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
84 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
85 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
86 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
87 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
88 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
89 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
90 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
91 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
92 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
93 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
94 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
95 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
96 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
97 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
98 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
99 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
100 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
101 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
102 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
103 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
104 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
105 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
106 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
107 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
108 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
109 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
110 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
111 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
112 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
113 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
114 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
115 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
116 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
117 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
118 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
119 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
120 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
121 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
122 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
123 	{ NULL }
124 };
125 
126 /* upper facilities limit for kvm */
127 unsigned long kvm_s390_fac_list_mask[16] = {
128 	0xffe6000000000000UL,
129 	0x005e000000000000UL,
130 };
131 
132 unsigned long kvm_s390_fac_list_mask_size(void)
133 {
134 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
135 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
136 }
137 
138 /* available cpu features supported by kvm */
139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
140 /* available subfunctions indicated via query / "test bit" */
141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
142 
143 static struct gmap_notifier gmap_notifier;
144 debug_info_t *kvm_s390_dbf;
145 
146 /* Section: not file related */
147 int kvm_arch_hardware_enable(void)
148 {
149 	/* every s390 is virtualization enabled ;-) */
150 	return 0;
151 }
152 
153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
154 
155 /*
156  * This callback is executed during stop_machine(). All CPUs are therefore
157  * temporarily stopped. In order not to change guest behavior, we have to
158  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
159  * so a CPU won't be stopped while calculating with the epoch.
160  */
161 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
162 			  void *v)
163 {
164 	struct kvm *kvm;
165 	struct kvm_vcpu *vcpu;
166 	int i;
167 	unsigned long long *delta = v;
168 
169 	list_for_each_entry(kvm, &vm_list, vm_list) {
170 		kvm->arch.epoch -= *delta;
171 		kvm_for_each_vcpu(i, vcpu, kvm) {
172 			vcpu->arch.sie_block->epoch -= *delta;
173 			if (vcpu->arch.cputm_enabled)
174 				vcpu->arch.cputm_start += *delta;
175 		}
176 	}
177 	return NOTIFY_OK;
178 }
179 
180 static struct notifier_block kvm_clock_notifier = {
181 	.notifier_call = kvm_clock_sync,
182 };
183 
184 int kvm_arch_hardware_setup(void)
185 {
186 	gmap_notifier.notifier_call = kvm_gmap_notifier;
187 	gmap_register_ipte_notifier(&gmap_notifier);
188 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
189 				       &kvm_clock_notifier);
190 	return 0;
191 }
192 
193 void kvm_arch_hardware_unsetup(void)
194 {
195 	gmap_unregister_ipte_notifier(&gmap_notifier);
196 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
197 					 &kvm_clock_notifier);
198 }
199 
200 static void allow_cpu_feat(unsigned long nr)
201 {
202 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
203 }
204 
205 static inline int plo_test_bit(unsigned char nr)
206 {
207 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
208 	int cc = 3; /* subfunction not available */
209 
210 	asm volatile(
211 		/* Parameter registers are ignored for "test bit" */
212 		"	plo	0,0,0,0(0)\n"
213 		"	ipm	%0\n"
214 		"	srl	%0,28\n"
215 		: "=d" (cc)
216 		: "d" (r0)
217 		: "cc");
218 	return cc == 0;
219 }
220 
221 static void kvm_s390_cpu_feat_init(void)
222 {
223 	int i;
224 
225 	for (i = 0; i < 256; ++i) {
226 		if (plo_test_bit(i))
227 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
228 	}
229 
230 	if (test_facility(28)) /* TOD-clock steering */
231 		etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
232 
233 	if (test_facility(17)) { /* MSA */
234 		__cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
235 		__cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
236 		__cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
237 		__cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
238 		__cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
239 	}
240 	if (test_facility(76)) /* MSA3 */
241 		__cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
242 	if (test_facility(77)) { /* MSA4 */
243 		__cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
244 		__cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
245 		__cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
246 		__cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
247 	}
248 	if (test_facility(57)) /* MSA5 */
249 		__cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
250 
251 	if (MACHINE_HAS_ESOP)
252 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
253 }
254 
255 int kvm_arch_init(void *opaque)
256 {
257 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
258 	if (!kvm_s390_dbf)
259 		return -ENOMEM;
260 
261 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
262 		debug_unregister(kvm_s390_dbf);
263 		return -ENOMEM;
264 	}
265 
266 	kvm_s390_cpu_feat_init();
267 
268 	/* Register floating interrupt controller interface. */
269 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
270 }
271 
272 void kvm_arch_exit(void)
273 {
274 	debug_unregister(kvm_s390_dbf);
275 }
276 
277 /* Section: device related */
278 long kvm_arch_dev_ioctl(struct file *filp,
279 			unsigned int ioctl, unsigned long arg)
280 {
281 	if (ioctl == KVM_S390_ENABLE_SIE)
282 		return s390_enable_sie();
283 	return -EINVAL;
284 }
285 
286 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
287 {
288 	int r;
289 
290 	switch (ext) {
291 	case KVM_CAP_S390_PSW:
292 	case KVM_CAP_S390_GMAP:
293 	case KVM_CAP_SYNC_MMU:
294 #ifdef CONFIG_KVM_S390_UCONTROL
295 	case KVM_CAP_S390_UCONTROL:
296 #endif
297 	case KVM_CAP_ASYNC_PF:
298 	case KVM_CAP_SYNC_REGS:
299 	case KVM_CAP_ONE_REG:
300 	case KVM_CAP_ENABLE_CAP:
301 	case KVM_CAP_S390_CSS_SUPPORT:
302 	case KVM_CAP_IOEVENTFD:
303 	case KVM_CAP_DEVICE_CTRL:
304 	case KVM_CAP_ENABLE_CAP_VM:
305 	case KVM_CAP_S390_IRQCHIP:
306 	case KVM_CAP_VM_ATTRIBUTES:
307 	case KVM_CAP_MP_STATE:
308 	case KVM_CAP_S390_INJECT_IRQ:
309 	case KVM_CAP_S390_USER_SIGP:
310 	case KVM_CAP_S390_USER_STSI:
311 	case KVM_CAP_S390_SKEYS:
312 	case KVM_CAP_S390_IRQ_STATE:
313 		r = 1;
314 		break;
315 	case KVM_CAP_S390_MEM_OP:
316 		r = MEM_OP_MAX_SIZE;
317 		break;
318 	case KVM_CAP_NR_VCPUS:
319 	case KVM_CAP_MAX_VCPUS:
320 		r = KVM_S390_BSCA_CPU_SLOTS;
321 		if (sclp.has_esca && sclp.has_64bscao)
322 			r = KVM_S390_ESCA_CPU_SLOTS;
323 		break;
324 	case KVM_CAP_NR_MEMSLOTS:
325 		r = KVM_USER_MEM_SLOTS;
326 		break;
327 	case KVM_CAP_S390_COW:
328 		r = MACHINE_HAS_ESOP;
329 		break;
330 	case KVM_CAP_S390_VECTOR_REGISTERS:
331 		r = MACHINE_HAS_VX;
332 		break;
333 	case KVM_CAP_S390_RI:
334 		r = test_facility(64);
335 		break;
336 	default:
337 		r = 0;
338 	}
339 	return r;
340 }
341 
342 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
343 					struct kvm_memory_slot *memslot)
344 {
345 	gfn_t cur_gfn, last_gfn;
346 	unsigned long address;
347 	struct gmap *gmap = kvm->arch.gmap;
348 
349 	/* Loop over all guest pages */
350 	last_gfn = memslot->base_gfn + memslot->npages;
351 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
352 		address = gfn_to_hva_memslot(memslot, cur_gfn);
353 
354 		if (test_and_clear_guest_dirty(gmap->mm, address))
355 			mark_page_dirty(kvm, cur_gfn);
356 		if (fatal_signal_pending(current))
357 			return;
358 		cond_resched();
359 	}
360 }
361 
362 /* Section: vm related */
363 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
364 
365 /*
366  * Get (and clear) the dirty memory log for a memory slot.
367  */
368 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
369 			       struct kvm_dirty_log *log)
370 {
371 	int r;
372 	unsigned long n;
373 	struct kvm_memslots *slots;
374 	struct kvm_memory_slot *memslot;
375 	int is_dirty = 0;
376 
377 	mutex_lock(&kvm->slots_lock);
378 
379 	r = -EINVAL;
380 	if (log->slot >= KVM_USER_MEM_SLOTS)
381 		goto out;
382 
383 	slots = kvm_memslots(kvm);
384 	memslot = id_to_memslot(slots, log->slot);
385 	r = -ENOENT;
386 	if (!memslot->dirty_bitmap)
387 		goto out;
388 
389 	kvm_s390_sync_dirty_log(kvm, memslot);
390 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
391 	if (r)
392 		goto out;
393 
394 	/* Clear the dirty log */
395 	if (is_dirty) {
396 		n = kvm_dirty_bitmap_bytes(memslot);
397 		memset(memslot->dirty_bitmap, 0, n);
398 	}
399 	r = 0;
400 out:
401 	mutex_unlock(&kvm->slots_lock);
402 	return r;
403 }
404 
405 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
406 {
407 	int r;
408 
409 	if (cap->flags)
410 		return -EINVAL;
411 
412 	switch (cap->cap) {
413 	case KVM_CAP_S390_IRQCHIP:
414 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
415 		kvm->arch.use_irqchip = 1;
416 		r = 0;
417 		break;
418 	case KVM_CAP_S390_USER_SIGP:
419 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
420 		kvm->arch.user_sigp = 1;
421 		r = 0;
422 		break;
423 	case KVM_CAP_S390_VECTOR_REGISTERS:
424 		mutex_lock(&kvm->lock);
425 		if (atomic_read(&kvm->online_vcpus)) {
426 			r = -EBUSY;
427 		} else if (MACHINE_HAS_VX) {
428 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
429 			set_kvm_facility(kvm->arch.model.fac_list, 129);
430 			r = 0;
431 		} else
432 			r = -EINVAL;
433 		mutex_unlock(&kvm->lock);
434 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
435 			 r ? "(not available)" : "(success)");
436 		break;
437 	case KVM_CAP_S390_RI:
438 		r = -EINVAL;
439 		mutex_lock(&kvm->lock);
440 		if (atomic_read(&kvm->online_vcpus)) {
441 			r = -EBUSY;
442 		} else if (test_facility(64)) {
443 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
444 			set_kvm_facility(kvm->arch.model.fac_list, 64);
445 			r = 0;
446 		}
447 		mutex_unlock(&kvm->lock);
448 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
449 			 r ? "(not available)" : "(success)");
450 		break;
451 	case KVM_CAP_S390_USER_STSI:
452 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
453 		kvm->arch.user_stsi = 1;
454 		r = 0;
455 		break;
456 	default:
457 		r = -EINVAL;
458 		break;
459 	}
460 	return r;
461 }
462 
463 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
464 {
465 	int ret;
466 
467 	switch (attr->attr) {
468 	case KVM_S390_VM_MEM_LIMIT_SIZE:
469 		ret = 0;
470 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
471 			 kvm->arch.mem_limit);
472 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
473 			ret = -EFAULT;
474 		break;
475 	default:
476 		ret = -ENXIO;
477 		break;
478 	}
479 	return ret;
480 }
481 
482 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
483 {
484 	int ret;
485 	unsigned int idx;
486 	switch (attr->attr) {
487 	case KVM_S390_VM_MEM_ENABLE_CMMA:
488 		ret = -ENXIO;
489 		if (!sclp.has_cmma)
490 			break;
491 
492 		ret = -EBUSY;
493 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
494 		mutex_lock(&kvm->lock);
495 		if (atomic_read(&kvm->online_vcpus) == 0) {
496 			kvm->arch.use_cmma = 1;
497 			ret = 0;
498 		}
499 		mutex_unlock(&kvm->lock);
500 		break;
501 	case KVM_S390_VM_MEM_CLR_CMMA:
502 		ret = -ENXIO;
503 		if (!sclp.has_cmma)
504 			break;
505 		ret = -EINVAL;
506 		if (!kvm->arch.use_cmma)
507 			break;
508 
509 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
510 		mutex_lock(&kvm->lock);
511 		idx = srcu_read_lock(&kvm->srcu);
512 		s390_reset_cmma(kvm->arch.gmap->mm);
513 		srcu_read_unlock(&kvm->srcu, idx);
514 		mutex_unlock(&kvm->lock);
515 		ret = 0;
516 		break;
517 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
518 		unsigned long new_limit;
519 
520 		if (kvm_is_ucontrol(kvm))
521 			return -EINVAL;
522 
523 		if (get_user(new_limit, (u64 __user *)attr->addr))
524 			return -EFAULT;
525 
526 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
527 		    new_limit > kvm->arch.mem_limit)
528 			return -E2BIG;
529 
530 		if (!new_limit)
531 			return -EINVAL;
532 
533 		/* gmap_alloc takes last usable address */
534 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
535 			new_limit -= 1;
536 
537 		ret = -EBUSY;
538 		mutex_lock(&kvm->lock);
539 		if (atomic_read(&kvm->online_vcpus) == 0) {
540 			/* gmap_alloc will round the limit up */
541 			struct gmap *new = gmap_alloc(current->mm, new_limit);
542 
543 			if (!new) {
544 				ret = -ENOMEM;
545 			} else {
546 				gmap_free(kvm->arch.gmap);
547 				new->private = kvm;
548 				kvm->arch.gmap = new;
549 				ret = 0;
550 			}
551 		}
552 		mutex_unlock(&kvm->lock);
553 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
554 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
555 			 (void *) kvm->arch.gmap->asce);
556 		break;
557 	}
558 	default:
559 		ret = -ENXIO;
560 		break;
561 	}
562 	return ret;
563 }
564 
565 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
566 
567 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
568 {
569 	struct kvm_vcpu *vcpu;
570 	int i;
571 
572 	if (!test_kvm_facility(kvm, 76))
573 		return -EINVAL;
574 
575 	mutex_lock(&kvm->lock);
576 	switch (attr->attr) {
577 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
578 		get_random_bytes(
579 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
580 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
581 		kvm->arch.crypto.aes_kw = 1;
582 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
583 		break;
584 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
585 		get_random_bytes(
586 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
587 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
588 		kvm->arch.crypto.dea_kw = 1;
589 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
590 		break;
591 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
592 		kvm->arch.crypto.aes_kw = 0;
593 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
594 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
595 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
596 		break;
597 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
598 		kvm->arch.crypto.dea_kw = 0;
599 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
600 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
601 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
602 		break;
603 	default:
604 		mutex_unlock(&kvm->lock);
605 		return -ENXIO;
606 	}
607 
608 	kvm_for_each_vcpu(i, vcpu, kvm) {
609 		kvm_s390_vcpu_crypto_setup(vcpu);
610 		exit_sie(vcpu);
611 	}
612 	mutex_unlock(&kvm->lock);
613 	return 0;
614 }
615 
616 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
617 {
618 	u8 gtod_high;
619 
620 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
621 					   sizeof(gtod_high)))
622 		return -EFAULT;
623 
624 	if (gtod_high != 0)
625 		return -EINVAL;
626 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
627 
628 	return 0;
629 }
630 
631 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
632 {
633 	u64 gtod;
634 
635 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
636 		return -EFAULT;
637 
638 	kvm_s390_set_tod_clock(kvm, gtod);
639 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
640 	return 0;
641 }
642 
643 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
644 {
645 	int ret;
646 
647 	if (attr->flags)
648 		return -EINVAL;
649 
650 	switch (attr->attr) {
651 	case KVM_S390_VM_TOD_HIGH:
652 		ret = kvm_s390_set_tod_high(kvm, attr);
653 		break;
654 	case KVM_S390_VM_TOD_LOW:
655 		ret = kvm_s390_set_tod_low(kvm, attr);
656 		break;
657 	default:
658 		ret = -ENXIO;
659 		break;
660 	}
661 	return ret;
662 }
663 
664 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
665 {
666 	u8 gtod_high = 0;
667 
668 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
669 					 sizeof(gtod_high)))
670 		return -EFAULT;
671 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
672 
673 	return 0;
674 }
675 
676 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
677 {
678 	u64 gtod;
679 
680 	gtod = kvm_s390_get_tod_clock_fast(kvm);
681 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
682 		return -EFAULT;
683 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
684 
685 	return 0;
686 }
687 
688 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
689 {
690 	int ret;
691 
692 	if (attr->flags)
693 		return -EINVAL;
694 
695 	switch (attr->attr) {
696 	case KVM_S390_VM_TOD_HIGH:
697 		ret = kvm_s390_get_tod_high(kvm, attr);
698 		break;
699 	case KVM_S390_VM_TOD_LOW:
700 		ret = kvm_s390_get_tod_low(kvm, attr);
701 		break;
702 	default:
703 		ret = -ENXIO;
704 		break;
705 	}
706 	return ret;
707 }
708 
709 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
710 {
711 	struct kvm_s390_vm_cpu_processor *proc;
712 	u16 lowest_ibc, unblocked_ibc;
713 	int ret = 0;
714 
715 	mutex_lock(&kvm->lock);
716 	if (atomic_read(&kvm->online_vcpus)) {
717 		ret = -EBUSY;
718 		goto out;
719 	}
720 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
721 	if (!proc) {
722 		ret = -ENOMEM;
723 		goto out;
724 	}
725 	if (!copy_from_user(proc, (void __user *)attr->addr,
726 			    sizeof(*proc))) {
727 		kvm->arch.model.cpuid = proc->cpuid;
728 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
729 		unblocked_ibc = sclp.ibc & 0xfff;
730 		if (lowest_ibc) {
731 			if (proc->ibc > unblocked_ibc)
732 				kvm->arch.model.ibc = unblocked_ibc;
733 			else if (proc->ibc < lowest_ibc)
734 				kvm->arch.model.ibc = lowest_ibc;
735 			else
736 				kvm->arch.model.ibc = proc->ibc;
737 		}
738 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
739 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
740 	} else
741 		ret = -EFAULT;
742 	kfree(proc);
743 out:
744 	mutex_unlock(&kvm->lock);
745 	return ret;
746 }
747 
748 static int kvm_s390_set_processor_feat(struct kvm *kvm,
749 				       struct kvm_device_attr *attr)
750 {
751 	struct kvm_s390_vm_cpu_feat data;
752 	int ret = -EBUSY;
753 
754 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
755 		return -EFAULT;
756 	if (!bitmap_subset((unsigned long *) data.feat,
757 			   kvm_s390_available_cpu_feat,
758 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
759 		return -EINVAL;
760 
761 	mutex_lock(&kvm->lock);
762 	if (!atomic_read(&kvm->online_vcpus)) {
763 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
764 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
765 		ret = 0;
766 	}
767 	mutex_unlock(&kvm->lock);
768 	return ret;
769 }
770 
771 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
772 					  struct kvm_device_attr *attr)
773 {
774 	/*
775 	 * Once supported by kernel + hw, we have to store the subfunctions
776 	 * in kvm->arch and remember that user space configured them.
777 	 */
778 	return -ENXIO;
779 }
780 
781 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
782 {
783 	int ret = -ENXIO;
784 
785 	switch (attr->attr) {
786 	case KVM_S390_VM_CPU_PROCESSOR:
787 		ret = kvm_s390_set_processor(kvm, attr);
788 		break;
789 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
790 		ret = kvm_s390_set_processor_feat(kvm, attr);
791 		break;
792 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
793 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
794 		break;
795 	}
796 	return ret;
797 }
798 
799 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
800 {
801 	struct kvm_s390_vm_cpu_processor *proc;
802 	int ret = 0;
803 
804 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
805 	if (!proc) {
806 		ret = -ENOMEM;
807 		goto out;
808 	}
809 	proc->cpuid = kvm->arch.model.cpuid;
810 	proc->ibc = kvm->arch.model.ibc;
811 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
812 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
813 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
814 		ret = -EFAULT;
815 	kfree(proc);
816 out:
817 	return ret;
818 }
819 
820 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
821 {
822 	struct kvm_s390_vm_cpu_machine *mach;
823 	int ret = 0;
824 
825 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
826 	if (!mach) {
827 		ret = -ENOMEM;
828 		goto out;
829 	}
830 	get_cpu_id((struct cpuid *) &mach->cpuid);
831 	mach->ibc = sclp.ibc;
832 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
833 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
834 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
835 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
836 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
837 		ret = -EFAULT;
838 	kfree(mach);
839 out:
840 	return ret;
841 }
842 
843 static int kvm_s390_get_processor_feat(struct kvm *kvm,
844 				       struct kvm_device_attr *attr)
845 {
846 	struct kvm_s390_vm_cpu_feat data;
847 
848 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
849 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
850 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
851 		return -EFAULT;
852 	return 0;
853 }
854 
855 static int kvm_s390_get_machine_feat(struct kvm *kvm,
856 				     struct kvm_device_attr *attr)
857 {
858 	struct kvm_s390_vm_cpu_feat data;
859 
860 	bitmap_copy((unsigned long *) data.feat,
861 		    kvm_s390_available_cpu_feat,
862 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
863 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
864 		return -EFAULT;
865 	return 0;
866 }
867 
868 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
869 					  struct kvm_device_attr *attr)
870 {
871 	/*
872 	 * Once we can actually configure subfunctions (kernel + hw support),
873 	 * we have to check if they were already set by user space, if so copy
874 	 * them from kvm->arch.
875 	 */
876 	return -ENXIO;
877 }
878 
879 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
880 					struct kvm_device_attr *attr)
881 {
882 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
883 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
884 		return -EFAULT;
885 	return 0;
886 }
887 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
888 {
889 	int ret = -ENXIO;
890 
891 	switch (attr->attr) {
892 	case KVM_S390_VM_CPU_PROCESSOR:
893 		ret = kvm_s390_get_processor(kvm, attr);
894 		break;
895 	case KVM_S390_VM_CPU_MACHINE:
896 		ret = kvm_s390_get_machine(kvm, attr);
897 		break;
898 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
899 		ret = kvm_s390_get_processor_feat(kvm, attr);
900 		break;
901 	case KVM_S390_VM_CPU_MACHINE_FEAT:
902 		ret = kvm_s390_get_machine_feat(kvm, attr);
903 		break;
904 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
905 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
906 		break;
907 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
908 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
909 		break;
910 	}
911 	return ret;
912 }
913 
914 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
915 {
916 	int ret;
917 
918 	switch (attr->group) {
919 	case KVM_S390_VM_MEM_CTRL:
920 		ret = kvm_s390_set_mem_control(kvm, attr);
921 		break;
922 	case KVM_S390_VM_TOD:
923 		ret = kvm_s390_set_tod(kvm, attr);
924 		break;
925 	case KVM_S390_VM_CPU_MODEL:
926 		ret = kvm_s390_set_cpu_model(kvm, attr);
927 		break;
928 	case KVM_S390_VM_CRYPTO:
929 		ret = kvm_s390_vm_set_crypto(kvm, attr);
930 		break;
931 	default:
932 		ret = -ENXIO;
933 		break;
934 	}
935 
936 	return ret;
937 }
938 
939 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
940 {
941 	int ret;
942 
943 	switch (attr->group) {
944 	case KVM_S390_VM_MEM_CTRL:
945 		ret = kvm_s390_get_mem_control(kvm, attr);
946 		break;
947 	case KVM_S390_VM_TOD:
948 		ret = kvm_s390_get_tod(kvm, attr);
949 		break;
950 	case KVM_S390_VM_CPU_MODEL:
951 		ret = kvm_s390_get_cpu_model(kvm, attr);
952 		break;
953 	default:
954 		ret = -ENXIO;
955 		break;
956 	}
957 
958 	return ret;
959 }
960 
961 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
962 {
963 	int ret;
964 
965 	switch (attr->group) {
966 	case KVM_S390_VM_MEM_CTRL:
967 		switch (attr->attr) {
968 		case KVM_S390_VM_MEM_ENABLE_CMMA:
969 		case KVM_S390_VM_MEM_CLR_CMMA:
970 			ret = sclp.has_cmma ? 0 : -ENXIO;
971 			break;
972 		case KVM_S390_VM_MEM_LIMIT_SIZE:
973 			ret = 0;
974 			break;
975 		default:
976 			ret = -ENXIO;
977 			break;
978 		}
979 		break;
980 	case KVM_S390_VM_TOD:
981 		switch (attr->attr) {
982 		case KVM_S390_VM_TOD_LOW:
983 		case KVM_S390_VM_TOD_HIGH:
984 			ret = 0;
985 			break;
986 		default:
987 			ret = -ENXIO;
988 			break;
989 		}
990 		break;
991 	case KVM_S390_VM_CPU_MODEL:
992 		switch (attr->attr) {
993 		case KVM_S390_VM_CPU_PROCESSOR:
994 		case KVM_S390_VM_CPU_MACHINE:
995 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
996 		case KVM_S390_VM_CPU_MACHINE_FEAT:
997 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
998 			ret = 0;
999 			break;
1000 		/* configuring subfunctions is not supported yet */
1001 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1002 		default:
1003 			ret = -ENXIO;
1004 			break;
1005 		}
1006 		break;
1007 	case KVM_S390_VM_CRYPTO:
1008 		switch (attr->attr) {
1009 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1011 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1012 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1013 			ret = 0;
1014 			break;
1015 		default:
1016 			ret = -ENXIO;
1017 			break;
1018 		}
1019 		break;
1020 	default:
1021 		ret = -ENXIO;
1022 		break;
1023 	}
1024 
1025 	return ret;
1026 }
1027 
1028 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1029 {
1030 	uint8_t *keys;
1031 	uint64_t hva;
1032 	unsigned long curkey;
1033 	int i, r = 0;
1034 
1035 	if (args->flags != 0)
1036 		return -EINVAL;
1037 
1038 	/* Is this guest using storage keys? */
1039 	if (!mm_use_skey(current->mm))
1040 		return KVM_S390_GET_SKEYS_NONE;
1041 
1042 	/* Enforce sane limit on memory allocation */
1043 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1044 		return -EINVAL;
1045 
1046 	keys = kmalloc_array(args->count, sizeof(uint8_t),
1047 			     GFP_KERNEL | __GFP_NOWARN);
1048 	if (!keys)
1049 		keys = vmalloc(sizeof(uint8_t) * args->count);
1050 	if (!keys)
1051 		return -ENOMEM;
1052 
1053 	for (i = 0; i < args->count; i++) {
1054 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1055 		if (kvm_is_error_hva(hva)) {
1056 			r = -EFAULT;
1057 			goto out;
1058 		}
1059 
1060 		curkey = get_guest_storage_key(current->mm, hva);
1061 		if (IS_ERR_VALUE(curkey)) {
1062 			r = curkey;
1063 			goto out;
1064 		}
1065 		keys[i] = curkey;
1066 	}
1067 
1068 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1069 			 sizeof(uint8_t) * args->count);
1070 	if (r)
1071 		r = -EFAULT;
1072 out:
1073 	kvfree(keys);
1074 	return r;
1075 }
1076 
1077 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1078 {
1079 	uint8_t *keys;
1080 	uint64_t hva;
1081 	int i, r = 0;
1082 
1083 	if (args->flags != 0)
1084 		return -EINVAL;
1085 
1086 	/* Enforce sane limit on memory allocation */
1087 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1088 		return -EINVAL;
1089 
1090 	keys = kmalloc_array(args->count, sizeof(uint8_t),
1091 			     GFP_KERNEL | __GFP_NOWARN);
1092 	if (!keys)
1093 		keys = vmalloc(sizeof(uint8_t) * args->count);
1094 	if (!keys)
1095 		return -ENOMEM;
1096 
1097 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1098 			   sizeof(uint8_t) * args->count);
1099 	if (r) {
1100 		r = -EFAULT;
1101 		goto out;
1102 	}
1103 
1104 	/* Enable storage key handling for the guest */
1105 	r = s390_enable_skey();
1106 	if (r)
1107 		goto out;
1108 
1109 	for (i = 0; i < args->count; i++) {
1110 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1111 		if (kvm_is_error_hva(hva)) {
1112 			r = -EFAULT;
1113 			goto out;
1114 		}
1115 
1116 		/* Lowest order bit is reserved */
1117 		if (keys[i] & 0x01) {
1118 			r = -EINVAL;
1119 			goto out;
1120 		}
1121 
1122 		r = set_guest_storage_key(current->mm, hva,
1123 					  (unsigned long)keys[i], 0);
1124 		if (r)
1125 			goto out;
1126 	}
1127 out:
1128 	kvfree(keys);
1129 	return r;
1130 }
1131 
1132 long kvm_arch_vm_ioctl(struct file *filp,
1133 		       unsigned int ioctl, unsigned long arg)
1134 {
1135 	struct kvm *kvm = filp->private_data;
1136 	void __user *argp = (void __user *)arg;
1137 	struct kvm_device_attr attr;
1138 	int r;
1139 
1140 	switch (ioctl) {
1141 	case KVM_S390_INTERRUPT: {
1142 		struct kvm_s390_interrupt s390int;
1143 
1144 		r = -EFAULT;
1145 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1146 			break;
1147 		r = kvm_s390_inject_vm(kvm, &s390int);
1148 		break;
1149 	}
1150 	case KVM_ENABLE_CAP: {
1151 		struct kvm_enable_cap cap;
1152 		r = -EFAULT;
1153 		if (copy_from_user(&cap, argp, sizeof(cap)))
1154 			break;
1155 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1156 		break;
1157 	}
1158 	case KVM_CREATE_IRQCHIP: {
1159 		struct kvm_irq_routing_entry routing;
1160 
1161 		r = -EINVAL;
1162 		if (kvm->arch.use_irqchip) {
1163 			/* Set up dummy routing. */
1164 			memset(&routing, 0, sizeof(routing));
1165 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1166 		}
1167 		break;
1168 	}
1169 	case KVM_SET_DEVICE_ATTR: {
1170 		r = -EFAULT;
1171 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1172 			break;
1173 		r = kvm_s390_vm_set_attr(kvm, &attr);
1174 		break;
1175 	}
1176 	case KVM_GET_DEVICE_ATTR: {
1177 		r = -EFAULT;
1178 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1179 			break;
1180 		r = kvm_s390_vm_get_attr(kvm, &attr);
1181 		break;
1182 	}
1183 	case KVM_HAS_DEVICE_ATTR: {
1184 		r = -EFAULT;
1185 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1186 			break;
1187 		r = kvm_s390_vm_has_attr(kvm, &attr);
1188 		break;
1189 	}
1190 	case KVM_S390_GET_SKEYS: {
1191 		struct kvm_s390_skeys args;
1192 
1193 		r = -EFAULT;
1194 		if (copy_from_user(&args, argp,
1195 				   sizeof(struct kvm_s390_skeys)))
1196 			break;
1197 		r = kvm_s390_get_skeys(kvm, &args);
1198 		break;
1199 	}
1200 	case KVM_S390_SET_SKEYS: {
1201 		struct kvm_s390_skeys args;
1202 
1203 		r = -EFAULT;
1204 		if (copy_from_user(&args, argp,
1205 				   sizeof(struct kvm_s390_skeys)))
1206 			break;
1207 		r = kvm_s390_set_skeys(kvm, &args);
1208 		break;
1209 	}
1210 	default:
1211 		r = -ENOTTY;
1212 	}
1213 
1214 	return r;
1215 }
1216 
1217 static int kvm_s390_query_ap_config(u8 *config)
1218 {
1219 	u32 fcn_code = 0x04000000UL;
1220 	u32 cc = 0;
1221 
1222 	memset(config, 0, 128);
1223 	asm volatile(
1224 		"lgr 0,%1\n"
1225 		"lgr 2,%2\n"
1226 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1227 		"0: ipm %0\n"
1228 		"srl %0,28\n"
1229 		"1:\n"
1230 		EX_TABLE(0b, 1b)
1231 		: "+r" (cc)
1232 		: "r" (fcn_code), "r" (config)
1233 		: "cc", "0", "2", "memory"
1234 	);
1235 
1236 	return cc;
1237 }
1238 
1239 static int kvm_s390_apxa_installed(void)
1240 {
1241 	u8 config[128];
1242 	int cc;
1243 
1244 	if (test_facility(12)) {
1245 		cc = kvm_s390_query_ap_config(config);
1246 
1247 		if (cc)
1248 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1249 		else
1250 			return config[0] & 0x40;
1251 	}
1252 
1253 	return 0;
1254 }
1255 
1256 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1257 {
1258 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1259 
1260 	if (kvm_s390_apxa_installed())
1261 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1262 	else
1263 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1264 }
1265 
1266 static u64 kvm_s390_get_initial_cpuid(void)
1267 {
1268 	struct cpuid cpuid;
1269 
1270 	get_cpu_id(&cpuid);
1271 	cpuid.version = 0xff;
1272 	return *((u64 *) &cpuid);
1273 }
1274 
1275 static void kvm_s390_crypto_init(struct kvm *kvm)
1276 {
1277 	if (!test_kvm_facility(kvm, 76))
1278 		return;
1279 
1280 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1281 	kvm_s390_set_crycb_format(kvm);
1282 
1283 	/* Enable AES/DEA protected key functions by default */
1284 	kvm->arch.crypto.aes_kw = 1;
1285 	kvm->arch.crypto.dea_kw = 1;
1286 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1287 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1288 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1289 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1290 }
1291 
1292 static void sca_dispose(struct kvm *kvm)
1293 {
1294 	if (kvm->arch.use_esca)
1295 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1296 	else
1297 		free_page((unsigned long)(kvm->arch.sca));
1298 	kvm->arch.sca = NULL;
1299 }
1300 
1301 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1302 {
1303 	gfp_t alloc_flags = GFP_KERNEL;
1304 	int i, rc;
1305 	char debug_name[16];
1306 	static unsigned long sca_offset;
1307 
1308 	rc = -EINVAL;
1309 #ifdef CONFIG_KVM_S390_UCONTROL
1310 	if (type & ~KVM_VM_S390_UCONTROL)
1311 		goto out_err;
1312 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1313 		goto out_err;
1314 #else
1315 	if (type)
1316 		goto out_err;
1317 #endif
1318 
1319 	rc = s390_enable_sie();
1320 	if (rc)
1321 		goto out_err;
1322 
1323 	rc = -ENOMEM;
1324 
1325 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1326 
1327 	kvm->arch.use_esca = 0; /* start with basic SCA */
1328 	if (!sclp.has_64bscao)
1329 		alloc_flags |= GFP_DMA;
1330 	rwlock_init(&kvm->arch.sca_lock);
1331 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1332 	if (!kvm->arch.sca)
1333 		goto out_err;
1334 	spin_lock(&kvm_lock);
1335 	sca_offset += 16;
1336 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1337 		sca_offset = 0;
1338 	kvm->arch.sca = (struct bsca_block *)
1339 			((char *) kvm->arch.sca + sca_offset);
1340 	spin_unlock(&kvm_lock);
1341 
1342 	sprintf(debug_name, "kvm-%u", current->pid);
1343 
1344 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1345 	if (!kvm->arch.dbf)
1346 		goto out_err;
1347 
1348 	kvm->arch.sie_page2 =
1349 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1350 	if (!kvm->arch.sie_page2)
1351 		goto out_err;
1352 
1353 	/* Populate the facility mask initially. */
1354 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1355 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1356 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1357 		if (i < kvm_s390_fac_list_mask_size())
1358 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1359 		else
1360 			kvm->arch.model.fac_mask[i] = 0UL;
1361 	}
1362 
1363 	/* Populate the facility list initially. */
1364 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1365 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1366 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1367 
1368 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1369 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1370 
1371 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1372 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1373 
1374 	kvm_s390_crypto_init(kvm);
1375 
1376 	spin_lock_init(&kvm->arch.float_int.lock);
1377 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1378 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1379 	init_waitqueue_head(&kvm->arch.ipte_wq);
1380 	mutex_init(&kvm->arch.ipte_mutex);
1381 
1382 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1383 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1384 
1385 	if (type & KVM_VM_S390_UCONTROL) {
1386 		kvm->arch.gmap = NULL;
1387 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1388 	} else {
1389 		if (sclp.hamax == U64_MAX)
1390 			kvm->arch.mem_limit = TASK_MAX_SIZE;
1391 		else
1392 			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1393 						    sclp.hamax + 1);
1394 		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1395 		if (!kvm->arch.gmap)
1396 			goto out_err;
1397 		kvm->arch.gmap->private = kvm;
1398 		kvm->arch.gmap->pfault_enabled = 0;
1399 	}
1400 
1401 	kvm->arch.css_support = 0;
1402 	kvm->arch.use_irqchip = 0;
1403 	kvm->arch.epoch = 0;
1404 
1405 	spin_lock_init(&kvm->arch.start_stop_lock);
1406 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1407 
1408 	return 0;
1409 out_err:
1410 	free_page((unsigned long)kvm->arch.sie_page2);
1411 	debug_unregister(kvm->arch.dbf);
1412 	sca_dispose(kvm);
1413 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1414 	return rc;
1415 }
1416 
1417 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1418 {
1419 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1420 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1421 	kvm_s390_clear_local_irqs(vcpu);
1422 	kvm_clear_async_pf_completion_queue(vcpu);
1423 	if (!kvm_is_ucontrol(vcpu->kvm))
1424 		sca_del_vcpu(vcpu);
1425 
1426 	if (kvm_is_ucontrol(vcpu->kvm))
1427 		gmap_free(vcpu->arch.gmap);
1428 
1429 	if (vcpu->kvm->arch.use_cmma)
1430 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1431 	free_page((unsigned long)(vcpu->arch.sie_block));
1432 
1433 	kvm_vcpu_uninit(vcpu);
1434 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1435 }
1436 
1437 static void kvm_free_vcpus(struct kvm *kvm)
1438 {
1439 	unsigned int i;
1440 	struct kvm_vcpu *vcpu;
1441 
1442 	kvm_for_each_vcpu(i, vcpu, kvm)
1443 		kvm_arch_vcpu_destroy(vcpu);
1444 
1445 	mutex_lock(&kvm->lock);
1446 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1447 		kvm->vcpus[i] = NULL;
1448 
1449 	atomic_set(&kvm->online_vcpus, 0);
1450 	mutex_unlock(&kvm->lock);
1451 }
1452 
1453 void kvm_arch_destroy_vm(struct kvm *kvm)
1454 {
1455 	kvm_free_vcpus(kvm);
1456 	sca_dispose(kvm);
1457 	debug_unregister(kvm->arch.dbf);
1458 	free_page((unsigned long)kvm->arch.sie_page2);
1459 	if (!kvm_is_ucontrol(kvm))
1460 		gmap_free(kvm->arch.gmap);
1461 	kvm_s390_destroy_adapters(kvm);
1462 	kvm_s390_clear_float_irqs(kvm);
1463 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1464 }
1465 
1466 /* Section: vcpu related */
1467 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1468 {
1469 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1470 	if (!vcpu->arch.gmap)
1471 		return -ENOMEM;
1472 	vcpu->arch.gmap->private = vcpu->kvm;
1473 
1474 	return 0;
1475 }
1476 
1477 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1478 {
1479 	read_lock(&vcpu->kvm->arch.sca_lock);
1480 	if (vcpu->kvm->arch.use_esca) {
1481 		struct esca_block *sca = vcpu->kvm->arch.sca;
1482 
1483 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1484 		sca->cpu[vcpu->vcpu_id].sda = 0;
1485 	} else {
1486 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1487 
1488 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1489 		sca->cpu[vcpu->vcpu_id].sda = 0;
1490 	}
1491 	read_unlock(&vcpu->kvm->arch.sca_lock);
1492 }
1493 
1494 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1495 {
1496 	read_lock(&vcpu->kvm->arch.sca_lock);
1497 	if (vcpu->kvm->arch.use_esca) {
1498 		struct esca_block *sca = vcpu->kvm->arch.sca;
1499 
1500 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1501 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1502 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1503 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1504 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1505 	} else {
1506 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1507 
1508 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1509 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1510 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1511 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1512 	}
1513 	read_unlock(&vcpu->kvm->arch.sca_lock);
1514 }
1515 
1516 /* Basic SCA to Extended SCA data copy routines */
1517 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1518 {
1519 	d->sda = s->sda;
1520 	d->sigp_ctrl.c = s->sigp_ctrl.c;
1521 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1522 }
1523 
1524 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1525 {
1526 	int i;
1527 
1528 	d->ipte_control = s->ipte_control;
1529 	d->mcn[0] = s->mcn;
1530 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1531 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1532 }
1533 
1534 static int sca_switch_to_extended(struct kvm *kvm)
1535 {
1536 	struct bsca_block *old_sca = kvm->arch.sca;
1537 	struct esca_block *new_sca;
1538 	struct kvm_vcpu *vcpu;
1539 	unsigned int vcpu_idx;
1540 	u32 scaol, scaoh;
1541 
1542 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1543 	if (!new_sca)
1544 		return -ENOMEM;
1545 
1546 	scaoh = (u32)((u64)(new_sca) >> 32);
1547 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
1548 
1549 	kvm_s390_vcpu_block_all(kvm);
1550 	write_lock(&kvm->arch.sca_lock);
1551 
1552 	sca_copy_b_to_e(new_sca, old_sca);
1553 
1554 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1555 		vcpu->arch.sie_block->scaoh = scaoh;
1556 		vcpu->arch.sie_block->scaol = scaol;
1557 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1558 	}
1559 	kvm->arch.sca = new_sca;
1560 	kvm->arch.use_esca = 1;
1561 
1562 	write_unlock(&kvm->arch.sca_lock);
1563 	kvm_s390_vcpu_unblock_all(kvm);
1564 
1565 	free_page((unsigned long)old_sca);
1566 
1567 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1568 		 old_sca, kvm->arch.sca);
1569 	return 0;
1570 }
1571 
1572 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1573 {
1574 	int rc;
1575 
1576 	if (id < KVM_S390_BSCA_CPU_SLOTS)
1577 		return true;
1578 	if (!sclp.has_esca || !sclp.has_64bscao)
1579 		return false;
1580 
1581 	mutex_lock(&kvm->lock);
1582 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1583 	mutex_unlock(&kvm->lock);
1584 
1585 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1586 }
1587 
1588 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1589 {
1590 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1591 	kvm_clear_async_pf_completion_queue(vcpu);
1592 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1593 				    KVM_SYNC_GPRS |
1594 				    KVM_SYNC_ACRS |
1595 				    KVM_SYNC_CRS |
1596 				    KVM_SYNC_ARCH0 |
1597 				    KVM_SYNC_PFAULT;
1598 	if (test_kvm_facility(vcpu->kvm, 64))
1599 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1600 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1601 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1602 	 */
1603 	if (MACHINE_HAS_VX)
1604 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1605 	else
1606 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1607 
1608 	if (kvm_is_ucontrol(vcpu->kvm))
1609 		return __kvm_ucontrol_vcpu_init(vcpu);
1610 
1611 	return 0;
1612 }
1613 
1614 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1615 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1616 {
1617 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1618 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1619 	vcpu->arch.cputm_start = get_tod_clock_fast();
1620 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1621 }
1622 
1623 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1624 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1625 {
1626 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1627 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1628 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1629 	vcpu->arch.cputm_start = 0;
1630 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1631 }
1632 
1633 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1634 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1635 {
1636 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1637 	vcpu->arch.cputm_enabled = true;
1638 	__start_cpu_timer_accounting(vcpu);
1639 }
1640 
1641 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1642 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1643 {
1644 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1645 	__stop_cpu_timer_accounting(vcpu);
1646 	vcpu->arch.cputm_enabled = false;
1647 }
1648 
1649 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1650 {
1651 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1652 	__enable_cpu_timer_accounting(vcpu);
1653 	preempt_enable();
1654 }
1655 
1656 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1657 {
1658 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1659 	__disable_cpu_timer_accounting(vcpu);
1660 	preempt_enable();
1661 }
1662 
1663 /* set the cpu timer - may only be called from the VCPU thread itself */
1664 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1665 {
1666 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1667 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1668 	if (vcpu->arch.cputm_enabled)
1669 		vcpu->arch.cputm_start = get_tod_clock_fast();
1670 	vcpu->arch.sie_block->cputm = cputm;
1671 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1672 	preempt_enable();
1673 }
1674 
1675 /* update and get the cpu timer - can also be called from other VCPU threads */
1676 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1677 {
1678 	unsigned int seq;
1679 	__u64 value;
1680 
1681 	if (unlikely(!vcpu->arch.cputm_enabled))
1682 		return vcpu->arch.sie_block->cputm;
1683 
1684 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1685 	do {
1686 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1687 		/*
1688 		 * If the writer would ever execute a read in the critical
1689 		 * section, e.g. in irq context, we have a deadlock.
1690 		 */
1691 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1692 		value = vcpu->arch.sie_block->cputm;
1693 		/* if cputm_start is 0, accounting is being started/stopped */
1694 		if (likely(vcpu->arch.cputm_start))
1695 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1696 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1697 	preempt_enable();
1698 	return value;
1699 }
1700 
1701 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1702 {
1703 	/* Save host register state */
1704 	save_fpu_regs();
1705 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1706 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1707 
1708 	if (MACHINE_HAS_VX)
1709 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1710 	else
1711 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1712 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1713 	if (test_fp_ctl(current->thread.fpu.fpc))
1714 		/* User space provided an invalid FPC, let's clear it */
1715 		current->thread.fpu.fpc = 0;
1716 
1717 	save_access_regs(vcpu->arch.host_acrs);
1718 	restore_access_regs(vcpu->run->s.regs.acrs);
1719 	gmap_enable(vcpu->arch.gmap);
1720 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1721 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1722 		__start_cpu_timer_accounting(vcpu);
1723 	vcpu->cpu = cpu;
1724 }
1725 
1726 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1727 {
1728 	vcpu->cpu = -1;
1729 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1730 		__stop_cpu_timer_accounting(vcpu);
1731 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1732 	gmap_disable(vcpu->arch.gmap);
1733 
1734 	/* Save guest register state */
1735 	save_fpu_regs();
1736 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1737 
1738 	/* Restore host register state */
1739 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1740 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1741 
1742 	save_access_regs(vcpu->run->s.regs.acrs);
1743 	restore_access_regs(vcpu->arch.host_acrs);
1744 }
1745 
1746 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1747 {
1748 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1749 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1750 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1751 	kvm_s390_set_prefix(vcpu, 0);
1752 	kvm_s390_set_cpu_timer(vcpu, 0);
1753 	vcpu->arch.sie_block->ckc       = 0UL;
1754 	vcpu->arch.sie_block->todpr     = 0;
1755 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1756 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1757 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1758 	/* make sure the new fpc will be lazily loaded */
1759 	save_fpu_regs();
1760 	current->thread.fpu.fpc = 0;
1761 	vcpu->arch.sie_block->gbea = 1;
1762 	vcpu->arch.sie_block->pp = 0;
1763 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1764 	kvm_clear_async_pf_completion_queue(vcpu);
1765 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1766 		kvm_s390_vcpu_stop(vcpu);
1767 	kvm_s390_clear_local_irqs(vcpu);
1768 }
1769 
1770 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1771 {
1772 	mutex_lock(&vcpu->kvm->lock);
1773 	preempt_disable();
1774 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1775 	preempt_enable();
1776 	mutex_unlock(&vcpu->kvm->lock);
1777 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1778 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1779 		sca_add_vcpu(vcpu);
1780 	}
1781 
1782 }
1783 
1784 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1785 {
1786 	if (!test_kvm_facility(vcpu->kvm, 76))
1787 		return;
1788 
1789 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1790 
1791 	if (vcpu->kvm->arch.crypto.aes_kw)
1792 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1793 	if (vcpu->kvm->arch.crypto.dea_kw)
1794 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1795 
1796 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1797 }
1798 
1799 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1800 {
1801 	free_page(vcpu->arch.sie_block->cbrlo);
1802 	vcpu->arch.sie_block->cbrlo = 0;
1803 }
1804 
1805 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1806 {
1807 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1808 	if (!vcpu->arch.sie_block->cbrlo)
1809 		return -ENOMEM;
1810 
1811 	vcpu->arch.sie_block->ecb2 |= 0x80;
1812 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1813 	return 0;
1814 }
1815 
1816 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1817 {
1818 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1819 
1820 	vcpu->arch.sie_block->ibc = model->ibc;
1821 	if (test_kvm_facility(vcpu->kvm, 7))
1822 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1823 }
1824 
1825 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1826 {
1827 	int rc = 0;
1828 
1829 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1830 						    CPUSTAT_SM |
1831 						    CPUSTAT_STOPPED);
1832 
1833 	if (test_kvm_facility(vcpu->kvm, 78))
1834 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1835 	else if (test_kvm_facility(vcpu->kvm, 8))
1836 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1837 
1838 	kvm_s390_vcpu_setup_model(vcpu);
1839 
1840 	vcpu->arch.sie_block->ecb = 0x02;
1841 	if (test_kvm_facility(vcpu->kvm, 9))
1842 		vcpu->arch.sie_block->ecb |= 0x04;
1843 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1844 		vcpu->arch.sie_block->ecb |= 0x10;
1845 
1846 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1847 		vcpu->arch.sie_block->ecb2 |= 0x08;
1848 	vcpu->arch.sie_block->eca = 0x1002000U;
1849 	if (sclp.has_cei)
1850 		vcpu->arch.sie_block->eca |= 0x80000000U;
1851 	if (sclp.has_ib)
1852 		vcpu->arch.sie_block->eca |= 0x40000000U;
1853 	if (sclp.has_siif)
1854 		vcpu->arch.sie_block->eca |= 1;
1855 	if (sclp.has_sigpif)
1856 		vcpu->arch.sie_block->eca |= 0x10000000U;
1857 	if (test_kvm_facility(vcpu->kvm, 64))
1858 		vcpu->arch.sie_block->ecb3 |= 0x01;
1859 	if (test_kvm_facility(vcpu->kvm, 129)) {
1860 		vcpu->arch.sie_block->eca |= 0x00020000;
1861 		vcpu->arch.sie_block->ecd |= 0x20000000;
1862 	}
1863 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1864 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1865 	if (test_kvm_facility(vcpu->kvm, 74))
1866 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1867 
1868 	if (vcpu->kvm->arch.use_cmma) {
1869 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1870 		if (rc)
1871 			return rc;
1872 	}
1873 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1874 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1875 
1876 	kvm_s390_vcpu_crypto_setup(vcpu);
1877 
1878 	return rc;
1879 }
1880 
1881 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1882 				      unsigned int id)
1883 {
1884 	struct kvm_vcpu *vcpu;
1885 	struct sie_page *sie_page;
1886 	int rc = -EINVAL;
1887 
1888 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1889 		goto out;
1890 
1891 	rc = -ENOMEM;
1892 
1893 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1894 	if (!vcpu)
1895 		goto out;
1896 
1897 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1898 	if (!sie_page)
1899 		goto out_free_cpu;
1900 
1901 	vcpu->arch.sie_block = &sie_page->sie_block;
1902 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1903 
1904 	/* the real guest size will always be smaller than msl */
1905 	vcpu->arch.sie_block->mso = 0;
1906 	vcpu->arch.sie_block->msl = sclp.hamax;
1907 
1908 	vcpu->arch.sie_block->icpua = id;
1909 	spin_lock_init(&vcpu->arch.local_int.lock);
1910 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1911 	vcpu->arch.local_int.wq = &vcpu->wq;
1912 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1913 	seqcount_init(&vcpu->arch.cputm_seqcount);
1914 
1915 	rc = kvm_vcpu_init(vcpu, kvm, id);
1916 	if (rc)
1917 		goto out_free_sie_block;
1918 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1919 		 vcpu->arch.sie_block);
1920 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1921 
1922 	return vcpu;
1923 out_free_sie_block:
1924 	free_page((unsigned long)(vcpu->arch.sie_block));
1925 out_free_cpu:
1926 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1927 out:
1928 	return ERR_PTR(rc);
1929 }
1930 
1931 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1932 {
1933 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1934 }
1935 
1936 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1937 {
1938 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1939 	exit_sie(vcpu);
1940 }
1941 
1942 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1943 {
1944 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1945 }
1946 
1947 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1948 {
1949 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1950 	exit_sie(vcpu);
1951 }
1952 
1953 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1954 {
1955 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1956 }
1957 
1958 /*
1959  * Kick a guest cpu out of SIE and wait until SIE is not running.
1960  * If the CPU is not running (e.g. waiting as idle) the function will
1961  * return immediately. */
1962 void exit_sie(struct kvm_vcpu *vcpu)
1963 {
1964 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1965 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1966 		cpu_relax();
1967 }
1968 
1969 /* Kick a guest cpu out of SIE to process a request synchronously */
1970 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1971 {
1972 	kvm_make_request(req, vcpu);
1973 	kvm_s390_vcpu_request(vcpu);
1974 }
1975 
1976 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1977 {
1978 	int i;
1979 	struct kvm *kvm = gmap->private;
1980 	struct kvm_vcpu *vcpu;
1981 
1982 	kvm_for_each_vcpu(i, vcpu, kvm) {
1983 		/* match against both prefix pages */
1984 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1985 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1986 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1987 		}
1988 	}
1989 }
1990 
1991 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1992 {
1993 	/* kvm common code refers to this, but never calls it */
1994 	BUG();
1995 	return 0;
1996 }
1997 
1998 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1999 					   struct kvm_one_reg *reg)
2000 {
2001 	int r = -EINVAL;
2002 
2003 	switch (reg->id) {
2004 	case KVM_REG_S390_TODPR:
2005 		r = put_user(vcpu->arch.sie_block->todpr,
2006 			     (u32 __user *)reg->addr);
2007 		break;
2008 	case KVM_REG_S390_EPOCHDIFF:
2009 		r = put_user(vcpu->arch.sie_block->epoch,
2010 			     (u64 __user *)reg->addr);
2011 		break;
2012 	case KVM_REG_S390_CPU_TIMER:
2013 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2014 			     (u64 __user *)reg->addr);
2015 		break;
2016 	case KVM_REG_S390_CLOCK_COMP:
2017 		r = put_user(vcpu->arch.sie_block->ckc,
2018 			     (u64 __user *)reg->addr);
2019 		break;
2020 	case KVM_REG_S390_PFTOKEN:
2021 		r = put_user(vcpu->arch.pfault_token,
2022 			     (u64 __user *)reg->addr);
2023 		break;
2024 	case KVM_REG_S390_PFCOMPARE:
2025 		r = put_user(vcpu->arch.pfault_compare,
2026 			     (u64 __user *)reg->addr);
2027 		break;
2028 	case KVM_REG_S390_PFSELECT:
2029 		r = put_user(vcpu->arch.pfault_select,
2030 			     (u64 __user *)reg->addr);
2031 		break;
2032 	case KVM_REG_S390_PP:
2033 		r = put_user(vcpu->arch.sie_block->pp,
2034 			     (u64 __user *)reg->addr);
2035 		break;
2036 	case KVM_REG_S390_GBEA:
2037 		r = put_user(vcpu->arch.sie_block->gbea,
2038 			     (u64 __user *)reg->addr);
2039 		break;
2040 	default:
2041 		break;
2042 	}
2043 
2044 	return r;
2045 }
2046 
2047 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2048 					   struct kvm_one_reg *reg)
2049 {
2050 	int r = -EINVAL;
2051 	__u64 val;
2052 
2053 	switch (reg->id) {
2054 	case KVM_REG_S390_TODPR:
2055 		r = get_user(vcpu->arch.sie_block->todpr,
2056 			     (u32 __user *)reg->addr);
2057 		break;
2058 	case KVM_REG_S390_EPOCHDIFF:
2059 		r = get_user(vcpu->arch.sie_block->epoch,
2060 			     (u64 __user *)reg->addr);
2061 		break;
2062 	case KVM_REG_S390_CPU_TIMER:
2063 		r = get_user(val, (u64 __user *)reg->addr);
2064 		if (!r)
2065 			kvm_s390_set_cpu_timer(vcpu, val);
2066 		break;
2067 	case KVM_REG_S390_CLOCK_COMP:
2068 		r = get_user(vcpu->arch.sie_block->ckc,
2069 			     (u64 __user *)reg->addr);
2070 		break;
2071 	case KVM_REG_S390_PFTOKEN:
2072 		r = get_user(vcpu->arch.pfault_token,
2073 			     (u64 __user *)reg->addr);
2074 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2075 			kvm_clear_async_pf_completion_queue(vcpu);
2076 		break;
2077 	case KVM_REG_S390_PFCOMPARE:
2078 		r = get_user(vcpu->arch.pfault_compare,
2079 			     (u64 __user *)reg->addr);
2080 		break;
2081 	case KVM_REG_S390_PFSELECT:
2082 		r = get_user(vcpu->arch.pfault_select,
2083 			     (u64 __user *)reg->addr);
2084 		break;
2085 	case KVM_REG_S390_PP:
2086 		r = get_user(vcpu->arch.sie_block->pp,
2087 			     (u64 __user *)reg->addr);
2088 		break;
2089 	case KVM_REG_S390_GBEA:
2090 		r = get_user(vcpu->arch.sie_block->gbea,
2091 			     (u64 __user *)reg->addr);
2092 		break;
2093 	default:
2094 		break;
2095 	}
2096 
2097 	return r;
2098 }
2099 
2100 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2101 {
2102 	kvm_s390_vcpu_initial_reset(vcpu);
2103 	return 0;
2104 }
2105 
2106 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2107 {
2108 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2109 	return 0;
2110 }
2111 
2112 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2113 {
2114 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2115 	return 0;
2116 }
2117 
2118 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2119 				  struct kvm_sregs *sregs)
2120 {
2121 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2122 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2123 	restore_access_regs(vcpu->run->s.regs.acrs);
2124 	return 0;
2125 }
2126 
2127 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2128 				  struct kvm_sregs *sregs)
2129 {
2130 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2131 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2132 	return 0;
2133 }
2134 
2135 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2136 {
2137 	/* make sure the new values will be lazily loaded */
2138 	save_fpu_regs();
2139 	if (test_fp_ctl(fpu->fpc))
2140 		return -EINVAL;
2141 	current->thread.fpu.fpc = fpu->fpc;
2142 	if (MACHINE_HAS_VX)
2143 		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2144 	else
2145 		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2146 	return 0;
2147 }
2148 
2149 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2150 {
2151 	/* make sure we have the latest values */
2152 	save_fpu_regs();
2153 	if (MACHINE_HAS_VX)
2154 		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2155 	else
2156 		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2157 	fpu->fpc = current->thread.fpu.fpc;
2158 	return 0;
2159 }
2160 
2161 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2162 {
2163 	int rc = 0;
2164 
2165 	if (!is_vcpu_stopped(vcpu))
2166 		rc = -EBUSY;
2167 	else {
2168 		vcpu->run->psw_mask = psw.mask;
2169 		vcpu->run->psw_addr = psw.addr;
2170 	}
2171 	return rc;
2172 }
2173 
2174 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2175 				  struct kvm_translation *tr)
2176 {
2177 	return -EINVAL; /* not implemented yet */
2178 }
2179 
2180 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2181 			      KVM_GUESTDBG_USE_HW_BP | \
2182 			      KVM_GUESTDBG_ENABLE)
2183 
2184 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2185 					struct kvm_guest_debug *dbg)
2186 {
2187 	int rc = 0;
2188 
2189 	vcpu->guest_debug = 0;
2190 	kvm_s390_clear_bp_data(vcpu);
2191 
2192 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2193 		return -EINVAL;
2194 	if (!sclp.has_gpere)
2195 		return -EINVAL;
2196 
2197 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2198 		vcpu->guest_debug = dbg->control;
2199 		/* enforce guest PER */
2200 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2201 
2202 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2203 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2204 	} else {
2205 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2206 		vcpu->arch.guestdbg.last_bp = 0;
2207 	}
2208 
2209 	if (rc) {
2210 		vcpu->guest_debug = 0;
2211 		kvm_s390_clear_bp_data(vcpu);
2212 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2213 	}
2214 
2215 	return rc;
2216 }
2217 
2218 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2219 				    struct kvm_mp_state *mp_state)
2220 {
2221 	/* CHECK_STOP and LOAD are not supported yet */
2222 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2223 				       KVM_MP_STATE_OPERATING;
2224 }
2225 
2226 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2227 				    struct kvm_mp_state *mp_state)
2228 {
2229 	int rc = 0;
2230 
2231 	/* user space knows about this interface - let it control the state */
2232 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2233 
2234 	switch (mp_state->mp_state) {
2235 	case KVM_MP_STATE_STOPPED:
2236 		kvm_s390_vcpu_stop(vcpu);
2237 		break;
2238 	case KVM_MP_STATE_OPERATING:
2239 		kvm_s390_vcpu_start(vcpu);
2240 		break;
2241 	case KVM_MP_STATE_LOAD:
2242 	case KVM_MP_STATE_CHECK_STOP:
2243 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2244 	default:
2245 		rc = -ENXIO;
2246 	}
2247 
2248 	return rc;
2249 }
2250 
2251 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2252 {
2253 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2254 }
2255 
2256 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2257 {
2258 retry:
2259 	kvm_s390_vcpu_request_handled(vcpu);
2260 	if (!vcpu->requests)
2261 		return 0;
2262 	/*
2263 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2264 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2265 	 * This ensures that the ipte instruction for this request has
2266 	 * already finished. We might race against a second unmapper that
2267 	 * wants to set the blocking bit. Lets just retry the request loop.
2268 	 */
2269 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2270 		int rc;
2271 		rc = gmap_ipte_notify(vcpu->arch.gmap,
2272 				      kvm_s390_get_prefix(vcpu),
2273 				      PAGE_SIZE * 2);
2274 		if (rc)
2275 			return rc;
2276 		goto retry;
2277 	}
2278 
2279 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2280 		vcpu->arch.sie_block->ihcpu = 0xffff;
2281 		goto retry;
2282 	}
2283 
2284 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2285 		if (!ibs_enabled(vcpu)) {
2286 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2287 			atomic_or(CPUSTAT_IBS,
2288 					&vcpu->arch.sie_block->cpuflags);
2289 		}
2290 		goto retry;
2291 	}
2292 
2293 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2294 		if (ibs_enabled(vcpu)) {
2295 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2296 			atomic_andnot(CPUSTAT_IBS,
2297 					  &vcpu->arch.sie_block->cpuflags);
2298 		}
2299 		goto retry;
2300 	}
2301 
2302 	/* nothing to do, just clear the request */
2303 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2304 
2305 	return 0;
2306 }
2307 
2308 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2309 {
2310 	struct kvm_vcpu *vcpu;
2311 	int i;
2312 
2313 	mutex_lock(&kvm->lock);
2314 	preempt_disable();
2315 	kvm->arch.epoch = tod - get_tod_clock();
2316 	kvm_s390_vcpu_block_all(kvm);
2317 	kvm_for_each_vcpu(i, vcpu, kvm)
2318 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2319 	kvm_s390_vcpu_unblock_all(kvm);
2320 	preempt_enable();
2321 	mutex_unlock(&kvm->lock);
2322 }
2323 
2324 /**
2325  * kvm_arch_fault_in_page - fault-in guest page if necessary
2326  * @vcpu: The corresponding virtual cpu
2327  * @gpa: Guest physical address
2328  * @writable: Whether the page should be writable or not
2329  *
2330  * Make sure that a guest page has been faulted-in on the host.
2331  *
2332  * Return: Zero on success, negative error code otherwise.
2333  */
2334 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2335 {
2336 	return gmap_fault(vcpu->arch.gmap, gpa,
2337 			  writable ? FAULT_FLAG_WRITE : 0);
2338 }
2339 
2340 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2341 				      unsigned long token)
2342 {
2343 	struct kvm_s390_interrupt inti;
2344 	struct kvm_s390_irq irq;
2345 
2346 	if (start_token) {
2347 		irq.u.ext.ext_params2 = token;
2348 		irq.type = KVM_S390_INT_PFAULT_INIT;
2349 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2350 	} else {
2351 		inti.type = KVM_S390_INT_PFAULT_DONE;
2352 		inti.parm64 = token;
2353 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2354 	}
2355 }
2356 
2357 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2358 				     struct kvm_async_pf *work)
2359 {
2360 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2361 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2362 }
2363 
2364 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2365 				 struct kvm_async_pf *work)
2366 {
2367 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2368 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2369 }
2370 
2371 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2372 			       struct kvm_async_pf *work)
2373 {
2374 	/* s390 will always inject the page directly */
2375 }
2376 
2377 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2378 {
2379 	/*
2380 	 * s390 will always inject the page directly,
2381 	 * but we still want check_async_completion to cleanup
2382 	 */
2383 	return true;
2384 }
2385 
2386 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2387 {
2388 	hva_t hva;
2389 	struct kvm_arch_async_pf arch;
2390 	int rc;
2391 
2392 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2393 		return 0;
2394 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2395 	    vcpu->arch.pfault_compare)
2396 		return 0;
2397 	if (psw_extint_disabled(vcpu))
2398 		return 0;
2399 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2400 		return 0;
2401 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2402 		return 0;
2403 	if (!vcpu->arch.gmap->pfault_enabled)
2404 		return 0;
2405 
2406 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2407 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2408 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2409 		return 0;
2410 
2411 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2412 	return rc;
2413 }
2414 
2415 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2416 {
2417 	int rc, cpuflags;
2418 
2419 	/*
2420 	 * On s390 notifications for arriving pages will be delivered directly
2421 	 * to the guest but the house keeping for completed pfaults is
2422 	 * handled outside the worker.
2423 	 */
2424 	kvm_check_async_pf_completion(vcpu);
2425 
2426 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2427 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2428 
2429 	if (need_resched())
2430 		schedule();
2431 
2432 	if (test_cpu_flag(CIF_MCCK_PENDING))
2433 		s390_handle_mcck();
2434 
2435 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2436 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2437 		if (rc)
2438 			return rc;
2439 	}
2440 
2441 	rc = kvm_s390_handle_requests(vcpu);
2442 	if (rc)
2443 		return rc;
2444 
2445 	if (guestdbg_enabled(vcpu)) {
2446 		kvm_s390_backup_guest_per_regs(vcpu);
2447 		kvm_s390_patch_guest_per_regs(vcpu);
2448 	}
2449 
2450 	vcpu->arch.sie_block->icptcode = 0;
2451 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2452 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2453 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2454 
2455 	return 0;
2456 }
2457 
2458 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2459 {
2460 	struct kvm_s390_pgm_info pgm_info = {
2461 		.code = PGM_ADDRESSING,
2462 	};
2463 	u8 opcode, ilen;
2464 	int rc;
2465 
2466 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2467 	trace_kvm_s390_sie_fault(vcpu);
2468 
2469 	/*
2470 	 * We want to inject an addressing exception, which is defined as a
2471 	 * suppressing or terminating exception. However, since we came here
2472 	 * by a DAT access exception, the PSW still points to the faulting
2473 	 * instruction since DAT exceptions are nullifying. So we've got
2474 	 * to look up the current opcode to get the length of the instruction
2475 	 * to be able to forward the PSW.
2476 	 */
2477 	rc = read_guest_instr(vcpu, &opcode, 1);
2478 	ilen = insn_length(opcode);
2479 	if (rc < 0) {
2480 		return rc;
2481 	} else if (rc) {
2482 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2483 		 * Forward by arbitrary ilc, injection will take care of
2484 		 * nullification if necessary.
2485 		 */
2486 		pgm_info = vcpu->arch.pgm;
2487 		ilen = 4;
2488 	}
2489 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2490 	kvm_s390_forward_psw(vcpu, ilen);
2491 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2492 }
2493 
2494 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2495 {
2496 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2497 		   vcpu->arch.sie_block->icptcode);
2498 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2499 
2500 	if (guestdbg_enabled(vcpu))
2501 		kvm_s390_restore_guest_per_regs(vcpu);
2502 
2503 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2504 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2505 
2506 	if (vcpu->arch.sie_block->icptcode > 0) {
2507 		int rc = kvm_handle_sie_intercept(vcpu);
2508 
2509 		if (rc != -EOPNOTSUPP)
2510 			return rc;
2511 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2512 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2513 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2514 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2515 		return -EREMOTE;
2516 	} else if (exit_reason != -EFAULT) {
2517 		vcpu->stat.exit_null++;
2518 		return 0;
2519 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2520 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2521 		vcpu->run->s390_ucontrol.trans_exc_code =
2522 						current->thread.gmap_addr;
2523 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2524 		return -EREMOTE;
2525 	} else if (current->thread.gmap_pfault) {
2526 		trace_kvm_s390_major_guest_pfault(vcpu);
2527 		current->thread.gmap_pfault = 0;
2528 		if (kvm_arch_setup_async_pf(vcpu))
2529 			return 0;
2530 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2531 	}
2532 	return vcpu_post_run_fault_in_sie(vcpu);
2533 }
2534 
2535 static int __vcpu_run(struct kvm_vcpu *vcpu)
2536 {
2537 	int rc, exit_reason;
2538 
2539 	/*
2540 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2541 	 * ning the guest), so that memslots (and other stuff) are protected
2542 	 */
2543 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2544 
2545 	do {
2546 		rc = vcpu_pre_run(vcpu);
2547 		if (rc)
2548 			break;
2549 
2550 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2551 		/*
2552 		 * As PF_VCPU will be used in fault handler, between
2553 		 * guest_enter and guest_exit should be no uaccess.
2554 		 */
2555 		local_irq_disable();
2556 		__kvm_guest_enter();
2557 		__disable_cpu_timer_accounting(vcpu);
2558 		local_irq_enable();
2559 		exit_reason = sie64a(vcpu->arch.sie_block,
2560 				     vcpu->run->s.regs.gprs);
2561 		local_irq_disable();
2562 		__enable_cpu_timer_accounting(vcpu);
2563 		__kvm_guest_exit();
2564 		local_irq_enable();
2565 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2566 
2567 		rc = vcpu_post_run(vcpu, exit_reason);
2568 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2569 
2570 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2571 	return rc;
2572 }
2573 
2574 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2575 {
2576 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2577 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2578 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2579 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2580 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2581 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2582 		/* some control register changes require a tlb flush */
2583 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2584 	}
2585 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2586 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2587 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2588 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2589 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2590 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2591 	}
2592 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2593 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2594 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2595 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2596 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2597 			kvm_clear_async_pf_completion_queue(vcpu);
2598 	}
2599 	kvm_run->kvm_dirty_regs = 0;
2600 }
2601 
2602 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2603 {
2604 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2605 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2606 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2607 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2608 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2609 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2610 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2611 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2612 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2613 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2614 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2615 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2616 }
2617 
2618 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2619 {
2620 	int rc;
2621 	sigset_t sigsaved;
2622 
2623 	if (guestdbg_exit_pending(vcpu)) {
2624 		kvm_s390_prepare_debug_exit(vcpu);
2625 		return 0;
2626 	}
2627 
2628 	if (vcpu->sigset_active)
2629 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2630 
2631 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2632 		kvm_s390_vcpu_start(vcpu);
2633 	} else if (is_vcpu_stopped(vcpu)) {
2634 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2635 				   vcpu->vcpu_id);
2636 		return -EINVAL;
2637 	}
2638 
2639 	sync_regs(vcpu, kvm_run);
2640 	enable_cpu_timer_accounting(vcpu);
2641 
2642 	might_fault();
2643 	rc = __vcpu_run(vcpu);
2644 
2645 	if (signal_pending(current) && !rc) {
2646 		kvm_run->exit_reason = KVM_EXIT_INTR;
2647 		rc = -EINTR;
2648 	}
2649 
2650 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2651 		kvm_s390_prepare_debug_exit(vcpu);
2652 		rc = 0;
2653 	}
2654 
2655 	if (rc == -EREMOTE) {
2656 		/* userspace support is needed, kvm_run has been prepared */
2657 		rc = 0;
2658 	}
2659 
2660 	disable_cpu_timer_accounting(vcpu);
2661 	store_regs(vcpu, kvm_run);
2662 
2663 	if (vcpu->sigset_active)
2664 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2665 
2666 	vcpu->stat.exit_userspace++;
2667 	return rc;
2668 }
2669 
2670 /*
2671  * store status at address
2672  * we use have two special cases:
2673  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2674  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2675  */
2676 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2677 {
2678 	unsigned char archmode = 1;
2679 	freg_t fprs[NUM_FPRS];
2680 	unsigned int px;
2681 	u64 clkcomp, cputm;
2682 	int rc;
2683 
2684 	px = kvm_s390_get_prefix(vcpu);
2685 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2686 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2687 			return -EFAULT;
2688 		gpa = 0;
2689 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2690 		if (write_guest_real(vcpu, 163, &archmode, 1))
2691 			return -EFAULT;
2692 		gpa = px;
2693 	} else
2694 		gpa -= __LC_FPREGS_SAVE_AREA;
2695 
2696 	/* manually convert vector registers if necessary */
2697 	if (MACHINE_HAS_VX) {
2698 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2699 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2700 				     fprs, 128);
2701 	} else {
2702 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2703 				     vcpu->run->s.regs.fprs, 128);
2704 	}
2705 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2706 			      vcpu->run->s.regs.gprs, 128);
2707 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2708 			      &vcpu->arch.sie_block->gpsw, 16);
2709 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2710 			      &px, 4);
2711 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2712 			      &vcpu->run->s.regs.fpc, 4);
2713 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2714 			      &vcpu->arch.sie_block->todpr, 4);
2715 	cputm = kvm_s390_get_cpu_timer(vcpu);
2716 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2717 			      &cputm, 8);
2718 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2719 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2720 			      &clkcomp, 8);
2721 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2722 			      &vcpu->run->s.regs.acrs, 64);
2723 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2724 			      &vcpu->arch.sie_block->gcr, 128);
2725 	return rc ? -EFAULT : 0;
2726 }
2727 
2728 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2729 {
2730 	/*
2731 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2732 	 * copying in vcpu load/put. Lets update our copies before we save
2733 	 * it into the save area
2734 	 */
2735 	save_fpu_regs();
2736 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2737 	save_access_regs(vcpu->run->s.regs.acrs);
2738 
2739 	return kvm_s390_store_status_unloaded(vcpu, addr);
2740 }
2741 
2742 /*
2743  * store additional status at address
2744  */
2745 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2746 					unsigned long gpa)
2747 {
2748 	/* Only bits 0-53 are used for address formation */
2749 	if (!(gpa & ~0x3ff))
2750 		return 0;
2751 
2752 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2753 			       (void *)&vcpu->run->s.regs.vrs, 512);
2754 }
2755 
2756 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2757 {
2758 	if (!test_kvm_facility(vcpu->kvm, 129))
2759 		return 0;
2760 
2761 	/*
2762 	 * The guest VXRS are in the host VXRs due to the lazy
2763 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2764 	 * to save the current register state because we are in the
2765 	 * middle of a load/put cycle.
2766 	 *
2767 	 * Let's update our copies before we save it into the save area.
2768 	 */
2769 	save_fpu_regs();
2770 
2771 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2772 }
2773 
2774 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2775 {
2776 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2777 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2778 }
2779 
2780 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2781 {
2782 	unsigned int i;
2783 	struct kvm_vcpu *vcpu;
2784 
2785 	kvm_for_each_vcpu(i, vcpu, kvm) {
2786 		__disable_ibs_on_vcpu(vcpu);
2787 	}
2788 }
2789 
2790 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2791 {
2792 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2793 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2794 }
2795 
2796 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2797 {
2798 	int i, online_vcpus, started_vcpus = 0;
2799 
2800 	if (!is_vcpu_stopped(vcpu))
2801 		return;
2802 
2803 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2804 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2805 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2806 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2807 
2808 	for (i = 0; i < online_vcpus; i++) {
2809 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2810 			started_vcpus++;
2811 	}
2812 
2813 	if (started_vcpus == 0) {
2814 		/* we're the only active VCPU -> speed it up */
2815 		__enable_ibs_on_vcpu(vcpu);
2816 	} else if (started_vcpus == 1) {
2817 		/*
2818 		 * As we are starting a second VCPU, we have to disable
2819 		 * the IBS facility on all VCPUs to remove potentially
2820 		 * oustanding ENABLE requests.
2821 		 */
2822 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2823 	}
2824 
2825 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2826 	/*
2827 	 * Another VCPU might have used IBS while we were offline.
2828 	 * Let's play safe and flush the VCPU at startup.
2829 	 */
2830 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2831 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2832 	return;
2833 }
2834 
2835 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2836 {
2837 	int i, online_vcpus, started_vcpus = 0;
2838 	struct kvm_vcpu *started_vcpu = NULL;
2839 
2840 	if (is_vcpu_stopped(vcpu))
2841 		return;
2842 
2843 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2844 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2845 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2846 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2847 
2848 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2849 	kvm_s390_clear_stop_irq(vcpu);
2850 
2851 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2852 	__disable_ibs_on_vcpu(vcpu);
2853 
2854 	for (i = 0; i < online_vcpus; i++) {
2855 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2856 			started_vcpus++;
2857 			started_vcpu = vcpu->kvm->vcpus[i];
2858 		}
2859 	}
2860 
2861 	if (started_vcpus == 1) {
2862 		/*
2863 		 * As we only have one VCPU left, we want to enable the
2864 		 * IBS facility for that VCPU to speed it up.
2865 		 */
2866 		__enable_ibs_on_vcpu(started_vcpu);
2867 	}
2868 
2869 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2870 	return;
2871 }
2872 
2873 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2874 				     struct kvm_enable_cap *cap)
2875 {
2876 	int r;
2877 
2878 	if (cap->flags)
2879 		return -EINVAL;
2880 
2881 	switch (cap->cap) {
2882 	case KVM_CAP_S390_CSS_SUPPORT:
2883 		if (!vcpu->kvm->arch.css_support) {
2884 			vcpu->kvm->arch.css_support = 1;
2885 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2886 			trace_kvm_s390_enable_css(vcpu->kvm);
2887 		}
2888 		r = 0;
2889 		break;
2890 	default:
2891 		r = -EINVAL;
2892 		break;
2893 	}
2894 	return r;
2895 }
2896 
2897 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2898 				  struct kvm_s390_mem_op *mop)
2899 {
2900 	void __user *uaddr = (void __user *)mop->buf;
2901 	void *tmpbuf = NULL;
2902 	int r, srcu_idx;
2903 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2904 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2905 
2906 	if (mop->flags & ~supported_flags)
2907 		return -EINVAL;
2908 
2909 	if (mop->size > MEM_OP_MAX_SIZE)
2910 		return -E2BIG;
2911 
2912 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2913 		tmpbuf = vmalloc(mop->size);
2914 		if (!tmpbuf)
2915 			return -ENOMEM;
2916 	}
2917 
2918 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2919 
2920 	switch (mop->op) {
2921 	case KVM_S390_MEMOP_LOGICAL_READ:
2922 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2923 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2924 					    mop->size, GACC_FETCH);
2925 			break;
2926 		}
2927 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2928 		if (r == 0) {
2929 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2930 				r = -EFAULT;
2931 		}
2932 		break;
2933 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2934 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2935 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2936 					    mop->size, GACC_STORE);
2937 			break;
2938 		}
2939 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2940 			r = -EFAULT;
2941 			break;
2942 		}
2943 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2944 		break;
2945 	default:
2946 		r = -EINVAL;
2947 	}
2948 
2949 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2950 
2951 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2952 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2953 
2954 	vfree(tmpbuf);
2955 	return r;
2956 }
2957 
2958 long kvm_arch_vcpu_ioctl(struct file *filp,
2959 			 unsigned int ioctl, unsigned long arg)
2960 {
2961 	struct kvm_vcpu *vcpu = filp->private_data;
2962 	void __user *argp = (void __user *)arg;
2963 	int idx;
2964 	long r;
2965 
2966 	switch (ioctl) {
2967 	case KVM_S390_IRQ: {
2968 		struct kvm_s390_irq s390irq;
2969 
2970 		r = -EFAULT;
2971 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2972 			break;
2973 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2974 		break;
2975 	}
2976 	case KVM_S390_INTERRUPT: {
2977 		struct kvm_s390_interrupt s390int;
2978 		struct kvm_s390_irq s390irq;
2979 
2980 		r = -EFAULT;
2981 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2982 			break;
2983 		if (s390int_to_s390irq(&s390int, &s390irq))
2984 			return -EINVAL;
2985 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2986 		break;
2987 	}
2988 	case KVM_S390_STORE_STATUS:
2989 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2990 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2991 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2992 		break;
2993 	case KVM_S390_SET_INITIAL_PSW: {
2994 		psw_t psw;
2995 
2996 		r = -EFAULT;
2997 		if (copy_from_user(&psw, argp, sizeof(psw)))
2998 			break;
2999 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3000 		break;
3001 	}
3002 	case KVM_S390_INITIAL_RESET:
3003 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3004 		break;
3005 	case KVM_SET_ONE_REG:
3006 	case KVM_GET_ONE_REG: {
3007 		struct kvm_one_reg reg;
3008 		r = -EFAULT;
3009 		if (copy_from_user(&reg, argp, sizeof(reg)))
3010 			break;
3011 		if (ioctl == KVM_SET_ONE_REG)
3012 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3013 		else
3014 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3015 		break;
3016 	}
3017 #ifdef CONFIG_KVM_S390_UCONTROL
3018 	case KVM_S390_UCAS_MAP: {
3019 		struct kvm_s390_ucas_mapping ucasmap;
3020 
3021 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3022 			r = -EFAULT;
3023 			break;
3024 		}
3025 
3026 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3027 			r = -EINVAL;
3028 			break;
3029 		}
3030 
3031 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3032 				     ucasmap.vcpu_addr, ucasmap.length);
3033 		break;
3034 	}
3035 	case KVM_S390_UCAS_UNMAP: {
3036 		struct kvm_s390_ucas_mapping ucasmap;
3037 
3038 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3039 			r = -EFAULT;
3040 			break;
3041 		}
3042 
3043 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3044 			r = -EINVAL;
3045 			break;
3046 		}
3047 
3048 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3049 			ucasmap.length);
3050 		break;
3051 	}
3052 #endif
3053 	case KVM_S390_VCPU_FAULT: {
3054 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3055 		break;
3056 	}
3057 	case KVM_ENABLE_CAP:
3058 	{
3059 		struct kvm_enable_cap cap;
3060 		r = -EFAULT;
3061 		if (copy_from_user(&cap, argp, sizeof(cap)))
3062 			break;
3063 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3064 		break;
3065 	}
3066 	case KVM_S390_MEM_OP: {
3067 		struct kvm_s390_mem_op mem_op;
3068 
3069 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3070 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3071 		else
3072 			r = -EFAULT;
3073 		break;
3074 	}
3075 	case KVM_S390_SET_IRQ_STATE: {
3076 		struct kvm_s390_irq_state irq_state;
3077 
3078 		r = -EFAULT;
3079 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3080 			break;
3081 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3082 		    irq_state.len == 0 ||
3083 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3084 			r = -EINVAL;
3085 			break;
3086 		}
3087 		r = kvm_s390_set_irq_state(vcpu,
3088 					   (void __user *) irq_state.buf,
3089 					   irq_state.len);
3090 		break;
3091 	}
3092 	case KVM_S390_GET_IRQ_STATE: {
3093 		struct kvm_s390_irq_state irq_state;
3094 
3095 		r = -EFAULT;
3096 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3097 			break;
3098 		if (irq_state.len == 0) {
3099 			r = -EINVAL;
3100 			break;
3101 		}
3102 		r = kvm_s390_get_irq_state(vcpu,
3103 					   (__u8 __user *)  irq_state.buf,
3104 					   irq_state.len);
3105 		break;
3106 	}
3107 	default:
3108 		r = -ENOTTY;
3109 	}
3110 	return r;
3111 }
3112 
3113 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3114 {
3115 #ifdef CONFIG_KVM_S390_UCONTROL
3116 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3117 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3118 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3119 		get_page(vmf->page);
3120 		return 0;
3121 	}
3122 #endif
3123 	return VM_FAULT_SIGBUS;
3124 }
3125 
3126 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3127 			    unsigned long npages)
3128 {
3129 	return 0;
3130 }
3131 
3132 /* Section: memory related */
3133 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3134 				   struct kvm_memory_slot *memslot,
3135 				   const struct kvm_userspace_memory_region *mem,
3136 				   enum kvm_mr_change change)
3137 {
3138 	/* A few sanity checks. We can have memory slots which have to be
3139 	   located/ended at a segment boundary (1MB). The memory in userland is
3140 	   ok to be fragmented into various different vmas. It is okay to mmap()
3141 	   and munmap() stuff in this slot after doing this call at any time */
3142 
3143 	if (mem->userspace_addr & 0xffffful)
3144 		return -EINVAL;
3145 
3146 	if (mem->memory_size & 0xffffful)
3147 		return -EINVAL;
3148 
3149 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3150 		return -EINVAL;
3151 
3152 	return 0;
3153 }
3154 
3155 void kvm_arch_commit_memory_region(struct kvm *kvm,
3156 				const struct kvm_userspace_memory_region *mem,
3157 				const struct kvm_memory_slot *old,
3158 				const struct kvm_memory_slot *new,
3159 				enum kvm_mr_change change)
3160 {
3161 	int rc;
3162 
3163 	/* If the basics of the memslot do not change, we do not want
3164 	 * to update the gmap. Every update causes several unnecessary
3165 	 * segment translation exceptions. This is usually handled just
3166 	 * fine by the normal fault handler + gmap, but it will also
3167 	 * cause faults on the prefix page of running guest CPUs.
3168 	 */
3169 	if (old->userspace_addr == mem->userspace_addr &&
3170 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3171 	    old->npages * PAGE_SIZE == mem->memory_size)
3172 		return;
3173 
3174 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3175 		mem->guest_phys_addr, mem->memory_size);
3176 	if (rc)
3177 		pr_warn("failed to commit memory region\n");
3178 	return;
3179 }
3180 
3181 static inline unsigned long nonhyp_mask(int i)
3182 {
3183 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3184 
3185 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3186 }
3187 
3188 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3189 {
3190 	vcpu->valid_wakeup = false;
3191 }
3192 
3193 static int __init kvm_s390_init(void)
3194 {
3195 	int i;
3196 
3197 	if (!sclp.has_sief2) {
3198 		pr_info("SIE not available\n");
3199 		return -ENODEV;
3200 	}
3201 
3202 	for (i = 0; i < 16; i++)
3203 		kvm_s390_fac_list_mask[i] |=
3204 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3205 
3206 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3207 }
3208 
3209 static void __exit kvm_s390_exit(void)
3210 {
3211 	kvm_exit();
3212 }
3213 
3214 module_init(kvm_s390_init);
3215 module_exit(kvm_s390_exit);
3216 
3217 /*
3218  * Enable autoloading of the kvm module.
3219  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3220  * since x86 takes a different approach.
3221  */
3222 #include <linux/miscdevice.h>
3223 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3224 MODULE_ALIAS("devname:kvm");
3225