xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision a8da474e)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39 
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43 
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47 
48 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
52 
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54 
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
57 	{ "exit_null", VCPU_STAT(exit_null) },
58 	{ "exit_validity", VCPU_STAT(exit_validity) },
59 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
61 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
63 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
84 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
86 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
91 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
111 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
112 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
114 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
115 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
116 	{ NULL }
117 };
118 
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 	0xffe6fffbfcfdfc40UL,
122 	0x005e800000000000UL,
123 };
124 
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130 
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133 
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137 	/* every s390 is virtualization enabled ;-) */
138 	return 0;
139 }
140 
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142 
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150 			  void *v)
151 {
152 	struct kvm *kvm;
153 	struct kvm_vcpu *vcpu;
154 	int i;
155 	unsigned long long *delta = v;
156 
157 	list_for_each_entry(kvm, &vm_list, vm_list) {
158 		kvm->arch.epoch -= *delta;
159 		kvm_for_each_vcpu(i, vcpu, kvm) {
160 			vcpu->arch.sie_block->epoch -= *delta;
161 		}
162 	}
163 	return NOTIFY_OK;
164 }
165 
166 static struct notifier_block kvm_clock_notifier = {
167 	.notifier_call = kvm_clock_sync,
168 };
169 
170 int kvm_arch_hardware_setup(void)
171 {
172 	gmap_notifier.notifier_call = kvm_gmap_notifier;
173 	gmap_register_ipte_notifier(&gmap_notifier);
174 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 				       &kvm_clock_notifier);
176 	return 0;
177 }
178 
179 void kvm_arch_hardware_unsetup(void)
180 {
181 	gmap_unregister_ipte_notifier(&gmap_notifier);
182 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 					 &kvm_clock_notifier);
184 }
185 
186 int kvm_arch_init(void *opaque)
187 {
188 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189 	if (!kvm_s390_dbf)
190 		return -ENOMEM;
191 
192 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 		debug_unregister(kvm_s390_dbf);
194 		return -ENOMEM;
195 	}
196 
197 	/* Register floating interrupt controller interface. */
198 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200 
201 void kvm_arch_exit(void)
202 {
203 	debug_unregister(kvm_s390_dbf);
204 }
205 
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 			unsigned int ioctl, unsigned long arg)
209 {
210 	if (ioctl == KVM_S390_ENABLE_SIE)
211 		return s390_enable_sie();
212 	return -EINVAL;
213 }
214 
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217 	int r;
218 
219 	switch (ext) {
220 	case KVM_CAP_S390_PSW:
221 	case KVM_CAP_S390_GMAP:
222 	case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 	case KVM_CAP_S390_UCONTROL:
225 #endif
226 	case KVM_CAP_ASYNC_PF:
227 	case KVM_CAP_SYNC_REGS:
228 	case KVM_CAP_ONE_REG:
229 	case KVM_CAP_ENABLE_CAP:
230 	case KVM_CAP_S390_CSS_SUPPORT:
231 	case KVM_CAP_IOEVENTFD:
232 	case KVM_CAP_DEVICE_CTRL:
233 	case KVM_CAP_ENABLE_CAP_VM:
234 	case KVM_CAP_S390_IRQCHIP:
235 	case KVM_CAP_VM_ATTRIBUTES:
236 	case KVM_CAP_MP_STATE:
237 	case KVM_CAP_S390_INJECT_IRQ:
238 	case KVM_CAP_S390_USER_SIGP:
239 	case KVM_CAP_S390_USER_STSI:
240 	case KVM_CAP_S390_SKEYS:
241 	case KVM_CAP_S390_IRQ_STATE:
242 		r = 1;
243 		break;
244 	case KVM_CAP_S390_MEM_OP:
245 		r = MEM_OP_MAX_SIZE;
246 		break;
247 	case KVM_CAP_NR_VCPUS:
248 	case KVM_CAP_MAX_VCPUS:
249 		r = KVM_MAX_VCPUS;
250 		break;
251 	case KVM_CAP_NR_MEMSLOTS:
252 		r = KVM_USER_MEM_SLOTS;
253 		break;
254 	case KVM_CAP_S390_COW:
255 		r = MACHINE_HAS_ESOP;
256 		break;
257 	case KVM_CAP_S390_VECTOR_REGISTERS:
258 		r = MACHINE_HAS_VX;
259 		break;
260 	default:
261 		r = 0;
262 	}
263 	return r;
264 }
265 
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267 					struct kvm_memory_slot *memslot)
268 {
269 	gfn_t cur_gfn, last_gfn;
270 	unsigned long address;
271 	struct gmap *gmap = kvm->arch.gmap;
272 
273 	down_read(&gmap->mm->mmap_sem);
274 	/* Loop over all guest pages */
275 	last_gfn = memslot->base_gfn + memslot->npages;
276 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277 		address = gfn_to_hva_memslot(memslot, cur_gfn);
278 
279 		if (gmap_test_and_clear_dirty(address, gmap))
280 			mark_page_dirty(kvm, cur_gfn);
281 	}
282 	up_read(&gmap->mm->mmap_sem);
283 }
284 
285 /* Section: vm related */
286 /*
287  * Get (and clear) the dirty memory log for a memory slot.
288  */
289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290 			       struct kvm_dirty_log *log)
291 {
292 	int r;
293 	unsigned long n;
294 	struct kvm_memslots *slots;
295 	struct kvm_memory_slot *memslot;
296 	int is_dirty = 0;
297 
298 	mutex_lock(&kvm->slots_lock);
299 
300 	r = -EINVAL;
301 	if (log->slot >= KVM_USER_MEM_SLOTS)
302 		goto out;
303 
304 	slots = kvm_memslots(kvm);
305 	memslot = id_to_memslot(slots, log->slot);
306 	r = -ENOENT;
307 	if (!memslot->dirty_bitmap)
308 		goto out;
309 
310 	kvm_s390_sync_dirty_log(kvm, memslot);
311 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
312 	if (r)
313 		goto out;
314 
315 	/* Clear the dirty log */
316 	if (is_dirty) {
317 		n = kvm_dirty_bitmap_bytes(memslot);
318 		memset(memslot->dirty_bitmap, 0, n);
319 	}
320 	r = 0;
321 out:
322 	mutex_unlock(&kvm->slots_lock);
323 	return r;
324 }
325 
326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
327 {
328 	int r;
329 
330 	if (cap->flags)
331 		return -EINVAL;
332 
333 	switch (cap->cap) {
334 	case KVM_CAP_S390_IRQCHIP:
335 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
336 		kvm->arch.use_irqchip = 1;
337 		r = 0;
338 		break;
339 	case KVM_CAP_S390_USER_SIGP:
340 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
341 		kvm->arch.user_sigp = 1;
342 		r = 0;
343 		break;
344 	case KVM_CAP_S390_VECTOR_REGISTERS:
345 		if (MACHINE_HAS_VX) {
346 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
347 			set_kvm_facility(kvm->arch.model.fac->list, 129);
348 			r = 0;
349 		} else
350 			r = -EINVAL;
351 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
352 			 r ? "(not available)" : "(success)");
353 		break;
354 	case KVM_CAP_S390_USER_STSI:
355 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
356 		kvm->arch.user_stsi = 1;
357 		r = 0;
358 		break;
359 	default:
360 		r = -EINVAL;
361 		break;
362 	}
363 	return r;
364 }
365 
366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
367 {
368 	int ret;
369 
370 	switch (attr->attr) {
371 	case KVM_S390_VM_MEM_LIMIT_SIZE:
372 		ret = 0;
373 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
374 			 kvm->arch.gmap->asce_end);
375 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
376 			ret = -EFAULT;
377 		break;
378 	default:
379 		ret = -ENXIO;
380 		break;
381 	}
382 	return ret;
383 }
384 
385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
386 {
387 	int ret;
388 	unsigned int idx;
389 	switch (attr->attr) {
390 	case KVM_S390_VM_MEM_ENABLE_CMMA:
391 		/* enable CMMA only for z10 and later (EDAT_1) */
392 		ret = -EINVAL;
393 		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
394 			break;
395 
396 		ret = -EBUSY;
397 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
398 		mutex_lock(&kvm->lock);
399 		if (atomic_read(&kvm->online_vcpus) == 0) {
400 			kvm->arch.use_cmma = 1;
401 			ret = 0;
402 		}
403 		mutex_unlock(&kvm->lock);
404 		break;
405 	case KVM_S390_VM_MEM_CLR_CMMA:
406 		ret = -EINVAL;
407 		if (!kvm->arch.use_cmma)
408 			break;
409 
410 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
411 		mutex_lock(&kvm->lock);
412 		idx = srcu_read_lock(&kvm->srcu);
413 		s390_reset_cmma(kvm->arch.gmap->mm);
414 		srcu_read_unlock(&kvm->srcu, idx);
415 		mutex_unlock(&kvm->lock);
416 		ret = 0;
417 		break;
418 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
419 		unsigned long new_limit;
420 
421 		if (kvm_is_ucontrol(kvm))
422 			return -EINVAL;
423 
424 		if (get_user(new_limit, (u64 __user *)attr->addr))
425 			return -EFAULT;
426 
427 		if (new_limit > kvm->arch.gmap->asce_end)
428 			return -E2BIG;
429 
430 		ret = -EBUSY;
431 		mutex_lock(&kvm->lock);
432 		if (atomic_read(&kvm->online_vcpus) == 0) {
433 			/* gmap_alloc will round the limit up */
434 			struct gmap *new = gmap_alloc(current->mm, new_limit);
435 
436 			if (!new) {
437 				ret = -ENOMEM;
438 			} else {
439 				gmap_free(kvm->arch.gmap);
440 				new->private = kvm;
441 				kvm->arch.gmap = new;
442 				ret = 0;
443 			}
444 		}
445 		mutex_unlock(&kvm->lock);
446 		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
447 		break;
448 	}
449 	default:
450 		ret = -ENXIO;
451 		break;
452 	}
453 	return ret;
454 }
455 
456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
457 
458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
459 {
460 	struct kvm_vcpu *vcpu;
461 	int i;
462 
463 	if (!test_kvm_facility(kvm, 76))
464 		return -EINVAL;
465 
466 	mutex_lock(&kvm->lock);
467 	switch (attr->attr) {
468 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
469 		get_random_bytes(
470 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
471 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
472 		kvm->arch.crypto.aes_kw = 1;
473 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
474 		break;
475 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
476 		get_random_bytes(
477 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
478 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
479 		kvm->arch.crypto.dea_kw = 1;
480 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
481 		break;
482 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
483 		kvm->arch.crypto.aes_kw = 0;
484 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
485 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
486 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
487 		break;
488 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
489 		kvm->arch.crypto.dea_kw = 0;
490 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
491 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
492 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
493 		break;
494 	default:
495 		mutex_unlock(&kvm->lock);
496 		return -ENXIO;
497 	}
498 
499 	kvm_for_each_vcpu(i, vcpu, kvm) {
500 		kvm_s390_vcpu_crypto_setup(vcpu);
501 		exit_sie(vcpu);
502 	}
503 	mutex_unlock(&kvm->lock);
504 	return 0;
505 }
506 
507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
508 {
509 	u8 gtod_high;
510 
511 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
512 					   sizeof(gtod_high)))
513 		return -EFAULT;
514 
515 	if (gtod_high != 0)
516 		return -EINVAL;
517 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
518 
519 	return 0;
520 }
521 
522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
523 {
524 	u64 gtod;
525 
526 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
527 		return -EFAULT;
528 
529 	kvm_s390_set_tod_clock(kvm, gtod);
530 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
531 	return 0;
532 }
533 
534 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
535 {
536 	int ret;
537 
538 	if (attr->flags)
539 		return -EINVAL;
540 
541 	switch (attr->attr) {
542 	case KVM_S390_VM_TOD_HIGH:
543 		ret = kvm_s390_set_tod_high(kvm, attr);
544 		break;
545 	case KVM_S390_VM_TOD_LOW:
546 		ret = kvm_s390_set_tod_low(kvm, attr);
547 		break;
548 	default:
549 		ret = -ENXIO;
550 		break;
551 	}
552 	return ret;
553 }
554 
555 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
556 {
557 	u8 gtod_high = 0;
558 
559 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
560 					 sizeof(gtod_high)))
561 		return -EFAULT;
562 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
563 
564 	return 0;
565 }
566 
567 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
568 {
569 	u64 gtod;
570 
571 	gtod = kvm_s390_get_tod_clock_fast(kvm);
572 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
573 		return -EFAULT;
574 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
575 
576 	return 0;
577 }
578 
579 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
580 {
581 	int ret;
582 
583 	if (attr->flags)
584 		return -EINVAL;
585 
586 	switch (attr->attr) {
587 	case KVM_S390_VM_TOD_HIGH:
588 		ret = kvm_s390_get_tod_high(kvm, attr);
589 		break;
590 	case KVM_S390_VM_TOD_LOW:
591 		ret = kvm_s390_get_tod_low(kvm, attr);
592 		break;
593 	default:
594 		ret = -ENXIO;
595 		break;
596 	}
597 	return ret;
598 }
599 
600 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
601 {
602 	struct kvm_s390_vm_cpu_processor *proc;
603 	int ret = 0;
604 
605 	mutex_lock(&kvm->lock);
606 	if (atomic_read(&kvm->online_vcpus)) {
607 		ret = -EBUSY;
608 		goto out;
609 	}
610 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
611 	if (!proc) {
612 		ret = -ENOMEM;
613 		goto out;
614 	}
615 	if (!copy_from_user(proc, (void __user *)attr->addr,
616 			    sizeof(*proc))) {
617 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
618 		       sizeof(struct cpuid));
619 		kvm->arch.model.ibc = proc->ibc;
620 		memcpy(kvm->arch.model.fac->list, proc->fac_list,
621 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
622 	} else
623 		ret = -EFAULT;
624 	kfree(proc);
625 out:
626 	mutex_unlock(&kvm->lock);
627 	return ret;
628 }
629 
630 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
631 {
632 	int ret = -ENXIO;
633 
634 	switch (attr->attr) {
635 	case KVM_S390_VM_CPU_PROCESSOR:
636 		ret = kvm_s390_set_processor(kvm, attr);
637 		break;
638 	}
639 	return ret;
640 }
641 
642 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
643 {
644 	struct kvm_s390_vm_cpu_processor *proc;
645 	int ret = 0;
646 
647 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
648 	if (!proc) {
649 		ret = -ENOMEM;
650 		goto out;
651 	}
652 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
653 	proc->ibc = kvm->arch.model.ibc;
654 	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
655 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
656 		ret = -EFAULT;
657 	kfree(proc);
658 out:
659 	return ret;
660 }
661 
662 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
663 {
664 	struct kvm_s390_vm_cpu_machine *mach;
665 	int ret = 0;
666 
667 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
668 	if (!mach) {
669 		ret = -ENOMEM;
670 		goto out;
671 	}
672 	get_cpu_id((struct cpuid *) &mach->cpuid);
673 	mach->ibc = sclp.ibc;
674 	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
675 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
676 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
677 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
678 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
679 		ret = -EFAULT;
680 	kfree(mach);
681 out:
682 	return ret;
683 }
684 
685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
686 {
687 	int ret = -ENXIO;
688 
689 	switch (attr->attr) {
690 	case KVM_S390_VM_CPU_PROCESSOR:
691 		ret = kvm_s390_get_processor(kvm, attr);
692 		break;
693 	case KVM_S390_VM_CPU_MACHINE:
694 		ret = kvm_s390_get_machine(kvm, attr);
695 		break;
696 	}
697 	return ret;
698 }
699 
700 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
701 {
702 	int ret;
703 
704 	switch (attr->group) {
705 	case KVM_S390_VM_MEM_CTRL:
706 		ret = kvm_s390_set_mem_control(kvm, attr);
707 		break;
708 	case KVM_S390_VM_TOD:
709 		ret = kvm_s390_set_tod(kvm, attr);
710 		break;
711 	case KVM_S390_VM_CPU_MODEL:
712 		ret = kvm_s390_set_cpu_model(kvm, attr);
713 		break;
714 	case KVM_S390_VM_CRYPTO:
715 		ret = kvm_s390_vm_set_crypto(kvm, attr);
716 		break;
717 	default:
718 		ret = -ENXIO;
719 		break;
720 	}
721 
722 	return ret;
723 }
724 
725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
726 {
727 	int ret;
728 
729 	switch (attr->group) {
730 	case KVM_S390_VM_MEM_CTRL:
731 		ret = kvm_s390_get_mem_control(kvm, attr);
732 		break;
733 	case KVM_S390_VM_TOD:
734 		ret = kvm_s390_get_tod(kvm, attr);
735 		break;
736 	case KVM_S390_VM_CPU_MODEL:
737 		ret = kvm_s390_get_cpu_model(kvm, attr);
738 		break;
739 	default:
740 		ret = -ENXIO;
741 		break;
742 	}
743 
744 	return ret;
745 }
746 
747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
748 {
749 	int ret;
750 
751 	switch (attr->group) {
752 	case KVM_S390_VM_MEM_CTRL:
753 		switch (attr->attr) {
754 		case KVM_S390_VM_MEM_ENABLE_CMMA:
755 		case KVM_S390_VM_MEM_CLR_CMMA:
756 		case KVM_S390_VM_MEM_LIMIT_SIZE:
757 			ret = 0;
758 			break;
759 		default:
760 			ret = -ENXIO;
761 			break;
762 		}
763 		break;
764 	case KVM_S390_VM_TOD:
765 		switch (attr->attr) {
766 		case KVM_S390_VM_TOD_LOW:
767 		case KVM_S390_VM_TOD_HIGH:
768 			ret = 0;
769 			break;
770 		default:
771 			ret = -ENXIO;
772 			break;
773 		}
774 		break;
775 	case KVM_S390_VM_CPU_MODEL:
776 		switch (attr->attr) {
777 		case KVM_S390_VM_CPU_PROCESSOR:
778 		case KVM_S390_VM_CPU_MACHINE:
779 			ret = 0;
780 			break;
781 		default:
782 			ret = -ENXIO;
783 			break;
784 		}
785 		break;
786 	case KVM_S390_VM_CRYPTO:
787 		switch (attr->attr) {
788 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
789 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
790 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
791 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
792 			ret = 0;
793 			break;
794 		default:
795 			ret = -ENXIO;
796 			break;
797 		}
798 		break;
799 	default:
800 		ret = -ENXIO;
801 		break;
802 	}
803 
804 	return ret;
805 }
806 
807 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
808 {
809 	uint8_t *keys;
810 	uint64_t hva;
811 	unsigned long curkey;
812 	int i, r = 0;
813 
814 	if (args->flags != 0)
815 		return -EINVAL;
816 
817 	/* Is this guest using storage keys? */
818 	if (!mm_use_skey(current->mm))
819 		return KVM_S390_GET_SKEYS_NONE;
820 
821 	/* Enforce sane limit on memory allocation */
822 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
823 		return -EINVAL;
824 
825 	keys = kmalloc_array(args->count, sizeof(uint8_t),
826 			     GFP_KERNEL | __GFP_NOWARN);
827 	if (!keys)
828 		keys = vmalloc(sizeof(uint8_t) * args->count);
829 	if (!keys)
830 		return -ENOMEM;
831 
832 	for (i = 0; i < args->count; i++) {
833 		hva = gfn_to_hva(kvm, args->start_gfn + i);
834 		if (kvm_is_error_hva(hva)) {
835 			r = -EFAULT;
836 			goto out;
837 		}
838 
839 		curkey = get_guest_storage_key(current->mm, hva);
840 		if (IS_ERR_VALUE(curkey)) {
841 			r = curkey;
842 			goto out;
843 		}
844 		keys[i] = curkey;
845 	}
846 
847 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
848 			 sizeof(uint8_t) * args->count);
849 	if (r)
850 		r = -EFAULT;
851 out:
852 	kvfree(keys);
853 	return r;
854 }
855 
856 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
857 {
858 	uint8_t *keys;
859 	uint64_t hva;
860 	int i, r = 0;
861 
862 	if (args->flags != 0)
863 		return -EINVAL;
864 
865 	/* Enforce sane limit on memory allocation */
866 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
867 		return -EINVAL;
868 
869 	keys = kmalloc_array(args->count, sizeof(uint8_t),
870 			     GFP_KERNEL | __GFP_NOWARN);
871 	if (!keys)
872 		keys = vmalloc(sizeof(uint8_t) * args->count);
873 	if (!keys)
874 		return -ENOMEM;
875 
876 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
877 			   sizeof(uint8_t) * args->count);
878 	if (r) {
879 		r = -EFAULT;
880 		goto out;
881 	}
882 
883 	/* Enable storage key handling for the guest */
884 	r = s390_enable_skey();
885 	if (r)
886 		goto out;
887 
888 	for (i = 0; i < args->count; i++) {
889 		hva = gfn_to_hva(kvm, args->start_gfn + i);
890 		if (kvm_is_error_hva(hva)) {
891 			r = -EFAULT;
892 			goto out;
893 		}
894 
895 		/* Lowest order bit is reserved */
896 		if (keys[i] & 0x01) {
897 			r = -EINVAL;
898 			goto out;
899 		}
900 
901 		r = set_guest_storage_key(current->mm, hva,
902 					  (unsigned long)keys[i], 0);
903 		if (r)
904 			goto out;
905 	}
906 out:
907 	kvfree(keys);
908 	return r;
909 }
910 
911 long kvm_arch_vm_ioctl(struct file *filp,
912 		       unsigned int ioctl, unsigned long arg)
913 {
914 	struct kvm *kvm = filp->private_data;
915 	void __user *argp = (void __user *)arg;
916 	struct kvm_device_attr attr;
917 	int r;
918 
919 	switch (ioctl) {
920 	case KVM_S390_INTERRUPT: {
921 		struct kvm_s390_interrupt s390int;
922 
923 		r = -EFAULT;
924 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
925 			break;
926 		r = kvm_s390_inject_vm(kvm, &s390int);
927 		break;
928 	}
929 	case KVM_ENABLE_CAP: {
930 		struct kvm_enable_cap cap;
931 		r = -EFAULT;
932 		if (copy_from_user(&cap, argp, sizeof(cap)))
933 			break;
934 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
935 		break;
936 	}
937 	case KVM_CREATE_IRQCHIP: {
938 		struct kvm_irq_routing_entry routing;
939 
940 		r = -EINVAL;
941 		if (kvm->arch.use_irqchip) {
942 			/* Set up dummy routing. */
943 			memset(&routing, 0, sizeof(routing));
944 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
945 		}
946 		break;
947 	}
948 	case KVM_SET_DEVICE_ATTR: {
949 		r = -EFAULT;
950 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
951 			break;
952 		r = kvm_s390_vm_set_attr(kvm, &attr);
953 		break;
954 	}
955 	case KVM_GET_DEVICE_ATTR: {
956 		r = -EFAULT;
957 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
958 			break;
959 		r = kvm_s390_vm_get_attr(kvm, &attr);
960 		break;
961 	}
962 	case KVM_HAS_DEVICE_ATTR: {
963 		r = -EFAULT;
964 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
965 			break;
966 		r = kvm_s390_vm_has_attr(kvm, &attr);
967 		break;
968 	}
969 	case KVM_S390_GET_SKEYS: {
970 		struct kvm_s390_skeys args;
971 
972 		r = -EFAULT;
973 		if (copy_from_user(&args, argp,
974 				   sizeof(struct kvm_s390_skeys)))
975 			break;
976 		r = kvm_s390_get_skeys(kvm, &args);
977 		break;
978 	}
979 	case KVM_S390_SET_SKEYS: {
980 		struct kvm_s390_skeys args;
981 
982 		r = -EFAULT;
983 		if (copy_from_user(&args, argp,
984 				   sizeof(struct kvm_s390_skeys)))
985 			break;
986 		r = kvm_s390_set_skeys(kvm, &args);
987 		break;
988 	}
989 	default:
990 		r = -ENOTTY;
991 	}
992 
993 	return r;
994 }
995 
996 static int kvm_s390_query_ap_config(u8 *config)
997 {
998 	u32 fcn_code = 0x04000000UL;
999 	u32 cc = 0;
1000 
1001 	memset(config, 0, 128);
1002 	asm volatile(
1003 		"lgr 0,%1\n"
1004 		"lgr 2,%2\n"
1005 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1006 		"0: ipm %0\n"
1007 		"srl %0,28\n"
1008 		"1:\n"
1009 		EX_TABLE(0b, 1b)
1010 		: "+r" (cc)
1011 		: "r" (fcn_code), "r" (config)
1012 		: "cc", "0", "2", "memory"
1013 	);
1014 
1015 	return cc;
1016 }
1017 
1018 static int kvm_s390_apxa_installed(void)
1019 {
1020 	u8 config[128];
1021 	int cc;
1022 
1023 	if (test_facility(2) && test_facility(12)) {
1024 		cc = kvm_s390_query_ap_config(config);
1025 
1026 		if (cc)
1027 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1028 		else
1029 			return config[0] & 0x40;
1030 	}
1031 
1032 	return 0;
1033 }
1034 
1035 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1036 {
1037 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1038 
1039 	if (kvm_s390_apxa_installed())
1040 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1041 	else
1042 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1043 }
1044 
1045 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1046 {
1047 	get_cpu_id(cpu_id);
1048 	cpu_id->version = 0xff;
1049 }
1050 
1051 static int kvm_s390_crypto_init(struct kvm *kvm)
1052 {
1053 	if (!test_kvm_facility(kvm, 76))
1054 		return 0;
1055 
1056 	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1057 					 GFP_KERNEL | GFP_DMA);
1058 	if (!kvm->arch.crypto.crycb)
1059 		return -ENOMEM;
1060 
1061 	kvm_s390_set_crycb_format(kvm);
1062 
1063 	/* Enable AES/DEA protected key functions by default */
1064 	kvm->arch.crypto.aes_kw = 1;
1065 	kvm->arch.crypto.dea_kw = 1;
1066 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1067 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1068 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1069 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1070 
1071 	return 0;
1072 }
1073 
1074 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1075 {
1076 	int i, rc;
1077 	char debug_name[16];
1078 	static unsigned long sca_offset;
1079 
1080 	rc = -EINVAL;
1081 #ifdef CONFIG_KVM_S390_UCONTROL
1082 	if (type & ~KVM_VM_S390_UCONTROL)
1083 		goto out_err;
1084 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1085 		goto out_err;
1086 #else
1087 	if (type)
1088 		goto out_err;
1089 #endif
1090 
1091 	rc = s390_enable_sie();
1092 	if (rc)
1093 		goto out_err;
1094 
1095 	rc = -ENOMEM;
1096 
1097 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1098 	if (!kvm->arch.sca)
1099 		goto out_err;
1100 	spin_lock(&kvm_lock);
1101 	sca_offset += 16;
1102 	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1103 		sca_offset = 0;
1104 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1105 	spin_unlock(&kvm_lock);
1106 
1107 	sprintf(debug_name, "kvm-%u", current->pid);
1108 
1109 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1110 	if (!kvm->arch.dbf)
1111 		goto out_err;
1112 
1113 	/*
1114 	 * The architectural maximum amount of facilities is 16 kbit. To store
1115 	 * this amount, 2 kbyte of memory is required. Thus we need a full
1116 	 * page to hold the guest facility list (arch.model.fac->list) and the
1117 	 * facility mask (arch.model.fac->mask). Its address size has to be
1118 	 * 31 bits and word aligned.
1119 	 */
1120 	kvm->arch.model.fac =
1121 		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1122 	if (!kvm->arch.model.fac)
1123 		goto out_err;
1124 
1125 	/* Populate the facility mask initially. */
1126 	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1127 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1128 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1129 		if (i < kvm_s390_fac_list_mask_size())
1130 			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1131 		else
1132 			kvm->arch.model.fac->mask[i] = 0UL;
1133 	}
1134 
1135 	/* Populate the facility list initially. */
1136 	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1137 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1138 
1139 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1140 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1141 
1142 	if (kvm_s390_crypto_init(kvm) < 0)
1143 		goto out_err;
1144 
1145 	spin_lock_init(&kvm->arch.float_int.lock);
1146 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1147 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1148 	init_waitqueue_head(&kvm->arch.ipte_wq);
1149 	mutex_init(&kvm->arch.ipte_mutex);
1150 
1151 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1152 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1153 
1154 	if (type & KVM_VM_S390_UCONTROL) {
1155 		kvm->arch.gmap = NULL;
1156 	} else {
1157 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1158 		if (!kvm->arch.gmap)
1159 			goto out_err;
1160 		kvm->arch.gmap->private = kvm;
1161 		kvm->arch.gmap->pfault_enabled = 0;
1162 	}
1163 
1164 	kvm->arch.css_support = 0;
1165 	kvm->arch.use_irqchip = 0;
1166 	kvm->arch.epoch = 0;
1167 
1168 	spin_lock_init(&kvm->arch.start_stop_lock);
1169 	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1170 
1171 	return 0;
1172 out_err:
1173 	kfree(kvm->arch.crypto.crycb);
1174 	free_page((unsigned long)kvm->arch.model.fac);
1175 	debug_unregister(kvm->arch.dbf);
1176 	free_page((unsigned long)(kvm->arch.sca));
1177 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1178 	return rc;
1179 }
1180 
1181 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1182 {
1183 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1184 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1185 	kvm_s390_clear_local_irqs(vcpu);
1186 	kvm_clear_async_pf_completion_queue(vcpu);
1187 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1188 		clear_bit(63 - vcpu->vcpu_id,
1189 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1190 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1191 		    (__u64) vcpu->arch.sie_block)
1192 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1193 	}
1194 	smp_mb();
1195 
1196 	if (kvm_is_ucontrol(vcpu->kvm))
1197 		gmap_free(vcpu->arch.gmap);
1198 
1199 	if (vcpu->kvm->arch.use_cmma)
1200 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1201 	free_page((unsigned long)(vcpu->arch.sie_block));
1202 
1203 	kvm_vcpu_uninit(vcpu);
1204 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1205 }
1206 
1207 static void kvm_free_vcpus(struct kvm *kvm)
1208 {
1209 	unsigned int i;
1210 	struct kvm_vcpu *vcpu;
1211 
1212 	kvm_for_each_vcpu(i, vcpu, kvm)
1213 		kvm_arch_vcpu_destroy(vcpu);
1214 
1215 	mutex_lock(&kvm->lock);
1216 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1217 		kvm->vcpus[i] = NULL;
1218 
1219 	atomic_set(&kvm->online_vcpus, 0);
1220 	mutex_unlock(&kvm->lock);
1221 }
1222 
1223 void kvm_arch_destroy_vm(struct kvm *kvm)
1224 {
1225 	kvm_free_vcpus(kvm);
1226 	free_page((unsigned long)kvm->arch.model.fac);
1227 	free_page((unsigned long)(kvm->arch.sca));
1228 	debug_unregister(kvm->arch.dbf);
1229 	kfree(kvm->arch.crypto.crycb);
1230 	if (!kvm_is_ucontrol(kvm))
1231 		gmap_free(kvm->arch.gmap);
1232 	kvm_s390_destroy_adapters(kvm);
1233 	kvm_s390_clear_float_irqs(kvm);
1234 	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1235 }
1236 
1237 /* Section: vcpu related */
1238 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1239 {
1240 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1241 	if (!vcpu->arch.gmap)
1242 		return -ENOMEM;
1243 	vcpu->arch.gmap->private = vcpu->kvm;
1244 
1245 	return 0;
1246 }
1247 
1248 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1249 {
1250 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1251 	kvm_clear_async_pf_completion_queue(vcpu);
1252 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1253 				    KVM_SYNC_GPRS |
1254 				    KVM_SYNC_ACRS |
1255 				    KVM_SYNC_CRS |
1256 				    KVM_SYNC_ARCH0 |
1257 				    KVM_SYNC_PFAULT;
1258 	if (test_kvm_facility(vcpu->kvm, 129))
1259 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1260 
1261 	if (kvm_is_ucontrol(vcpu->kvm))
1262 		return __kvm_ucontrol_vcpu_init(vcpu);
1263 
1264 	return 0;
1265 }
1266 
1267 /*
1268  * Backs up the current FP/VX register save area on a particular
1269  * destination.  Used to switch between different register save
1270  * areas.
1271  */
1272 static inline void save_fpu_to(struct fpu *dst)
1273 {
1274 	dst->fpc = current->thread.fpu.fpc;
1275 	dst->regs = current->thread.fpu.regs;
1276 }
1277 
1278 /*
1279  * Switches the FP/VX register save area from which to lazy
1280  * restore register contents.
1281  */
1282 static inline void load_fpu_from(struct fpu *from)
1283 {
1284 	current->thread.fpu.fpc = from->fpc;
1285 	current->thread.fpu.regs = from->regs;
1286 }
1287 
1288 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1289 {
1290 	/* Save host register state */
1291 	save_fpu_regs();
1292 	save_fpu_to(&vcpu->arch.host_fpregs);
1293 
1294 	if (test_kvm_facility(vcpu->kvm, 129)) {
1295 		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1296 		/*
1297 		 * Use the register save area in the SIE-control block
1298 		 * for register restore and save in kvm_arch_vcpu_put()
1299 		 */
1300 		current->thread.fpu.vxrs =
1301 			(__vector128 *)&vcpu->run->s.regs.vrs;
1302 	} else
1303 		load_fpu_from(&vcpu->arch.guest_fpregs);
1304 
1305 	if (test_fp_ctl(current->thread.fpu.fpc))
1306 		/* User space provided an invalid FPC, let's clear it */
1307 		current->thread.fpu.fpc = 0;
1308 
1309 	save_access_regs(vcpu->arch.host_acrs);
1310 	restore_access_regs(vcpu->run->s.regs.acrs);
1311 	gmap_enable(vcpu->arch.gmap);
1312 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1313 }
1314 
1315 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1316 {
1317 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1318 	gmap_disable(vcpu->arch.gmap);
1319 
1320 	save_fpu_regs();
1321 
1322 	if (test_kvm_facility(vcpu->kvm, 129))
1323 		/*
1324 		 * kvm_arch_vcpu_load() set up the register save area to
1325 		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1326 		 * are already saved.  Only the floating-point control must be
1327 		 * copied.
1328 		 */
1329 		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1330 	else
1331 		save_fpu_to(&vcpu->arch.guest_fpregs);
1332 	load_fpu_from(&vcpu->arch.host_fpregs);
1333 
1334 	save_access_regs(vcpu->run->s.regs.acrs);
1335 	restore_access_regs(vcpu->arch.host_acrs);
1336 }
1337 
1338 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1339 {
1340 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1341 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1342 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1343 	kvm_s390_set_prefix(vcpu, 0);
1344 	vcpu->arch.sie_block->cputm     = 0UL;
1345 	vcpu->arch.sie_block->ckc       = 0UL;
1346 	vcpu->arch.sie_block->todpr     = 0;
1347 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1348 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1349 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1350 	vcpu->arch.guest_fpregs.fpc = 0;
1351 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1352 	vcpu->arch.sie_block->gbea = 1;
1353 	vcpu->arch.sie_block->pp = 0;
1354 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1355 	kvm_clear_async_pf_completion_queue(vcpu);
1356 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1357 		kvm_s390_vcpu_stop(vcpu);
1358 	kvm_s390_clear_local_irqs(vcpu);
1359 }
1360 
1361 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1362 {
1363 	mutex_lock(&vcpu->kvm->lock);
1364 	preempt_disable();
1365 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1366 	preempt_enable();
1367 	mutex_unlock(&vcpu->kvm->lock);
1368 	if (!kvm_is_ucontrol(vcpu->kvm))
1369 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1370 }
1371 
1372 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1373 {
1374 	if (!test_kvm_facility(vcpu->kvm, 76))
1375 		return;
1376 
1377 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1378 
1379 	if (vcpu->kvm->arch.crypto.aes_kw)
1380 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1381 	if (vcpu->kvm->arch.crypto.dea_kw)
1382 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1383 
1384 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1385 }
1386 
1387 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1388 {
1389 	free_page(vcpu->arch.sie_block->cbrlo);
1390 	vcpu->arch.sie_block->cbrlo = 0;
1391 }
1392 
1393 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1394 {
1395 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1396 	if (!vcpu->arch.sie_block->cbrlo)
1397 		return -ENOMEM;
1398 
1399 	vcpu->arch.sie_block->ecb2 |= 0x80;
1400 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1401 	return 0;
1402 }
1403 
1404 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1405 {
1406 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1407 
1408 	vcpu->arch.cpu_id = model->cpu_id;
1409 	vcpu->arch.sie_block->ibc = model->ibc;
1410 	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1411 }
1412 
1413 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1414 {
1415 	int rc = 0;
1416 
1417 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1418 						    CPUSTAT_SM |
1419 						    CPUSTAT_STOPPED);
1420 
1421 	if (test_kvm_facility(vcpu->kvm, 78))
1422 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1423 	else if (test_kvm_facility(vcpu->kvm, 8))
1424 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1425 
1426 	kvm_s390_vcpu_setup_model(vcpu);
1427 
1428 	vcpu->arch.sie_block->ecb   = 6;
1429 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1430 		vcpu->arch.sie_block->ecb |= 0x10;
1431 
1432 	vcpu->arch.sie_block->ecb2  = 8;
1433 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1434 	if (sclp.has_siif)
1435 		vcpu->arch.sie_block->eca |= 1;
1436 	if (sclp.has_sigpif)
1437 		vcpu->arch.sie_block->eca |= 0x10000000U;
1438 	if (test_kvm_facility(vcpu->kvm, 129)) {
1439 		vcpu->arch.sie_block->eca |= 0x00020000;
1440 		vcpu->arch.sie_block->ecd |= 0x20000000;
1441 	}
1442 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1443 
1444 	if (vcpu->kvm->arch.use_cmma) {
1445 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1446 		if (rc)
1447 			return rc;
1448 	}
1449 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1450 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1451 
1452 	kvm_s390_vcpu_crypto_setup(vcpu);
1453 
1454 	return rc;
1455 }
1456 
1457 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1458 				      unsigned int id)
1459 {
1460 	struct kvm_vcpu *vcpu;
1461 	struct sie_page *sie_page;
1462 	int rc = -EINVAL;
1463 
1464 	if (id >= KVM_MAX_VCPUS)
1465 		goto out;
1466 
1467 	rc = -ENOMEM;
1468 
1469 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1470 	if (!vcpu)
1471 		goto out;
1472 
1473 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1474 	if (!sie_page)
1475 		goto out_free_cpu;
1476 
1477 	vcpu->arch.sie_block = &sie_page->sie_block;
1478 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1479 
1480 	vcpu->arch.sie_block->icpua = id;
1481 	if (!kvm_is_ucontrol(kvm)) {
1482 		if (!kvm->arch.sca) {
1483 			WARN_ON_ONCE(1);
1484 			goto out_free_cpu;
1485 		}
1486 		if (!kvm->arch.sca->cpu[id].sda)
1487 			kvm->arch.sca->cpu[id].sda =
1488 				(__u64) vcpu->arch.sie_block;
1489 		vcpu->arch.sie_block->scaoh =
1490 			(__u32)(((__u64)kvm->arch.sca) >> 32);
1491 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1492 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1493 	}
1494 
1495 	spin_lock_init(&vcpu->arch.local_int.lock);
1496 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1497 	vcpu->arch.local_int.wq = &vcpu->wq;
1498 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1499 
1500 	/*
1501 	 * Allocate a save area for floating-point registers.  If the vector
1502 	 * extension is available, register contents are saved in the SIE
1503 	 * control block.  The allocated save area is still required in
1504 	 * particular places, for example, in kvm_s390_vcpu_store_status().
1505 	 */
1506 	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1507 					       GFP_KERNEL);
1508 	if (!vcpu->arch.guest_fpregs.fprs) {
1509 		rc = -ENOMEM;
1510 		goto out_free_sie_block;
1511 	}
1512 
1513 	rc = kvm_vcpu_init(vcpu, kvm, id);
1514 	if (rc)
1515 		goto out_free_sie_block;
1516 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1517 		 vcpu->arch.sie_block);
1518 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1519 
1520 	return vcpu;
1521 out_free_sie_block:
1522 	free_page((unsigned long)(vcpu->arch.sie_block));
1523 out_free_cpu:
1524 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1525 out:
1526 	return ERR_PTR(rc);
1527 }
1528 
1529 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1530 {
1531 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1532 }
1533 
1534 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1535 {
1536 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1537 	exit_sie(vcpu);
1538 }
1539 
1540 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1541 {
1542 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1543 }
1544 
1545 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1546 {
1547 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1548 	exit_sie(vcpu);
1549 }
1550 
1551 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1552 {
1553 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1554 }
1555 
1556 /*
1557  * Kick a guest cpu out of SIE and wait until SIE is not running.
1558  * If the CPU is not running (e.g. waiting as idle) the function will
1559  * return immediately. */
1560 void exit_sie(struct kvm_vcpu *vcpu)
1561 {
1562 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1563 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1564 		cpu_relax();
1565 }
1566 
1567 /* Kick a guest cpu out of SIE to process a request synchronously */
1568 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1569 {
1570 	kvm_make_request(req, vcpu);
1571 	kvm_s390_vcpu_request(vcpu);
1572 }
1573 
1574 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1575 {
1576 	int i;
1577 	struct kvm *kvm = gmap->private;
1578 	struct kvm_vcpu *vcpu;
1579 
1580 	kvm_for_each_vcpu(i, vcpu, kvm) {
1581 		/* match against both prefix pages */
1582 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1583 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1584 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1585 		}
1586 	}
1587 }
1588 
1589 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1590 {
1591 	/* kvm common code refers to this, but never calls it */
1592 	BUG();
1593 	return 0;
1594 }
1595 
1596 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1597 					   struct kvm_one_reg *reg)
1598 {
1599 	int r = -EINVAL;
1600 
1601 	switch (reg->id) {
1602 	case KVM_REG_S390_TODPR:
1603 		r = put_user(vcpu->arch.sie_block->todpr,
1604 			     (u32 __user *)reg->addr);
1605 		break;
1606 	case KVM_REG_S390_EPOCHDIFF:
1607 		r = put_user(vcpu->arch.sie_block->epoch,
1608 			     (u64 __user *)reg->addr);
1609 		break;
1610 	case KVM_REG_S390_CPU_TIMER:
1611 		r = put_user(vcpu->arch.sie_block->cputm,
1612 			     (u64 __user *)reg->addr);
1613 		break;
1614 	case KVM_REG_S390_CLOCK_COMP:
1615 		r = put_user(vcpu->arch.sie_block->ckc,
1616 			     (u64 __user *)reg->addr);
1617 		break;
1618 	case KVM_REG_S390_PFTOKEN:
1619 		r = put_user(vcpu->arch.pfault_token,
1620 			     (u64 __user *)reg->addr);
1621 		break;
1622 	case KVM_REG_S390_PFCOMPARE:
1623 		r = put_user(vcpu->arch.pfault_compare,
1624 			     (u64 __user *)reg->addr);
1625 		break;
1626 	case KVM_REG_S390_PFSELECT:
1627 		r = put_user(vcpu->arch.pfault_select,
1628 			     (u64 __user *)reg->addr);
1629 		break;
1630 	case KVM_REG_S390_PP:
1631 		r = put_user(vcpu->arch.sie_block->pp,
1632 			     (u64 __user *)reg->addr);
1633 		break;
1634 	case KVM_REG_S390_GBEA:
1635 		r = put_user(vcpu->arch.sie_block->gbea,
1636 			     (u64 __user *)reg->addr);
1637 		break;
1638 	default:
1639 		break;
1640 	}
1641 
1642 	return r;
1643 }
1644 
1645 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1646 					   struct kvm_one_reg *reg)
1647 {
1648 	int r = -EINVAL;
1649 
1650 	switch (reg->id) {
1651 	case KVM_REG_S390_TODPR:
1652 		r = get_user(vcpu->arch.sie_block->todpr,
1653 			     (u32 __user *)reg->addr);
1654 		break;
1655 	case KVM_REG_S390_EPOCHDIFF:
1656 		r = get_user(vcpu->arch.sie_block->epoch,
1657 			     (u64 __user *)reg->addr);
1658 		break;
1659 	case KVM_REG_S390_CPU_TIMER:
1660 		r = get_user(vcpu->arch.sie_block->cputm,
1661 			     (u64 __user *)reg->addr);
1662 		break;
1663 	case KVM_REG_S390_CLOCK_COMP:
1664 		r = get_user(vcpu->arch.sie_block->ckc,
1665 			     (u64 __user *)reg->addr);
1666 		break;
1667 	case KVM_REG_S390_PFTOKEN:
1668 		r = get_user(vcpu->arch.pfault_token,
1669 			     (u64 __user *)reg->addr);
1670 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1671 			kvm_clear_async_pf_completion_queue(vcpu);
1672 		break;
1673 	case KVM_REG_S390_PFCOMPARE:
1674 		r = get_user(vcpu->arch.pfault_compare,
1675 			     (u64 __user *)reg->addr);
1676 		break;
1677 	case KVM_REG_S390_PFSELECT:
1678 		r = get_user(vcpu->arch.pfault_select,
1679 			     (u64 __user *)reg->addr);
1680 		break;
1681 	case KVM_REG_S390_PP:
1682 		r = get_user(vcpu->arch.sie_block->pp,
1683 			     (u64 __user *)reg->addr);
1684 		break;
1685 	case KVM_REG_S390_GBEA:
1686 		r = get_user(vcpu->arch.sie_block->gbea,
1687 			     (u64 __user *)reg->addr);
1688 		break;
1689 	default:
1690 		break;
1691 	}
1692 
1693 	return r;
1694 }
1695 
1696 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1697 {
1698 	kvm_s390_vcpu_initial_reset(vcpu);
1699 	return 0;
1700 }
1701 
1702 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1703 {
1704 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1705 	return 0;
1706 }
1707 
1708 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1709 {
1710 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1711 	return 0;
1712 }
1713 
1714 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1715 				  struct kvm_sregs *sregs)
1716 {
1717 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1718 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1719 	restore_access_regs(vcpu->run->s.regs.acrs);
1720 	return 0;
1721 }
1722 
1723 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1724 				  struct kvm_sregs *sregs)
1725 {
1726 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1727 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1728 	return 0;
1729 }
1730 
1731 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1732 {
1733 	if (test_fp_ctl(fpu->fpc))
1734 		return -EINVAL;
1735 	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1736 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1737 	save_fpu_regs();
1738 	load_fpu_from(&vcpu->arch.guest_fpregs);
1739 	return 0;
1740 }
1741 
1742 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1743 {
1744 	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1745 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1746 	return 0;
1747 }
1748 
1749 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1750 {
1751 	int rc = 0;
1752 
1753 	if (!is_vcpu_stopped(vcpu))
1754 		rc = -EBUSY;
1755 	else {
1756 		vcpu->run->psw_mask = psw.mask;
1757 		vcpu->run->psw_addr = psw.addr;
1758 	}
1759 	return rc;
1760 }
1761 
1762 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1763 				  struct kvm_translation *tr)
1764 {
1765 	return -EINVAL; /* not implemented yet */
1766 }
1767 
1768 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1769 			      KVM_GUESTDBG_USE_HW_BP | \
1770 			      KVM_GUESTDBG_ENABLE)
1771 
1772 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1773 					struct kvm_guest_debug *dbg)
1774 {
1775 	int rc = 0;
1776 
1777 	vcpu->guest_debug = 0;
1778 	kvm_s390_clear_bp_data(vcpu);
1779 
1780 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1781 		return -EINVAL;
1782 
1783 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1784 		vcpu->guest_debug = dbg->control;
1785 		/* enforce guest PER */
1786 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1787 
1788 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1789 			rc = kvm_s390_import_bp_data(vcpu, dbg);
1790 	} else {
1791 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1792 		vcpu->arch.guestdbg.last_bp = 0;
1793 	}
1794 
1795 	if (rc) {
1796 		vcpu->guest_debug = 0;
1797 		kvm_s390_clear_bp_data(vcpu);
1798 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1799 	}
1800 
1801 	return rc;
1802 }
1803 
1804 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1805 				    struct kvm_mp_state *mp_state)
1806 {
1807 	/* CHECK_STOP and LOAD are not supported yet */
1808 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1809 				       KVM_MP_STATE_OPERATING;
1810 }
1811 
1812 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1813 				    struct kvm_mp_state *mp_state)
1814 {
1815 	int rc = 0;
1816 
1817 	/* user space knows about this interface - let it control the state */
1818 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1819 
1820 	switch (mp_state->mp_state) {
1821 	case KVM_MP_STATE_STOPPED:
1822 		kvm_s390_vcpu_stop(vcpu);
1823 		break;
1824 	case KVM_MP_STATE_OPERATING:
1825 		kvm_s390_vcpu_start(vcpu);
1826 		break;
1827 	case KVM_MP_STATE_LOAD:
1828 	case KVM_MP_STATE_CHECK_STOP:
1829 		/* fall through - CHECK_STOP and LOAD are not supported yet */
1830 	default:
1831 		rc = -ENXIO;
1832 	}
1833 
1834 	return rc;
1835 }
1836 
1837 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1838 {
1839 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1840 }
1841 
1842 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1843 {
1844 retry:
1845 	kvm_s390_vcpu_request_handled(vcpu);
1846 	if (!vcpu->requests)
1847 		return 0;
1848 	/*
1849 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1850 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1851 	 * This ensures that the ipte instruction for this request has
1852 	 * already finished. We might race against a second unmapper that
1853 	 * wants to set the blocking bit. Lets just retry the request loop.
1854 	 */
1855 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1856 		int rc;
1857 		rc = gmap_ipte_notify(vcpu->arch.gmap,
1858 				      kvm_s390_get_prefix(vcpu),
1859 				      PAGE_SIZE * 2);
1860 		if (rc)
1861 			return rc;
1862 		goto retry;
1863 	}
1864 
1865 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1866 		vcpu->arch.sie_block->ihcpu = 0xffff;
1867 		goto retry;
1868 	}
1869 
1870 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1871 		if (!ibs_enabled(vcpu)) {
1872 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1873 			atomic_or(CPUSTAT_IBS,
1874 					&vcpu->arch.sie_block->cpuflags);
1875 		}
1876 		goto retry;
1877 	}
1878 
1879 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1880 		if (ibs_enabled(vcpu)) {
1881 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1882 			atomic_andnot(CPUSTAT_IBS,
1883 					  &vcpu->arch.sie_block->cpuflags);
1884 		}
1885 		goto retry;
1886 	}
1887 
1888 	/* nothing to do, just clear the request */
1889 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1890 
1891 	return 0;
1892 }
1893 
1894 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1895 {
1896 	struct kvm_vcpu *vcpu;
1897 	int i;
1898 
1899 	mutex_lock(&kvm->lock);
1900 	preempt_disable();
1901 	kvm->arch.epoch = tod - get_tod_clock();
1902 	kvm_s390_vcpu_block_all(kvm);
1903 	kvm_for_each_vcpu(i, vcpu, kvm)
1904 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1905 	kvm_s390_vcpu_unblock_all(kvm);
1906 	preempt_enable();
1907 	mutex_unlock(&kvm->lock);
1908 }
1909 
1910 /**
1911  * kvm_arch_fault_in_page - fault-in guest page if necessary
1912  * @vcpu: The corresponding virtual cpu
1913  * @gpa: Guest physical address
1914  * @writable: Whether the page should be writable or not
1915  *
1916  * Make sure that a guest page has been faulted-in on the host.
1917  *
1918  * Return: Zero on success, negative error code otherwise.
1919  */
1920 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1921 {
1922 	return gmap_fault(vcpu->arch.gmap, gpa,
1923 			  writable ? FAULT_FLAG_WRITE : 0);
1924 }
1925 
1926 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1927 				      unsigned long token)
1928 {
1929 	struct kvm_s390_interrupt inti;
1930 	struct kvm_s390_irq irq;
1931 
1932 	if (start_token) {
1933 		irq.u.ext.ext_params2 = token;
1934 		irq.type = KVM_S390_INT_PFAULT_INIT;
1935 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1936 	} else {
1937 		inti.type = KVM_S390_INT_PFAULT_DONE;
1938 		inti.parm64 = token;
1939 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1940 	}
1941 }
1942 
1943 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1944 				     struct kvm_async_pf *work)
1945 {
1946 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1947 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1948 }
1949 
1950 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1951 				 struct kvm_async_pf *work)
1952 {
1953 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1954 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1955 }
1956 
1957 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1958 			       struct kvm_async_pf *work)
1959 {
1960 	/* s390 will always inject the page directly */
1961 }
1962 
1963 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1964 {
1965 	/*
1966 	 * s390 will always inject the page directly,
1967 	 * but we still want check_async_completion to cleanup
1968 	 */
1969 	return true;
1970 }
1971 
1972 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1973 {
1974 	hva_t hva;
1975 	struct kvm_arch_async_pf arch;
1976 	int rc;
1977 
1978 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1979 		return 0;
1980 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1981 	    vcpu->arch.pfault_compare)
1982 		return 0;
1983 	if (psw_extint_disabled(vcpu))
1984 		return 0;
1985 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1986 		return 0;
1987 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1988 		return 0;
1989 	if (!vcpu->arch.gmap->pfault_enabled)
1990 		return 0;
1991 
1992 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1993 	hva += current->thread.gmap_addr & ~PAGE_MASK;
1994 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1995 		return 0;
1996 
1997 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1998 	return rc;
1999 }
2000 
2001 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2002 {
2003 	int rc, cpuflags;
2004 
2005 	/*
2006 	 * On s390 notifications for arriving pages will be delivered directly
2007 	 * to the guest but the house keeping for completed pfaults is
2008 	 * handled outside the worker.
2009 	 */
2010 	kvm_check_async_pf_completion(vcpu);
2011 
2012 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2013 
2014 	if (need_resched())
2015 		schedule();
2016 
2017 	if (test_cpu_flag(CIF_MCCK_PENDING))
2018 		s390_handle_mcck();
2019 
2020 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2021 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2022 		if (rc)
2023 			return rc;
2024 	}
2025 
2026 	rc = kvm_s390_handle_requests(vcpu);
2027 	if (rc)
2028 		return rc;
2029 
2030 	if (guestdbg_enabled(vcpu)) {
2031 		kvm_s390_backup_guest_per_regs(vcpu);
2032 		kvm_s390_patch_guest_per_regs(vcpu);
2033 	}
2034 
2035 	vcpu->arch.sie_block->icptcode = 0;
2036 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2037 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2038 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2039 
2040 	return 0;
2041 }
2042 
2043 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2044 {
2045 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
2046 	u8 opcode;
2047 	int rc;
2048 
2049 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2050 	trace_kvm_s390_sie_fault(vcpu);
2051 
2052 	/*
2053 	 * We want to inject an addressing exception, which is defined as a
2054 	 * suppressing or terminating exception. However, since we came here
2055 	 * by a DAT access exception, the PSW still points to the faulting
2056 	 * instruction since DAT exceptions are nullifying. So we've got
2057 	 * to look up the current opcode to get the length of the instruction
2058 	 * to be able to forward the PSW.
2059 	 */
2060 	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2061 	if (rc)
2062 		return kvm_s390_inject_prog_cond(vcpu, rc);
2063 	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2064 
2065 	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2066 }
2067 
2068 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2069 {
2070 	int rc = -1;
2071 
2072 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2073 		   vcpu->arch.sie_block->icptcode);
2074 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2075 
2076 	if (guestdbg_enabled(vcpu))
2077 		kvm_s390_restore_guest_per_regs(vcpu);
2078 
2079 	if (exit_reason >= 0) {
2080 		rc = 0;
2081 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2082 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2083 		vcpu->run->s390_ucontrol.trans_exc_code =
2084 						current->thread.gmap_addr;
2085 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2086 		rc = -EREMOTE;
2087 
2088 	} else if (current->thread.gmap_pfault) {
2089 		trace_kvm_s390_major_guest_pfault(vcpu);
2090 		current->thread.gmap_pfault = 0;
2091 		if (kvm_arch_setup_async_pf(vcpu)) {
2092 			rc = 0;
2093 		} else {
2094 			gpa_t gpa = current->thread.gmap_addr;
2095 			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2096 		}
2097 	}
2098 
2099 	if (rc == -1)
2100 		rc = vcpu_post_run_fault_in_sie(vcpu);
2101 
2102 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2103 
2104 	if (rc == 0) {
2105 		if (kvm_is_ucontrol(vcpu->kvm))
2106 			/* Don't exit for host interrupts. */
2107 			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2108 		else
2109 			rc = kvm_handle_sie_intercept(vcpu);
2110 	}
2111 
2112 	return rc;
2113 }
2114 
2115 static int __vcpu_run(struct kvm_vcpu *vcpu)
2116 {
2117 	int rc, exit_reason;
2118 
2119 	/*
2120 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2121 	 * ning the guest), so that memslots (and other stuff) are protected
2122 	 */
2123 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2124 
2125 	do {
2126 		rc = vcpu_pre_run(vcpu);
2127 		if (rc)
2128 			break;
2129 
2130 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2131 		/*
2132 		 * As PF_VCPU will be used in fault handler, between
2133 		 * guest_enter and guest_exit should be no uaccess.
2134 		 */
2135 		local_irq_disable();
2136 		__kvm_guest_enter();
2137 		local_irq_enable();
2138 		exit_reason = sie64a(vcpu->arch.sie_block,
2139 				     vcpu->run->s.regs.gprs);
2140 		local_irq_disable();
2141 		__kvm_guest_exit();
2142 		local_irq_enable();
2143 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2144 
2145 		rc = vcpu_post_run(vcpu, exit_reason);
2146 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2147 
2148 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2149 	return rc;
2150 }
2151 
2152 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2153 {
2154 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2155 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2156 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2157 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2158 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2159 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2160 		/* some control register changes require a tlb flush */
2161 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2162 	}
2163 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2164 		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2165 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2166 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2167 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2168 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2169 	}
2170 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2171 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2172 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2173 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2174 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2175 			kvm_clear_async_pf_completion_queue(vcpu);
2176 	}
2177 	kvm_run->kvm_dirty_regs = 0;
2178 }
2179 
2180 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2181 {
2182 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2183 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2184 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2185 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2186 	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2187 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2188 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2189 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2190 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2191 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2192 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2193 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2194 }
2195 
2196 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2197 {
2198 	int rc;
2199 	sigset_t sigsaved;
2200 
2201 	if (guestdbg_exit_pending(vcpu)) {
2202 		kvm_s390_prepare_debug_exit(vcpu);
2203 		return 0;
2204 	}
2205 
2206 	if (vcpu->sigset_active)
2207 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2208 
2209 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2210 		kvm_s390_vcpu_start(vcpu);
2211 	} else if (is_vcpu_stopped(vcpu)) {
2212 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2213 				   vcpu->vcpu_id);
2214 		return -EINVAL;
2215 	}
2216 
2217 	sync_regs(vcpu, kvm_run);
2218 
2219 	might_fault();
2220 	rc = __vcpu_run(vcpu);
2221 
2222 	if (signal_pending(current) && !rc) {
2223 		kvm_run->exit_reason = KVM_EXIT_INTR;
2224 		rc = -EINTR;
2225 	}
2226 
2227 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2228 		kvm_s390_prepare_debug_exit(vcpu);
2229 		rc = 0;
2230 	}
2231 
2232 	if (rc == -EOPNOTSUPP) {
2233 		/* intercept cannot be handled in-kernel, prepare kvm-run */
2234 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2235 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2236 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2237 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2238 		rc = 0;
2239 	}
2240 
2241 	if (rc == -EREMOTE) {
2242 		/* intercept was handled, but userspace support is needed
2243 		 * kvm_run has been prepared by the handler */
2244 		rc = 0;
2245 	}
2246 
2247 	store_regs(vcpu, kvm_run);
2248 
2249 	if (vcpu->sigset_active)
2250 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2251 
2252 	vcpu->stat.exit_userspace++;
2253 	return rc;
2254 }
2255 
2256 /*
2257  * store status at address
2258  * we use have two special cases:
2259  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2260  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2261  */
2262 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2263 {
2264 	unsigned char archmode = 1;
2265 	unsigned int px;
2266 	u64 clkcomp;
2267 	int rc;
2268 
2269 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2270 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2271 			return -EFAULT;
2272 		gpa = SAVE_AREA_BASE;
2273 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2274 		if (write_guest_real(vcpu, 163, &archmode, 1))
2275 			return -EFAULT;
2276 		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2277 	}
2278 	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2279 			     vcpu->arch.guest_fpregs.fprs, 128);
2280 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2281 			      vcpu->run->s.regs.gprs, 128);
2282 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2283 			      &vcpu->arch.sie_block->gpsw, 16);
2284 	px = kvm_s390_get_prefix(vcpu);
2285 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2286 			      &px, 4);
2287 	rc |= write_guest_abs(vcpu,
2288 			      gpa + offsetof(struct save_area, fp_ctrl_reg),
2289 			      &vcpu->arch.guest_fpregs.fpc, 4);
2290 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2291 			      &vcpu->arch.sie_block->todpr, 4);
2292 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2293 			      &vcpu->arch.sie_block->cputm, 8);
2294 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2295 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2296 			      &clkcomp, 8);
2297 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2298 			      &vcpu->run->s.regs.acrs, 64);
2299 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2300 			      &vcpu->arch.sie_block->gcr, 128);
2301 	return rc ? -EFAULT : 0;
2302 }
2303 
2304 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2305 {
2306 	/*
2307 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2308 	 * copying in vcpu load/put. Lets update our copies before we save
2309 	 * it into the save area
2310 	 */
2311 	save_fpu_regs();
2312 	if (test_kvm_facility(vcpu->kvm, 129)) {
2313 		/*
2314 		 * If the vector extension is available, the vector registers
2315 		 * which overlaps with floating-point registers are saved in
2316 		 * the SIE-control block.  Hence, extract the floating-point
2317 		 * registers and the FPC value and store them in the
2318 		 * guest_fpregs structure.
2319 		 */
2320 		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2321 		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2322 				 current->thread.fpu.vxrs);
2323 	} else
2324 		save_fpu_to(&vcpu->arch.guest_fpregs);
2325 	save_access_regs(vcpu->run->s.regs.acrs);
2326 
2327 	return kvm_s390_store_status_unloaded(vcpu, addr);
2328 }
2329 
2330 /*
2331  * store additional status at address
2332  */
2333 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2334 					unsigned long gpa)
2335 {
2336 	/* Only bits 0-53 are used for address formation */
2337 	if (!(gpa & ~0x3ff))
2338 		return 0;
2339 
2340 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2341 			       (void *)&vcpu->run->s.regs.vrs, 512);
2342 }
2343 
2344 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2345 {
2346 	if (!test_kvm_facility(vcpu->kvm, 129))
2347 		return 0;
2348 
2349 	/*
2350 	 * The guest VXRS are in the host VXRs due to the lazy
2351 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2352 	 * to save the current register state because we are in the
2353 	 * middle of a load/put cycle.
2354 	 *
2355 	 * Let's update our copies before we save it into the save area.
2356 	 */
2357 	save_fpu_regs();
2358 
2359 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2360 }
2361 
2362 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2363 {
2364 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2365 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2366 }
2367 
2368 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2369 {
2370 	unsigned int i;
2371 	struct kvm_vcpu *vcpu;
2372 
2373 	kvm_for_each_vcpu(i, vcpu, kvm) {
2374 		__disable_ibs_on_vcpu(vcpu);
2375 	}
2376 }
2377 
2378 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2379 {
2380 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2381 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2382 }
2383 
2384 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2385 {
2386 	int i, online_vcpus, started_vcpus = 0;
2387 
2388 	if (!is_vcpu_stopped(vcpu))
2389 		return;
2390 
2391 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2392 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2393 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2394 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2395 
2396 	for (i = 0; i < online_vcpus; i++) {
2397 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2398 			started_vcpus++;
2399 	}
2400 
2401 	if (started_vcpus == 0) {
2402 		/* we're the only active VCPU -> speed it up */
2403 		__enable_ibs_on_vcpu(vcpu);
2404 	} else if (started_vcpus == 1) {
2405 		/*
2406 		 * As we are starting a second VCPU, we have to disable
2407 		 * the IBS facility on all VCPUs to remove potentially
2408 		 * oustanding ENABLE requests.
2409 		 */
2410 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2411 	}
2412 
2413 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2414 	/*
2415 	 * Another VCPU might have used IBS while we were offline.
2416 	 * Let's play safe and flush the VCPU at startup.
2417 	 */
2418 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2419 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2420 	return;
2421 }
2422 
2423 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2424 {
2425 	int i, online_vcpus, started_vcpus = 0;
2426 	struct kvm_vcpu *started_vcpu = NULL;
2427 
2428 	if (is_vcpu_stopped(vcpu))
2429 		return;
2430 
2431 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2432 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2433 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2434 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2435 
2436 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2437 	kvm_s390_clear_stop_irq(vcpu);
2438 
2439 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2440 	__disable_ibs_on_vcpu(vcpu);
2441 
2442 	for (i = 0; i < online_vcpus; i++) {
2443 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2444 			started_vcpus++;
2445 			started_vcpu = vcpu->kvm->vcpus[i];
2446 		}
2447 	}
2448 
2449 	if (started_vcpus == 1) {
2450 		/*
2451 		 * As we only have one VCPU left, we want to enable the
2452 		 * IBS facility for that VCPU to speed it up.
2453 		 */
2454 		__enable_ibs_on_vcpu(started_vcpu);
2455 	}
2456 
2457 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2458 	return;
2459 }
2460 
2461 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2462 				     struct kvm_enable_cap *cap)
2463 {
2464 	int r;
2465 
2466 	if (cap->flags)
2467 		return -EINVAL;
2468 
2469 	switch (cap->cap) {
2470 	case KVM_CAP_S390_CSS_SUPPORT:
2471 		if (!vcpu->kvm->arch.css_support) {
2472 			vcpu->kvm->arch.css_support = 1;
2473 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2474 			trace_kvm_s390_enable_css(vcpu->kvm);
2475 		}
2476 		r = 0;
2477 		break;
2478 	default:
2479 		r = -EINVAL;
2480 		break;
2481 	}
2482 	return r;
2483 }
2484 
2485 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2486 				  struct kvm_s390_mem_op *mop)
2487 {
2488 	void __user *uaddr = (void __user *)mop->buf;
2489 	void *tmpbuf = NULL;
2490 	int r, srcu_idx;
2491 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2492 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2493 
2494 	if (mop->flags & ~supported_flags)
2495 		return -EINVAL;
2496 
2497 	if (mop->size > MEM_OP_MAX_SIZE)
2498 		return -E2BIG;
2499 
2500 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2501 		tmpbuf = vmalloc(mop->size);
2502 		if (!tmpbuf)
2503 			return -ENOMEM;
2504 	}
2505 
2506 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2507 
2508 	switch (mop->op) {
2509 	case KVM_S390_MEMOP_LOGICAL_READ:
2510 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2511 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2512 			break;
2513 		}
2514 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2515 		if (r == 0) {
2516 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2517 				r = -EFAULT;
2518 		}
2519 		break;
2520 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2521 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2522 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2523 			break;
2524 		}
2525 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2526 			r = -EFAULT;
2527 			break;
2528 		}
2529 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2530 		break;
2531 	default:
2532 		r = -EINVAL;
2533 	}
2534 
2535 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2536 
2537 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2538 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2539 
2540 	vfree(tmpbuf);
2541 	return r;
2542 }
2543 
2544 long kvm_arch_vcpu_ioctl(struct file *filp,
2545 			 unsigned int ioctl, unsigned long arg)
2546 {
2547 	struct kvm_vcpu *vcpu = filp->private_data;
2548 	void __user *argp = (void __user *)arg;
2549 	int idx;
2550 	long r;
2551 
2552 	switch (ioctl) {
2553 	case KVM_S390_IRQ: {
2554 		struct kvm_s390_irq s390irq;
2555 
2556 		r = -EFAULT;
2557 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2558 			break;
2559 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2560 		break;
2561 	}
2562 	case KVM_S390_INTERRUPT: {
2563 		struct kvm_s390_interrupt s390int;
2564 		struct kvm_s390_irq s390irq;
2565 
2566 		r = -EFAULT;
2567 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2568 			break;
2569 		if (s390int_to_s390irq(&s390int, &s390irq))
2570 			return -EINVAL;
2571 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2572 		break;
2573 	}
2574 	case KVM_S390_STORE_STATUS:
2575 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2576 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2577 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2578 		break;
2579 	case KVM_S390_SET_INITIAL_PSW: {
2580 		psw_t psw;
2581 
2582 		r = -EFAULT;
2583 		if (copy_from_user(&psw, argp, sizeof(psw)))
2584 			break;
2585 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2586 		break;
2587 	}
2588 	case KVM_S390_INITIAL_RESET:
2589 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2590 		break;
2591 	case KVM_SET_ONE_REG:
2592 	case KVM_GET_ONE_REG: {
2593 		struct kvm_one_reg reg;
2594 		r = -EFAULT;
2595 		if (copy_from_user(&reg, argp, sizeof(reg)))
2596 			break;
2597 		if (ioctl == KVM_SET_ONE_REG)
2598 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2599 		else
2600 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2601 		break;
2602 	}
2603 #ifdef CONFIG_KVM_S390_UCONTROL
2604 	case KVM_S390_UCAS_MAP: {
2605 		struct kvm_s390_ucas_mapping ucasmap;
2606 
2607 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2608 			r = -EFAULT;
2609 			break;
2610 		}
2611 
2612 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2613 			r = -EINVAL;
2614 			break;
2615 		}
2616 
2617 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2618 				     ucasmap.vcpu_addr, ucasmap.length);
2619 		break;
2620 	}
2621 	case KVM_S390_UCAS_UNMAP: {
2622 		struct kvm_s390_ucas_mapping ucasmap;
2623 
2624 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2625 			r = -EFAULT;
2626 			break;
2627 		}
2628 
2629 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2630 			r = -EINVAL;
2631 			break;
2632 		}
2633 
2634 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2635 			ucasmap.length);
2636 		break;
2637 	}
2638 #endif
2639 	case KVM_S390_VCPU_FAULT: {
2640 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2641 		break;
2642 	}
2643 	case KVM_ENABLE_CAP:
2644 	{
2645 		struct kvm_enable_cap cap;
2646 		r = -EFAULT;
2647 		if (copy_from_user(&cap, argp, sizeof(cap)))
2648 			break;
2649 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2650 		break;
2651 	}
2652 	case KVM_S390_MEM_OP: {
2653 		struct kvm_s390_mem_op mem_op;
2654 
2655 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2656 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2657 		else
2658 			r = -EFAULT;
2659 		break;
2660 	}
2661 	case KVM_S390_SET_IRQ_STATE: {
2662 		struct kvm_s390_irq_state irq_state;
2663 
2664 		r = -EFAULT;
2665 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2666 			break;
2667 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2668 		    irq_state.len == 0 ||
2669 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2670 			r = -EINVAL;
2671 			break;
2672 		}
2673 		r = kvm_s390_set_irq_state(vcpu,
2674 					   (void __user *) irq_state.buf,
2675 					   irq_state.len);
2676 		break;
2677 	}
2678 	case KVM_S390_GET_IRQ_STATE: {
2679 		struct kvm_s390_irq_state irq_state;
2680 
2681 		r = -EFAULT;
2682 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2683 			break;
2684 		if (irq_state.len == 0) {
2685 			r = -EINVAL;
2686 			break;
2687 		}
2688 		r = kvm_s390_get_irq_state(vcpu,
2689 					   (__u8 __user *)  irq_state.buf,
2690 					   irq_state.len);
2691 		break;
2692 	}
2693 	default:
2694 		r = -ENOTTY;
2695 	}
2696 	return r;
2697 }
2698 
2699 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2700 {
2701 #ifdef CONFIG_KVM_S390_UCONTROL
2702 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2703 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2704 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2705 		get_page(vmf->page);
2706 		return 0;
2707 	}
2708 #endif
2709 	return VM_FAULT_SIGBUS;
2710 }
2711 
2712 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2713 			    unsigned long npages)
2714 {
2715 	return 0;
2716 }
2717 
2718 /* Section: memory related */
2719 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2720 				   struct kvm_memory_slot *memslot,
2721 				   const struct kvm_userspace_memory_region *mem,
2722 				   enum kvm_mr_change change)
2723 {
2724 	/* A few sanity checks. We can have memory slots which have to be
2725 	   located/ended at a segment boundary (1MB). The memory in userland is
2726 	   ok to be fragmented into various different vmas. It is okay to mmap()
2727 	   and munmap() stuff in this slot after doing this call at any time */
2728 
2729 	if (mem->userspace_addr & 0xffffful)
2730 		return -EINVAL;
2731 
2732 	if (mem->memory_size & 0xffffful)
2733 		return -EINVAL;
2734 
2735 	return 0;
2736 }
2737 
2738 void kvm_arch_commit_memory_region(struct kvm *kvm,
2739 				const struct kvm_userspace_memory_region *mem,
2740 				const struct kvm_memory_slot *old,
2741 				const struct kvm_memory_slot *new,
2742 				enum kvm_mr_change change)
2743 {
2744 	int rc;
2745 
2746 	/* If the basics of the memslot do not change, we do not want
2747 	 * to update the gmap. Every update causes several unnecessary
2748 	 * segment translation exceptions. This is usually handled just
2749 	 * fine by the normal fault handler + gmap, but it will also
2750 	 * cause faults on the prefix page of running guest CPUs.
2751 	 */
2752 	if (old->userspace_addr == mem->userspace_addr &&
2753 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2754 	    old->npages * PAGE_SIZE == mem->memory_size)
2755 		return;
2756 
2757 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2758 		mem->guest_phys_addr, mem->memory_size);
2759 	if (rc)
2760 		pr_warn("failed to commit memory region\n");
2761 	return;
2762 }
2763 
2764 static int __init kvm_s390_init(void)
2765 {
2766 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2767 }
2768 
2769 static void __exit kvm_s390_exit(void)
2770 {
2771 	kvm_exit();
2772 }
2773 
2774 module_init(kvm_s390_init);
2775 module_exit(kvm_s390_exit);
2776 
2777 /*
2778  * Enable autoloading of the kvm module.
2779  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2780  * since x86 takes a different approach.
2781  */
2782 #include <linux/miscdevice.h>
2783 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2784 MODULE_ALIAS("devname:kvm");
2785