xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 2c684d89)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39 
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43 
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47 
48 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
52 
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54 
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
57 	{ "exit_null", VCPU_STAT(exit_null) },
58 	{ "exit_validity", VCPU_STAT(exit_validity) },
59 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
61 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
63 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
84 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
86 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
91 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
111 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
112 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
114 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
115 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
116 	{ NULL }
117 };
118 
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 	0xffe6fffbfcfdfc40UL,
122 	0x005e800000000000UL,
123 };
124 
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130 
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133 
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137 	/* every s390 is virtualization enabled ;-) */
138 	return 0;
139 }
140 
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142 
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150 			  void *v)
151 {
152 	struct kvm *kvm;
153 	struct kvm_vcpu *vcpu;
154 	int i;
155 	unsigned long long *delta = v;
156 
157 	list_for_each_entry(kvm, &vm_list, vm_list) {
158 		kvm->arch.epoch -= *delta;
159 		kvm_for_each_vcpu(i, vcpu, kvm) {
160 			vcpu->arch.sie_block->epoch -= *delta;
161 		}
162 	}
163 	return NOTIFY_OK;
164 }
165 
166 static struct notifier_block kvm_clock_notifier = {
167 	.notifier_call = kvm_clock_sync,
168 };
169 
170 int kvm_arch_hardware_setup(void)
171 {
172 	gmap_notifier.notifier_call = kvm_gmap_notifier;
173 	gmap_register_ipte_notifier(&gmap_notifier);
174 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 				       &kvm_clock_notifier);
176 	return 0;
177 }
178 
179 void kvm_arch_hardware_unsetup(void)
180 {
181 	gmap_unregister_ipte_notifier(&gmap_notifier);
182 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 					 &kvm_clock_notifier);
184 }
185 
186 int kvm_arch_init(void *opaque)
187 {
188 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189 	if (!kvm_s390_dbf)
190 		return -ENOMEM;
191 
192 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 		debug_unregister(kvm_s390_dbf);
194 		return -ENOMEM;
195 	}
196 
197 	/* Register floating interrupt controller interface. */
198 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200 
201 void kvm_arch_exit(void)
202 {
203 	debug_unregister(kvm_s390_dbf);
204 }
205 
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 			unsigned int ioctl, unsigned long arg)
209 {
210 	if (ioctl == KVM_S390_ENABLE_SIE)
211 		return s390_enable_sie();
212 	return -EINVAL;
213 }
214 
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217 	int r;
218 
219 	switch (ext) {
220 	case KVM_CAP_S390_PSW:
221 	case KVM_CAP_S390_GMAP:
222 	case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 	case KVM_CAP_S390_UCONTROL:
225 #endif
226 	case KVM_CAP_ASYNC_PF:
227 	case KVM_CAP_SYNC_REGS:
228 	case KVM_CAP_ONE_REG:
229 	case KVM_CAP_ENABLE_CAP:
230 	case KVM_CAP_S390_CSS_SUPPORT:
231 	case KVM_CAP_IOEVENTFD:
232 	case KVM_CAP_DEVICE_CTRL:
233 	case KVM_CAP_ENABLE_CAP_VM:
234 	case KVM_CAP_S390_IRQCHIP:
235 	case KVM_CAP_VM_ATTRIBUTES:
236 	case KVM_CAP_MP_STATE:
237 	case KVM_CAP_S390_INJECT_IRQ:
238 	case KVM_CAP_S390_USER_SIGP:
239 	case KVM_CAP_S390_USER_STSI:
240 	case KVM_CAP_S390_SKEYS:
241 	case KVM_CAP_S390_IRQ_STATE:
242 		r = 1;
243 		break;
244 	case KVM_CAP_S390_MEM_OP:
245 		r = MEM_OP_MAX_SIZE;
246 		break;
247 	case KVM_CAP_NR_VCPUS:
248 	case KVM_CAP_MAX_VCPUS:
249 		r = KVM_MAX_VCPUS;
250 		break;
251 	case KVM_CAP_NR_MEMSLOTS:
252 		r = KVM_USER_MEM_SLOTS;
253 		break;
254 	case KVM_CAP_S390_COW:
255 		r = MACHINE_HAS_ESOP;
256 		break;
257 	case KVM_CAP_S390_VECTOR_REGISTERS:
258 		r = MACHINE_HAS_VX;
259 		break;
260 	default:
261 		r = 0;
262 	}
263 	return r;
264 }
265 
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267 					struct kvm_memory_slot *memslot)
268 {
269 	gfn_t cur_gfn, last_gfn;
270 	unsigned long address;
271 	struct gmap *gmap = kvm->arch.gmap;
272 
273 	down_read(&gmap->mm->mmap_sem);
274 	/* Loop over all guest pages */
275 	last_gfn = memslot->base_gfn + memslot->npages;
276 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277 		address = gfn_to_hva_memslot(memslot, cur_gfn);
278 
279 		if (gmap_test_and_clear_dirty(address, gmap))
280 			mark_page_dirty(kvm, cur_gfn);
281 	}
282 	up_read(&gmap->mm->mmap_sem);
283 }
284 
285 /* Section: vm related */
286 /*
287  * Get (and clear) the dirty memory log for a memory slot.
288  */
289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290 			       struct kvm_dirty_log *log)
291 {
292 	int r;
293 	unsigned long n;
294 	struct kvm_memslots *slots;
295 	struct kvm_memory_slot *memslot;
296 	int is_dirty = 0;
297 
298 	mutex_lock(&kvm->slots_lock);
299 
300 	r = -EINVAL;
301 	if (log->slot >= KVM_USER_MEM_SLOTS)
302 		goto out;
303 
304 	slots = kvm_memslots(kvm);
305 	memslot = id_to_memslot(slots, log->slot);
306 	r = -ENOENT;
307 	if (!memslot->dirty_bitmap)
308 		goto out;
309 
310 	kvm_s390_sync_dirty_log(kvm, memslot);
311 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
312 	if (r)
313 		goto out;
314 
315 	/* Clear the dirty log */
316 	if (is_dirty) {
317 		n = kvm_dirty_bitmap_bytes(memslot);
318 		memset(memslot->dirty_bitmap, 0, n);
319 	}
320 	r = 0;
321 out:
322 	mutex_unlock(&kvm->slots_lock);
323 	return r;
324 }
325 
326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
327 {
328 	int r;
329 
330 	if (cap->flags)
331 		return -EINVAL;
332 
333 	switch (cap->cap) {
334 	case KVM_CAP_S390_IRQCHIP:
335 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
336 		kvm->arch.use_irqchip = 1;
337 		r = 0;
338 		break;
339 	case KVM_CAP_S390_USER_SIGP:
340 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
341 		kvm->arch.user_sigp = 1;
342 		r = 0;
343 		break;
344 	case KVM_CAP_S390_VECTOR_REGISTERS:
345 		mutex_lock(&kvm->lock);
346 		if (atomic_read(&kvm->online_vcpus)) {
347 			r = -EBUSY;
348 		} else if (MACHINE_HAS_VX) {
349 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
350 			set_kvm_facility(kvm->arch.model.fac->list, 129);
351 			r = 0;
352 		} else
353 			r = -EINVAL;
354 		mutex_unlock(&kvm->lock);
355 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
356 			 r ? "(not available)" : "(success)");
357 		break;
358 	case KVM_CAP_S390_USER_STSI:
359 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
360 		kvm->arch.user_stsi = 1;
361 		r = 0;
362 		break;
363 	default:
364 		r = -EINVAL;
365 		break;
366 	}
367 	return r;
368 }
369 
370 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
371 {
372 	int ret;
373 
374 	switch (attr->attr) {
375 	case KVM_S390_VM_MEM_LIMIT_SIZE:
376 		ret = 0;
377 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
378 			 kvm->arch.gmap->asce_end);
379 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
380 			ret = -EFAULT;
381 		break;
382 	default:
383 		ret = -ENXIO;
384 		break;
385 	}
386 	return ret;
387 }
388 
389 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
390 {
391 	int ret;
392 	unsigned int idx;
393 	switch (attr->attr) {
394 	case KVM_S390_VM_MEM_ENABLE_CMMA:
395 		/* enable CMMA only for z10 and later (EDAT_1) */
396 		ret = -EINVAL;
397 		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
398 			break;
399 
400 		ret = -EBUSY;
401 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
402 		mutex_lock(&kvm->lock);
403 		if (atomic_read(&kvm->online_vcpus) == 0) {
404 			kvm->arch.use_cmma = 1;
405 			ret = 0;
406 		}
407 		mutex_unlock(&kvm->lock);
408 		break;
409 	case KVM_S390_VM_MEM_CLR_CMMA:
410 		ret = -EINVAL;
411 		if (!kvm->arch.use_cmma)
412 			break;
413 
414 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
415 		mutex_lock(&kvm->lock);
416 		idx = srcu_read_lock(&kvm->srcu);
417 		s390_reset_cmma(kvm->arch.gmap->mm);
418 		srcu_read_unlock(&kvm->srcu, idx);
419 		mutex_unlock(&kvm->lock);
420 		ret = 0;
421 		break;
422 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
423 		unsigned long new_limit;
424 
425 		if (kvm_is_ucontrol(kvm))
426 			return -EINVAL;
427 
428 		if (get_user(new_limit, (u64 __user *)attr->addr))
429 			return -EFAULT;
430 
431 		if (new_limit > kvm->arch.gmap->asce_end)
432 			return -E2BIG;
433 
434 		ret = -EBUSY;
435 		mutex_lock(&kvm->lock);
436 		if (atomic_read(&kvm->online_vcpus) == 0) {
437 			/* gmap_alloc will round the limit up */
438 			struct gmap *new = gmap_alloc(current->mm, new_limit);
439 
440 			if (!new) {
441 				ret = -ENOMEM;
442 			} else {
443 				gmap_free(kvm->arch.gmap);
444 				new->private = kvm;
445 				kvm->arch.gmap = new;
446 				ret = 0;
447 			}
448 		}
449 		mutex_unlock(&kvm->lock);
450 		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
451 		break;
452 	}
453 	default:
454 		ret = -ENXIO;
455 		break;
456 	}
457 	return ret;
458 }
459 
460 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
461 
462 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
463 {
464 	struct kvm_vcpu *vcpu;
465 	int i;
466 
467 	if (!test_kvm_facility(kvm, 76))
468 		return -EINVAL;
469 
470 	mutex_lock(&kvm->lock);
471 	switch (attr->attr) {
472 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
473 		get_random_bytes(
474 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
475 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
476 		kvm->arch.crypto.aes_kw = 1;
477 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
478 		break;
479 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
480 		get_random_bytes(
481 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
482 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
483 		kvm->arch.crypto.dea_kw = 1;
484 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
485 		break;
486 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
487 		kvm->arch.crypto.aes_kw = 0;
488 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
489 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
490 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
491 		break;
492 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
493 		kvm->arch.crypto.dea_kw = 0;
494 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
495 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
496 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
497 		break;
498 	default:
499 		mutex_unlock(&kvm->lock);
500 		return -ENXIO;
501 	}
502 
503 	kvm_for_each_vcpu(i, vcpu, kvm) {
504 		kvm_s390_vcpu_crypto_setup(vcpu);
505 		exit_sie(vcpu);
506 	}
507 	mutex_unlock(&kvm->lock);
508 	return 0;
509 }
510 
511 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
512 {
513 	u8 gtod_high;
514 
515 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
516 					   sizeof(gtod_high)))
517 		return -EFAULT;
518 
519 	if (gtod_high != 0)
520 		return -EINVAL;
521 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
522 
523 	return 0;
524 }
525 
526 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
527 {
528 	u64 gtod;
529 
530 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
531 		return -EFAULT;
532 
533 	kvm_s390_set_tod_clock(kvm, gtod);
534 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
535 	return 0;
536 }
537 
538 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
539 {
540 	int ret;
541 
542 	if (attr->flags)
543 		return -EINVAL;
544 
545 	switch (attr->attr) {
546 	case KVM_S390_VM_TOD_HIGH:
547 		ret = kvm_s390_set_tod_high(kvm, attr);
548 		break;
549 	case KVM_S390_VM_TOD_LOW:
550 		ret = kvm_s390_set_tod_low(kvm, attr);
551 		break;
552 	default:
553 		ret = -ENXIO;
554 		break;
555 	}
556 	return ret;
557 }
558 
559 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
560 {
561 	u8 gtod_high = 0;
562 
563 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
564 					 sizeof(gtod_high)))
565 		return -EFAULT;
566 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
567 
568 	return 0;
569 }
570 
571 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
572 {
573 	u64 gtod;
574 
575 	gtod = kvm_s390_get_tod_clock_fast(kvm);
576 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
577 		return -EFAULT;
578 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
579 
580 	return 0;
581 }
582 
583 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
584 {
585 	int ret;
586 
587 	if (attr->flags)
588 		return -EINVAL;
589 
590 	switch (attr->attr) {
591 	case KVM_S390_VM_TOD_HIGH:
592 		ret = kvm_s390_get_tod_high(kvm, attr);
593 		break;
594 	case KVM_S390_VM_TOD_LOW:
595 		ret = kvm_s390_get_tod_low(kvm, attr);
596 		break;
597 	default:
598 		ret = -ENXIO;
599 		break;
600 	}
601 	return ret;
602 }
603 
604 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
605 {
606 	struct kvm_s390_vm_cpu_processor *proc;
607 	int ret = 0;
608 
609 	mutex_lock(&kvm->lock);
610 	if (atomic_read(&kvm->online_vcpus)) {
611 		ret = -EBUSY;
612 		goto out;
613 	}
614 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
615 	if (!proc) {
616 		ret = -ENOMEM;
617 		goto out;
618 	}
619 	if (!copy_from_user(proc, (void __user *)attr->addr,
620 			    sizeof(*proc))) {
621 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
622 		       sizeof(struct cpuid));
623 		kvm->arch.model.ibc = proc->ibc;
624 		memcpy(kvm->arch.model.fac->list, proc->fac_list,
625 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
626 	} else
627 		ret = -EFAULT;
628 	kfree(proc);
629 out:
630 	mutex_unlock(&kvm->lock);
631 	return ret;
632 }
633 
634 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
635 {
636 	int ret = -ENXIO;
637 
638 	switch (attr->attr) {
639 	case KVM_S390_VM_CPU_PROCESSOR:
640 		ret = kvm_s390_set_processor(kvm, attr);
641 		break;
642 	}
643 	return ret;
644 }
645 
646 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
647 {
648 	struct kvm_s390_vm_cpu_processor *proc;
649 	int ret = 0;
650 
651 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
652 	if (!proc) {
653 		ret = -ENOMEM;
654 		goto out;
655 	}
656 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
657 	proc->ibc = kvm->arch.model.ibc;
658 	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
659 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
660 		ret = -EFAULT;
661 	kfree(proc);
662 out:
663 	return ret;
664 }
665 
666 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
667 {
668 	struct kvm_s390_vm_cpu_machine *mach;
669 	int ret = 0;
670 
671 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
672 	if (!mach) {
673 		ret = -ENOMEM;
674 		goto out;
675 	}
676 	get_cpu_id((struct cpuid *) &mach->cpuid);
677 	mach->ibc = sclp.ibc;
678 	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
679 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
680 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
681 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
682 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
683 		ret = -EFAULT;
684 	kfree(mach);
685 out:
686 	return ret;
687 }
688 
689 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
690 {
691 	int ret = -ENXIO;
692 
693 	switch (attr->attr) {
694 	case KVM_S390_VM_CPU_PROCESSOR:
695 		ret = kvm_s390_get_processor(kvm, attr);
696 		break;
697 	case KVM_S390_VM_CPU_MACHINE:
698 		ret = kvm_s390_get_machine(kvm, attr);
699 		break;
700 	}
701 	return ret;
702 }
703 
704 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
705 {
706 	int ret;
707 
708 	switch (attr->group) {
709 	case KVM_S390_VM_MEM_CTRL:
710 		ret = kvm_s390_set_mem_control(kvm, attr);
711 		break;
712 	case KVM_S390_VM_TOD:
713 		ret = kvm_s390_set_tod(kvm, attr);
714 		break;
715 	case KVM_S390_VM_CPU_MODEL:
716 		ret = kvm_s390_set_cpu_model(kvm, attr);
717 		break;
718 	case KVM_S390_VM_CRYPTO:
719 		ret = kvm_s390_vm_set_crypto(kvm, attr);
720 		break;
721 	default:
722 		ret = -ENXIO;
723 		break;
724 	}
725 
726 	return ret;
727 }
728 
729 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
730 {
731 	int ret;
732 
733 	switch (attr->group) {
734 	case KVM_S390_VM_MEM_CTRL:
735 		ret = kvm_s390_get_mem_control(kvm, attr);
736 		break;
737 	case KVM_S390_VM_TOD:
738 		ret = kvm_s390_get_tod(kvm, attr);
739 		break;
740 	case KVM_S390_VM_CPU_MODEL:
741 		ret = kvm_s390_get_cpu_model(kvm, attr);
742 		break;
743 	default:
744 		ret = -ENXIO;
745 		break;
746 	}
747 
748 	return ret;
749 }
750 
751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
752 {
753 	int ret;
754 
755 	switch (attr->group) {
756 	case KVM_S390_VM_MEM_CTRL:
757 		switch (attr->attr) {
758 		case KVM_S390_VM_MEM_ENABLE_CMMA:
759 		case KVM_S390_VM_MEM_CLR_CMMA:
760 		case KVM_S390_VM_MEM_LIMIT_SIZE:
761 			ret = 0;
762 			break;
763 		default:
764 			ret = -ENXIO;
765 			break;
766 		}
767 		break;
768 	case KVM_S390_VM_TOD:
769 		switch (attr->attr) {
770 		case KVM_S390_VM_TOD_LOW:
771 		case KVM_S390_VM_TOD_HIGH:
772 			ret = 0;
773 			break;
774 		default:
775 			ret = -ENXIO;
776 			break;
777 		}
778 		break;
779 	case KVM_S390_VM_CPU_MODEL:
780 		switch (attr->attr) {
781 		case KVM_S390_VM_CPU_PROCESSOR:
782 		case KVM_S390_VM_CPU_MACHINE:
783 			ret = 0;
784 			break;
785 		default:
786 			ret = -ENXIO;
787 			break;
788 		}
789 		break;
790 	case KVM_S390_VM_CRYPTO:
791 		switch (attr->attr) {
792 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
793 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
794 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
795 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
796 			ret = 0;
797 			break;
798 		default:
799 			ret = -ENXIO;
800 			break;
801 		}
802 		break;
803 	default:
804 		ret = -ENXIO;
805 		break;
806 	}
807 
808 	return ret;
809 }
810 
811 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
812 {
813 	uint8_t *keys;
814 	uint64_t hva;
815 	unsigned long curkey;
816 	int i, r = 0;
817 
818 	if (args->flags != 0)
819 		return -EINVAL;
820 
821 	/* Is this guest using storage keys? */
822 	if (!mm_use_skey(current->mm))
823 		return KVM_S390_GET_SKEYS_NONE;
824 
825 	/* Enforce sane limit on memory allocation */
826 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
827 		return -EINVAL;
828 
829 	keys = kmalloc_array(args->count, sizeof(uint8_t),
830 			     GFP_KERNEL | __GFP_NOWARN);
831 	if (!keys)
832 		keys = vmalloc(sizeof(uint8_t) * args->count);
833 	if (!keys)
834 		return -ENOMEM;
835 
836 	for (i = 0; i < args->count; i++) {
837 		hva = gfn_to_hva(kvm, args->start_gfn + i);
838 		if (kvm_is_error_hva(hva)) {
839 			r = -EFAULT;
840 			goto out;
841 		}
842 
843 		curkey = get_guest_storage_key(current->mm, hva);
844 		if (IS_ERR_VALUE(curkey)) {
845 			r = curkey;
846 			goto out;
847 		}
848 		keys[i] = curkey;
849 	}
850 
851 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
852 			 sizeof(uint8_t) * args->count);
853 	if (r)
854 		r = -EFAULT;
855 out:
856 	kvfree(keys);
857 	return r;
858 }
859 
860 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
861 {
862 	uint8_t *keys;
863 	uint64_t hva;
864 	int i, r = 0;
865 
866 	if (args->flags != 0)
867 		return -EINVAL;
868 
869 	/* Enforce sane limit on memory allocation */
870 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
871 		return -EINVAL;
872 
873 	keys = kmalloc_array(args->count, sizeof(uint8_t),
874 			     GFP_KERNEL | __GFP_NOWARN);
875 	if (!keys)
876 		keys = vmalloc(sizeof(uint8_t) * args->count);
877 	if (!keys)
878 		return -ENOMEM;
879 
880 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
881 			   sizeof(uint8_t) * args->count);
882 	if (r) {
883 		r = -EFAULT;
884 		goto out;
885 	}
886 
887 	/* Enable storage key handling for the guest */
888 	r = s390_enable_skey();
889 	if (r)
890 		goto out;
891 
892 	for (i = 0; i < args->count; i++) {
893 		hva = gfn_to_hva(kvm, args->start_gfn + i);
894 		if (kvm_is_error_hva(hva)) {
895 			r = -EFAULT;
896 			goto out;
897 		}
898 
899 		/* Lowest order bit is reserved */
900 		if (keys[i] & 0x01) {
901 			r = -EINVAL;
902 			goto out;
903 		}
904 
905 		r = set_guest_storage_key(current->mm, hva,
906 					  (unsigned long)keys[i], 0);
907 		if (r)
908 			goto out;
909 	}
910 out:
911 	kvfree(keys);
912 	return r;
913 }
914 
915 long kvm_arch_vm_ioctl(struct file *filp,
916 		       unsigned int ioctl, unsigned long arg)
917 {
918 	struct kvm *kvm = filp->private_data;
919 	void __user *argp = (void __user *)arg;
920 	struct kvm_device_attr attr;
921 	int r;
922 
923 	switch (ioctl) {
924 	case KVM_S390_INTERRUPT: {
925 		struct kvm_s390_interrupt s390int;
926 
927 		r = -EFAULT;
928 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
929 			break;
930 		r = kvm_s390_inject_vm(kvm, &s390int);
931 		break;
932 	}
933 	case KVM_ENABLE_CAP: {
934 		struct kvm_enable_cap cap;
935 		r = -EFAULT;
936 		if (copy_from_user(&cap, argp, sizeof(cap)))
937 			break;
938 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
939 		break;
940 	}
941 	case KVM_CREATE_IRQCHIP: {
942 		struct kvm_irq_routing_entry routing;
943 
944 		r = -EINVAL;
945 		if (kvm->arch.use_irqchip) {
946 			/* Set up dummy routing. */
947 			memset(&routing, 0, sizeof(routing));
948 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
949 		}
950 		break;
951 	}
952 	case KVM_SET_DEVICE_ATTR: {
953 		r = -EFAULT;
954 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
955 			break;
956 		r = kvm_s390_vm_set_attr(kvm, &attr);
957 		break;
958 	}
959 	case KVM_GET_DEVICE_ATTR: {
960 		r = -EFAULT;
961 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
962 			break;
963 		r = kvm_s390_vm_get_attr(kvm, &attr);
964 		break;
965 	}
966 	case KVM_HAS_DEVICE_ATTR: {
967 		r = -EFAULT;
968 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
969 			break;
970 		r = kvm_s390_vm_has_attr(kvm, &attr);
971 		break;
972 	}
973 	case KVM_S390_GET_SKEYS: {
974 		struct kvm_s390_skeys args;
975 
976 		r = -EFAULT;
977 		if (copy_from_user(&args, argp,
978 				   sizeof(struct kvm_s390_skeys)))
979 			break;
980 		r = kvm_s390_get_skeys(kvm, &args);
981 		break;
982 	}
983 	case KVM_S390_SET_SKEYS: {
984 		struct kvm_s390_skeys args;
985 
986 		r = -EFAULT;
987 		if (copy_from_user(&args, argp,
988 				   sizeof(struct kvm_s390_skeys)))
989 			break;
990 		r = kvm_s390_set_skeys(kvm, &args);
991 		break;
992 	}
993 	default:
994 		r = -ENOTTY;
995 	}
996 
997 	return r;
998 }
999 
1000 static int kvm_s390_query_ap_config(u8 *config)
1001 {
1002 	u32 fcn_code = 0x04000000UL;
1003 	u32 cc = 0;
1004 
1005 	memset(config, 0, 128);
1006 	asm volatile(
1007 		"lgr 0,%1\n"
1008 		"lgr 2,%2\n"
1009 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1010 		"0: ipm %0\n"
1011 		"srl %0,28\n"
1012 		"1:\n"
1013 		EX_TABLE(0b, 1b)
1014 		: "+r" (cc)
1015 		: "r" (fcn_code), "r" (config)
1016 		: "cc", "0", "2", "memory"
1017 	);
1018 
1019 	return cc;
1020 }
1021 
1022 static int kvm_s390_apxa_installed(void)
1023 {
1024 	u8 config[128];
1025 	int cc;
1026 
1027 	if (test_facility(2) && test_facility(12)) {
1028 		cc = kvm_s390_query_ap_config(config);
1029 
1030 		if (cc)
1031 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1032 		else
1033 			return config[0] & 0x40;
1034 	}
1035 
1036 	return 0;
1037 }
1038 
1039 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1040 {
1041 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1042 
1043 	if (kvm_s390_apxa_installed())
1044 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1045 	else
1046 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1047 }
1048 
1049 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1050 {
1051 	get_cpu_id(cpu_id);
1052 	cpu_id->version = 0xff;
1053 }
1054 
1055 static int kvm_s390_crypto_init(struct kvm *kvm)
1056 {
1057 	if (!test_kvm_facility(kvm, 76))
1058 		return 0;
1059 
1060 	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1061 					 GFP_KERNEL | GFP_DMA);
1062 	if (!kvm->arch.crypto.crycb)
1063 		return -ENOMEM;
1064 
1065 	kvm_s390_set_crycb_format(kvm);
1066 
1067 	/* Enable AES/DEA protected key functions by default */
1068 	kvm->arch.crypto.aes_kw = 1;
1069 	kvm->arch.crypto.dea_kw = 1;
1070 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1071 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1072 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1073 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1074 
1075 	return 0;
1076 }
1077 
1078 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1079 {
1080 	int i, rc;
1081 	char debug_name[16];
1082 	static unsigned long sca_offset;
1083 
1084 	rc = -EINVAL;
1085 #ifdef CONFIG_KVM_S390_UCONTROL
1086 	if (type & ~KVM_VM_S390_UCONTROL)
1087 		goto out_err;
1088 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1089 		goto out_err;
1090 #else
1091 	if (type)
1092 		goto out_err;
1093 #endif
1094 
1095 	rc = s390_enable_sie();
1096 	if (rc)
1097 		goto out_err;
1098 
1099 	rc = -ENOMEM;
1100 
1101 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1102 	if (!kvm->arch.sca)
1103 		goto out_err;
1104 	spin_lock(&kvm_lock);
1105 	sca_offset += 16;
1106 	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1107 		sca_offset = 0;
1108 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1109 	spin_unlock(&kvm_lock);
1110 
1111 	sprintf(debug_name, "kvm-%u", current->pid);
1112 
1113 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1114 	if (!kvm->arch.dbf)
1115 		goto out_err;
1116 
1117 	/*
1118 	 * The architectural maximum amount of facilities is 16 kbit. To store
1119 	 * this amount, 2 kbyte of memory is required. Thus we need a full
1120 	 * page to hold the guest facility list (arch.model.fac->list) and the
1121 	 * facility mask (arch.model.fac->mask). Its address size has to be
1122 	 * 31 bits and word aligned.
1123 	 */
1124 	kvm->arch.model.fac =
1125 		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1126 	if (!kvm->arch.model.fac)
1127 		goto out_err;
1128 
1129 	/* Populate the facility mask initially. */
1130 	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1131 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1132 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1133 		if (i < kvm_s390_fac_list_mask_size())
1134 			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1135 		else
1136 			kvm->arch.model.fac->mask[i] = 0UL;
1137 	}
1138 
1139 	/* Populate the facility list initially. */
1140 	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1141 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1142 
1143 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1144 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1145 
1146 	if (kvm_s390_crypto_init(kvm) < 0)
1147 		goto out_err;
1148 
1149 	spin_lock_init(&kvm->arch.float_int.lock);
1150 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1151 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1152 	init_waitqueue_head(&kvm->arch.ipte_wq);
1153 	mutex_init(&kvm->arch.ipte_mutex);
1154 
1155 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1156 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1157 
1158 	if (type & KVM_VM_S390_UCONTROL) {
1159 		kvm->arch.gmap = NULL;
1160 	} else {
1161 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1162 		if (!kvm->arch.gmap)
1163 			goto out_err;
1164 		kvm->arch.gmap->private = kvm;
1165 		kvm->arch.gmap->pfault_enabled = 0;
1166 	}
1167 
1168 	kvm->arch.css_support = 0;
1169 	kvm->arch.use_irqchip = 0;
1170 	kvm->arch.epoch = 0;
1171 
1172 	spin_lock_init(&kvm->arch.start_stop_lock);
1173 	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1174 
1175 	return 0;
1176 out_err:
1177 	kfree(kvm->arch.crypto.crycb);
1178 	free_page((unsigned long)kvm->arch.model.fac);
1179 	debug_unregister(kvm->arch.dbf);
1180 	free_page((unsigned long)(kvm->arch.sca));
1181 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1182 	return rc;
1183 }
1184 
1185 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1186 {
1187 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1188 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1189 	kvm_s390_clear_local_irqs(vcpu);
1190 	kvm_clear_async_pf_completion_queue(vcpu);
1191 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1192 		clear_bit(63 - vcpu->vcpu_id,
1193 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1194 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1195 		    (__u64) vcpu->arch.sie_block)
1196 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1197 	}
1198 	smp_mb();
1199 
1200 	if (kvm_is_ucontrol(vcpu->kvm))
1201 		gmap_free(vcpu->arch.gmap);
1202 
1203 	if (vcpu->kvm->arch.use_cmma)
1204 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1205 	free_page((unsigned long)(vcpu->arch.sie_block));
1206 
1207 	kvm_vcpu_uninit(vcpu);
1208 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1209 }
1210 
1211 static void kvm_free_vcpus(struct kvm *kvm)
1212 {
1213 	unsigned int i;
1214 	struct kvm_vcpu *vcpu;
1215 
1216 	kvm_for_each_vcpu(i, vcpu, kvm)
1217 		kvm_arch_vcpu_destroy(vcpu);
1218 
1219 	mutex_lock(&kvm->lock);
1220 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1221 		kvm->vcpus[i] = NULL;
1222 
1223 	atomic_set(&kvm->online_vcpus, 0);
1224 	mutex_unlock(&kvm->lock);
1225 }
1226 
1227 void kvm_arch_destroy_vm(struct kvm *kvm)
1228 {
1229 	kvm_free_vcpus(kvm);
1230 	free_page((unsigned long)kvm->arch.model.fac);
1231 	free_page((unsigned long)(kvm->arch.sca));
1232 	debug_unregister(kvm->arch.dbf);
1233 	kfree(kvm->arch.crypto.crycb);
1234 	if (!kvm_is_ucontrol(kvm))
1235 		gmap_free(kvm->arch.gmap);
1236 	kvm_s390_destroy_adapters(kvm);
1237 	kvm_s390_clear_float_irqs(kvm);
1238 	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1239 }
1240 
1241 /* Section: vcpu related */
1242 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1243 {
1244 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1245 	if (!vcpu->arch.gmap)
1246 		return -ENOMEM;
1247 	vcpu->arch.gmap->private = vcpu->kvm;
1248 
1249 	return 0;
1250 }
1251 
1252 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1253 {
1254 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1255 	kvm_clear_async_pf_completion_queue(vcpu);
1256 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1257 				    KVM_SYNC_GPRS |
1258 				    KVM_SYNC_ACRS |
1259 				    KVM_SYNC_CRS |
1260 				    KVM_SYNC_ARCH0 |
1261 				    KVM_SYNC_PFAULT;
1262 	if (test_kvm_facility(vcpu->kvm, 129))
1263 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1264 
1265 	if (kvm_is_ucontrol(vcpu->kvm))
1266 		return __kvm_ucontrol_vcpu_init(vcpu);
1267 
1268 	return 0;
1269 }
1270 
1271 /*
1272  * Backs up the current FP/VX register save area on a particular
1273  * destination.  Used to switch between different register save
1274  * areas.
1275  */
1276 static inline void save_fpu_to(struct fpu *dst)
1277 {
1278 	dst->fpc = current->thread.fpu.fpc;
1279 	dst->regs = current->thread.fpu.regs;
1280 }
1281 
1282 /*
1283  * Switches the FP/VX register save area from which to lazy
1284  * restore register contents.
1285  */
1286 static inline void load_fpu_from(struct fpu *from)
1287 {
1288 	current->thread.fpu.fpc = from->fpc;
1289 	current->thread.fpu.regs = from->regs;
1290 }
1291 
1292 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1293 {
1294 	/* Save host register state */
1295 	save_fpu_regs();
1296 	save_fpu_to(&vcpu->arch.host_fpregs);
1297 
1298 	if (test_kvm_facility(vcpu->kvm, 129)) {
1299 		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1300 		/*
1301 		 * Use the register save area in the SIE-control block
1302 		 * for register restore and save in kvm_arch_vcpu_put()
1303 		 */
1304 		current->thread.fpu.vxrs =
1305 			(__vector128 *)&vcpu->run->s.regs.vrs;
1306 	} else
1307 		load_fpu_from(&vcpu->arch.guest_fpregs);
1308 
1309 	if (test_fp_ctl(current->thread.fpu.fpc))
1310 		/* User space provided an invalid FPC, let's clear it */
1311 		current->thread.fpu.fpc = 0;
1312 
1313 	save_access_regs(vcpu->arch.host_acrs);
1314 	restore_access_regs(vcpu->run->s.regs.acrs);
1315 	gmap_enable(vcpu->arch.gmap);
1316 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1317 }
1318 
1319 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1320 {
1321 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1322 	gmap_disable(vcpu->arch.gmap);
1323 
1324 	save_fpu_regs();
1325 
1326 	if (test_kvm_facility(vcpu->kvm, 129))
1327 		/*
1328 		 * kvm_arch_vcpu_load() set up the register save area to
1329 		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1330 		 * are already saved.  Only the floating-point control must be
1331 		 * copied.
1332 		 */
1333 		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1334 	else
1335 		save_fpu_to(&vcpu->arch.guest_fpregs);
1336 	load_fpu_from(&vcpu->arch.host_fpregs);
1337 
1338 	save_access_regs(vcpu->run->s.regs.acrs);
1339 	restore_access_regs(vcpu->arch.host_acrs);
1340 }
1341 
1342 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1343 {
1344 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1345 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1346 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1347 	kvm_s390_set_prefix(vcpu, 0);
1348 	vcpu->arch.sie_block->cputm     = 0UL;
1349 	vcpu->arch.sie_block->ckc       = 0UL;
1350 	vcpu->arch.sie_block->todpr     = 0;
1351 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1352 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1353 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1354 	vcpu->arch.guest_fpregs.fpc = 0;
1355 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1356 	vcpu->arch.sie_block->gbea = 1;
1357 	vcpu->arch.sie_block->pp = 0;
1358 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1359 	kvm_clear_async_pf_completion_queue(vcpu);
1360 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1361 		kvm_s390_vcpu_stop(vcpu);
1362 	kvm_s390_clear_local_irqs(vcpu);
1363 }
1364 
1365 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1366 {
1367 	mutex_lock(&vcpu->kvm->lock);
1368 	preempt_disable();
1369 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1370 	preempt_enable();
1371 	mutex_unlock(&vcpu->kvm->lock);
1372 	if (!kvm_is_ucontrol(vcpu->kvm))
1373 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1374 }
1375 
1376 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1377 {
1378 	if (!test_kvm_facility(vcpu->kvm, 76))
1379 		return;
1380 
1381 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1382 
1383 	if (vcpu->kvm->arch.crypto.aes_kw)
1384 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1385 	if (vcpu->kvm->arch.crypto.dea_kw)
1386 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1387 
1388 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1389 }
1390 
1391 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1392 {
1393 	free_page(vcpu->arch.sie_block->cbrlo);
1394 	vcpu->arch.sie_block->cbrlo = 0;
1395 }
1396 
1397 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1398 {
1399 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1400 	if (!vcpu->arch.sie_block->cbrlo)
1401 		return -ENOMEM;
1402 
1403 	vcpu->arch.sie_block->ecb2 |= 0x80;
1404 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1405 	return 0;
1406 }
1407 
1408 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1409 {
1410 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1411 
1412 	vcpu->arch.cpu_id = model->cpu_id;
1413 	vcpu->arch.sie_block->ibc = model->ibc;
1414 	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1415 }
1416 
1417 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1418 {
1419 	int rc = 0;
1420 
1421 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1422 						    CPUSTAT_SM |
1423 						    CPUSTAT_STOPPED);
1424 
1425 	if (test_kvm_facility(vcpu->kvm, 78))
1426 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1427 	else if (test_kvm_facility(vcpu->kvm, 8))
1428 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1429 
1430 	kvm_s390_vcpu_setup_model(vcpu);
1431 
1432 	vcpu->arch.sie_block->ecb   = 6;
1433 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1434 		vcpu->arch.sie_block->ecb |= 0x10;
1435 
1436 	vcpu->arch.sie_block->ecb2  = 8;
1437 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1438 	if (sclp.has_siif)
1439 		vcpu->arch.sie_block->eca |= 1;
1440 	if (sclp.has_sigpif)
1441 		vcpu->arch.sie_block->eca |= 0x10000000U;
1442 	if (test_kvm_facility(vcpu->kvm, 129)) {
1443 		vcpu->arch.sie_block->eca |= 0x00020000;
1444 		vcpu->arch.sie_block->ecd |= 0x20000000;
1445 	}
1446 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1447 
1448 	if (vcpu->kvm->arch.use_cmma) {
1449 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1450 		if (rc)
1451 			return rc;
1452 	}
1453 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1454 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1455 
1456 	kvm_s390_vcpu_crypto_setup(vcpu);
1457 
1458 	return rc;
1459 }
1460 
1461 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1462 				      unsigned int id)
1463 {
1464 	struct kvm_vcpu *vcpu;
1465 	struct sie_page *sie_page;
1466 	int rc = -EINVAL;
1467 
1468 	if (id >= KVM_MAX_VCPUS)
1469 		goto out;
1470 
1471 	rc = -ENOMEM;
1472 
1473 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1474 	if (!vcpu)
1475 		goto out;
1476 
1477 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1478 	if (!sie_page)
1479 		goto out_free_cpu;
1480 
1481 	vcpu->arch.sie_block = &sie_page->sie_block;
1482 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1483 
1484 	vcpu->arch.sie_block->icpua = id;
1485 	if (!kvm_is_ucontrol(kvm)) {
1486 		if (!kvm->arch.sca) {
1487 			WARN_ON_ONCE(1);
1488 			goto out_free_cpu;
1489 		}
1490 		if (!kvm->arch.sca->cpu[id].sda)
1491 			kvm->arch.sca->cpu[id].sda =
1492 				(__u64) vcpu->arch.sie_block;
1493 		vcpu->arch.sie_block->scaoh =
1494 			(__u32)(((__u64)kvm->arch.sca) >> 32);
1495 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1496 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1497 	}
1498 
1499 	spin_lock_init(&vcpu->arch.local_int.lock);
1500 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1501 	vcpu->arch.local_int.wq = &vcpu->wq;
1502 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1503 
1504 	/*
1505 	 * Allocate a save area for floating-point registers.  If the vector
1506 	 * extension is available, register contents are saved in the SIE
1507 	 * control block.  The allocated save area is still required in
1508 	 * particular places, for example, in kvm_s390_vcpu_store_status().
1509 	 */
1510 	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1511 					       GFP_KERNEL);
1512 	if (!vcpu->arch.guest_fpregs.fprs) {
1513 		rc = -ENOMEM;
1514 		goto out_free_sie_block;
1515 	}
1516 
1517 	rc = kvm_vcpu_init(vcpu, kvm, id);
1518 	if (rc)
1519 		goto out_free_sie_block;
1520 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1521 		 vcpu->arch.sie_block);
1522 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1523 
1524 	return vcpu;
1525 out_free_sie_block:
1526 	free_page((unsigned long)(vcpu->arch.sie_block));
1527 out_free_cpu:
1528 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1529 out:
1530 	return ERR_PTR(rc);
1531 }
1532 
1533 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1534 {
1535 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1536 }
1537 
1538 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1539 {
1540 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1541 	exit_sie(vcpu);
1542 }
1543 
1544 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1545 {
1546 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1547 }
1548 
1549 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1550 {
1551 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1552 	exit_sie(vcpu);
1553 }
1554 
1555 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1556 {
1557 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1558 }
1559 
1560 /*
1561  * Kick a guest cpu out of SIE and wait until SIE is not running.
1562  * If the CPU is not running (e.g. waiting as idle) the function will
1563  * return immediately. */
1564 void exit_sie(struct kvm_vcpu *vcpu)
1565 {
1566 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1567 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1568 		cpu_relax();
1569 }
1570 
1571 /* Kick a guest cpu out of SIE to process a request synchronously */
1572 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1573 {
1574 	kvm_make_request(req, vcpu);
1575 	kvm_s390_vcpu_request(vcpu);
1576 }
1577 
1578 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1579 {
1580 	int i;
1581 	struct kvm *kvm = gmap->private;
1582 	struct kvm_vcpu *vcpu;
1583 
1584 	kvm_for_each_vcpu(i, vcpu, kvm) {
1585 		/* match against both prefix pages */
1586 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1587 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1588 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1589 		}
1590 	}
1591 }
1592 
1593 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1594 {
1595 	/* kvm common code refers to this, but never calls it */
1596 	BUG();
1597 	return 0;
1598 }
1599 
1600 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1601 					   struct kvm_one_reg *reg)
1602 {
1603 	int r = -EINVAL;
1604 
1605 	switch (reg->id) {
1606 	case KVM_REG_S390_TODPR:
1607 		r = put_user(vcpu->arch.sie_block->todpr,
1608 			     (u32 __user *)reg->addr);
1609 		break;
1610 	case KVM_REG_S390_EPOCHDIFF:
1611 		r = put_user(vcpu->arch.sie_block->epoch,
1612 			     (u64 __user *)reg->addr);
1613 		break;
1614 	case KVM_REG_S390_CPU_TIMER:
1615 		r = put_user(vcpu->arch.sie_block->cputm,
1616 			     (u64 __user *)reg->addr);
1617 		break;
1618 	case KVM_REG_S390_CLOCK_COMP:
1619 		r = put_user(vcpu->arch.sie_block->ckc,
1620 			     (u64 __user *)reg->addr);
1621 		break;
1622 	case KVM_REG_S390_PFTOKEN:
1623 		r = put_user(vcpu->arch.pfault_token,
1624 			     (u64 __user *)reg->addr);
1625 		break;
1626 	case KVM_REG_S390_PFCOMPARE:
1627 		r = put_user(vcpu->arch.pfault_compare,
1628 			     (u64 __user *)reg->addr);
1629 		break;
1630 	case KVM_REG_S390_PFSELECT:
1631 		r = put_user(vcpu->arch.pfault_select,
1632 			     (u64 __user *)reg->addr);
1633 		break;
1634 	case KVM_REG_S390_PP:
1635 		r = put_user(vcpu->arch.sie_block->pp,
1636 			     (u64 __user *)reg->addr);
1637 		break;
1638 	case KVM_REG_S390_GBEA:
1639 		r = put_user(vcpu->arch.sie_block->gbea,
1640 			     (u64 __user *)reg->addr);
1641 		break;
1642 	default:
1643 		break;
1644 	}
1645 
1646 	return r;
1647 }
1648 
1649 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1650 					   struct kvm_one_reg *reg)
1651 {
1652 	int r = -EINVAL;
1653 
1654 	switch (reg->id) {
1655 	case KVM_REG_S390_TODPR:
1656 		r = get_user(vcpu->arch.sie_block->todpr,
1657 			     (u32 __user *)reg->addr);
1658 		break;
1659 	case KVM_REG_S390_EPOCHDIFF:
1660 		r = get_user(vcpu->arch.sie_block->epoch,
1661 			     (u64 __user *)reg->addr);
1662 		break;
1663 	case KVM_REG_S390_CPU_TIMER:
1664 		r = get_user(vcpu->arch.sie_block->cputm,
1665 			     (u64 __user *)reg->addr);
1666 		break;
1667 	case KVM_REG_S390_CLOCK_COMP:
1668 		r = get_user(vcpu->arch.sie_block->ckc,
1669 			     (u64 __user *)reg->addr);
1670 		break;
1671 	case KVM_REG_S390_PFTOKEN:
1672 		r = get_user(vcpu->arch.pfault_token,
1673 			     (u64 __user *)reg->addr);
1674 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1675 			kvm_clear_async_pf_completion_queue(vcpu);
1676 		break;
1677 	case KVM_REG_S390_PFCOMPARE:
1678 		r = get_user(vcpu->arch.pfault_compare,
1679 			     (u64 __user *)reg->addr);
1680 		break;
1681 	case KVM_REG_S390_PFSELECT:
1682 		r = get_user(vcpu->arch.pfault_select,
1683 			     (u64 __user *)reg->addr);
1684 		break;
1685 	case KVM_REG_S390_PP:
1686 		r = get_user(vcpu->arch.sie_block->pp,
1687 			     (u64 __user *)reg->addr);
1688 		break;
1689 	case KVM_REG_S390_GBEA:
1690 		r = get_user(vcpu->arch.sie_block->gbea,
1691 			     (u64 __user *)reg->addr);
1692 		break;
1693 	default:
1694 		break;
1695 	}
1696 
1697 	return r;
1698 }
1699 
1700 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1701 {
1702 	kvm_s390_vcpu_initial_reset(vcpu);
1703 	return 0;
1704 }
1705 
1706 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1707 {
1708 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1709 	return 0;
1710 }
1711 
1712 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1713 {
1714 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1715 	return 0;
1716 }
1717 
1718 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1719 				  struct kvm_sregs *sregs)
1720 {
1721 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1722 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1723 	restore_access_regs(vcpu->run->s.regs.acrs);
1724 	return 0;
1725 }
1726 
1727 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1728 				  struct kvm_sregs *sregs)
1729 {
1730 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1731 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1732 	return 0;
1733 }
1734 
1735 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1736 {
1737 	if (test_fp_ctl(fpu->fpc))
1738 		return -EINVAL;
1739 	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1740 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1741 	save_fpu_regs();
1742 	load_fpu_from(&vcpu->arch.guest_fpregs);
1743 	return 0;
1744 }
1745 
1746 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1747 {
1748 	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1749 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1750 	return 0;
1751 }
1752 
1753 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1754 {
1755 	int rc = 0;
1756 
1757 	if (!is_vcpu_stopped(vcpu))
1758 		rc = -EBUSY;
1759 	else {
1760 		vcpu->run->psw_mask = psw.mask;
1761 		vcpu->run->psw_addr = psw.addr;
1762 	}
1763 	return rc;
1764 }
1765 
1766 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1767 				  struct kvm_translation *tr)
1768 {
1769 	return -EINVAL; /* not implemented yet */
1770 }
1771 
1772 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1773 			      KVM_GUESTDBG_USE_HW_BP | \
1774 			      KVM_GUESTDBG_ENABLE)
1775 
1776 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1777 					struct kvm_guest_debug *dbg)
1778 {
1779 	int rc = 0;
1780 
1781 	vcpu->guest_debug = 0;
1782 	kvm_s390_clear_bp_data(vcpu);
1783 
1784 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1785 		return -EINVAL;
1786 
1787 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1788 		vcpu->guest_debug = dbg->control;
1789 		/* enforce guest PER */
1790 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1791 
1792 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1793 			rc = kvm_s390_import_bp_data(vcpu, dbg);
1794 	} else {
1795 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1796 		vcpu->arch.guestdbg.last_bp = 0;
1797 	}
1798 
1799 	if (rc) {
1800 		vcpu->guest_debug = 0;
1801 		kvm_s390_clear_bp_data(vcpu);
1802 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1803 	}
1804 
1805 	return rc;
1806 }
1807 
1808 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1809 				    struct kvm_mp_state *mp_state)
1810 {
1811 	/* CHECK_STOP and LOAD are not supported yet */
1812 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1813 				       KVM_MP_STATE_OPERATING;
1814 }
1815 
1816 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1817 				    struct kvm_mp_state *mp_state)
1818 {
1819 	int rc = 0;
1820 
1821 	/* user space knows about this interface - let it control the state */
1822 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1823 
1824 	switch (mp_state->mp_state) {
1825 	case KVM_MP_STATE_STOPPED:
1826 		kvm_s390_vcpu_stop(vcpu);
1827 		break;
1828 	case KVM_MP_STATE_OPERATING:
1829 		kvm_s390_vcpu_start(vcpu);
1830 		break;
1831 	case KVM_MP_STATE_LOAD:
1832 	case KVM_MP_STATE_CHECK_STOP:
1833 		/* fall through - CHECK_STOP and LOAD are not supported yet */
1834 	default:
1835 		rc = -ENXIO;
1836 	}
1837 
1838 	return rc;
1839 }
1840 
1841 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1842 {
1843 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1844 }
1845 
1846 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1847 {
1848 retry:
1849 	kvm_s390_vcpu_request_handled(vcpu);
1850 	if (!vcpu->requests)
1851 		return 0;
1852 	/*
1853 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1854 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1855 	 * This ensures that the ipte instruction for this request has
1856 	 * already finished. We might race against a second unmapper that
1857 	 * wants to set the blocking bit. Lets just retry the request loop.
1858 	 */
1859 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1860 		int rc;
1861 		rc = gmap_ipte_notify(vcpu->arch.gmap,
1862 				      kvm_s390_get_prefix(vcpu),
1863 				      PAGE_SIZE * 2);
1864 		if (rc)
1865 			return rc;
1866 		goto retry;
1867 	}
1868 
1869 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1870 		vcpu->arch.sie_block->ihcpu = 0xffff;
1871 		goto retry;
1872 	}
1873 
1874 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1875 		if (!ibs_enabled(vcpu)) {
1876 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1877 			atomic_or(CPUSTAT_IBS,
1878 					&vcpu->arch.sie_block->cpuflags);
1879 		}
1880 		goto retry;
1881 	}
1882 
1883 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1884 		if (ibs_enabled(vcpu)) {
1885 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1886 			atomic_andnot(CPUSTAT_IBS,
1887 					  &vcpu->arch.sie_block->cpuflags);
1888 		}
1889 		goto retry;
1890 	}
1891 
1892 	/* nothing to do, just clear the request */
1893 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1894 
1895 	return 0;
1896 }
1897 
1898 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1899 {
1900 	struct kvm_vcpu *vcpu;
1901 	int i;
1902 
1903 	mutex_lock(&kvm->lock);
1904 	preempt_disable();
1905 	kvm->arch.epoch = tod - get_tod_clock();
1906 	kvm_s390_vcpu_block_all(kvm);
1907 	kvm_for_each_vcpu(i, vcpu, kvm)
1908 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1909 	kvm_s390_vcpu_unblock_all(kvm);
1910 	preempt_enable();
1911 	mutex_unlock(&kvm->lock);
1912 }
1913 
1914 /**
1915  * kvm_arch_fault_in_page - fault-in guest page if necessary
1916  * @vcpu: The corresponding virtual cpu
1917  * @gpa: Guest physical address
1918  * @writable: Whether the page should be writable or not
1919  *
1920  * Make sure that a guest page has been faulted-in on the host.
1921  *
1922  * Return: Zero on success, negative error code otherwise.
1923  */
1924 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1925 {
1926 	return gmap_fault(vcpu->arch.gmap, gpa,
1927 			  writable ? FAULT_FLAG_WRITE : 0);
1928 }
1929 
1930 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1931 				      unsigned long token)
1932 {
1933 	struct kvm_s390_interrupt inti;
1934 	struct kvm_s390_irq irq;
1935 
1936 	if (start_token) {
1937 		irq.u.ext.ext_params2 = token;
1938 		irq.type = KVM_S390_INT_PFAULT_INIT;
1939 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1940 	} else {
1941 		inti.type = KVM_S390_INT_PFAULT_DONE;
1942 		inti.parm64 = token;
1943 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1944 	}
1945 }
1946 
1947 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1948 				     struct kvm_async_pf *work)
1949 {
1950 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1951 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1952 }
1953 
1954 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1955 				 struct kvm_async_pf *work)
1956 {
1957 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1958 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1959 }
1960 
1961 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1962 			       struct kvm_async_pf *work)
1963 {
1964 	/* s390 will always inject the page directly */
1965 }
1966 
1967 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1968 {
1969 	/*
1970 	 * s390 will always inject the page directly,
1971 	 * but we still want check_async_completion to cleanup
1972 	 */
1973 	return true;
1974 }
1975 
1976 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1977 {
1978 	hva_t hva;
1979 	struct kvm_arch_async_pf arch;
1980 	int rc;
1981 
1982 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1983 		return 0;
1984 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1985 	    vcpu->arch.pfault_compare)
1986 		return 0;
1987 	if (psw_extint_disabled(vcpu))
1988 		return 0;
1989 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1990 		return 0;
1991 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1992 		return 0;
1993 	if (!vcpu->arch.gmap->pfault_enabled)
1994 		return 0;
1995 
1996 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1997 	hva += current->thread.gmap_addr & ~PAGE_MASK;
1998 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1999 		return 0;
2000 
2001 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2002 	return rc;
2003 }
2004 
2005 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2006 {
2007 	int rc, cpuflags;
2008 
2009 	/*
2010 	 * On s390 notifications for arriving pages will be delivered directly
2011 	 * to the guest but the house keeping for completed pfaults is
2012 	 * handled outside the worker.
2013 	 */
2014 	kvm_check_async_pf_completion(vcpu);
2015 
2016 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2017 
2018 	if (need_resched())
2019 		schedule();
2020 
2021 	if (test_cpu_flag(CIF_MCCK_PENDING))
2022 		s390_handle_mcck();
2023 
2024 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2025 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2026 		if (rc)
2027 			return rc;
2028 	}
2029 
2030 	rc = kvm_s390_handle_requests(vcpu);
2031 	if (rc)
2032 		return rc;
2033 
2034 	if (guestdbg_enabled(vcpu)) {
2035 		kvm_s390_backup_guest_per_regs(vcpu);
2036 		kvm_s390_patch_guest_per_regs(vcpu);
2037 	}
2038 
2039 	vcpu->arch.sie_block->icptcode = 0;
2040 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2041 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2042 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2043 
2044 	return 0;
2045 }
2046 
2047 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2048 {
2049 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
2050 	u8 opcode;
2051 	int rc;
2052 
2053 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2054 	trace_kvm_s390_sie_fault(vcpu);
2055 
2056 	/*
2057 	 * We want to inject an addressing exception, which is defined as a
2058 	 * suppressing or terminating exception. However, since we came here
2059 	 * by a DAT access exception, the PSW still points to the faulting
2060 	 * instruction since DAT exceptions are nullifying. So we've got
2061 	 * to look up the current opcode to get the length of the instruction
2062 	 * to be able to forward the PSW.
2063 	 */
2064 	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2065 	if (rc)
2066 		return kvm_s390_inject_prog_cond(vcpu, rc);
2067 	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2068 
2069 	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2070 }
2071 
2072 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2073 {
2074 	int rc = -1;
2075 
2076 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2077 		   vcpu->arch.sie_block->icptcode);
2078 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2079 
2080 	if (guestdbg_enabled(vcpu))
2081 		kvm_s390_restore_guest_per_regs(vcpu);
2082 
2083 	if (exit_reason >= 0) {
2084 		rc = 0;
2085 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2086 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2087 		vcpu->run->s390_ucontrol.trans_exc_code =
2088 						current->thread.gmap_addr;
2089 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2090 		rc = -EREMOTE;
2091 
2092 	} else if (current->thread.gmap_pfault) {
2093 		trace_kvm_s390_major_guest_pfault(vcpu);
2094 		current->thread.gmap_pfault = 0;
2095 		if (kvm_arch_setup_async_pf(vcpu)) {
2096 			rc = 0;
2097 		} else {
2098 			gpa_t gpa = current->thread.gmap_addr;
2099 			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2100 		}
2101 	}
2102 
2103 	if (rc == -1)
2104 		rc = vcpu_post_run_fault_in_sie(vcpu);
2105 
2106 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2107 
2108 	if (rc == 0) {
2109 		if (kvm_is_ucontrol(vcpu->kvm))
2110 			/* Don't exit for host interrupts. */
2111 			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2112 		else
2113 			rc = kvm_handle_sie_intercept(vcpu);
2114 	}
2115 
2116 	return rc;
2117 }
2118 
2119 static int __vcpu_run(struct kvm_vcpu *vcpu)
2120 {
2121 	int rc, exit_reason;
2122 
2123 	/*
2124 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2125 	 * ning the guest), so that memslots (and other stuff) are protected
2126 	 */
2127 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2128 
2129 	do {
2130 		rc = vcpu_pre_run(vcpu);
2131 		if (rc)
2132 			break;
2133 
2134 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2135 		/*
2136 		 * As PF_VCPU will be used in fault handler, between
2137 		 * guest_enter and guest_exit should be no uaccess.
2138 		 */
2139 		local_irq_disable();
2140 		__kvm_guest_enter();
2141 		local_irq_enable();
2142 		exit_reason = sie64a(vcpu->arch.sie_block,
2143 				     vcpu->run->s.regs.gprs);
2144 		local_irq_disable();
2145 		__kvm_guest_exit();
2146 		local_irq_enable();
2147 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2148 
2149 		rc = vcpu_post_run(vcpu, exit_reason);
2150 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2151 
2152 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2153 	return rc;
2154 }
2155 
2156 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2157 {
2158 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2159 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2160 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2161 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2162 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2163 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2164 		/* some control register changes require a tlb flush */
2165 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2166 	}
2167 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2168 		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2169 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2170 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2171 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2172 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2173 	}
2174 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2175 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2176 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2177 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2178 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2179 			kvm_clear_async_pf_completion_queue(vcpu);
2180 	}
2181 	kvm_run->kvm_dirty_regs = 0;
2182 }
2183 
2184 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2185 {
2186 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2187 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2188 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2189 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2190 	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2191 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2192 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2193 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2194 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2195 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2196 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2197 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2198 }
2199 
2200 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2201 {
2202 	int rc;
2203 	sigset_t sigsaved;
2204 
2205 	if (guestdbg_exit_pending(vcpu)) {
2206 		kvm_s390_prepare_debug_exit(vcpu);
2207 		return 0;
2208 	}
2209 
2210 	if (vcpu->sigset_active)
2211 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2212 
2213 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2214 		kvm_s390_vcpu_start(vcpu);
2215 	} else if (is_vcpu_stopped(vcpu)) {
2216 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2217 				   vcpu->vcpu_id);
2218 		return -EINVAL;
2219 	}
2220 
2221 	sync_regs(vcpu, kvm_run);
2222 
2223 	might_fault();
2224 	rc = __vcpu_run(vcpu);
2225 
2226 	if (signal_pending(current) && !rc) {
2227 		kvm_run->exit_reason = KVM_EXIT_INTR;
2228 		rc = -EINTR;
2229 	}
2230 
2231 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2232 		kvm_s390_prepare_debug_exit(vcpu);
2233 		rc = 0;
2234 	}
2235 
2236 	if (rc == -EOPNOTSUPP) {
2237 		/* intercept cannot be handled in-kernel, prepare kvm-run */
2238 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2239 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2240 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2241 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2242 		rc = 0;
2243 	}
2244 
2245 	if (rc == -EREMOTE) {
2246 		/* intercept was handled, but userspace support is needed
2247 		 * kvm_run has been prepared by the handler */
2248 		rc = 0;
2249 	}
2250 
2251 	store_regs(vcpu, kvm_run);
2252 
2253 	if (vcpu->sigset_active)
2254 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2255 
2256 	vcpu->stat.exit_userspace++;
2257 	return rc;
2258 }
2259 
2260 /*
2261  * store status at address
2262  * we use have two special cases:
2263  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2264  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2265  */
2266 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2267 {
2268 	unsigned char archmode = 1;
2269 	unsigned int px;
2270 	u64 clkcomp;
2271 	int rc;
2272 
2273 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2274 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2275 			return -EFAULT;
2276 		gpa = SAVE_AREA_BASE;
2277 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2278 		if (write_guest_real(vcpu, 163, &archmode, 1))
2279 			return -EFAULT;
2280 		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2281 	}
2282 	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2283 			     vcpu->arch.guest_fpregs.fprs, 128);
2284 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2285 			      vcpu->run->s.regs.gprs, 128);
2286 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2287 			      &vcpu->arch.sie_block->gpsw, 16);
2288 	px = kvm_s390_get_prefix(vcpu);
2289 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2290 			      &px, 4);
2291 	rc |= write_guest_abs(vcpu,
2292 			      gpa + offsetof(struct save_area, fp_ctrl_reg),
2293 			      &vcpu->arch.guest_fpregs.fpc, 4);
2294 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2295 			      &vcpu->arch.sie_block->todpr, 4);
2296 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2297 			      &vcpu->arch.sie_block->cputm, 8);
2298 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2299 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2300 			      &clkcomp, 8);
2301 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2302 			      &vcpu->run->s.regs.acrs, 64);
2303 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2304 			      &vcpu->arch.sie_block->gcr, 128);
2305 	return rc ? -EFAULT : 0;
2306 }
2307 
2308 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2309 {
2310 	/*
2311 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2312 	 * copying in vcpu load/put. Lets update our copies before we save
2313 	 * it into the save area
2314 	 */
2315 	save_fpu_regs();
2316 	if (test_kvm_facility(vcpu->kvm, 129)) {
2317 		/*
2318 		 * If the vector extension is available, the vector registers
2319 		 * which overlaps with floating-point registers are saved in
2320 		 * the SIE-control block.  Hence, extract the floating-point
2321 		 * registers and the FPC value and store them in the
2322 		 * guest_fpregs structure.
2323 		 */
2324 		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2325 		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2326 				 current->thread.fpu.vxrs);
2327 	} else
2328 		save_fpu_to(&vcpu->arch.guest_fpregs);
2329 	save_access_regs(vcpu->run->s.regs.acrs);
2330 
2331 	return kvm_s390_store_status_unloaded(vcpu, addr);
2332 }
2333 
2334 /*
2335  * store additional status at address
2336  */
2337 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2338 					unsigned long gpa)
2339 {
2340 	/* Only bits 0-53 are used for address formation */
2341 	if (!(gpa & ~0x3ff))
2342 		return 0;
2343 
2344 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2345 			       (void *)&vcpu->run->s.regs.vrs, 512);
2346 }
2347 
2348 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2349 {
2350 	if (!test_kvm_facility(vcpu->kvm, 129))
2351 		return 0;
2352 
2353 	/*
2354 	 * The guest VXRS are in the host VXRs due to the lazy
2355 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2356 	 * to save the current register state because we are in the
2357 	 * middle of a load/put cycle.
2358 	 *
2359 	 * Let's update our copies before we save it into the save area.
2360 	 */
2361 	save_fpu_regs();
2362 
2363 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2364 }
2365 
2366 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2367 {
2368 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2369 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2370 }
2371 
2372 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2373 {
2374 	unsigned int i;
2375 	struct kvm_vcpu *vcpu;
2376 
2377 	kvm_for_each_vcpu(i, vcpu, kvm) {
2378 		__disable_ibs_on_vcpu(vcpu);
2379 	}
2380 }
2381 
2382 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2383 {
2384 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2385 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2386 }
2387 
2388 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2389 {
2390 	int i, online_vcpus, started_vcpus = 0;
2391 
2392 	if (!is_vcpu_stopped(vcpu))
2393 		return;
2394 
2395 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2396 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2397 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2398 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2399 
2400 	for (i = 0; i < online_vcpus; i++) {
2401 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2402 			started_vcpus++;
2403 	}
2404 
2405 	if (started_vcpus == 0) {
2406 		/* we're the only active VCPU -> speed it up */
2407 		__enable_ibs_on_vcpu(vcpu);
2408 	} else if (started_vcpus == 1) {
2409 		/*
2410 		 * As we are starting a second VCPU, we have to disable
2411 		 * the IBS facility on all VCPUs to remove potentially
2412 		 * oustanding ENABLE requests.
2413 		 */
2414 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2415 	}
2416 
2417 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2418 	/*
2419 	 * Another VCPU might have used IBS while we were offline.
2420 	 * Let's play safe and flush the VCPU at startup.
2421 	 */
2422 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2423 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2424 	return;
2425 }
2426 
2427 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2428 {
2429 	int i, online_vcpus, started_vcpus = 0;
2430 	struct kvm_vcpu *started_vcpu = NULL;
2431 
2432 	if (is_vcpu_stopped(vcpu))
2433 		return;
2434 
2435 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2436 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2437 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2438 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2439 
2440 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2441 	kvm_s390_clear_stop_irq(vcpu);
2442 
2443 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2444 	__disable_ibs_on_vcpu(vcpu);
2445 
2446 	for (i = 0; i < online_vcpus; i++) {
2447 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2448 			started_vcpus++;
2449 			started_vcpu = vcpu->kvm->vcpus[i];
2450 		}
2451 	}
2452 
2453 	if (started_vcpus == 1) {
2454 		/*
2455 		 * As we only have one VCPU left, we want to enable the
2456 		 * IBS facility for that VCPU to speed it up.
2457 		 */
2458 		__enable_ibs_on_vcpu(started_vcpu);
2459 	}
2460 
2461 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2462 	return;
2463 }
2464 
2465 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2466 				     struct kvm_enable_cap *cap)
2467 {
2468 	int r;
2469 
2470 	if (cap->flags)
2471 		return -EINVAL;
2472 
2473 	switch (cap->cap) {
2474 	case KVM_CAP_S390_CSS_SUPPORT:
2475 		if (!vcpu->kvm->arch.css_support) {
2476 			vcpu->kvm->arch.css_support = 1;
2477 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2478 			trace_kvm_s390_enable_css(vcpu->kvm);
2479 		}
2480 		r = 0;
2481 		break;
2482 	default:
2483 		r = -EINVAL;
2484 		break;
2485 	}
2486 	return r;
2487 }
2488 
2489 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2490 				  struct kvm_s390_mem_op *mop)
2491 {
2492 	void __user *uaddr = (void __user *)mop->buf;
2493 	void *tmpbuf = NULL;
2494 	int r, srcu_idx;
2495 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2496 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2497 
2498 	if (mop->flags & ~supported_flags)
2499 		return -EINVAL;
2500 
2501 	if (mop->size > MEM_OP_MAX_SIZE)
2502 		return -E2BIG;
2503 
2504 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2505 		tmpbuf = vmalloc(mop->size);
2506 		if (!tmpbuf)
2507 			return -ENOMEM;
2508 	}
2509 
2510 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2511 
2512 	switch (mop->op) {
2513 	case KVM_S390_MEMOP_LOGICAL_READ:
2514 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2515 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2516 			break;
2517 		}
2518 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2519 		if (r == 0) {
2520 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2521 				r = -EFAULT;
2522 		}
2523 		break;
2524 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2525 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2526 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2527 			break;
2528 		}
2529 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2530 			r = -EFAULT;
2531 			break;
2532 		}
2533 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2534 		break;
2535 	default:
2536 		r = -EINVAL;
2537 	}
2538 
2539 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2540 
2541 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2542 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2543 
2544 	vfree(tmpbuf);
2545 	return r;
2546 }
2547 
2548 long kvm_arch_vcpu_ioctl(struct file *filp,
2549 			 unsigned int ioctl, unsigned long arg)
2550 {
2551 	struct kvm_vcpu *vcpu = filp->private_data;
2552 	void __user *argp = (void __user *)arg;
2553 	int idx;
2554 	long r;
2555 
2556 	switch (ioctl) {
2557 	case KVM_S390_IRQ: {
2558 		struct kvm_s390_irq s390irq;
2559 
2560 		r = -EFAULT;
2561 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2562 			break;
2563 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2564 		break;
2565 	}
2566 	case KVM_S390_INTERRUPT: {
2567 		struct kvm_s390_interrupt s390int;
2568 		struct kvm_s390_irq s390irq;
2569 
2570 		r = -EFAULT;
2571 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2572 			break;
2573 		if (s390int_to_s390irq(&s390int, &s390irq))
2574 			return -EINVAL;
2575 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2576 		break;
2577 	}
2578 	case KVM_S390_STORE_STATUS:
2579 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2580 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2581 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2582 		break;
2583 	case KVM_S390_SET_INITIAL_PSW: {
2584 		psw_t psw;
2585 
2586 		r = -EFAULT;
2587 		if (copy_from_user(&psw, argp, sizeof(psw)))
2588 			break;
2589 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2590 		break;
2591 	}
2592 	case KVM_S390_INITIAL_RESET:
2593 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2594 		break;
2595 	case KVM_SET_ONE_REG:
2596 	case KVM_GET_ONE_REG: {
2597 		struct kvm_one_reg reg;
2598 		r = -EFAULT;
2599 		if (copy_from_user(&reg, argp, sizeof(reg)))
2600 			break;
2601 		if (ioctl == KVM_SET_ONE_REG)
2602 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2603 		else
2604 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2605 		break;
2606 	}
2607 #ifdef CONFIG_KVM_S390_UCONTROL
2608 	case KVM_S390_UCAS_MAP: {
2609 		struct kvm_s390_ucas_mapping ucasmap;
2610 
2611 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2612 			r = -EFAULT;
2613 			break;
2614 		}
2615 
2616 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2617 			r = -EINVAL;
2618 			break;
2619 		}
2620 
2621 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2622 				     ucasmap.vcpu_addr, ucasmap.length);
2623 		break;
2624 	}
2625 	case KVM_S390_UCAS_UNMAP: {
2626 		struct kvm_s390_ucas_mapping ucasmap;
2627 
2628 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2629 			r = -EFAULT;
2630 			break;
2631 		}
2632 
2633 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2634 			r = -EINVAL;
2635 			break;
2636 		}
2637 
2638 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2639 			ucasmap.length);
2640 		break;
2641 	}
2642 #endif
2643 	case KVM_S390_VCPU_FAULT: {
2644 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2645 		break;
2646 	}
2647 	case KVM_ENABLE_CAP:
2648 	{
2649 		struct kvm_enable_cap cap;
2650 		r = -EFAULT;
2651 		if (copy_from_user(&cap, argp, sizeof(cap)))
2652 			break;
2653 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2654 		break;
2655 	}
2656 	case KVM_S390_MEM_OP: {
2657 		struct kvm_s390_mem_op mem_op;
2658 
2659 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2660 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2661 		else
2662 			r = -EFAULT;
2663 		break;
2664 	}
2665 	case KVM_S390_SET_IRQ_STATE: {
2666 		struct kvm_s390_irq_state irq_state;
2667 
2668 		r = -EFAULT;
2669 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2670 			break;
2671 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2672 		    irq_state.len == 0 ||
2673 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2674 			r = -EINVAL;
2675 			break;
2676 		}
2677 		r = kvm_s390_set_irq_state(vcpu,
2678 					   (void __user *) irq_state.buf,
2679 					   irq_state.len);
2680 		break;
2681 	}
2682 	case KVM_S390_GET_IRQ_STATE: {
2683 		struct kvm_s390_irq_state irq_state;
2684 
2685 		r = -EFAULT;
2686 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2687 			break;
2688 		if (irq_state.len == 0) {
2689 			r = -EINVAL;
2690 			break;
2691 		}
2692 		r = kvm_s390_get_irq_state(vcpu,
2693 					   (__u8 __user *)  irq_state.buf,
2694 					   irq_state.len);
2695 		break;
2696 	}
2697 	default:
2698 		r = -ENOTTY;
2699 	}
2700 	return r;
2701 }
2702 
2703 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2704 {
2705 #ifdef CONFIG_KVM_S390_UCONTROL
2706 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2707 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2708 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2709 		get_page(vmf->page);
2710 		return 0;
2711 	}
2712 #endif
2713 	return VM_FAULT_SIGBUS;
2714 }
2715 
2716 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2717 			    unsigned long npages)
2718 {
2719 	return 0;
2720 }
2721 
2722 /* Section: memory related */
2723 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2724 				   struct kvm_memory_slot *memslot,
2725 				   const struct kvm_userspace_memory_region *mem,
2726 				   enum kvm_mr_change change)
2727 {
2728 	/* A few sanity checks. We can have memory slots which have to be
2729 	   located/ended at a segment boundary (1MB). The memory in userland is
2730 	   ok to be fragmented into various different vmas. It is okay to mmap()
2731 	   and munmap() stuff in this slot after doing this call at any time */
2732 
2733 	if (mem->userspace_addr & 0xffffful)
2734 		return -EINVAL;
2735 
2736 	if (mem->memory_size & 0xffffful)
2737 		return -EINVAL;
2738 
2739 	return 0;
2740 }
2741 
2742 void kvm_arch_commit_memory_region(struct kvm *kvm,
2743 				const struct kvm_userspace_memory_region *mem,
2744 				const struct kvm_memory_slot *old,
2745 				const struct kvm_memory_slot *new,
2746 				enum kvm_mr_change change)
2747 {
2748 	int rc;
2749 
2750 	/* If the basics of the memslot do not change, we do not want
2751 	 * to update the gmap. Every update causes several unnecessary
2752 	 * segment translation exceptions. This is usually handled just
2753 	 * fine by the normal fault handler + gmap, but it will also
2754 	 * cause faults on the prefix page of running guest CPUs.
2755 	 */
2756 	if (old->userspace_addr == mem->userspace_addr &&
2757 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2758 	    old->npages * PAGE_SIZE == mem->memory_size)
2759 		return;
2760 
2761 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2762 		mem->guest_phys_addr, mem->memory_size);
2763 	if (rc)
2764 		pr_warn("failed to commit memory region\n");
2765 	return;
2766 }
2767 
2768 static int __init kvm_s390_init(void)
2769 {
2770 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2771 }
2772 
2773 static void __exit kvm_s390_exit(void)
2774 {
2775 	kvm_exit();
2776 }
2777 
2778 module_init(kvm_s390_init);
2779 module_exit(kvm_s390_exit);
2780 
2781 /*
2782  * Enable autoloading of the kvm module.
2783  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2784  * since x86 takes a different approach.
2785  */
2786 #include <linux/miscdevice.h>
2787 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2788 MODULE_ALIAS("devname:kvm");
2789