xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 2eb5f31b)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39 
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43 
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47 
48 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
52 
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54 
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
57 	{ "exit_null", VCPU_STAT(exit_null) },
58 	{ "exit_validity", VCPU_STAT(exit_validity) },
59 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
61 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
63 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
84 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
86 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
91 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
111 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
112 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
114 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
115 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
116 	{ NULL }
117 };
118 
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 	0xffe6fffbfcfdfc40UL,
122 	0x005e800000000000UL,
123 };
124 
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130 
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133 
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137 	/* every s390 is virtualization enabled ;-) */
138 	return 0;
139 }
140 
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142 
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150 			  void *v)
151 {
152 	struct kvm *kvm;
153 	struct kvm_vcpu *vcpu;
154 	int i;
155 	unsigned long long *delta = v;
156 
157 	list_for_each_entry(kvm, &vm_list, vm_list) {
158 		kvm->arch.epoch -= *delta;
159 		kvm_for_each_vcpu(i, vcpu, kvm) {
160 			vcpu->arch.sie_block->epoch -= *delta;
161 		}
162 	}
163 	return NOTIFY_OK;
164 }
165 
166 static struct notifier_block kvm_clock_notifier = {
167 	.notifier_call = kvm_clock_sync,
168 };
169 
170 int kvm_arch_hardware_setup(void)
171 {
172 	gmap_notifier.notifier_call = kvm_gmap_notifier;
173 	gmap_register_ipte_notifier(&gmap_notifier);
174 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 				       &kvm_clock_notifier);
176 	return 0;
177 }
178 
179 void kvm_arch_hardware_unsetup(void)
180 {
181 	gmap_unregister_ipte_notifier(&gmap_notifier);
182 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 					 &kvm_clock_notifier);
184 }
185 
186 int kvm_arch_init(void *opaque)
187 {
188 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189 	if (!kvm_s390_dbf)
190 		return -ENOMEM;
191 
192 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 		debug_unregister(kvm_s390_dbf);
194 		return -ENOMEM;
195 	}
196 
197 	/* Register floating interrupt controller interface. */
198 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200 
201 void kvm_arch_exit(void)
202 {
203 	debug_unregister(kvm_s390_dbf);
204 }
205 
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 			unsigned int ioctl, unsigned long arg)
209 {
210 	if (ioctl == KVM_S390_ENABLE_SIE)
211 		return s390_enable_sie();
212 	return -EINVAL;
213 }
214 
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217 	int r;
218 
219 	switch (ext) {
220 	case KVM_CAP_S390_PSW:
221 	case KVM_CAP_S390_GMAP:
222 	case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 	case KVM_CAP_S390_UCONTROL:
225 #endif
226 	case KVM_CAP_ASYNC_PF:
227 	case KVM_CAP_SYNC_REGS:
228 	case KVM_CAP_ONE_REG:
229 	case KVM_CAP_ENABLE_CAP:
230 	case KVM_CAP_S390_CSS_SUPPORT:
231 	case KVM_CAP_IOEVENTFD:
232 	case KVM_CAP_DEVICE_CTRL:
233 	case KVM_CAP_ENABLE_CAP_VM:
234 	case KVM_CAP_S390_IRQCHIP:
235 	case KVM_CAP_VM_ATTRIBUTES:
236 	case KVM_CAP_MP_STATE:
237 	case KVM_CAP_S390_INJECT_IRQ:
238 	case KVM_CAP_S390_USER_SIGP:
239 	case KVM_CAP_S390_USER_STSI:
240 	case KVM_CAP_S390_SKEYS:
241 	case KVM_CAP_S390_IRQ_STATE:
242 		r = 1;
243 		break;
244 	case KVM_CAP_S390_MEM_OP:
245 		r = MEM_OP_MAX_SIZE;
246 		break;
247 	case KVM_CAP_NR_VCPUS:
248 	case KVM_CAP_MAX_VCPUS:
249 		r = KVM_MAX_VCPUS;
250 		break;
251 	case KVM_CAP_NR_MEMSLOTS:
252 		r = KVM_USER_MEM_SLOTS;
253 		break;
254 	case KVM_CAP_S390_COW:
255 		r = MACHINE_HAS_ESOP;
256 		break;
257 	case KVM_CAP_S390_VECTOR_REGISTERS:
258 		r = MACHINE_HAS_VX;
259 		break;
260 	default:
261 		r = 0;
262 	}
263 	return r;
264 }
265 
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267 					struct kvm_memory_slot *memslot)
268 {
269 	gfn_t cur_gfn, last_gfn;
270 	unsigned long address;
271 	struct gmap *gmap = kvm->arch.gmap;
272 
273 	down_read(&gmap->mm->mmap_sem);
274 	/* Loop over all guest pages */
275 	last_gfn = memslot->base_gfn + memslot->npages;
276 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277 		address = gfn_to_hva_memslot(memslot, cur_gfn);
278 
279 		if (gmap_test_and_clear_dirty(address, gmap))
280 			mark_page_dirty(kvm, cur_gfn);
281 	}
282 	up_read(&gmap->mm->mmap_sem);
283 }
284 
285 /* Section: vm related */
286 /*
287  * Get (and clear) the dirty memory log for a memory slot.
288  */
289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290 			       struct kvm_dirty_log *log)
291 {
292 	int r;
293 	unsigned long n;
294 	struct kvm_memslots *slots;
295 	struct kvm_memory_slot *memslot;
296 	int is_dirty = 0;
297 
298 	mutex_lock(&kvm->slots_lock);
299 
300 	r = -EINVAL;
301 	if (log->slot >= KVM_USER_MEM_SLOTS)
302 		goto out;
303 
304 	slots = kvm_memslots(kvm);
305 	memslot = id_to_memslot(slots, log->slot);
306 	r = -ENOENT;
307 	if (!memslot->dirty_bitmap)
308 		goto out;
309 
310 	kvm_s390_sync_dirty_log(kvm, memslot);
311 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
312 	if (r)
313 		goto out;
314 
315 	/* Clear the dirty log */
316 	if (is_dirty) {
317 		n = kvm_dirty_bitmap_bytes(memslot);
318 		memset(memslot->dirty_bitmap, 0, n);
319 	}
320 	r = 0;
321 out:
322 	mutex_unlock(&kvm->slots_lock);
323 	return r;
324 }
325 
326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
327 {
328 	int r;
329 
330 	if (cap->flags)
331 		return -EINVAL;
332 
333 	switch (cap->cap) {
334 	case KVM_CAP_S390_IRQCHIP:
335 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
336 		kvm->arch.use_irqchip = 1;
337 		r = 0;
338 		break;
339 	case KVM_CAP_S390_USER_SIGP:
340 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
341 		kvm->arch.user_sigp = 1;
342 		r = 0;
343 		break;
344 	case KVM_CAP_S390_VECTOR_REGISTERS:
345 		if (MACHINE_HAS_VX) {
346 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
347 			set_kvm_facility(kvm->arch.model.fac->list, 129);
348 			r = 0;
349 		} else
350 			r = -EINVAL;
351 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
352 			 r ? "(not available)" : "(success)");
353 		break;
354 	case KVM_CAP_S390_USER_STSI:
355 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
356 		kvm->arch.user_stsi = 1;
357 		r = 0;
358 		break;
359 	default:
360 		r = -EINVAL;
361 		break;
362 	}
363 	return r;
364 }
365 
366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
367 {
368 	int ret;
369 
370 	switch (attr->attr) {
371 	case KVM_S390_VM_MEM_LIMIT_SIZE:
372 		ret = 0;
373 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
374 			 kvm->arch.gmap->asce_end);
375 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
376 			ret = -EFAULT;
377 		break;
378 	default:
379 		ret = -ENXIO;
380 		break;
381 	}
382 	return ret;
383 }
384 
385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
386 {
387 	int ret;
388 	unsigned int idx;
389 	switch (attr->attr) {
390 	case KVM_S390_VM_MEM_ENABLE_CMMA:
391 		/* enable CMMA only for z10 and later (EDAT_1) */
392 		ret = -EINVAL;
393 		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
394 			break;
395 
396 		ret = -EBUSY;
397 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
398 		mutex_lock(&kvm->lock);
399 		if (atomic_read(&kvm->online_vcpus) == 0) {
400 			kvm->arch.use_cmma = 1;
401 			ret = 0;
402 		}
403 		mutex_unlock(&kvm->lock);
404 		break;
405 	case KVM_S390_VM_MEM_CLR_CMMA:
406 		ret = -EINVAL;
407 		if (!kvm->arch.use_cmma)
408 			break;
409 
410 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
411 		mutex_lock(&kvm->lock);
412 		idx = srcu_read_lock(&kvm->srcu);
413 		s390_reset_cmma(kvm->arch.gmap->mm);
414 		srcu_read_unlock(&kvm->srcu, idx);
415 		mutex_unlock(&kvm->lock);
416 		ret = 0;
417 		break;
418 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
419 		unsigned long new_limit;
420 
421 		if (kvm_is_ucontrol(kvm))
422 			return -EINVAL;
423 
424 		if (get_user(new_limit, (u64 __user *)attr->addr))
425 			return -EFAULT;
426 
427 		if (new_limit > kvm->arch.gmap->asce_end)
428 			return -E2BIG;
429 
430 		ret = -EBUSY;
431 		mutex_lock(&kvm->lock);
432 		if (atomic_read(&kvm->online_vcpus) == 0) {
433 			/* gmap_alloc will round the limit up */
434 			struct gmap *new = gmap_alloc(current->mm, new_limit);
435 
436 			if (!new) {
437 				ret = -ENOMEM;
438 			} else {
439 				gmap_free(kvm->arch.gmap);
440 				new->private = kvm;
441 				kvm->arch.gmap = new;
442 				ret = 0;
443 			}
444 		}
445 		mutex_unlock(&kvm->lock);
446 		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
447 		break;
448 	}
449 	default:
450 		ret = -ENXIO;
451 		break;
452 	}
453 	return ret;
454 }
455 
456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
457 
458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
459 {
460 	struct kvm_vcpu *vcpu;
461 	int i;
462 
463 	if (!test_kvm_facility(kvm, 76))
464 		return -EINVAL;
465 
466 	mutex_lock(&kvm->lock);
467 	switch (attr->attr) {
468 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
469 		get_random_bytes(
470 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
471 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
472 		kvm->arch.crypto.aes_kw = 1;
473 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
474 		break;
475 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
476 		get_random_bytes(
477 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
478 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
479 		kvm->arch.crypto.dea_kw = 1;
480 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
481 		break;
482 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
483 		kvm->arch.crypto.aes_kw = 0;
484 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
485 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
486 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
487 		break;
488 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
489 		kvm->arch.crypto.dea_kw = 0;
490 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
491 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
492 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
493 		break;
494 	default:
495 		mutex_unlock(&kvm->lock);
496 		return -ENXIO;
497 	}
498 
499 	kvm_for_each_vcpu(i, vcpu, kvm) {
500 		kvm_s390_vcpu_crypto_setup(vcpu);
501 		exit_sie(vcpu);
502 	}
503 	mutex_unlock(&kvm->lock);
504 	return 0;
505 }
506 
507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
508 {
509 	u8 gtod_high;
510 
511 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
512 					   sizeof(gtod_high)))
513 		return -EFAULT;
514 
515 	if (gtod_high != 0)
516 		return -EINVAL;
517 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
518 
519 	return 0;
520 }
521 
522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
523 {
524 	struct kvm_vcpu *cur_vcpu;
525 	unsigned int vcpu_idx;
526 	u64 host_tod, gtod;
527 	int r;
528 
529 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
530 		return -EFAULT;
531 
532 	r = store_tod_clock(&host_tod);
533 	if (r)
534 		return r;
535 
536 	mutex_lock(&kvm->lock);
537 	preempt_disable();
538 	kvm->arch.epoch = gtod - host_tod;
539 	kvm_s390_vcpu_block_all(kvm);
540 	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
541 		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
542 	kvm_s390_vcpu_unblock_all(kvm);
543 	preempt_enable();
544 	mutex_unlock(&kvm->lock);
545 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
546 	return 0;
547 }
548 
549 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
550 {
551 	int ret;
552 
553 	if (attr->flags)
554 		return -EINVAL;
555 
556 	switch (attr->attr) {
557 	case KVM_S390_VM_TOD_HIGH:
558 		ret = kvm_s390_set_tod_high(kvm, attr);
559 		break;
560 	case KVM_S390_VM_TOD_LOW:
561 		ret = kvm_s390_set_tod_low(kvm, attr);
562 		break;
563 	default:
564 		ret = -ENXIO;
565 		break;
566 	}
567 	return ret;
568 }
569 
570 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
571 {
572 	u8 gtod_high = 0;
573 
574 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
575 					 sizeof(gtod_high)))
576 		return -EFAULT;
577 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
578 
579 	return 0;
580 }
581 
582 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
583 {
584 	u64 host_tod, gtod;
585 	int r;
586 
587 	r = store_tod_clock(&host_tod);
588 	if (r)
589 		return r;
590 
591 	preempt_disable();
592 	gtod = host_tod + kvm->arch.epoch;
593 	preempt_enable();
594 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
595 		return -EFAULT;
596 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
597 
598 	return 0;
599 }
600 
601 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603 	int ret;
604 
605 	if (attr->flags)
606 		return -EINVAL;
607 
608 	switch (attr->attr) {
609 	case KVM_S390_VM_TOD_HIGH:
610 		ret = kvm_s390_get_tod_high(kvm, attr);
611 		break;
612 	case KVM_S390_VM_TOD_LOW:
613 		ret = kvm_s390_get_tod_low(kvm, attr);
614 		break;
615 	default:
616 		ret = -ENXIO;
617 		break;
618 	}
619 	return ret;
620 }
621 
622 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
623 {
624 	struct kvm_s390_vm_cpu_processor *proc;
625 	int ret = 0;
626 
627 	mutex_lock(&kvm->lock);
628 	if (atomic_read(&kvm->online_vcpus)) {
629 		ret = -EBUSY;
630 		goto out;
631 	}
632 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
633 	if (!proc) {
634 		ret = -ENOMEM;
635 		goto out;
636 	}
637 	if (!copy_from_user(proc, (void __user *)attr->addr,
638 			    sizeof(*proc))) {
639 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
640 		       sizeof(struct cpuid));
641 		kvm->arch.model.ibc = proc->ibc;
642 		memcpy(kvm->arch.model.fac->list, proc->fac_list,
643 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
644 	} else
645 		ret = -EFAULT;
646 	kfree(proc);
647 out:
648 	mutex_unlock(&kvm->lock);
649 	return ret;
650 }
651 
652 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
653 {
654 	int ret = -ENXIO;
655 
656 	switch (attr->attr) {
657 	case KVM_S390_VM_CPU_PROCESSOR:
658 		ret = kvm_s390_set_processor(kvm, attr);
659 		break;
660 	}
661 	return ret;
662 }
663 
664 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
665 {
666 	struct kvm_s390_vm_cpu_processor *proc;
667 	int ret = 0;
668 
669 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
670 	if (!proc) {
671 		ret = -ENOMEM;
672 		goto out;
673 	}
674 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
675 	proc->ibc = kvm->arch.model.ibc;
676 	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
677 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
678 		ret = -EFAULT;
679 	kfree(proc);
680 out:
681 	return ret;
682 }
683 
684 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
685 {
686 	struct kvm_s390_vm_cpu_machine *mach;
687 	int ret = 0;
688 
689 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
690 	if (!mach) {
691 		ret = -ENOMEM;
692 		goto out;
693 	}
694 	get_cpu_id((struct cpuid *) &mach->cpuid);
695 	mach->ibc = sclp.ibc;
696 	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
697 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
698 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
699 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
700 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
701 		ret = -EFAULT;
702 	kfree(mach);
703 out:
704 	return ret;
705 }
706 
707 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
708 {
709 	int ret = -ENXIO;
710 
711 	switch (attr->attr) {
712 	case KVM_S390_VM_CPU_PROCESSOR:
713 		ret = kvm_s390_get_processor(kvm, attr);
714 		break;
715 	case KVM_S390_VM_CPU_MACHINE:
716 		ret = kvm_s390_get_machine(kvm, attr);
717 		break;
718 	}
719 	return ret;
720 }
721 
722 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
723 {
724 	int ret;
725 
726 	switch (attr->group) {
727 	case KVM_S390_VM_MEM_CTRL:
728 		ret = kvm_s390_set_mem_control(kvm, attr);
729 		break;
730 	case KVM_S390_VM_TOD:
731 		ret = kvm_s390_set_tod(kvm, attr);
732 		break;
733 	case KVM_S390_VM_CPU_MODEL:
734 		ret = kvm_s390_set_cpu_model(kvm, attr);
735 		break;
736 	case KVM_S390_VM_CRYPTO:
737 		ret = kvm_s390_vm_set_crypto(kvm, attr);
738 		break;
739 	default:
740 		ret = -ENXIO;
741 		break;
742 	}
743 
744 	return ret;
745 }
746 
747 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
748 {
749 	int ret;
750 
751 	switch (attr->group) {
752 	case KVM_S390_VM_MEM_CTRL:
753 		ret = kvm_s390_get_mem_control(kvm, attr);
754 		break;
755 	case KVM_S390_VM_TOD:
756 		ret = kvm_s390_get_tod(kvm, attr);
757 		break;
758 	case KVM_S390_VM_CPU_MODEL:
759 		ret = kvm_s390_get_cpu_model(kvm, attr);
760 		break;
761 	default:
762 		ret = -ENXIO;
763 		break;
764 	}
765 
766 	return ret;
767 }
768 
769 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
770 {
771 	int ret;
772 
773 	switch (attr->group) {
774 	case KVM_S390_VM_MEM_CTRL:
775 		switch (attr->attr) {
776 		case KVM_S390_VM_MEM_ENABLE_CMMA:
777 		case KVM_S390_VM_MEM_CLR_CMMA:
778 		case KVM_S390_VM_MEM_LIMIT_SIZE:
779 			ret = 0;
780 			break;
781 		default:
782 			ret = -ENXIO;
783 			break;
784 		}
785 		break;
786 	case KVM_S390_VM_TOD:
787 		switch (attr->attr) {
788 		case KVM_S390_VM_TOD_LOW:
789 		case KVM_S390_VM_TOD_HIGH:
790 			ret = 0;
791 			break;
792 		default:
793 			ret = -ENXIO;
794 			break;
795 		}
796 		break;
797 	case KVM_S390_VM_CPU_MODEL:
798 		switch (attr->attr) {
799 		case KVM_S390_VM_CPU_PROCESSOR:
800 		case KVM_S390_VM_CPU_MACHINE:
801 			ret = 0;
802 			break;
803 		default:
804 			ret = -ENXIO;
805 			break;
806 		}
807 		break;
808 	case KVM_S390_VM_CRYPTO:
809 		switch (attr->attr) {
810 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
811 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
812 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
813 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
814 			ret = 0;
815 			break;
816 		default:
817 			ret = -ENXIO;
818 			break;
819 		}
820 		break;
821 	default:
822 		ret = -ENXIO;
823 		break;
824 	}
825 
826 	return ret;
827 }
828 
829 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
830 {
831 	uint8_t *keys;
832 	uint64_t hva;
833 	unsigned long curkey;
834 	int i, r = 0;
835 
836 	if (args->flags != 0)
837 		return -EINVAL;
838 
839 	/* Is this guest using storage keys? */
840 	if (!mm_use_skey(current->mm))
841 		return KVM_S390_GET_SKEYS_NONE;
842 
843 	/* Enforce sane limit on memory allocation */
844 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
845 		return -EINVAL;
846 
847 	keys = kmalloc_array(args->count, sizeof(uint8_t),
848 			     GFP_KERNEL | __GFP_NOWARN);
849 	if (!keys)
850 		keys = vmalloc(sizeof(uint8_t) * args->count);
851 	if (!keys)
852 		return -ENOMEM;
853 
854 	for (i = 0; i < args->count; i++) {
855 		hva = gfn_to_hva(kvm, args->start_gfn + i);
856 		if (kvm_is_error_hva(hva)) {
857 			r = -EFAULT;
858 			goto out;
859 		}
860 
861 		curkey = get_guest_storage_key(current->mm, hva);
862 		if (IS_ERR_VALUE(curkey)) {
863 			r = curkey;
864 			goto out;
865 		}
866 		keys[i] = curkey;
867 	}
868 
869 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
870 			 sizeof(uint8_t) * args->count);
871 	if (r)
872 		r = -EFAULT;
873 out:
874 	kvfree(keys);
875 	return r;
876 }
877 
878 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
879 {
880 	uint8_t *keys;
881 	uint64_t hva;
882 	int i, r = 0;
883 
884 	if (args->flags != 0)
885 		return -EINVAL;
886 
887 	/* Enforce sane limit on memory allocation */
888 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
889 		return -EINVAL;
890 
891 	keys = kmalloc_array(args->count, sizeof(uint8_t),
892 			     GFP_KERNEL | __GFP_NOWARN);
893 	if (!keys)
894 		keys = vmalloc(sizeof(uint8_t) * args->count);
895 	if (!keys)
896 		return -ENOMEM;
897 
898 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
899 			   sizeof(uint8_t) * args->count);
900 	if (r) {
901 		r = -EFAULT;
902 		goto out;
903 	}
904 
905 	/* Enable storage key handling for the guest */
906 	r = s390_enable_skey();
907 	if (r)
908 		goto out;
909 
910 	for (i = 0; i < args->count; i++) {
911 		hva = gfn_to_hva(kvm, args->start_gfn + i);
912 		if (kvm_is_error_hva(hva)) {
913 			r = -EFAULT;
914 			goto out;
915 		}
916 
917 		/* Lowest order bit is reserved */
918 		if (keys[i] & 0x01) {
919 			r = -EINVAL;
920 			goto out;
921 		}
922 
923 		r = set_guest_storage_key(current->mm, hva,
924 					  (unsigned long)keys[i], 0);
925 		if (r)
926 			goto out;
927 	}
928 out:
929 	kvfree(keys);
930 	return r;
931 }
932 
933 long kvm_arch_vm_ioctl(struct file *filp,
934 		       unsigned int ioctl, unsigned long arg)
935 {
936 	struct kvm *kvm = filp->private_data;
937 	void __user *argp = (void __user *)arg;
938 	struct kvm_device_attr attr;
939 	int r;
940 
941 	switch (ioctl) {
942 	case KVM_S390_INTERRUPT: {
943 		struct kvm_s390_interrupt s390int;
944 
945 		r = -EFAULT;
946 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
947 			break;
948 		r = kvm_s390_inject_vm(kvm, &s390int);
949 		break;
950 	}
951 	case KVM_ENABLE_CAP: {
952 		struct kvm_enable_cap cap;
953 		r = -EFAULT;
954 		if (copy_from_user(&cap, argp, sizeof(cap)))
955 			break;
956 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
957 		break;
958 	}
959 	case KVM_CREATE_IRQCHIP: {
960 		struct kvm_irq_routing_entry routing;
961 
962 		r = -EINVAL;
963 		if (kvm->arch.use_irqchip) {
964 			/* Set up dummy routing. */
965 			memset(&routing, 0, sizeof(routing));
966 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
967 		}
968 		break;
969 	}
970 	case KVM_SET_DEVICE_ATTR: {
971 		r = -EFAULT;
972 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
973 			break;
974 		r = kvm_s390_vm_set_attr(kvm, &attr);
975 		break;
976 	}
977 	case KVM_GET_DEVICE_ATTR: {
978 		r = -EFAULT;
979 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
980 			break;
981 		r = kvm_s390_vm_get_attr(kvm, &attr);
982 		break;
983 	}
984 	case KVM_HAS_DEVICE_ATTR: {
985 		r = -EFAULT;
986 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
987 			break;
988 		r = kvm_s390_vm_has_attr(kvm, &attr);
989 		break;
990 	}
991 	case KVM_S390_GET_SKEYS: {
992 		struct kvm_s390_skeys args;
993 
994 		r = -EFAULT;
995 		if (copy_from_user(&args, argp,
996 				   sizeof(struct kvm_s390_skeys)))
997 			break;
998 		r = kvm_s390_get_skeys(kvm, &args);
999 		break;
1000 	}
1001 	case KVM_S390_SET_SKEYS: {
1002 		struct kvm_s390_skeys args;
1003 
1004 		r = -EFAULT;
1005 		if (copy_from_user(&args, argp,
1006 				   sizeof(struct kvm_s390_skeys)))
1007 			break;
1008 		r = kvm_s390_set_skeys(kvm, &args);
1009 		break;
1010 	}
1011 	default:
1012 		r = -ENOTTY;
1013 	}
1014 
1015 	return r;
1016 }
1017 
1018 static int kvm_s390_query_ap_config(u8 *config)
1019 {
1020 	u32 fcn_code = 0x04000000UL;
1021 	u32 cc = 0;
1022 
1023 	memset(config, 0, 128);
1024 	asm volatile(
1025 		"lgr 0,%1\n"
1026 		"lgr 2,%2\n"
1027 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1028 		"0: ipm %0\n"
1029 		"srl %0,28\n"
1030 		"1:\n"
1031 		EX_TABLE(0b, 1b)
1032 		: "+r" (cc)
1033 		: "r" (fcn_code), "r" (config)
1034 		: "cc", "0", "2", "memory"
1035 	);
1036 
1037 	return cc;
1038 }
1039 
1040 static int kvm_s390_apxa_installed(void)
1041 {
1042 	u8 config[128];
1043 	int cc;
1044 
1045 	if (test_facility(2) && test_facility(12)) {
1046 		cc = kvm_s390_query_ap_config(config);
1047 
1048 		if (cc)
1049 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1050 		else
1051 			return config[0] & 0x40;
1052 	}
1053 
1054 	return 0;
1055 }
1056 
1057 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1058 {
1059 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1060 
1061 	if (kvm_s390_apxa_installed())
1062 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1063 	else
1064 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1065 }
1066 
1067 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1068 {
1069 	get_cpu_id(cpu_id);
1070 	cpu_id->version = 0xff;
1071 }
1072 
1073 static int kvm_s390_crypto_init(struct kvm *kvm)
1074 {
1075 	if (!test_kvm_facility(kvm, 76))
1076 		return 0;
1077 
1078 	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1079 					 GFP_KERNEL | GFP_DMA);
1080 	if (!kvm->arch.crypto.crycb)
1081 		return -ENOMEM;
1082 
1083 	kvm_s390_set_crycb_format(kvm);
1084 
1085 	/* Enable AES/DEA protected key functions by default */
1086 	kvm->arch.crypto.aes_kw = 1;
1087 	kvm->arch.crypto.dea_kw = 1;
1088 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1089 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1090 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1091 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1092 
1093 	return 0;
1094 }
1095 
1096 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1097 {
1098 	int i, rc;
1099 	char debug_name[16];
1100 	static unsigned long sca_offset;
1101 
1102 	rc = -EINVAL;
1103 #ifdef CONFIG_KVM_S390_UCONTROL
1104 	if (type & ~KVM_VM_S390_UCONTROL)
1105 		goto out_err;
1106 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1107 		goto out_err;
1108 #else
1109 	if (type)
1110 		goto out_err;
1111 #endif
1112 
1113 	rc = s390_enable_sie();
1114 	if (rc)
1115 		goto out_err;
1116 
1117 	rc = -ENOMEM;
1118 
1119 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1120 	if (!kvm->arch.sca)
1121 		goto out_err;
1122 	spin_lock(&kvm_lock);
1123 	sca_offset = (sca_offset + 16) & 0x7f0;
1124 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1125 	spin_unlock(&kvm_lock);
1126 
1127 	sprintf(debug_name, "kvm-%u", current->pid);
1128 
1129 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1130 	if (!kvm->arch.dbf)
1131 		goto out_err;
1132 
1133 	/*
1134 	 * The architectural maximum amount of facilities is 16 kbit. To store
1135 	 * this amount, 2 kbyte of memory is required. Thus we need a full
1136 	 * page to hold the guest facility list (arch.model.fac->list) and the
1137 	 * facility mask (arch.model.fac->mask). Its address size has to be
1138 	 * 31 bits and word aligned.
1139 	 */
1140 	kvm->arch.model.fac =
1141 		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1142 	if (!kvm->arch.model.fac)
1143 		goto out_err;
1144 
1145 	/* Populate the facility mask initially. */
1146 	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1147 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1148 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1149 		if (i < kvm_s390_fac_list_mask_size())
1150 			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1151 		else
1152 			kvm->arch.model.fac->mask[i] = 0UL;
1153 	}
1154 
1155 	/* Populate the facility list initially. */
1156 	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1157 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1158 
1159 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1160 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1161 
1162 	if (kvm_s390_crypto_init(kvm) < 0)
1163 		goto out_err;
1164 
1165 	spin_lock_init(&kvm->arch.float_int.lock);
1166 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1167 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1168 	init_waitqueue_head(&kvm->arch.ipte_wq);
1169 	mutex_init(&kvm->arch.ipte_mutex);
1170 
1171 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1172 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1173 
1174 	if (type & KVM_VM_S390_UCONTROL) {
1175 		kvm->arch.gmap = NULL;
1176 	} else {
1177 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1178 		if (!kvm->arch.gmap)
1179 			goto out_err;
1180 		kvm->arch.gmap->private = kvm;
1181 		kvm->arch.gmap->pfault_enabled = 0;
1182 	}
1183 
1184 	kvm->arch.css_support = 0;
1185 	kvm->arch.use_irqchip = 0;
1186 	kvm->arch.epoch = 0;
1187 
1188 	spin_lock_init(&kvm->arch.start_stop_lock);
1189 	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1190 
1191 	return 0;
1192 out_err:
1193 	kfree(kvm->arch.crypto.crycb);
1194 	free_page((unsigned long)kvm->arch.model.fac);
1195 	debug_unregister(kvm->arch.dbf);
1196 	free_page((unsigned long)(kvm->arch.sca));
1197 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1198 	return rc;
1199 }
1200 
1201 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1202 {
1203 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1204 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1205 	kvm_s390_clear_local_irqs(vcpu);
1206 	kvm_clear_async_pf_completion_queue(vcpu);
1207 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1208 		clear_bit(63 - vcpu->vcpu_id,
1209 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1210 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1211 		    (__u64) vcpu->arch.sie_block)
1212 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1213 	}
1214 	smp_mb();
1215 
1216 	if (kvm_is_ucontrol(vcpu->kvm))
1217 		gmap_free(vcpu->arch.gmap);
1218 
1219 	if (vcpu->kvm->arch.use_cmma)
1220 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1221 	free_page((unsigned long)(vcpu->arch.sie_block));
1222 
1223 	kvm_vcpu_uninit(vcpu);
1224 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1225 }
1226 
1227 static void kvm_free_vcpus(struct kvm *kvm)
1228 {
1229 	unsigned int i;
1230 	struct kvm_vcpu *vcpu;
1231 
1232 	kvm_for_each_vcpu(i, vcpu, kvm)
1233 		kvm_arch_vcpu_destroy(vcpu);
1234 
1235 	mutex_lock(&kvm->lock);
1236 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1237 		kvm->vcpus[i] = NULL;
1238 
1239 	atomic_set(&kvm->online_vcpus, 0);
1240 	mutex_unlock(&kvm->lock);
1241 }
1242 
1243 void kvm_arch_destroy_vm(struct kvm *kvm)
1244 {
1245 	kvm_free_vcpus(kvm);
1246 	free_page((unsigned long)kvm->arch.model.fac);
1247 	free_page((unsigned long)(kvm->arch.sca));
1248 	debug_unregister(kvm->arch.dbf);
1249 	kfree(kvm->arch.crypto.crycb);
1250 	if (!kvm_is_ucontrol(kvm))
1251 		gmap_free(kvm->arch.gmap);
1252 	kvm_s390_destroy_adapters(kvm);
1253 	kvm_s390_clear_float_irqs(kvm);
1254 	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1255 }
1256 
1257 /* Section: vcpu related */
1258 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1259 {
1260 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1261 	if (!vcpu->arch.gmap)
1262 		return -ENOMEM;
1263 	vcpu->arch.gmap->private = vcpu->kvm;
1264 
1265 	return 0;
1266 }
1267 
1268 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1269 {
1270 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1271 	kvm_clear_async_pf_completion_queue(vcpu);
1272 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1273 				    KVM_SYNC_GPRS |
1274 				    KVM_SYNC_ACRS |
1275 				    KVM_SYNC_CRS |
1276 				    KVM_SYNC_ARCH0 |
1277 				    KVM_SYNC_PFAULT;
1278 	if (test_kvm_facility(vcpu->kvm, 129))
1279 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1280 
1281 	if (kvm_is_ucontrol(vcpu->kvm))
1282 		return __kvm_ucontrol_vcpu_init(vcpu);
1283 
1284 	return 0;
1285 }
1286 
1287 /*
1288  * Backs up the current FP/VX register save area on a particular
1289  * destination.  Used to switch between different register save
1290  * areas.
1291  */
1292 static inline void save_fpu_to(struct fpu *dst)
1293 {
1294 	dst->fpc = current->thread.fpu.fpc;
1295 	dst->flags = current->thread.fpu.flags;
1296 	dst->regs = current->thread.fpu.regs;
1297 }
1298 
1299 /*
1300  * Switches the FP/VX register save area from which to lazy
1301  * restore register contents.
1302  */
1303 static inline void load_fpu_from(struct fpu *from)
1304 {
1305 	current->thread.fpu.fpc = from->fpc;
1306 	current->thread.fpu.flags = from->flags;
1307 	current->thread.fpu.regs = from->regs;
1308 }
1309 
1310 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1311 {
1312 	/* Save host register state */
1313 	save_fpu_regs();
1314 	save_fpu_to(&vcpu->arch.host_fpregs);
1315 
1316 	if (test_kvm_facility(vcpu->kvm, 129)) {
1317 		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1318 		current->thread.fpu.flags = FPU_USE_VX;
1319 		/*
1320 		 * Use the register save area in the SIE-control block
1321 		 * for register restore and save in kvm_arch_vcpu_put()
1322 		 */
1323 		current->thread.fpu.vxrs =
1324 			(__vector128 *)&vcpu->run->s.regs.vrs;
1325 		/* Always enable the vector extension for KVM */
1326 		__ctl_set_vx();
1327 	} else
1328 		load_fpu_from(&vcpu->arch.guest_fpregs);
1329 
1330 	if (test_fp_ctl(current->thread.fpu.fpc))
1331 		/* User space provided an invalid FPC, let's clear it */
1332 		current->thread.fpu.fpc = 0;
1333 
1334 	save_access_regs(vcpu->arch.host_acrs);
1335 	restore_access_regs(vcpu->run->s.regs.acrs);
1336 	gmap_enable(vcpu->arch.gmap);
1337 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1338 }
1339 
1340 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1341 {
1342 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1343 	gmap_disable(vcpu->arch.gmap);
1344 
1345 	save_fpu_regs();
1346 
1347 	if (test_kvm_facility(vcpu->kvm, 129))
1348 		/*
1349 		 * kvm_arch_vcpu_load() set up the register save area to
1350 		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1351 		 * are already saved.  Only the floating-point control must be
1352 		 * copied.
1353 		 */
1354 		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1355 	else
1356 		save_fpu_to(&vcpu->arch.guest_fpregs);
1357 	load_fpu_from(&vcpu->arch.host_fpregs);
1358 
1359 	save_access_regs(vcpu->run->s.regs.acrs);
1360 	restore_access_regs(vcpu->arch.host_acrs);
1361 }
1362 
1363 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1364 {
1365 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1366 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1367 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1368 	kvm_s390_set_prefix(vcpu, 0);
1369 	vcpu->arch.sie_block->cputm     = 0UL;
1370 	vcpu->arch.sie_block->ckc       = 0UL;
1371 	vcpu->arch.sie_block->todpr     = 0;
1372 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1373 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1374 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1375 	vcpu->arch.guest_fpregs.fpc = 0;
1376 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1377 	vcpu->arch.sie_block->gbea = 1;
1378 	vcpu->arch.sie_block->pp = 0;
1379 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1380 	kvm_clear_async_pf_completion_queue(vcpu);
1381 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1382 		kvm_s390_vcpu_stop(vcpu);
1383 	kvm_s390_clear_local_irqs(vcpu);
1384 }
1385 
1386 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1387 {
1388 	mutex_lock(&vcpu->kvm->lock);
1389 	preempt_disable();
1390 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1391 	preempt_enable();
1392 	mutex_unlock(&vcpu->kvm->lock);
1393 	if (!kvm_is_ucontrol(vcpu->kvm))
1394 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1395 }
1396 
1397 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1398 {
1399 	if (!test_kvm_facility(vcpu->kvm, 76))
1400 		return;
1401 
1402 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1403 
1404 	if (vcpu->kvm->arch.crypto.aes_kw)
1405 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1406 	if (vcpu->kvm->arch.crypto.dea_kw)
1407 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1408 
1409 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1410 }
1411 
1412 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1413 {
1414 	free_page(vcpu->arch.sie_block->cbrlo);
1415 	vcpu->arch.sie_block->cbrlo = 0;
1416 }
1417 
1418 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1419 {
1420 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1421 	if (!vcpu->arch.sie_block->cbrlo)
1422 		return -ENOMEM;
1423 
1424 	vcpu->arch.sie_block->ecb2 |= 0x80;
1425 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1426 	return 0;
1427 }
1428 
1429 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1430 {
1431 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1432 
1433 	vcpu->arch.cpu_id = model->cpu_id;
1434 	vcpu->arch.sie_block->ibc = model->ibc;
1435 	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1436 }
1437 
1438 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1439 {
1440 	int rc = 0;
1441 
1442 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1443 						    CPUSTAT_SM |
1444 						    CPUSTAT_STOPPED);
1445 
1446 	if (test_kvm_facility(vcpu->kvm, 78))
1447 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1448 	else if (test_kvm_facility(vcpu->kvm, 8))
1449 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1450 
1451 	kvm_s390_vcpu_setup_model(vcpu);
1452 
1453 	vcpu->arch.sie_block->ecb   = 6;
1454 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1455 		vcpu->arch.sie_block->ecb |= 0x10;
1456 
1457 	vcpu->arch.sie_block->ecb2  = 8;
1458 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1459 	if (sclp.has_siif)
1460 		vcpu->arch.sie_block->eca |= 1;
1461 	if (sclp.has_sigpif)
1462 		vcpu->arch.sie_block->eca |= 0x10000000U;
1463 	if (test_kvm_facility(vcpu->kvm, 129)) {
1464 		vcpu->arch.sie_block->eca |= 0x00020000;
1465 		vcpu->arch.sie_block->ecd |= 0x20000000;
1466 	}
1467 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1468 
1469 	if (vcpu->kvm->arch.use_cmma) {
1470 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1471 		if (rc)
1472 			return rc;
1473 	}
1474 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1475 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1476 
1477 	kvm_s390_vcpu_crypto_setup(vcpu);
1478 
1479 	return rc;
1480 }
1481 
1482 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1483 				      unsigned int id)
1484 {
1485 	struct kvm_vcpu *vcpu;
1486 	struct sie_page *sie_page;
1487 	int rc = -EINVAL;
1488 
1489 	if (id >= KVM_MAX_VCPUS)
1490 		goto out;
1491 
1492 	rc = -ENOMEM;
1493 
1494 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1495 	if (!vcpu)
1496 		goto out;
1497 
1498 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1499 	if (!sie_page)
1500 		goto out_free_cpu;
1501 
1502 	vcpu->arch.sie_block = &sie_page->sie_block;
1503 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1504 
1505 	vcpu->arch.sie_block->icpua = id;
1506 	if (!kvm_is_ucontrol(kvm)) {
1507 		if (!kvm->arch.sca) {
1508 			WARN_ON_ONCE(1);
1509 			goto out_free_cpu;
1510 		}
1511 		if (!kvm->arch.sca->cpu[id].sda)
1512 			kvm->arch.sca->cpu[id].sda =
1513 				(__u64) vcpu->arch.sie_block;
1514 		vcpu->arch.sie_block->scaoh =
1515 			(__u32)(((__u64)kvm->arch.sca) >> 32);
1516 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1517 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1518 	}
1519 
1520 	spin_lock_init(&vcpu->arch.local_int.lock);
1521 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1522 	vcpu->arch.local_int.wq = &vcpu->wq;
1523 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1524 
1525 	/*
1526 	 * Allocate a save area for floating-point registers.  If the vector
1527 	 * extension is available, register contents are saved in the SIE
1528 	 * control block.  The allocated save area is still required in
1529 	 * particular places, for example, in kvm_s390_vcpu_store_status().
1530 	 */
1531 	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1532 					       GFP_KERNEL);
1533 	if (!vcpu->arch.guest_fpregs.fprs) {
1534 		rc = -ENOMEM;
1535 		goto out_free_sie_block;
1536 	}
1537 
1538 	rc = kvm_vcpu_init(vcpu, kvm, id);
1539 	if (rc)
1540 		goto out_free_sie_block;
1541 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1542 		 vcpu->arch.sie_block);
1543 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1544 
1545 	return vcpu;
1546 out_free_sie_block:
1547 	free_page((unsigned long)(vcpu->arch.sie_block));
1548 out_free_cpu:
1549 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1550 out:
1551 	return ERR_PTR(rc);
1552 }
1553 
1554 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1555 {
1556 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1557 }
1558 
1559 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1560 {
1561 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1562 	exit_sie(vcpu);
1563 }
1564 
1565 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1566 {
1567 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1568 }
1569 
1570 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1571 {
1572 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1573 	exit_sie(vcpu);
1574 }
1575 
1576 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1577 {
1578 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1579 }
1580 
1581 /*
1582  * Kick a guest cpu out of SIE and wait until SIE is not running.
1583  * If the CPU is not running (e.g. waiting as idle) the function will
1584  * return immediately. */
1585 void exit_sie(struct kvm_vcpu *vcpu)
1586 {
1587 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1588 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1589 		cpu_relax();
1590 }
1591 
1592 /* Kick a guest cpu out of SIE to process a request synchronously */
1593 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1594 {
1595 	kvm_make_request(req, vcpu);
1596 	kvm_s390_vcpu_request(vcpu);
1597 }
1598 
1599 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1600 {
1601 	int i;
1602 	struct kvm *kvm = gmap->private;
1603 	struct kvm_vcpu *vcpu;
1604 
1605 	kvm_for_each_vcpu(i, vcpu, kvm) {
1606 		/* match against both prefix pages */
1607 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1608 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1609 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1610 		}
1611 	}
1612 }
1613 
1614 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1615 {
1616 	/* kvm common code refers to this, but never calls it */
1617 	BUG();
1618 	return 0;
1619 }
1620 
1621 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1622 					   struct kvm_one_reg *reg)
1623 {
1624 	int r = -EINVAL;
1625 
1626 	switch (reg->id) {
1627 	case KVM_REG_S390_TODPR:
1628 		r = put_user(vcpu->arch.sie_block->todpr,
1629 			     (u32 __user *)reg->addr);
1630 		break;
1631 	case KVM_REG_S390_EPOCHDIFF:
1632 		r = put_user(vcpu->arch.sie_block->epoch,
1633 			     (u64 __user *)reg->addr);
1634 		break;
1635 	case KVM_REG_S390_CPU_TIMER:
1636 		r = put_user(vcpu->arch.sie_block->cputm,
1637 			     (u64 __user *)reg->addr);
1638 		break;
1639 	case KVM_REG_S390_CLOCK_COMP:
1640 		r = put_user(vcpu->arch.sie_block->ckc,
1641 			     (u64 __user *)reg->addr);
1642 		break;
1643 	case KVM_REG_S390_PFTOKEN:
1644 		r = put_user(vcpu->arch.pfault_token,
1645 			     (u64 __user *)reg->addr);
1646 		break;
1647 	case KVM_REG_S390_PFCOMPARE:
1648 		r = put_user(vcpu->arch.pfault_compare,
1649 			     (u64 __user *)reg->addr);
1650 		break;
1651 	case KVM_REG_S390_PFSELECT:
1652 		r = put_user(vcpu->arch.pfault_select,
1653 			     (u64 __user *)reg->addr);
1654 		break;
1655 	case KVM_REG_S390_PP:
1656 		r = put_user(vcpu->arch.sie_block->pp,
1657 			     (u64 __user *)reg->addr);
1658 		break;
1659 	case KVM_REG_S390_GBEA:
1660 		r = put_user(vcpu->arch.sie_block->gbea,
1661 			     (u64 __user *)reg->addr);
1662 		break;
1663 	default:
1664 		break;
1665 	}
1666 
1667 	return r;
1668 }
1669 
1670 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1671 					   struct kvm_one_reg *reg)
1672 {
1673 	int r = -EINVAL;
1674 
1675 	switch (reg->id) {
1676 	case KVM_REG_S390_TODPR:
1677 		r = get_user(vcpu->arch.sie_block->todpr,
1678 			     (u32 __user *)reg->addr);
1679 		break;
1680 	case KVM_REG_S390_EPOCHDIFF:
1681 		r = get_user(vcpu->arch.sie_block->epoch,
1682 			     (u64 __user *)reg->addr);
1683 		break;
1684 	case KVM_REG_S390_CPU_TIMER:
1685 		r = get_user(vcpu->arch.sie_block->cputm,
1686 			     (u64 __user *)reg->addr);
1687 		break;
1688 	case KVM_REG_S390_CLOCK_COMP:
1689 		r = get_user(vcpu->arch.sie_block->ckc,
1690 			     (u64 __user *)reg->addr);
1691 		break;
1692 	case KVM_REG_S390_PFTOKEN:
1693 		r = get_user(vcpu->arch.pfault_token,
1694 			     (u64 __user *)reg->addr);
1695 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1696 			kvm_clear_async_pf_completion_queue(vcpu);
1697 		break;
1698 	case KVM_REG_S390_PFCOMPARE:
1699 		r = get_user(vcpu->arch.pfault_compare,
1700 			     (u64 __user *)reg->addr);
1701 		break;
1702 	case KVM_REG_S390_PFSELECT:
1703 		r = get_user(vcpu->arch.pfault_select,
1704 			     (u64 __user *)reg->addr);
1705 		break;
1706 	case KVM_REG_S390_PP:
1707 		r = get_user(vcpu->arch.sie_block->pp,
1708 			     (u64 __user *)reg->addr);
1709 		break;
1710 	case KVM_REG_S390_GBEA:
1711 		r = get_user(vcpu->arch.sie_block->gbea,
1712 			     (u64 __user *)reg->addr);
1713 		break;
1714 	default:
1715 		break;
1716 	}
1717 
1718 	return r;
1719 }
1720 
1721 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1722 {
1723 	kvm_s390_vcpu_initial_reset(vcpu);
1724 	return 0;
1725 }
1726 
1727 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1728 {
1729 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1730 	return 0;
1731 }
1732 
1733 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1734 {
1735 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1736 	return 0;
1737 }
1738 
1739 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1740 				  struct kvm_sregs *sregs)
1741 {
1742 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1743 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1744 	restore_access_regs(vcpu->run->s.regs.acrs);
1745 	return 0;
1746 }
1747 
1748 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1749 				  struct kvm_sregs *sregs)
1750 {
1751 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1752 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1753 	return 0;
1754 }
1755 
1756 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1757 {
1758 	if (test_fp_ctl(fpu->fpc))
1759 		return -EINVAL;
1760 	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1761 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1762 	save_fpu_regs();
1763 	load_fpu_from(&vcpu->arch.guest_fpregs);
1764 	return 0;
1765 }
1766 
1767 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1768 {
1769 	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1770 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1771 	return 0;
1772 }
1773 
1774 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1775 {
1776 	int rc = 0;
1777 
1778 	if (!is_vcpu_stopped(vcpu))
1779 		rc = -EBUSY;
1780 	else {
1781 		vcpu->run->psw_mask = psw.mask;
1782 		vcpu->run->psw_addr = psw.addr;
1783 	}
1784 	return rc;
1785 }
1786 
1787 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1788 				  struct kvm_translation *tr)
1789 {
1790 	return -EINVAL; /* not implemented yet */
1791 }
1792 
1793 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1794 			      KVM_GUESTDBG_USE_HW_BP | \
1795 			      KVM_GUESTDBG_ENABLE)
1796 
1797 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1798 					struct kvm_guest_debug *dbg)
1799 {
1800 	int rc = 0;
1801 
1802 	vcpu->guest_debug = 0;
1803 	kvm_s390_clear_bp_data(vcpu);
1804 
1805 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1806 		return -EINVAL;
1807 
1808 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1809 		vcpu->guest_debug = dbg->control;
1810 		/* enforce guest PER */
1811 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1812 
1813 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1814 			rc = kvm_s390_import_bp_data(vcpu, dbg);
1815 	} else {
1816 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1817 		vcpu->arch.guestdbg.last_bp = 0;
1818 	}
1819 
1820 	if (rc) {
1821 		vcpu->guest_debug = 0;
1822 		kvm_s390_clear_bp_data(vcpu);
1823 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1824 	}
1825 
1826 	return rc;
1827 }
1828 
1829 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1830 				    struct kvm_mp_state *mp_state)
1831 {
1832 	/* CHECK_STOP and LOAD are not supported yet */
1833 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1834 				       KVM_MP_STATE_OPERATING;
1835 }
1836 
1837 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1838 				    struct kvm_mp_state *mp_state)
1839 {
1840 	int rc = 0;
1841 
1842 	/* user space knows about this interface - let it control the state */
1843 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1844 
1845 	switch (mp_state->mp_state) {
1846 	case KVM_MP_STATE_STOPPED:
1847 		kvm_s390_vcpu_stop(vcpu);
1848 		break;
1849 	case KVM_MP_STATE_OPERATING:
1850 		kvm_s390_vcpu_start(vcpu);
1851 		break;
1852 	case KVM_MP_STATE_LOAD:
1853 	case KVM_MP_STATE_CHECK_STOP:
1854 		/* fall through - CHECK_STOP and LOAD are not supported yet */
1855 	default:
1856 		rc = -ENXIO;
1857 	}
1858 
1859 	return rc;
1860 }
1861 
1862 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1863 {
1864 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1865 }
1866 
1867 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1868 {
1869 retry:
1870 	kvm_s390_vcpu_request_handled(vcpu);
1871 	if (!vcpu->requests)
1872 		return 0;
1873 	/*
1874 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1875 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1876 	 * This ensures that the ipte instruction for this request has
1877 	 * already finished. We might race against a second unmapper that
1878 	 * wants to set the blocking bit. Lets just retry the request loop.
1879 	 */
1880 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1881 		int rc;
1882 		rc = gmap_ipte_notify(vcpu->arch.gmap,
1883 				      kvm_s390_get_prefix(vcpu),
1884 				      PAGE_SIZE * 2);
1885 		if (rc)
1886 			return rc;
1887 		goto retry;
1888 	}
1889 
1890 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1891 		vcpu->arch.sie_block->ihcpu = 0xffff;
1892 		goto retry;
1893 	}
1894 
1895 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1896 		if (!ibs_enabled(vcpu)) {
1897 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1898 			atomic_or(CPUSTAT_IBS,
1899 					&vcpu->arch.sie_block->cpuflags);
1900 		}
1901 		goto retry;
1902 	}
1903 
1904 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1905 		if (ibs_enabled(vcpu)) {
1906 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1907 			atomic_andnot(CPUSTAT_IBS,
1908 					  &vcpu->arch.sie_block->cpuflags);
1909 		}
1910 		goto retry;
1911 	}
1912 
1913 	/* nothing to do, just clear the request */
1914 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1915 
1916 	return 0;
1917 }
1918 
1919 /**
1920  * kvm_arch_fault_in_page - fault-in guest page if necessary
1921  * @vcpu: The corresponding virtual cpu
1922  * @gpa: Guest physical address
1923  * @writable: Whether the page should be writable or not
1924  *
1925  * Make sure that a guest page has been faulted-in on the host.
1926  *
1927  * Return: Zero on success, negative error code otherwise.
1928  */
1929 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1930 {
1931 	return gmap_fault(vcpu->arch.gmap, gpa,
1932 			  writable ? FAULT_FLAG_WRITE : 0);
1933 }
1934 
1935 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1936 				      unsigned long token)
1937 {
1938 	struct kvm_s390_interrupt inti;
1939 	struct kvm_s390_irq irq;
1940 
1941 	if (start_token) {
1942 		irq.u.ext.ext_params2 = token;
1943 		irq.type = KVM_S390_INT_PFAULT_INIT;
1944 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1945 	} else {
1946 		inti.type = KVM_S390_INT_PFAULT_DONE;
1947 		inti.parm64 = token;
1948 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1949 	}
1950 }
1951 
1952 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1953 				     struct kvm_async_pf *work)
1954 {
1955 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1956 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1957 }
1958 
1959 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1960 				 struct kvm_async_pf *work)
1961 {
1962 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1963 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1964 }
1965 
1966 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1967 			       struct kvm_async_pf *work)
1968 {
1969 	/* s390 will always inject the page directly */
1970 }
1971 
1972 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1973 {
1974 	/*
1975 	 * s390 will always inject the page directly,
1976 	 * but we still want check_async_completion to cleanup
1977 	 */
1978 	return true;
1979 }
1980 
1981 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1982 {
1983 	hva_t hva;
1984 	struct kvm_arch_async_pf arch;
1985 	int rc;
1986 
1987 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1988 		return 0;
1989 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1990 	    vcpu->arch.pfault_compare)
1991 		return 0;
1992 	if (psw_extint_disabled(vcpu))
1993 		return 0;
1994 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1995 		return 0;
1996 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1997 		return 0;
1998 	if (!vcpu->arch.gmap->pfault_enabled)
1999 		return 0;
2000 
2001 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2002 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2003 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2004 		return 0;
2005 
2006 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2007 	return rc;
2008 }
2009 
2010 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2011 {
2012 	int rc, cpuflags;
2013 
2014 	/*
2015 	 * On s390 notifications for arriving pages will be delivered directly
2016 	 * to the guest but the house keeping for completed pfaults is
2017 	 * handled outside the worker.
2018 	 */
2019 	kvm_check_async_pf_completion(vcpu);
2020 
2021 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2022 
2023 	if (need_resched())
2024 		schedule();
2025 
2026 	if (test_cpu_flag(CIF_MCCK_PENDING))
2027 		s390_handle_mcck();
2028 
2029 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2030 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2031 		if (rc)
2032 			return rc;
2033 	}
2034 
2035 	rc = kvm_s390_handle_requests(vcpu);
2036 	if (rc)
2037 		return rc;
2038 
2039 	if (guestdbg_enabled(vcpu)) {
2040 		kvm_s390_backup_guest_per_regs(vcpu);
2041 		kvm_s390_patch_guest_per_regs(vcpu);
2042 	}
2043 
2044 	vcpu->arch.sie_block->icptcode = 0;
2045 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2046 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2047 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2048 
2049 	return 0;
2050 }
2051 
2052 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2053 {
2054 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
2055 	u8 opcode;
2056 	int rc;
2057 
2058 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2059 	trace_kvm_s390_sie_fault(vcpu);
2060 
2061 	/*
2062 	 * We want to inject an addressing exception, which is defined as a
2063 	 * suppressing or terminating exception. However, since we came here
2064 	 * by a DAT access exception, the PSW still points to the faulting
2065 	 * instruction since DAT exceptions are nullifying. So we've got
2066 	 * to look up the current opcode to get the length of the instruction
2067 	 * to be able to forward the PSW.
2068 	 */
2069 	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2070 	if (rc)
2071 		return kvm_s390_inject_prog_cond(vcpu, rc);
2072 	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2073 
2074 	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2075 }
2076 
2077 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2078 {
2079 	int rc = -1;
2080 
2081 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2082 		   vcpu->arch.sie_block->icptcode);
2083 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2084 
2085 	if (guestdbg_enabled(vcpu))
2086 		kvm_s390_restore_guest_per_regs(vcpu);
2087 
2088 	if (exit_reason >= 0) {
2089 		rc = 0;
2090 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2091 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2092 		vcpu->run->s390_ucontrol.trans_exc_code =
2093 						current->thread.gmap_addr;
2094 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2095 		rc = -EREMOTE;
2096 
2097 	} else if (current->thread.gmap_pfault) {
2098 		trace_kvm_s390_major_guest_pfault(vcpu);
2099 		current->thread.gmap_pfault = 0;
2100 		if (kvm_arch_setup_async_pf(vcpu)) {
2101 			rc = 0;
2102 		} else {
2103 			gpa_t gpa = current->thread.gmap_addr;
2104 			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2105 		}
2106 	}
2107 
2108 	if (rc == -1)
2109 		rc = vcpu_post_run_fault_in_sie(vcpu);
2110 
2111 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2112 
2113 	if (rc == 0) {
2114 		if (kvm_is_ucontrol(vcpu->kvm))
2115 			/* Don't exit for host interrupts. */
2116 			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2117 		else
2118 			rc = kvm_handle_sie_intercept(vcpu);
2119 	}
2120 
2121 	return rc;
2122 }
2123 
2124 static int __vcpu_run(struct kvm_vcpu *vcpu)
2125 {
2126 	int rc, exit_reason;
2127 
2128 	/*
2129 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2130 	 * ning the guest), so that memslots (and other stuff) are protected
2131 	 */
2132 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2133 
2134 	do {
2135 		rc = vcpu_pre_run(vcpu);
2136 		if (rc)
2137 			break;
2138 
2139 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2140 		/*
2141 		 * As PF_VCPU will be used in fault handler, between
2142 		 * guest_enter and guest_exit should be no uaccess.
2143 		 */
2144 		local_irq_disable();
2145 		__kvm_guest_enter();
2146 		local_irq_enable();
2147 		exit_reason = sie64a(vcpu->arch.sie_block,
2148 				     vcpu->run->s.regs.gprs);
2149 		local_irq_disable();
2150 		__kvm_guest_exit();
2151 		local_irq_enable();
2152 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2153 
2154 		rc = vcpu_post_run(vcpu, exit_reason);
2155 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2156 
2157 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2158 	return rc;
2159 }
2160 
2161 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2162 {
2163 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2164 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2165 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2166 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2167 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2168 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2169 		/* some control register changes require a tlb flush */
2170 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2171 	}
2172 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2173 		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2174 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2175 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2176 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2177 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2178 	}
2179 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2180 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2181 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2182 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2183 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2184 			kvm_clear_async_pf_completion_queue(vcpu);
2185 	}
2186 	kvm_run->kvm_dirty_regs = 0;
2187 }
2188 
2189 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2190 {
2191 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2192 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2193 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2194 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2195 	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2196 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2197 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2198 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2199 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2200 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2201 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2202 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2203 }
2204 
2205 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2206 {
2207 	int rc;
2208 	sigset_t sigsaved;
2209 
2210 	if (guestdbg_exit_pending(vcpu)) {
2211 		kvm_s390_prepare_debug_exit(vcpu);
2212 		return 0;
2213 	}
2214 
2215 	if (vcpu->sigset_active)
2216 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2217 
2218 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2219 		kvm_s390_vcpu_start(vcpu);
2220 	} else if (is_vcpu_stopped(vcpu)) {
2221 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2222 				   vcpu->vcpu_id);
2223 		return -EINVAL;
2224 	}
2225 
2226 	sync_regs(vcpu, kvm_run);
2227 
2228 	might_fault();
2229 	rc = __vcpu_run(vcpu);
2230 
2231 	if (signal_pending(current) && !rc) {
2232 		kvm_run->exit_reason = KVM_EXIT_INTR;
2233 		rc = -EINTR;
2234 	}
2235 
2236 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2237 		kvm_s390_prepare_debug_exit(vcpu);
2238 		rc = 0;
2239 	}
2240 
2241 	if (rc == -EOPNOTSUPP) {
2242 		/* intercept cannot be handled in-kernel, prepare kvm-run */
2243 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2244 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2245 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2246 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2247 		rc = 0;
2248 	}
2249 
2250 	if (rc == -EREMOTE) {
2251 		/* intercept was handled, but userspace support is needed
2252 		 * kvm_run has been prepared by the handler */
2253 		rc = 0;
2254 	}
2255 
2256 	store_regs(vcpu, kvm_run);
2257 
2258 	if (vcpu->sigset_active)
2259 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2260 
2261 	vcpu->stat.exit_userspace++;
2262 	return rc;
2263 }
2264 
2265 /*
2266  * store status at address
2267  * we use have two special cases:
2268  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2269  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2270  */
2271 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2272 {
2273 	unsigned char archmode = 1;
2274 	unsigned int px;
2275 	u64 clkcomp;
2276 	int rc;
2277 
2278 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2279 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2280 			return -EFAULT;
2281 		gpa = SAVE_AREA_BASE;
2282 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2283 		if (write_guest_real(vcpu, 163, &archmode, 1))
2284 			return -EFAULT;
2285 		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2286 	}
2287 	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2288 			     vcpu->arch.guest_fpregs.fprs, 128);
2289 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2290 			      vcpu->run->s.regs.gprs, 128);
2291 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2292 			      &vcpu->arch.sie_block->gpsw, 16);
2293 	px = kvm_s390_get_prefix(vcpu);
2294 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2295 			      &px, 4);
2296 	rc |= write_guest_abs(vcpu,
2297 			      gpa + offsetof(struct save_area, fp_ctrl_reg),
2298 			      &vcpu->arch.guest_fpregs.fpc, 4);
2299 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2300 			      &vcpu->arch.sie_block->todpr, 4);
2301 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2302 			      &vcpu->arch.sie_block->cputm, 8);
2303 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2304 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2305 			      &clkcomp, 8);
2306 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2307 			      &vcpu->run->s.regs.acrs, 64);
2308 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2309 			      &vcpu->arch.sie_block->gcr, 128);
2310 	return rc ? -EFAULT : 0;
2311 }
2312 
2313 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2314 {
2315 	/*
2316 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2317 	 * copying in vcpu load/put. Lets update our copies before we save
2318 	 * it into the save area
2319 	 */
2320 	save_fpu_regs();
2321 	if (test_kvm_facility(vcpu->kvm, 129)) {
2322 		/*
2323 		 * If the vector extension is available, the vector registers
2324 		 * which overlaps with floating-point registers are saved in
2325 		 * the SIE-control block.  Hence, extract the floating-point
2326 		 * registers and the FPC value and store them in the
2327 		 * guest_fpregs structure.
2328 		 */
2329 		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
2330 		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2331 		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2332 				 current->thread.fpu.vxrs);
2333 	} else
2334 		save_fpu_to(&vcpu->arch.guest_fpregs);
2335 	save_access_regs(vcpu->run->s.regs.acrs);
2336 
2337 	return kvm_s390_store_status_unloaded(vcpu, addr);
2338 }
2339 
2340 /*
2341  * store additional status at address
2342  */
2343 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2344 					unsigned long gpa)
2345 {
2346 	/* Only bits 0-53 are used for address formation */
2347 	if (!(gpa & ~0x3ff))
2348 		return 0;
2349 
2350 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2351 			       (void *)&vcpu->run->s.regs.vrs, 512);
2352 }
2353 
2354 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2355 {
2356 	if (!test_kvm_facility(vcpu->kvm, 129))
2357 		return 0;
2358 
2359 	/*
2360 	 * The guest VXRS are in the host VXRs due to the lazy
2361 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2362 	 * to save the current register state because we are in the
2363 	 * middle of a load/put cycle.
2364 	 *
2365 	 * Let's update our copies before we save it into the save area.
2366 	 */
2367 	save_fpu_regs();
2368 
2369 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2370 }
2371 
2372 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2373 {
2374 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2375 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2376 }
2377 
2378 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2379 {
2380 	unsigned int i;
2381 	struct kvm_vcpu *vcpu;
2382 
2383 	kvm_for_each_vcpu(i, vcpu, kvm) {
2384 		__disable_ibs_on_vcpu(vcpu);
2385 	}
2386 }
2387 
2388 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2389 {
2390 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2391 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2392 }
2393 
2394 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2395 {
2396 	int i, online_vcpus, started_vcpus = 0;
2397 
2398 	if (!is_vcpu_stopped(vcpu))
2399 		return;
2400 
2401 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2402 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2403 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2404 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2405 
2406 	for (i = 0; i < online_vcpus; i++) {
2407 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2408 			started_vcpus++;
2409 	}
2410 
2411 	if (started_vcpus == 0) {
2412 		/* we're the only active VCPU -> speed it up */
2413 		__enable_ibs_on_vcpu(vcpu);
2414 	} else if (started_vcpus == 1) {
2415 		/*
2416 		 * As we are starting a second VCPU, we have to disable
2417 		 * the IBS facility on all VCPUs to remove potentially
2418 		 * oustanding ENABLE requests.
2419 		 */
2420 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2421 	}
2422 
2423 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2424 	/*
2425 	 * Another VCPU might have used IBS while we were offline.
2426 	 * Let's play safe and flush the VCPU at startup.
2427 	 */
2428 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2429 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2430 	return;
2431 }
2432 
2433 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2434 {
2435 	int i, online_vcpus, started_vcpus = 0;
2436 	struct kvm_vcpu *started_vcpu = NULL;
2437 
2438 	if (is_vcpu_stopped(vcpu))
2439 		return;
2440 
2441 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2442 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2443 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2444 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2445 
2446 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2447 	kvm_s390_clear_stop_irq(vcpu);
2448 
2449 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2450 	__disable_ibs_on_vcpu(vcpu);
2451 
2452 	for (i = 0; i < online_vcpus; i++) {
2453 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2454 			started_vcpus++;
2455 			started_vcpu = vcpu->kvm->vcpus[i];
2456 		}
2457 	}
2458 
2459 	if (started_vcpus == 1) {
2460 		/*
2461 		 * As we only have one VCPU left, we want to enable the
2462 		 * IBS facility for that VCPU to speed it up.
2463 		 */
2464 		__enable_ibs_on_vcpu(started_vcpu);
2465 	}
2466 
2467 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2468 	return;
2469 }
2470 
2471 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2472 				     struct kvm_enable_cap *cap)
2473 {
2474 	int r;
2475 
2476 	if (cap->flags)
2477 		return -EINVAL;
2478 
2479 	switch (cap->cap) {
2480 	case KVM_CAP_S390_CSS_SUPPORT:
2481 		if (!vcpu->kvm->arch.css_support) {
2482 			vcpu->kvm->arch.css_support = 1;
2483 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2484 			trace_kvm_s390_enable_css(vcpu->kvm);
2485 		}
2486 		r = 0;
2487 		break;
2488 	default:
2489 		r = -EINVAL;
2490 		break;
2491 	}
2492 	return r;
2493 }
2494 
2495 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2496 				  struct kvm_s390_mem_op *mop)
2497 {
2498 	void __user *uaddr = (void __user *)mop->buf;
2499 	void *tmpbuf = NULL;
2500 	int r, srcu_idx;
2501 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2502 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2503 
2504 	if (mop->flags & ~supported_flags)
2505 		return -EINVAL;
2506 
2507 	if (mop->size > MEM_OP_MAX_SIZE)
2508 		return -E2BIG;
2509 
2510 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2511 		tmpbuf = vmalloc(mop->size);
2512 		if (!tmpbuf)
2513 			return -ENOMEM;
2514 	}
2515 
2516 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2517 
2518 	switch (mop->op) {
2519 	case KVM_S390_MEMOP_LOGICAL_READ:
2520 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2521 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2522 			break;
2523 		}
2524 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2525 		if (r == 0) {
2526 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2527 				r = -EFAULT;
2528 		}
2529 		break;
2530 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2531 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2532 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2533 			break;
2534 		}
2535 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2536 			r = -EFAULT;
2537 			break;
2538 		}
2539 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2540 		break;
2541 	default:
2542 		r = -EINVAL;
2543 	}
2544 
2545 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2546 
2547 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2548 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2549 
2550 	vfree(tmpbuf);
2551 	return r;
2552 }
2553 
2554 long kvm_arch_vcpu_ioctl(struct file *filp,
2555 			 unsigned int ioctl, unsigned long arg)
2556 {
2557 	struct kvm_vcpu *vcpu = filp->private_data;
2558 	void __user *argp = (void __user *)arg;
2559 	int idx;
2560 	long r;
2561 
2562 	switch (ioctl) {
2563 	case KVM_S390_IRQ: {
2564 		struct kvm_s390_irq s390irq;
2565 
2566 		r = -EFAULT;
2567 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2568 			break;
2569 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2570 		break;
2571 	}
2572 	case KVM_S390_INTERRUPT: {
2573 		struct kvm_s390_interrupt s390int;
2574 		struct kvm_s390_irq s390irq;
2575 
2576 		r = -EFAULT;
2577 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2578 			break;
2579 		if (s390int_to_s390irq(&s390int, &s390irq))
2580 			return -EINVAL;
2581 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2582 		break;
2583 	}
2584 	case KVM_S390_STORE_STATUS:
2585 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2586 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2587 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2588 		break;
2589 	case KVM_S390_SET_INITIAL_PSW: {
2590 		psw_t psw;
2591 
2592 		r = -EFAULT;
2593 		if (copy_from_user(&psw, argp, sizeof(psw)))
2594 			break;
2595 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2596 		break;
2597 	}
2598 	case KVM_S390_INITIAL_RESET:
2599 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2600 		break;
2601 	case KVM_SET_ONE_REG:
2602 	case KVM_GET_ONE_REG: {
2603 		struct kvm_one_reg reg;
2604 		r = -EFAULT;
2605 		if (copy_from_user(&reg, argp, sizeof(reg)))
2606 			break;
2607 		if (ioctl == KVM_SET_ONE_REG)
2608 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2609 		else
2610 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2611 		break;
2612 	}
2613 #ifdef CONFIG_KVM_S390_UCONTROL
2614 	case KVM_S390_UCAS_MAP: {
2615 		struct kvm_s390_ucas_mapping ucasmap;
2616 
2617 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2618 			r = -EFAULT;
2619 			break;
2620 		}
2621 
2622 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2623 			r = -EINVAL;
2624 			break;
2625 		}
2626 
2627 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2628 				     ucasmap.vcpu_addr, ucasmap.length);
2629 		break;
2630 	}
2631 	case KVM_S390_UCAS_UNMAP: {
2632 		struct kvm_s390_ucas_mapping ucasmap;
2633 
2634 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2635 			r = -EFAULT;
2636 			break;
2637 		}
2638 
2639 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2640 			r = -EINVAL;
2641 			break;
2642 		}
2643 
2644 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2645 			ucasmap.length);
2646 		break;
2647 	}
2648 #endif
2649 	case KVM_S390_VCPU_FAULT: {
2650 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2651 		break;
2652 	}
2653 	case KVM_ENABLE_CAP:
2654 	{
2655 		struct kvm_enable_cap cap;
2656 		r = -EFAULT;
2657 		if (copy_from_user(&cap, argp, sizeof(cap)))
2658 			break;
2659 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2660 		break;
2661 	}
2662 	case KVM_S390_MEM_OP: {
2663 		struct kvm_s390_mem_op mem_op;
2664 
2665 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2666 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2667 		else
2668 			r = -EFAULT;
2669 		break;
2670 	}
2671 	case KVM_S390_SET_IRQ_STATE: {
2672 		struct kvm_s390_irq_state irq_state;
2673 
2674 		r = -EFAULT;
2675 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2676 			break;
2677 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2678 		    irq_state.len == 0 ||
2679 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2680 			r = -EINVAL;
2681 			break;
2682 		}
2683 		r = kvm_s390_set_irq_state(vcpu,
2684 					   (void __user *) irq_state.buf,
2685 					   irq_state.len);
2686 		break;
2687 	}
2688 	case KVM_S390_GET_IRQ_STATE: {
2689 		struct kvm_s390_irq_state irq_state;
2690 
2691 		r = -EFAULT;
2692 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2693 			break;
2694 		if (irq_state.len == 0) {
2695 			r = -EINVAL;
2696 			break;
2697 		}
2698 		r = kvm_s390_get_irq_state(vcpu,
2699 					   (__u8 __user *)  irq_state.buf,
2700 					   irq_state.len);
2701 		break;
2702 	}
2703 	default:
2704 		r = -ENOTTY;
2705 	}
2706 	return r;
2707 }
2708 
2709 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2710 {
2711 #ifdef CONFIG_KVM_S390_UCONTROL
2712 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2713 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2714 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2715 		get_page(vmf->page);
2716 		return 0;
2717 	}
2718 #endif
2719 	return VM_FAULT_SIGBUS;
2720 }
2721 
2722 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2723 			    unsigned long npages)
2724 {
2725 	return 0;
2726 }
2727 
2728 /* Section: memory related */
2729 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2730 				   struct kvm_memory_slot *memslot,
2731 				   const struct kvm_userspace_memory_region *mem,
2732 				   enum kvm_mr_change change)
2733 {
2734 	/* A few sanity checks. We can have memory slots which have to be
2735 	   located/ended at a segment boundary (1MB). The memory in userland is
2736 	   ok to be fragmented into various different vmas. It is okay to mmap()
2737 	   and munmap() stuff in this slot after doing this call at any time */
2738 
2739 	if (mem->userspace_addr & 0xffffful)
2740 		return -EINVAL;
2741 
2742 	if (mem->memory_size & 0xffffful)
2743 		return -EINVAL;
2744 
2745 	return 0;
2746 }
2747 
2748 void kvm_arch_commit_memory_region(struct kvm *kvm,
2749 				const struct kvm_userspace_memory_region *mem,
2750 				const struct kvm_memory_slot *old,
2751 				const struct kvm_memory_slot *new,
2752 				enum kvm_mr_change change)
2753 {
2754 	int rc;
2755 
2756 	/* If the basics of the memslot do not change, we do not want
2757 	 * to update the gmap. Every update causes several unnecessary
2758 	 * segment translation exceptions. This is usually handled just
2759 	 * fine by the normal fault handler + gmap, but it will also
2760 	 * cause faults on the prefix page of running guest CPUs.
2761 	 */
2762 	if (old->userspace_addr == mem->userspace_addr &&
2763 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2764 	    old->npages * PAGE_SIZE == mem->memory_size)
2765 		return;
2766 
2767 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2768 		mem->guest_phys_addr, mem->memory_size);
2769 	if (rc)
2770 		pr_warn("failed to commit memory region\n");
2771 	return;
2772 }
2773 
2774 static int __init kvm_s390_init(void)
2775 {
2776 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2777 }
2778 
2779 static void __exit kvm_s390_exit(void)
2780 {
2781 	kvm_exit();
2782 }
2783 
2784 module_init(kvm_s390_init);
2785 module_exit(kvm_s390_exit);
2786 
2787 /*
2788  * Enable autoloading of the kvm module.
2789  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2790  * since x86 takes a different approach.
2791  */
2792 #include <linux/miscdevice.h>
2793 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2794 MODULE_ALIAS("devname:kvm");
2795