xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 77a87824)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/stp.h>
32 #include <asm/pgtable.h>
33 #include <asm/gmap.h>
34 #include <asm/nmi.h>
35 #include <asm/switch_to.h>
36 #include <asm/isc.h>
37 #include <asm/sclp.h>
38 #include "kvm-s390.h"
39 #include "gaccess.h"
40 
41 #define KMSG_COMPONENT "kvm-s390"
42 #undef pr_fmt
43 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 
45 #define CREATE_TRACE_POINTS
46 #include "trace.h"
47 #include "trace-s390.h"
48 
49 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
50 #define LOCAL_IRQS 32
51 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
52 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
53 
54 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 
56 struct kvm_stats_debugfs_item debugfs_entries[] = {
57 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
58 	{ "exit_null", VCPU_STAT(exit_null) },
59 	{ "exit_validity", VCPU_STAT(exit_validity) },
60 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
61 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
62 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
63 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
64 	{ "exit_pei", VCPU_STAT(exit_pei) },
65 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
66 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
67 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
68 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
69 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
70 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
71 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
72 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
73 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
74 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
75 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
76 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
77 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
78 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
79 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
80 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
81 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
82 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
83 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
84 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
85 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
86 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
87 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
88 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
89 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
90 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
91 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
92 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
93 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
94 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
95 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
96 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
97 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
98 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
99 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
100 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
101 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
102 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
103 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
104 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
105 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
106 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
107 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
108 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
109 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
110 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
111 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
112 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
113 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
114 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
115 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
116 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
117 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
118 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
119 	{ NULL }
120 };
121 
122 /* upper facilities limit for kvm */
123 unsigned long kvm_s390_fac_list_mask[16] = {
124 	0xffe6000000000000UL,
125 	0x005e000000000000UL,
126 };
127 
128 unsigned long kvm_s390_fac_list_mask_size(void)
129 {
130 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
131 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
132 }
133 
134 static struct gmap_notifier gmap_notifier;
135 debug_info_t *kvm_s390_dbf;
136 
137 /* Section: not file related */
138 int kvm_arch_hardware_enable(void)
139 {
140 	/* every s390 is virtualization enabled ;-) */
141 	return 0;
142 }
143 
144 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
145 
146 /*
147  * This callback is executed during stop_machine(). All CPUs are therefore
148  * temporarily stopped. In order not to change guest behavior, we have to
149  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
150  * so a CPU won't be stopped while calculating with the epoch.
151  */
152 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 			  void *v)
154 {
155 	struct kvm *kvm;
156 	struct kvm_vcpu *vcpu;
157 	int i;
158 	unsigned long long *delta = v;
159 
160 	list_for_each_entry(kvm, &vm_list, vm_list) {
161 		kvm->arch.epoch -= *delta;
162 		kvm_for_each_vcpu(i, vcpu, kvm) {
163 			vcpu->arch.sie_block->epoch -= *delta;
164 			if (vcpu->arch.cputm_enabled)
165 				vcpu->arch.cputm_start += *delta;
166 		}
167 	}
168 	return NOTIFY_OK;
169 }
170 
171 static struct notifier_block kvm_clock_notifier = {
172 	.notifier_call = kvm_clock_sync,
173 };
174 
175 int kvm_arch_hardware_setup(void)
176 {
177 	gmap_notifier.notifier_call = kvm_gmap_notifier;
178 	gmap_register_ipte_notifier(&gmap_notifier);
179 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
180 				       &kvm_clock_notifier);
181 	return 0;
182 }
183 
184 void kvm_arch_hardware_unsetup(void)
185 {
186 	gmap_unregister_ipte_notifier(&gmap_notifier);
187 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
188 					 &kvm_clock_notifier);
189 }
190 
191 int kvm_arch_init(void *opaque)
192 {
193 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
194 	if (!kvm_s390_dbf)
195 		return -ENOMEM;
196 
197 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
198 		debug_unregister(kvm_s390_dbf);
199 		return -ENOMEM;
200 	}
201 
202 	/* Register floating interrupt controller interface. */
203 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
204 }
205 
206 void kvm_arch_exit(void)
207 {
208 	debug_unregister(kvm_s390_dbf);
209 }
210 
211 /* Section: device related */
212 long kvm_arch_dev_ioctl(struct file *filp,
213 			unsigned int ioctl, unsigned long arg)
214 {
215 	if (ioctl == KVM_S390_ENABLE_SIE)
216 		return s390_enable_sie();
217 	return -EINVAL;
218 }
219 
220 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
221 {
222 	int r;
223 
224 	switch (ext) {
225 	case KVM_CAP_S390_PSW:
226 	case KVM_CAP_S390_GMAP:
227 	case KVM_CAP_SYNC_MMU:
228 #ifdef CONFIG_KVM_S390_UCONTROL
229 	case KVM_CAP_S390_UCONTROL:
230 #endif
231 	case KVM_CAP_ASYNC_PF:
232 	case KVM_CAP_SYNC_REGS:
233 	case KVM_CAP_ONE_REG:
234 	case KVM_CAP_ENABLE_CAP:
235 	case KVM_CAP_S390_CSS_SUPPORT:
236 	case KVM_CAP_IOEVENTFD:
237 	case KVM_CAP_DEVICE_CTRL:
238 	case KVM_CAP_ENABLE_CAP_VM:
239 	case KVM_CAP_S390_IRQCHIP:
240 	case KVM_CAP_VM_ATTRIBUTES:
241 	case KVM_CAP_MP_STATE:
242 	case KVM_CAP_S390_INJECT_IRQ:
243 	case KVM_CAP_S390_USER_SIGP:
244 	case KVM_CAP_S390_USER_STSI:
245 	case KVM_CAP_S390_SKEYS:
246 	case KVM_CAP_S390_IRQ_STATE:
247 		r = 1;
248 		break;
249 	case KVM_CAP_S390_MEM_OP:
250 		r = MEM_OP_MAX_SIZE;
251 		break;
252 	case KVM_CAP_NR_VCPUS:
253 	case KVM_CAP_MAX_VCPUS:
254 		r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
255 				  : KVM_S390_BSCA_CPU_SLOTS;
256 		break;
257 	case KVM_CAP_NR_MEMSLOTS:
258 		r = KVM_USER_MEM_SLOTS;
259 		break;
260 	case KVM_CAP_S390_COW:
261 		r = MACHINE_HAS_ESOP;
262 		break;
263 	case KVM_CAP_S390_VECTOR_REGISTERS:
264 		r = MACHINE_HAS_VX;
265 		break;
266 	case KVM_CAP_S390_RI:
267 		r = test_facility(64);
268 		break;
269 	default:
270 		r = 0;
271 	}
272 	return r;
273 }
274 
275 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
276 					struct kvm_memory_slot *memslot)
277 {
278 	gfn_t cur_gfn, last_gfn;
279 	unsigned long address;
280 	struct gmap *gmap = kvm->arch.gmap;
281 
282 	/* Loop over all guest pages */
283 	last_gfn = memslot->base_gfn + memslot->npages;
284 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
285 		address = gfn_to_hva_memslot(memslot, cur_gfn);
286 
287 		if (test_and_clear_guest_dirty(gmap->mm, address))
288 			mark_page_dirty(kvm, cur_gfn);
289 		if (fatal_signal_pending(current))
290 			return;
291 		cond_resched();
292 	}
293 }
294 
295 /* Section: vm related */
296 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
297 
298 /*
299  * Get (and clear) the dirty memory log for a memory slot.
300  */
301 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
302 			       struct kvm_dirty_log *log)
303 {
304 	int r;
305 	unsigned long n;
306 	struct kvm_memslots *slots;
307 	struct kvm_memory_slot *memslot;
308 	int is_dirty = 0;
309 
310 	mutex_lock(&kvm->slots_lock);
311 
312 	r = -EINVAL;
313 	if (log->slot >= KVM_USER_MEM_SLOTS)
314 		goto out;
315 
316 	slots = kvm_memslots(kvm);
317 	memslot = id_to_memslot(slots, log->slot);
318 	r = -ENOENT;
319 	if (!memslot->dirty_bitmap)
320 		goto out;
321 
322 	kvm_s390_sync_dirty_log(kvm, memslot);
323 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
324 	if (r)
325 		goto out;
326 
327 	/* Clear the dirty log */
328 	if (is_dirty) {
329 		n = kvm_dirty_bitmap_bytes(memslot);
330 		memset(memslot->dirty_bitmap, 0, n);
331 	}
332 	r = 0;
333 out:
334 	mutex_unlock(&kvm->slots_lock);
335 	return r;
336 }
337 
338 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
339 {
340 	int r;
341 
342 	if (cap->flags)
343 		return -EINVAL;
344 
345 	switch (cap->cap) {
346 	case KVM_CAP_S390_IRQCHIP:
347 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
348 		kvm->arch.use_irqchip = 1;
349 		r = 0;
350 		break;
351 	case KVM_CAP_S390_USER_SIGP:
352 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
353 		kvm->arch.user_sigp = 1;
354 		r = 0;
355 		break;
356 	case KVM_CAP_S390_VECTOR_REGISTERS:
357 		mutex_lock(&kvm->lock);
358 		if (atomic_read(&kvm->online_vcpus)) {
359 			r = -EBUSY;
360 		} else if (MACHINE_HAS_VX) {
361 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
362 			set_kvm_facility(kvm->arch.model.fac_list, 129);
363 			r = 0;
364 		} else
365 			r = -EINVAL;
366 		mutex_unlock(&kvm->lock);
367 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
368 			 r ? "(not available)" : "(success)");
369 		break;
370 	case KVM_CAP_S390_RI:
371 		r = -EINVAL;
372 		mutex_lock(&kvm->lock);
373 		if (atomic_read(&kvm->online_vcpus)) {
374 			r = -EBUSY;
375 		} else if (test_facility(64)) {
376 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
377 			set_kvm_facility(kvm->arch.model.fac_list, 64);
378 			r = 0;
379 		}
380 		mutex_unlock(&kvm->lock);
381 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
382 			 r ? "(not available)" : "(success)");
383 		break;
384 	case KVM_CAP_S390_USER_STSI:
385 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
386 		kvm->arch.user_stsi = 1;
387 		r = 0;
388 		break;
389 	default:
390 		r = -EINVAL;
391 		break;
392 	}
393 	return r;
394 }
395 
396 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
397 {
398 	int ret;
399 
400 	switch (attr->attr) {
401 	case KVM_S390_VM_MEM_LIMIT_SIZE:
402 		ret = 0;
403 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
404 			 kvm->arch.mem_limit);
405 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
406 			ret = -EFAULT;
407 		break;
408 	default:
409 		ret = -ENXIO;
410 		break;
411 	}
412 	return ret;
413 }
414 
415 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
416 {
417 	int ret;
418 	unsigned int idx;
419 	switch (attr->attr) {
420 	case KVM_S390_VM_MEM_ENABLE_CMMA:
421 		/* enable CMMA only for z10 and later (EDAT_1) */
422 		ret = -EINVAL;
423 		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
424 			break;
425 
426 		ret = -EBUSY;
427 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
428 		mutex_lock(&kvm->lock);
429 		if (atomic_read(&kvm->online_vcpus) == 0) {
430 			kvm->arch.use_cmma = 1;
431 			ret = 0;
432 		}
433 		mutex_unlock(&kvm->lock);
434 		break;
435 	case KVM_S390_VM_MEM_CLR_CMMA:
436 		ret = -EINVAL;
437 		if (!kvm->arch.use_cmma)
438 			break;
439 
440 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
441 		mutex_lock(&kvm->lock);
442 		idx = srcu_read_lock(&kvm->srcu);
443 		s390_reset_cmma(kvm->arch.gmap->mm);
444 		srcu_read_unlock(&kvm->srcu, idx);
445 		mutex_unlock(&kvm->lock);
446 		ret = 0;
447 		break;
448 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
449 		unsigned long new_limit;
450 
451 		if (kvm_is_ucontrol(kvm))
452 			return -EINVAL;
453 
454 		if (get_user(new_limit, (u64 __user *)attr->addr))
455 			return -EFAULT;
456 
457 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
458 		    new_limit > kvm->arch.mem_limit)
459 			return -E2BIG;
460 
461 		if (!new_limit)
462 			return -EINVAL;
463 
464 		/* gmap_alloc takes last usable address */
465 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
466 			new_limit -= 1;
467 
468 		ret = -EBUSY;
469 		mutex_lock(&kvm->lock);
470 		if (atomic_read(&kvm->online_vcpus) == 0) {
471 			/* gmap_alloc will round the limit up */
472 			struct gmap *new = gmap_alloc(current->mm, new_limit);
473 
474 			if (!new) {
475 				ret = -ENOMEM;
476 			} else {
477 				gmap_free(kvm->arch.gmap);
478 				new->private = kvm;
479 				kvm->arch.gmap = new;
480 				ret = 0;
481 			}
482 		}
483 		mutex_unlock(&kvm->lock);
484 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
485 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
486 			 (void *) kvm->arch.gmap->asce);
487 		break;
488 	}
489 	default:
490 		ret = -ENXIO;
491 		break;
492 	}
493 	return ret;
494 }
495 
496 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
497 
498 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
499 {
500 	struct kvm_vcpu *vcpu;
501 	int i;
502 
503 	if (!test_kvm_facility(kvm, 76))
504 		return -EINVAL;
505 
506 	mutex_lock(&kvm->lock);
507 	switch (attr->attr) {
508 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
509 		get_random_bytes(
510 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
511 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
512 		kvm->arch.crypto.aes_kw = 1;
513 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
514 		break;
515 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
516 		get_random_bytes(
517 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
518 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
519 		kvm->arch.crypto.dea_kw = 1;
520 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
521 		break;
522 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
523 		kvm->arch.crypto.aes_kw = 0;
524 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
525 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
526 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
527 		break;
528 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
529 		kvm->arch.crypto.dea_kw = 0;
530 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
531 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
532 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
533 		break;
534 	default:
535 		mutex_unlock(&kvm->lock);
536 		return -ENXIO;
537 	}
538 
539 	kvm_for_each_vcpu(i, vcpu, kvm) {
540 		kvm_s390_vcpu_crypto_setup(vcpu);
541 		exit_sie(vcpu);
542 	}
543 	mutex_unlock(&kvm->lock);
544 	return 0;
545 }
546 
547 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
548 {
549 	u8 gtod_high;
550 
551 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
552 					   sizeof(gtod_high)))
553 		return -EFAULT;
554 
555 	if (gtod_high != 0)
556 		return -EINVAL;
557 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
558 
559 	return 0;
560 }
561 
562 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
563 {
564 	u64 gtod;
565 
566 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
567 		return -EFAULT;
568 
569 	kvm_s390_set_tod_clock(kvm, gtod);
570 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
571 	return 0;
572 }
573 
574 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
575 {
576 	int ret;
577 
578 	if (attr->flags)
579 		return -EINVAL;
580 
581 	switch (attr->attr) {
582 	case KVM_S390_VM_TOD_HIGH:
583 		ret = kvm_s390_set_tod_high(kvm, attr);
584 		break;
585 	case KVM_S390_VM_TOD_LOW:
586 		ret = kvm_s390_set_tod_low(kvm, attr);
587 		break;
588 	default:
589 		ret = -ENXIO;
590 		break;
591 	}
592 	return ret;
593 }
594 
595 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
596 {
597 	u8 gtod_high = 0;
598 
599 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
600 					 sizeof(gtod_high)))
601 		return -EFAULT;
602 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
603 
604 	return 0;
605 }
606 
607 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609 	u64 gtod;
610 
611 	gtod = kvm_s390_get_tod_clock_fast(kvm);
612 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
613 		return -EFAULT;
614 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
615 
616 	return 0;
617 }
618 
619 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
620 {
621 	int ret;
622 
623 	if (attr->flags)
624 		return -EINVAL;
625 
626 	switch (attr->attr) {
627 	case KVM_S390_VM_TOD_HIGH:
628 		ret = kvm_s390_get_tod_high(kvm, attr);
629 		break;
630 	case KVM_S390_VM_TOD_LOW:
631 		ret = kvm_s390_get_tod_low(kvm, attr);
632 		break;
633 	default:
634 		ret = -ENXIO;
635 		break;
636 	}
637 	return ret;
638 }
639 
640 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
641 {
642 	struct kvm_s390_vm_cpu_processor *proc;
643 	u16 lowest_ibc, unblocked_ibc;
644 	int ret = 0;
645 
646 	mutex_lock(&kvm->lock);
647 	if (atomic_read(&kvm->online_vcpus)) {
648 		ret = -EBUSY;
649 		goto out;
650 	}
651 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
652 	if (!proc) {
653 		ret = -ENOMEM;
654 		goto out;
655 	}
656 	if (!copy_from_user(proc, (void __user *)attr->addr,
657 			    sizeof(*proc))) {
658 		kvm->arch.model.cpuid = proc->cpuid;
659 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
660 		unblocked_ibc = sclp.ibc & 0xfff;
661 		if (lowest_ibc && proc->ibc) {
662 			if (proc->ibc > unblocked_ibc)
663 				kvm->arch.model.ibc = unblocked_ibc;
664 			else if (proc->ibc < lowest_ibc)
665 				kvm->arch.model.ibc = lowest_ibc;
666 			else
667 				kvm->arch.model.ibc = proc->ibc;
668 		}
669 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
670 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
671 	} else
672 		ret = -EFAULT;
673 	kfree(proc);
674 out:
675 	mutex_unlock(&kvm->lock);
676 	return ret;
677 }
678 
679 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
680 {
681 	int ret = -ENXIO;
682 
683 	switch (attr->attr) {
684 	case KVM_S390_VM_CPU_PROCESSOR:
685 		ret = kvm_s390_set_processor(kvm, attr);
686 		break;
687 	}
688 	return ret;
689 }
690 
691 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
692 {
693 	struct kvm_s390_vm_cpu_processor *proc;
694 	int ret = 0;
695 
696 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
697 	if (!proc) {
698 		ret = -ENOMEM;
699 		goto out;
700 	}
701 	proc->cpuid = kvm->arch.model.cpuid;
702 	proc->ibc = kvm->arch.model.ibc;
703 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
704 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
705 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
706 		ret = -EFAULT;
707 	kfree(proc);
708 out:
709 	return ret;
710 }
711 
712 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714 	struct kvm_s390_vm_cpu_machine *mach;
715 	int ret = 0;
716 
717 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
718 	if (!mach) {
719 		ret = -ENOMEM;
720 		goto out;
721 	}
722 	get_cpu_id((struct cpuid *) &mach->cpuid);
723 	mach->ibc = sclp.ibc;
724 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
725 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
726 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
727 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
728 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
729 		ret = -EFAULT;
730 	kfree(mach);
731 out:
732 	return ret;
733 }
734 
735 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
736 {
737 	int ret = -ENXIO;
738 
739 	switch (attr->attr) {
740 	case KVM_S390_VM_CPU_PROCESSOR:
741 		ret = kvm_s390_get_processor(kvm, attr);
742 		break;
743 	case KVM_S390_VM_CPU_MACHINE:
744 		ret = kvm_s390_get_machine(kvm, attr);
745 		break;
746 	}
747 	return ret;
748 }
749 
750 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
751 {
752 	int ret;
753 
754 	switch (attr->group) {
755 	case KVM_S390_VM_MEM_CTRL:
756 		ret = kvm_s390_set_mem_control(kvm, attr);
757 		break;
758 	case KVM_S390_VM_TOD:
759 		ret = kvm_s390_set_tod(kvm, attr);
760 		break;
761 	case KVM_S390_VM_CPU_MODEL:
762 		ret = kvm_s390_set_cpu_model(kvm, attr);
763 		break;
764 	case KVM_S390_VM_CRYPTO:
765 		ret = kvm_s390_vm_set_crypto(kvm, attr);
766 		break;
767 	default:
768 		ret = -ENXIO;
769 		break;
770 	}
771 
772 	return ret;
773 }
774 
775 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
776 {
777 	int ret;
778 
779 	switch (attr->group) {
780 	case KVM_S390_VM_MEM_CTRL:
781 		ret = kvm_s390_get_mem_control(kvm, attr);
782 		break;
783 	case KVM_S390_VM_TOD:
784 		ret = kvm_s390_get_tod(kvm, attr);
785 		break;
786 	case KVM_S390_VM_CPU_MODEL:
787 		ret = kvm_s390_get_cpu_model(kvm, attr);
788 		break;
789 	default:
790 		ret = -ENXIO;
791 		break;
792 	}
793 
794 	return ret;
795 }
796 
797 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
798 {
799 	int ret;
800 
801 	switch (attr->group) {
802 	case KVM_S390_VM_MEM_CTRL:
803 		switch (attr->attr) {
804 		case KVM_S390_VM_MEM_ENABLE_CMMA:
805 		case KVM_S390_VM_MEM_CLR_CMMA:
806 		case KVM_S390_VM_MEM_LIMIT_SIZE:
807 			ret = 0;
808 			break;
809 		default:
810 			ret = -ENXIO;
811 			break;
812 		}
813 		break;
814 	case KVM_S390_VM_TOD:
815 		switch (attr->attr) {
816 		case KVM_S390_VM_TOD_LOW:
817 		case KVM_S390_VM_TOD_HIGH:
818 			ret = 0;
819 			break;
820 		default:
821 			ret = -ENXIO;
822 			break;
823 		}
824 		break;
825 	case KVM_S390_VM_CPU_MODEL:
826 		switch (attr->attr) {
827 		case KVM_S390_VM_CPU_PROCESSOR:
828 		case KVM_S390_VM_CPU_MACHINE:
829 			ret = 0;
830 			break;
831 		default:
832 			ret = -ENXIO;
833 			break;
834 		}
835 		break;
836 	case KVM_S390_VM_CRYPTO:
837 		switch (attr->attr) {
838 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
839 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
840 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
841 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
842 			ret = 0;
843 			break;
844 		default:
845 			ret = -ENXIO;
846 			break;
847 		}
848 		break;
849 	default:
850 		ret = -ENXIO;
851 		break;
852 	}
853 
854 	return ret;
855 }
856 
857 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
858 {
859 	uint8_t *keys;
860 	uint64_t hva;
861 	unsigned long curkey;
862 	int i, r = 0;
863 
864 	if (args->flags != 0)
865 		return -EINVAL;
866 
867 	/* Is this guest using storage keys? */
868 	if (!mm_use_skey(current->mm))
869 		return KVM_S390_GET_SKEYS_NONE;
870 
871 	/* Enforce sane limit on memory allocation */
872 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
873 		return -EINVAL;
874 
875 	keys = kmalloc_array(args->count, sizeof(uint8_t),
876 			     GFP_KERNEL | __GFP_NOWARN);
877 	if (!keys)
878 		keys = vmalloc(sizeof(uint8_t) * args->count);
879 	if (!keys)
880 		return -ENOMEM;
881 
882 	for (i = 0; i < args->count; i++) {
883 		hva = gfn_to_hva(kvm, args->start_gfn + i);
884 		if (kvm_is_error_hva(hva)) {
885 			r = -EFAULT;
886 			goto out;
887 		}
888 
889 		curkey = get_guest_storage_key(current->mm, hva);
890 		if (IS_ERR_VALUE(curkey)) {
891 			r = curkey;
892 			goto out;
893 		}
894 		keys[i] = curkey;
895 	}
896 
897 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
898 			 sizeof(uint8_t) * args->count);
899 	if (r)
900 		r = -EFAULT;
901 out:
902 	kvfree(keys);
903 	return r;
904 }
905 
906 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
907 {
908 	uint8_t *keys;
909 	uint64_t hva;
910 	int i, r = 0;
911 
912 	if (args->flags != 0)
913 		return -EINVAL;
914 
915 	/* Enforce sane limit on memory allocation */
916 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
917 		return -EINVAL;
918 
919 	keys = kmalloc_array(args->count, sizeof(uint8_t),
920 			     GFP_KERNEL | __GFP_NOWARN);
921 	if (!keys)
922 		keys = vmalloc(sizeof(uint8_t) * args->count);
923 	if (!keys)
924 		return -ENOMEM;
925 
926 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
927 			   sizeof(uint8_t) * args->count);
928 	if (r) {
929 		r = -EFAULT;
930 		goto out;
931 	}
932 
933 	/* Enable storage key handling for the guest */
934 	r = s390_enable_skey();
935 	if (r)
936 		goto out;
937 
938 	for (i = 0; i < args->count; i++) {
939 		hva = gfn_to_hva(kvm, args->start_gfn + i);
940 		if (kvm_is_error_hva(hva)) {
941 			r = -EFAULT;
942 			goto out;
943 		}
944 
945 		/* Lowest order bit is reserved */
946 		if (keys[i] & 0x01) {
947 			r = -EINVAL;
948 			goto out;
949 		}
950 
951 		r = set_guest_storage_key(current->mm, hva,
952 					  (unsigned long)keys[i], 0);
953 		if (r)
954 			goto out;
955 	}
956 out:
957 	kvfree(keys);
958 	return r;
959 }
960 
961 long kvm_arch_vm_ioctl(struct file *filp,
962 		       unsigned int ioctl, unsigned long arg)
963 {
964 	struct kvm *kvm = filp->private_data;
965 	void __user *argp = (void __user *)arg;
966 	struct kvm_device_attr attr;
967 	int r;
968 
969 	switch (ioctl) {
970 	case KVM_S390_INTERRUPT: {
971 		struct kvm_s390_interrupt s390int;
972 
973 		r = -EFAULT;
974 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
975 			break;
976 		r = kvm_s390_inject_vm(kvm, &s390int);
977 		break;
978 	}
979 	case KVM_ENABLE_CAP: {
980 		struct kvm_enable_cap cap;
981 		r = -EFAULT;
982 		if (copy_from_user(&cap, argp, sizeof(cap)))
983 			break;
984 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
985 		break;
986 	}
987 	case KVM_CREATE_IRQCHIP: {
988 		struct kvm_irq_routing_entry routing;
989 
990 		r = -EINVAL;
991 		if (kvm->arch.use_irqchip) {
992 			/* Set up dummy routing. */
993 			memset(&routing, 0, sizeof(routing));
994 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
995 		}
996 		break;
997 	}
998 	case KVM_SET_DEVICE_ATTR: {
999 		r = -EFAULT;
1000 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1001 			break;
1002 		r = kvm_s390_vm_set_attr(kvm, &attr);
1003 		break;
1004 	}
1005 	case KVM_GET_DEVICE_ATTR: {
1006 		r = -EFAULT;
1007 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1008 			break;
1009 		r = kvm_s390_vm_get_attr(kvm, &attr);
1010 		break;
1011 	}
1012 	case KVM_HAS_DEVICE_ATTR: {
1013 		r = -EFAULT;
1014 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1015 			break;
1016 		r = kvm_s390_vm_has_attr(kvm, &attr);
1017 		break;
1018 	}
1019 	case KVM_S390_GET_SKEYS: {
1020 		struct kvm_s390_skeys args;
1021 
1022 		r = -EFAULT;
1023 		if (copy_from_user(&args, argp,
1024 				   sizeof(struct kvm_s390_skeys)))
1025 			break;
1026 		r = kvm_s390_get_skeys(kvm, &args);
1027 		break;
1028 	}
1029 	case KVM_S390_SET_SKEYS: {
1030 		struct kvm_s390_skeys args;
1031 
1032 		r = -EFAULT;
1033 		if (copy_from_user(&args, argp,
1034 				   sizeof(struct kvm_s390_skeys)))
1035 			break;
1036 		r = kvm_s390_set_skeys(kvm, &args);
1037 		break;
1038 	}
1039 	default:
1040 		r = -ENOTTY;
1041 	}
1042 
1043 	return r;
1044 }
1045 
1046 static int kvm_s390_query_ap_config(u8 *config)
1047 {
1048 	u32 fcn_code = 0x04000000UL;
1049 	u32 cc = 0;
1050 
1051 	memset(config, 0, 128);
1052 	asm volatile(
1053 		"lgr 0,%1\n"
1054 		"lgr 2,%2\n"
1055 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1056 		"0: ipm %0\n"
1057 		"srl %0,28\n"
1058 		"1:\n"
1059 		EX_TABLE(0b, 1b)
1060 		: "+r" (cc)
1061 		: "r" (fcn_code), "r" (config)
1062 		: "cc", "0", "2", "memory"
1063 	);
1064 
1065 	return cc;
1066 }
1067 
1068 static int kvm_s390_apxa_installed(void)
1069 {
1070 	u8 config[128];
1071 	int cc;
1072 
1073 	if (test_facility(12)) {
1074 		cc = kvm_s390_query_ap_config(config);
1075 
1076 		if (cc)
1077 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1078 		else
1079 			return config[0] & 0x40;
1080 	}
1081 
1082 	return 0;
1083 }
1084 
1085 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1086 {
1087 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1088 
1089 	if (kvm_s390_apxa_installed())
1090 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1091 	else
1092 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1093 }
1094 
1095 static u64 kvm_s390_get_initial_cpuid(void)
1096 {
1097 	struct cpuid cpuid;
1098 
1099 	get_cpu_id(&cpuid);
1100 	cpuid.version = 0xff;
1101 	return *((u64 *) &cpuid);
1102 }
1103 
1104 static void kvm_s390_crypto_init(struct kvm *kvm)
1105 {
1106 	if (!test_kvm_facility(kvm, 76))
1107 		return;
1108 
1109 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1110 	kvm_s390_set_crycb_format(kvm);
1111 
1112 	/* Enable AES/DEA protected key functions by default */
1113 	kvm->arch.crypto.aes_kw = 1;
1114 	kvm->arch.crypto.dea_kw = 1;
1115 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1116 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1117 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1118 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1119 }
1120 
1121 static void sca_dispose(struct kvm *kvm)
1122 {
1123 	if (kvm->arch.use_esca)
1124 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1125 	else
1126 		free_page((unsigned long)(kvm->arch.sca));
1127 	kvm->arch.sca = NULL;
1128 }
1129 
1130 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1131 {
1132 	int i, rc;
1133 	char debug_name[16];
1134 	static unsigned long sca_offset;
1135 
1136 	rc = -EINVAL;
1137 #ifdef CONFIG_KVM_S390_UCONTROL
1138 	if (type & ~KVM_VM_S390_UCONTROL)
1139 		goto out_err;
1140 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1141 		goto out_err;
1142 #else
1143 	if (type)
1144 		goto out_err;
1145 #endif
1146 
1147 	rc = s390_enable_sie();
1148 	if (rc)
1149 		goto out_err;
1150 
1151 	rc = -ENOMEM;
1152 
1153 	kvm->arch.use_esca = 0; /* start with basic SCA */
1154 	rwlock_init(&kvm->arch.sca_lock);
1155 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1156 	if (!kvm->arch.sca)
1157 		goto out_err;
1158 	spin_lock(&kvm_lock);
1159 	sca_offset += 16;
1160 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1161 		sca_offset = 0;
1162 	kvm->arch.sca = (struct bsca_block *)
1163 			((char *) kvm->arch.sca + sca_offset);
1164 	spin_unlock(&kvm_lock);
1165 
1166 	sprintf(debug_name, "kvm-%u", current->pid);
1167 
1168 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1169 	if (!kvm->arch.dbf)
1170 		goto out_err;
1171 
1172 	kvm->arch.sie_page2 =
1173 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1174 	if (!kvm->arch.sie_page2)
1175 		goto out_err;
1176 
1177 	/* Populate the facility mask initially. */
1178 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1179 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1180 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1181 		if (i < kvm_s390_fac_list_mask_size())
1182 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1183 		else
1184 			kvm->arch.model.fac_mask[i] = 0UL;
1185 	}
1186 
1187 	/* Populate the facility list initially. */
1188 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1189 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1190 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1191 
1192 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1193 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1194 
1195 	kvm_s390_crypto_init(kvm);
1196 
1197 	spin_lock_init(&kvm->arch.float_int.lock);
1198 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1199 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1200 	init_waitqueue_head(&kvm->arch.ipte_wq);
1201 	mutex_init(&kvm->arch.ipte_mutex);
1202 
1203 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1204 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1205 
1206 	if (type & KVM_VM_S390_UCONTROL) {
1207 		kvm->arch.gmap = NULL;
1208 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1209 	} else {
1210 		if (sclp.hamax == U64_MAX)
1211 			kvm->arch.mem_limit = TASK_MAX_SIZE;
1212 		else
1213 			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1214 						    sclp.hamax + 1);
1215 		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1216 		if (!kvm->arch.gmap)
1217 			goto out_err;
1218 		kvm->arch.gmap->private = kvm;
1219 		kvm->arch.gmap->pfault_enabled = 0;
1220 	}
1221 
1222 	kvm->arch.css_support = 0;
1223 	kvm->arch.use_irqchip = 0;
1224 	kvm->arch.epoch = 0;
1225 
1226 	spin_lock_init(&kvm->arch.start_stop_lock);
1227 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1228 
1229 	return 0;
1230 out_err:
1231 	free_page((unsigned long)kvm->arch.sie_page2);
1232 	debug_unregister(kvm->arch.dbf);
1233 	sca_dispose(kvm);
1234 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1235 	return rc;
1236 }
1237 
1238 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1239 {
1240 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1241 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1242 	kvm_s390_clear_local_irqs(vcpu);
1243 	kvm_clear_async_pf_completion_queue(vcpu);
1244 	if (!kvm_is_ucontrol(vcpu->kvm))
1245 		sca_del_vcpu(vcpu);
1246 
1247 	if (kvm_is_ucontrol(vcpu->kvm))
1248 		gmap_free(vcpu->arch.gmap);
1249 
1250 	if (vcpu->kvm->arch.use_cmma)
1251 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1252 	free_page((unsigned long)(vcpu->arch.sie_block));
1253 
1254 	kvm_vcpu_uninit(vcpu);
1255 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1256 }
1257 
1258 static void kvm_free_vcpus(struct kvm *kvm)
1259 {
1260 	unsigned int i;
1261 	struct kvm_vcpu *vcpu;
1262 
1263 	kvm_for_each_vcpu(i, vcpu, kvm)
1264 		kvm_arch_vcpu_destroy(vcpu);
1265 
1266 	mutex_lock(&kvm->lock);
1267 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1268 		kvm->vcpus[i] = NULL;
1269 
1270 	atomic_set(&kvm->online_vcpus, 0);
1271 	mutex_unlock(&kvm->lock);
1272 }
1273 
1274 void kvm_arch_destroy_vm(struct kvm *kvm)
1275 {
1276 	kvm_free_vcpus(kvm);
1277 	sca_dispose(kvm);
1278 	debug_unregister(kvm->arch.dbf);
1279 	free_page((unsigned long)kvm->arch.sie_page2);
1280 	if (!kvm_is_ucontrol(kvm))
1281 		gmap_free(kvm->arch.gmap);
1282 	kvm_s390_destroy_adapters(kvm);
1283 	kvm_s390_clear_float_irqs(kvm);
1284 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1285 }
1286 
1287 /* Section: vcpu related */
1288 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1289 {
1290 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1291 	if (!vcpu->arch.gmap)
1292 		return -ENOMEM;
1293 	vcpu->arch.gmap->private = vcpu->kvm;
1294 
1295 	return 0;
1296 }
1297 
1298 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1299 {
1300 	read_lock(&vcpu->kvm->arch.sca_lock);
1301 	if (vcpu->kvm->arch.use_esca) {
1302 		struct esca_block *sca = vcpu->kvm->arch.sca;
1303 
1304 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1305 		sca->cpu[vcpu->vcpu_id].sda = 0;
1306 	} else {
1307 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1308 
1309 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1310 		sca->cpu[vcpu->vcpu_id].sda = 0;
1311 	}
1312 	read_unlock(&vcpu->kvm->arch.sca_lock);
1313 }
1314 
1315 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1316 {
1317 	read_lock(&vcpu->kvm->arch.sca_lock);
1318 	if (vcpu->kvm->arch.use_esca) {
1319 		struct esca_block *sca = vcpu->kvm->arch.sca;
1320 
1321 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1322 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1323 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1324 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1325 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1326 	} else {
1327 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1328 
1329 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1330 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1331 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1332 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1333 	}
1334 	read_unlock(&vcpu->kvm->arch.sca_lock);
1335 }
1336 
1337 /* Basic SCA to Extended SCA data copy routines */
1338 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1339 {
1340 	d->sda = s->sda;
1341 	d->sigp_ctrl.c = s->sigp_ctrl.c;
1342 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1343 }
1344 
1345 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1346 {
1347 	int i;
1348 
1349 	d->ipte_control = s->ipte_control;
1350 	d->mcn[0] = s->mcn;
1351 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1352 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1353 }
1354 
1355 static int sca_switch_to_extended(struct kvm *kvm)
1356 {
1357 	struct bsca_block *old_sca = kvm->arch.sca;
1358 	struct esca_block *new_sca;
1359 	struct kvm_vcpu *vcpu;
1360 	unsigned int vcpu_idx;
1361 	u32 scaol, scaoh;
1362 
1363 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1364 	if (!new_sca)
1365 		return -ENOMEM;
1366 
1367 	scaoh = (u32)((u64)(new_sca) >> 32);
1368 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
1369 
1370 	kvm_s390_vcpu_block_all(kvm);
1371 	write_lock(&kvm->arch.sca_lock);
1372 
1373 	sca_copy_b_to_e(new_sca, old_sca);
1374 
1375 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1376 		vcpu->arch.sie_block->scaoh = scaoh;
1377 		vcpu->arch.sie_block->scaol = scaol;
1378 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1379 	}
1380 	kvm->arch.sca = new_sca;
1381 	kvm->arch.use_esca = 1;
1382 
1383 	write_unlock(&kvm->arch.sca_lock);
1384 	kvm_s390_vcpu_unblock_all(kvm);
1385 
1386 	free_page((unsigned long)old_sca);
1387 
1388 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1389 		 old_sca, kvm->arch.sca);
1390 	return 0;
1391 }
1392 
1393 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1394 {
1395 	int rc;
1396 
1397 	if (id < KVM_S390_BSCA_CPU_SLOTS)
1398 		return true;
1399 	if (!sclp.has_esca)
1400 		return false;
1401 
1402 	mutex_lock(&kvm->lock);
1403 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1404 	mutex_unlock(&kvm->lock);
1405 
1406 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1407 }
1408 
1409 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1410 {
1411 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1412 	kvm_clear_async_pf_completion_queue(vcpu);
1413 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1414 				    KVM_SYNC_GPRS |
1415 				    KVM_SYNC_ACRS |
1416 				    KVM_SYNC_CRS |
1417 				    KVM_SYNC_ARCH0 |
1418 				    KVM_SYNC_PFAULT;
1419 	if (test_kvm_facility(vcpu->kvm, 64))
1420 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1421 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1422 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1423 	 */
1424 	if (MACHINE_HAS_VX)
1425 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1426 	else
1427 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1428 
1429 	if (kvm_is_ucontrol(vcpu->kvm))
1430 		return __kvm_ucontrol_vcpu_init(vcpu);
1431 
1432 	return 0;
1433 }
1434 
1435 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1436 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1437 {
1438 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1439 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1440 	vcpu->arch.cputm_start = get_tod_clock_fast();
1441 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1442 }
1443 
1444 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1445 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1446 {
1447 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1448 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1449 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1450 	vcpu->arch.cputm_start = 0;
1451 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1452 }
1453 
1454 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1455 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1456 {
1457 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1458 	vcpu->arch.cputm_enabled = true;
1459 	__start_cpu_timer_accounting(vcpu);
1460 }
1461 
1462 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1463 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1464 {
1465 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1466 	__stop_cpu_timer_accounting(vcpu);
1467 	vcpu->arch.cputm_enabled = false;
1468 }
1469 
1470 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1471 {
1472 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1473 	__enable_cpu_timer_accounting(vcpu);
1474 	preempt_enable();
1475 }
1476 
1477 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1478 {
1479 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1480 	__disable_cpu_timer_accounting(vcpu);
1481 	preempt_enable();
1482 }
1483 
1484 /* set the cpu timer - may only be called from the VCPU thread itself */
1485 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1486 {
1487 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1488 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1489 	if (vcpu->arch.cputm_enabled)
1490 		vcpu->arch.cputm_start = get_tod_clock_fast();
1491 	vcpu->arch.sie_block->cputm = cputm;
1492 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1493 	preempt_enable();
1494 }
1495 
1496 /* update and get the cpu timer - can also be called from other VCPU threads */
1497 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1498 {
1499 	unsigned int seq;
1500 	__u64 value;
1501 
1502 	if (unlikely(!vcpu->arch.cputm_enabled))
1503 		return vcpu->arch.sie_block->cputm;
1504 
1505 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1506 	do {
1507 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1508 		/*
1509 		 * If the writer would ever execute a read in the critical
1510 		 * section, e.g. in irq context, we have a deadlock.
1511 		 */
1512 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1513 		value = vcpu->arch.sie_block->cputm;
1514 		/* if cputm_start is 0, accounting is being started/stopped */
1515 		if (likely(vcpu->arch.cputm_start))
1516 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1517 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1518 	preempt_enable();
1519 	return value;
1520 }
1521 
1522 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1523 {
1524 	/* Save host register state */
1525 	save_fpu_regs();
1526 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1527 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1528 
1529 	if (MACHINE_HAS_VX)
1530 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1531 	else
1532 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1533 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1534 	if (test_fp_ctl(current->thread.fpu.fpc))
1535 		/* User space provided an invalid FPC, let's clear it */
1536 		current->thread.fpu.fpc = 0;
1537 
1538 	save_access_regs(vcpu->arch.host_acrs);
1539 	restore_access_regs(vcpu->run->s.regs.acrs);
1540 	gmap_enable(vcpu->arch.gmap);
1541 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1542 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1543 		__start_cpu_timer_accounting(vcpu);
1544 	vcpu->cpu = cpu;
1545 }
1546 
1547 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1548 {
1549 	vcpu->cpu = -1;
1550 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1551 		__stop_cpu_timer_accounting(vcpu);
1552 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1553 	gmap_disable(vcpu->arch.gmap);
1554 
1555 	/* Save guest register state */
1556 	save_fpu_regs();
1557 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1558 
1559 	/* Restore host register state */
1560 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1561 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1562 
1563 	save_access_regs(vcpu->run->s.regs.acrs);
1564 	restore_access_regs(vcpu->arch.host_acrs);
1565 }
1566 
1567 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1568 {
1569 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1570 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1571 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1572 	kvm_s390_set_prefix(vcpu, 0);
1573 	kvm_s390_set_cpu_timer(vcpu, 0);
1574 	vcpu->arch.sie_block->ckc       = 0UL;
1575 	vcpu->arch.sie_block->todpr     = 0;
1576 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1577 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1578 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1579 	/* make sure the new fpc will be lazily loaded */
1580 	save_fpu_regs();
1581 	current->thread.fpu.fpc = 0;
1582 	vcpu->arch.sie_block->gbea = 1;
1583 	vcpu->arch.sie_block->pp = 0;
1584 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1585 	kvm_clear_async_pf_completion_queue(vcpu);
1586 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1587 		kvm_s390_vcpu_stop(vcpu);
1588 	kvm_s390_clear_local_irqs(vcpu);
1589 }
1590 
1591 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1592 {
1593 	mutex_lock(&vcpu->kvm->lock);
1594 	preempt_disable();
1595 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1596 	preempt_enable();
1597 	mutex_unlock(&vcpu->kvm->lock);
1598 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1599 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1600 		sca_add_vcpu(vcpu);
1601 	}
1602 
1603 }
1604 
1605 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1606 {
1607 	if (!test_kvm_facility(vcpu->kvm, 76))
1608 		return;
1609 
1610 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1611 
1612 	if (vcpu->kvm->arch.crypto.aes_kw)
1613 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1614 	if (vcpu->kvm->arch.crypto.dea_kw)
1615 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1616 
1617 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1618 }
1619 
1620 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1621 {
1622 	free_page(vcpu->arch.sie_block->cbrlo);
1623 	vcpu->arch.sie_block->cbrlo = 0;
1624 }
1625 
1626 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1627 {
1628 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1629 	if (!vcpu->arch.sie_block->cbrlo)
1630 		return -ENOMEM;
1631 
1632 	vcpu->arch.sie_block->ecb2 |= 0x80;
1633 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1634 	return 0;
1635 }
1636 
1637 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1638 {
1639 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1640 
1641 	vcpu->arch.sie_block->ibc = model->ibc;
1642 	if (test_kvm_facility(vcpu->kvm, 7))
1643 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1644 }
1645 
1646 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1647 {
1648 	int rc = 0;
1649 
1650 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1651 						    CPUSTAT_SM |
1652 						    CPUSTAT_STOPPED);
1653 
1654 	if (test_kvm_facility(vcpu->kvm, 78))
1655 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1656 	else if (test_kvm_facility(vcpu->kvm, 8))
1657 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1658 
1659 	kvm_s390_vcpu_setup_model(vcpu);
1660 
1661 	vcpu->arch.sie_block->ecb = 0x02;
1662 	if (test_kvm_facility(vcpu->kvm, 9))
1663 		vcpu->arch.sie_block->ecb |= 0x04;
1664 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1665 		vcpu->arch.sie_block->ecb |= 0x10;
1666 
1667 	if (test_kvm_facility(vcpu->kvm, 8))
1668 		vcpu->arch.sie_block->ecb2 |= 0x08;
1669 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1670 	if (sclp.has_siif)
1671 		vcpu->arch.sie_block->eca |= 1;
1672 	if (sclp.has_sigpif)
1673 		vcpu->arch.sie_block->eca |= 0x10000000U;
1674 	if (test_kvm_facility(vcpu->kvm, 64))
1675 		vcpu->arch.sie_block->ecb3 |= 0x01;
1676 	if (test_kvm_facility(vcpu->kvm, 129)) {
1677 		vcpu->arch.sie_block->eca |= 0x00020000;
1678 		vcpu->arch.sie_block->ecd |= 0x20000000;
1679 	}
1680 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1681 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1682 
1683 	if (vcpu->kvm->arch.use_cmma) {
1684 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1685 		if (rc)
1686 			return rc;
1687 	}
1688 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1689 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1690 
1691 	kvm_s390_vcpu_crypto_setup(vcpu);
1692 
1693 	return rc;
1694 }
1695 
1696 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1697 				      unsigned int id)
1698 {
1699 	struct kvm_vcpu *vcpu;
1700 	struct sie_page *sie_page;
1701 	int rc = -EINVAL;
1702 
1703 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1704 		goto out;
1705 
1706 	rc = -ENOMEM;
1707 
1708 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1709 	if (!vcpu)
1710 		goto out;
1711 
1712 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1713 	if (!sie_page)
1714 		goto out_free_cpu;
1715 
1716 	vcpu->arch.sie_block = &sie_page->sie_block;
1717 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1718 
1719 	vcpu->arch.sie_block->icpua = id;
1720 	spin_lock_init(&vcpu->arch.local_int.lock);
1721 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1722 	vcpu->arch.local_int.wq = &vcpu->wq;
1723 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1724 	seqcount_init(&vcpu->arch.cputm_seqcount);
1725 
1726 	rc = kvm_vcpu_init(vcpu, kvm, id);
1727 	if (rc)
1728 		goto out_free_sie_block;
1729 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1730 		 vcpu->arch.sie_block);
1731 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1732 
1733 	return vcpu;
1734 out_free_sie_block:
1735 	free_page((unsigned long)(vcpu->arch.sie_block));
1736 out_free_cpu:
1737 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1738 out:
1739 	return ERR_PTR(rc);
1740 }
1741 
1742 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1743 {
1744 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1745 }
1746 
1747 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1748 {
1749 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1750 	exit_sie(vcpu);
1751 }
1752 
1753 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1754 {
1755 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1756 }
1757 
1758 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1759 {
1760 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1761 	exit_sie(vcpu);
1762 }
1763 
1764 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1765 {
1766 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1767 }
1768 
1769 /*
1770  * Kick a guest cpu out of SIE and wait until SIE is not running.
1771  * If the CPU is not running (e.g. waiting as idle) the function will
1772  * return immediately. */
1773 void exit_sie(struct kvm_vcpu *vcpu)
1774 {
1775 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1776 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1777 		cpu_relax();
1778 }
1779 
1780 /* Kick a guest cpu out of SIE to process a request synchronously */
1781 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1782 {
1783 	kvm_make_request(req, vcpu);
1784 	kvm_s390_vcpu_request(vcpu);
1785 }
1786 
1787 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1788 {
1789 	int i;
1790 	struct kvm *kvm = gmap->private;
1791 	struct kvm_vcpu *vcpu;
1792 
1793 	kvm_for_each_vcpu(i, vcpu, kvm) {
1794 		/* match against both prefix pages */
1795 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1796 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1797 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1798 		}
1799 	}
1800 }
1801 
1802 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1803 {
1804 	/* kvm common code refers to this, but never calls it */
1805 	BUG();
1806 	return 0;
1807 }
1808 
1809 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1810 					   struct kvm_one_reg *reg)
1811 {
1812 	int r = -EINVAL;
1813 
1814 	switch (reg->id) {
1815 	case KVM_REG_S390_TODPR:
1816 		r = put_user(vcpu->arch.sie_block->todpr,
1817 			     (u32 __user *)reg->addr);
1818 		break;
1819 	case KVM_REG_S390_EPOCHDIFF:
1820 		r = put_user(vcpu->arch.sie_block->epoch,
1821 			     (u64 __user *)reg->addr);
1822 		break;
1823 	case KVM_REG_S390_CPU_TIMER:
1824 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
1825 			     (u64 __user *)reg->addr);
1826 		break;
1827 	case KVM_REG_S390_CLOCK_COMP:
1828 		r = put_user(vcpu->arch.sie_block->ckc,
1829 			     (u64 __user *)reg->addr);
1830 		break;
1831 	case KVM_REG_S390_PFTOKEN:
1832 		r = put_user(vcpu->arch.pfault_token,
1833 			     (u64 __user *)reg->addr);
1834 		break;
1835 	case KVM_REG_S390_PFCOMPARE:
1836 		r = put_user(vcpu->arch.pfault_compare,
1837 			     (u64 __user *)reg->addr);
1838 		break;
1839 	case KVM_REG_S390_PFSELECT:
1840 		r = put_user(vcpu->arch.pfault_select,
1841 			     (u64 __user *)reg->addr);
1842 		break;
1843 	case KVM_REG_S390_PP:
1844 		r = put_user(vcpu->arch.sie_block->pp,
1845 			     (u64 __user *)reg->addr);
1846 		break;
1847 	case KVM_REG_S390_GBEA:
1848 		r = put_user(vcpu->arch.sie_block->gbea,
1849 			     (u64 __user *)reg->addr);
1850 		break;
1851 	default:
1852 		break;
1853 	}
1854 
1855 	return r;
1856 }
1857 
1858 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1859 					   struct kvm_one_reg *reg)
1860 {
1861 	int r = -EINVAL;
1862 	__u64 val;
1863 
1864 	switch (reg->id) {
1865 	case KVM_REG_S390_TODPR:
1866 		r = get_user(vcpu->arch.sie_block->todpr,
1867 			     (u32 __user *)reg->addr);
1868 		break;
1869 	case KVM_REG_S390_EPOCHDIFF:
1870 		r = get_user(vcpu->arch.sie_block->epoch,
1871 			     (u64 __user *)reg->addr);
1872 		break;
1873 	case KVM_REG_S390_CPU_TIMER:
1874 		r = get_user(val, (u64 __user *)reg->addr);
1875 		if (!r)
1876 			kvm_s390_set_cpu_timer(vcpu, val);
1877 		break;
1878 	case KVM_REG_S390_CLOCK_COMP:
1879 		r = get_user(vcpu->arch.sie_block->ckc,
1880 			     (u64 __user *)reg->addr);
1881 		break;
1882 	case KVM_REG_S390_PFTOKEN:
1883 		r = get_user(vcpu->arch.pfault_token,
1884 			     (u64 __user *)reg->addr);
1885 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1886 			kvm_clear_async_pf_completion_queue(vcpu);
1887 		break;
1888 	case KVM_REG_S390_PFCOMPARE:
1889 		r = get_user(vcpu->arch.pfault_compare,
1890 			     (u64 __user *)reg->addr);
1891 		break;
1892 	case KVM_REG_S390_PFSELECT:
1893 		r = get_user(vcpu->arch.pfault_select,
1894 			     (u64 __user *)reg->addr);
1895 		break;
1896 	case KVM_REG_S390_PP:
1897 		r = get_user(vcpu->arch.sie_block->pp,
1898 			     (u64 __user *)reg->addr);
1899 		break;
1900 	case KVM_REG_S390_GBEA:
1901 		r = get_user(vcpu->arch.sie_block->gbea,
1902 			     (u64 __user *)reg->addr);
1903 		break;
1904 	default:
1905 		break;
1906 	}
1907 
1908 	return r;
1909 }
1910 
1911 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1912 {
1913 	kvm_s390_vcpu_initial_reset(vcpu);
1914 	return 0;
1915 }
1916 
1917 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1918 {
1919 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1920 	return 0;
1921 }
1922 
1923 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1924 {
1925 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1926 	return 0;
1927 }
1928 
1929 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1930 				  struct kvm_sregs *sregs)
1931 {
1932 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1933 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1934 	restore_access_regs(vcpu->run->s.regs.acrs);
1935 	return 0;
1936 }
1937 
1938 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1939 				  struct kvm_sregs *sregs)
1940 {
1941 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1942 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1943 	return 0;
1944 }
1945 
1946 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1947 {
1948 	/* make sure the new values will be lazily loaded */
1949 	save_fpu_regs();
1950 	if (test_fp_ctl(fpu->fpc))
1951 		return -EINVAL;
1952 	current->thread.fpu.fpc = fpu->fpc;
1953 	if (MACHINE_HAS_VX)
1954 		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1955 	else
1956 		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1957 	return 0;
1958 }
1959 
1960 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1961 {
1962 	/* make sure we have the latest values */
1963 	save_fpu_regs();
1964 	if (MACHINE_HAS_VX)
1965 		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1966 	else
1967 		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1968 	fpu->fpc = current->thread.fpu.fpc;
1969 	return 0;
1970 }
1971 
1972 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1973 {
1974 	int rc = 0;
1975 
1976 	if (!is_vcpu_stopped(vcpu))
1977 		rc = -EBUSY;
1978 	else {
1979 		vcpu->run->psw_mask = psw.mask;
1980 		vcpu->run->psw_addr = psw.addr;
1981 	}
1982 	return rc;
1983 }
1984 
1985 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1986 				  struct kvm_translation *tr)
1987 {
1988 	return -EINVAL; /* not implemented yet */
1989 }
1990 
1991 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1992 			      KVM_GUESTDBG_USE_HW_BP | \
1993 			      KVM_GUESTDBG_ENABLE)
1994 
1995 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1996 					struct kvm_guest_debug *dbg)
1997 {
1998 	int rc = 0;
1999 
2000 	vcpu->guest_debug = 0;
2001 	kvm_s390_clear_bp_data(vcpu);
2002 
2003 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2004 		return -EINVAL;
2005 
2006 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2007 		vcpu->guest_debug = dbg->control;
2008 		/* enforce guest PER */
2009 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2010 
2011 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2012 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2013 	} else {
2014 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2015 		vcpu->arch.guestdbg.last_bp = 0;
2016 	}
2017 
2018 	if (rc) {
2019 		vcpu->guest_debug = 0;
2020 		kvm_s390_clear_bp_data(vcpu);
2021 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2022 	}
2023 
2024 	return rc;
2025 }
2026 
2027 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2028 				    struct kvm_mp_state *mp_state)
2029 {
2030 	/* CHECK_STOP and LOAD are not supported yet */
2031 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2032 				       KVM_MP_STATE_OPERATING;
2033 }
2034 
2035 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2036 				    struct kvm_mp_state *mp_state)
2037 {
2038 	int rc = 0;
2039 
2040 	/* user space knows about this interface - let it control the state */
2041 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2042 
2043 	switch (mp_state->mp_state) {
2044 	case KVM_MP_STATE_STOPPED:
2045 		kvm_s390_vcpu_stop(vcpu);
2046 		break;
2047 	case KVM_MP_STATE_OPERATING:
2048 		kvm_s390_vcpu_start(vcpu);
2049 		break;
2050 	case KVM_MP_STATE_LOAD:
2051 	case KVM_MP_STATE_CHECK_STOP:
2052 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2053 	default:
2054 		rc = -ENXIO;
2055 	}
2056 
2057 	return rc;
2058 }
2059 
2060 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2061 {
2062 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2063 }
2064 
2065 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2066 {
2067 retry:
2068 	kvm_s390_vcpu_request_handled(vcpu);
2069 	if (!vcpu->requests)
2070 		return 0;
2071 	/*
2072 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2073 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2074 	 * This ensures that the ipte instruction for this request has
2075 	 * already finished. We might race against a second unmapper that
2076 	 * wants to set the blocking bit. Lets just retry the request loop.
2077 	 */
2078 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2079 		int rc;
2080 		rc = gmap_ipte_notify(vcpu->arch.gmap,
2081 				      kvm_s390_get_prefix(vcpu),
2082 				      PAGE_SIZE * 2);
2083 		if (rc)
2084 			return rc;
2085 		goto retry;
2086 	}
2087 
2088 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2089 		vcpu->arch.sie_block->ihcpu = 0xffff;
2090 		goto retry;
2091 	}
2092 
2093 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2094 		if (!ibs_enabled(vcpu)) {
2095 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2096 			atomic_or(CPUSTAT_IBS,
2097 					&vcpu->arch.sie_block->cpuflags);
2098 		}
2099 		goto retry;
2100 	}
2101 
2102 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2103 		if (ibs_enabled(vcpu)) {
2104 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2105 			atomic_andnot(CPUSTAT_IBS,
2106 					  &vcpu->arch.sie_block->cpuflags);
2107 		}
2108 		goto retry;
2109 	}
2110 
2111 	/* nothing to do, just clear the request */
2112 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2113 
2114 	return 0;
2115 }
2116 
2117 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2118 {
2119 	struct kvm_vcpu *vcpu;
2120 	int i;
2121 
2122 	mutex_lock(&kvm->lock);
2123 	preempt_disable();
2124 	kvm->arch.epoch = tod - get_tod_clock();
2125 	kvm_s390_vcpu_block_all(kvm);
2126 	kvm_for_each_vcpu(i, vcpu, kvm)
2127 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2128 	kvm_s390_vcpu_unblock_all(kvm);
2129 	preempt_enable();
2130 	mutex_unlock(&kvm->lock);
2131 }
2132 
2133 /**
2134  * kvm_arch_fault_in_page - fault-in guest page if necessary
2135  * @vcpu: The corresponding virtual cpu
2136  * @gpa: Guest physical address
2137  * @writable: Whether the page should be writable or not
2138  *
2139  * Make sure that a guest page has been faulted-in on the host.
2140  *
2141  * Return: Zero on success, negative error code otherwise.
2142  */
2143 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2144 {
2145 	return gmap_fault(vcpu->arch.gmap, gpa,
2146 			  writable ? FAULT_FLAG_WRITE : 0);
2147 }
2148 
2149 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2150 				      unsigned long token)
2151 {
2152 	struct kvm_s390_interrupt inti;
2153 	struct kvm_s390_irq irq;
2154 
2155 	if (start_token) {
2156 		irq.u.ext.ext_params2 = token;
2157 		irq.type = KVM_S390_INT_PFAULT_INIT;
2158 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2159 	} else {
2160 		inti.type = KVM_S390_INT_PFAULT_DONE;
2161 		inti.parm64 = token;
2162 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2163 	}
2164 }
2165 
2166 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2167 				     struct kvm_async_pf *work)
2168 {
2169 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2170 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2171 }
2172 
2173 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2174 				 struct kvm_async_pf *work)
2175 {
2176 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2177 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2178 }
2179 
2180 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2181 			       struct kvm_async_pf *work)
2182 {
2183 	/* s390 will always inject the page directly */
2184 }
2185 
2186 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2187 {
2188 	/*
2189 	 * s390 will always inject the page directly,
2190 	 * but we still want check_async_completion to cleanup
2191 	 */
2192 	return true;
2193 }
2194 
2195 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2196 {
2197 	hva_t hva;
2198 	struct kvm_arch_async_pf arch;
2199 	int rc;
2200 
2201 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2202 		return 0;
2203 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2204 	    vcpu->arch.pfault_compare)
2205 		return 0;
2206 	if (psw_extint_disabled(vcpu))
2207 		return 0;
2208 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2209 		return 0;
2210 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2211 		return 0;
2212 	if (!vcpu->arch.gmap->pfault_enabled)
2213 		return 0;
2214 
2215 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2216 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2217 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2218 		return 0;
2219 
2220 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2221 	return rc;
2222 }
2223 
2224 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2225 {
2226 	int rc, cpuflags;
2227 
2228 	/*
2229 	 * On s390 notifications for arriving pages will be delivered directly
2230 	 * to the guest but the house keeping for completed pfaults is
2231 	 * handled outside the worker.
2232 	 */
2233 	kvm_check_async_pf_completion(vcpu);
2234 
2235 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2236 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2237 
2238 	if (need_resched())
2239 		schedule();
2240 
2241 	if (test_cpu_flag(CIF_MCCK_PENDING))
2242 		s390_handle_mcck();
2243 
2244 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2245 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2246 		if (rc)
2247 			return rc;
2248 	}
2249 
2250 	rc = kvm_s390_handle_requests(vcpu);
2251 	if (rc)
2252 		return rc;
2253 
2254 	if (guestdbg_enabled(vcpu)) {
2255 		kvm_s390_backup_guest_per_regs(vcpu);
2256 		kvm_s390_patch_guest_per_regs(vcpu);
2257 	}
2258 
2259 	vcpu->arch.sie_block->icptcode = 0;
2260 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2261 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2262 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2263 
2264 	return 0;
2265 }
2266 
2267 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2268 {
2269 	struct kvm_s390_pgm_info pgm_info = {
2270 		.code = PGM_ADDRESSING,
2271 	};
2272 	u8 opcode, ilen;
2273 	int rc;
2274 
2275 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2276 	trace_kvm_s390_sie_fault(vcpu);
2277 
2278 	/*
2279 	 * We want to inject an addressing exception, which is defined as a
2280 	 * suppressing or terminating exception. However, since we came here
2281 	 * by a DAT access exception, the PSW still points to the faulting
2282 	 * instruction since DAT exceptions are nullifying. So we've got
2283 	 * to look up the current opcode to get the length of the instruction
2284 	 * to be able to forward the PSW.
2285 	 */
2286 	rc = read_guest_instr(vcpu, &opcode, 1);
2287 	ilen = insn_length(opcode);
2288 	if (rc < 0) {
2289 		return rc;
2290 	} else if (rc) {
2291 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2292 		 * Forward by arbitrary ilc, injection will take care of
2293 		 * nullification if necessary.
2294 		 */
2295 		pgm_info = vcpu->arch.pgm;
2296 		ilen = 4;
2297 	}
2298 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2299 	kvm_s390_forward_psw(vcpu, ilen);
2300 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2301 }
2302 
2303 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2304 {
2305 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2306 		   vcpu->arch.sie_block->icptcode);
2307 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2308 
2309 	if (guestdbg_enabled(vcpu))
2310 		kvm_s390_restore_guest_per_regs(vcpu);
2311 
2312 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2313 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2314 
2315 	if (vcpu->arch.sie_block->icptcode > 0) {
2316 		int rc = kvm_handle_sie_intercept(vcpu);
2317 
2318 		if (rc != -EOPNOTSUPP)
2319 			return rc;
2320 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2321 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2322 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2323 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2324 		return -EREMOTE;
2325 	} else if (exit_reason != -EFAULT) {
2326 		vcpu->stat.exit_null++;
2327 		return 0;
2328 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2329 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2330 		vcpu->run->s390_ucontrol.trans_exc_code =
2331 						current->thread.gmap_addr;
2332 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2333 		return -EREMOTE;
2334 	} else if (current->thread.gmap_pfault) {
2335 		trace_kvm_s390_major_guest_pfault(vcpu);
2336 		current->thread.gmap_pfault = 0;
2337 		if (kvm_arch_setup_async_pf(vcpu))
2338 			return 0;
2339 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2340 	}
2341 	return vcpu_post_run_fault_in_sie(vcpu);
2342 }
2343 
2344 static int __vcpu_run(struct kvm_vcpu *vcpu)
2345 {
2346 	int rc, exit_reason;
2347 
2348 	/*
2349 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2350 	 * ning the guest), so that memslots (and other stuff) are protected
2351 	 */
2352 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2353 
2354 	do {
2355 		rc = vcpu_pre_run(vcpu);
2356 		if (rc)
2357 			break;
2358 
2359 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2360 		/*
2361 		 * As PF_VCPU will be used in fault handler, between
2362 		 * guest_enter and guest_exit should be no uaccess.
2363 		 */
2364 		local_irq_disable();
2365 		__kvm_guest_enter();
2366 		__disable_cpu_timer_accounting(vcpu);
2367 		local_irq_enable();
2368 		exit_reason = sie64a(vcpu->arch.sie_block,
2369 				     vcpu->run->s.regs.gprs);
2370 		local_irq_disable();
2371 		__enable_cpu_timer_accounting(vcpu);
2372 		__kvm_guest_exit();
2373 		local_irq_enable();
2374 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2375 
2376 		rc = vcpu_post_run(vcpu, exit_reason);
2377 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2378 
2379 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2380 	return rc;
2381 }
2382 
2383 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2384 {
2385 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2386 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2387 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2388 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2389 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2390 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2391 		/* some control register changes require a tlb flush */
2392 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2393 	}
2394 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2395 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2396 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2397 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2398 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2399 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2400 	}
2401 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2402 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2403 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2404 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2405 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2406 			kvm_clear_async_pf_completion_queue(vcpu);
2407 	}
2408 	kvm_run->kvm_dirty_regs = 0;
2409 }
2410 
2411 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2412 {
2413 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2414 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2415 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2416 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2417 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2418 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2419 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2420 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2421 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2422 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2423 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2424 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2425 }
2426 
2427 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2428 {
2429 	int rc;
2430 	sigset_t sigsaved;
2431 
2432 	if (guestdbg_exit_pending(vcpu)) {
2433 		kvm_s390_prepare_debug_exit(vcpu);
2434 		return 0;
2435 	}
2436 
2437 	if (vcpu->sigset_active)
2438 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2439 
2440 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2441 		kvm_s390_vcpu_start(vcpu);
2442 	} else if (is_vcpu_stopped(vcpu)) {
2443 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2444 				   vcpu->vcpu_id);
2445 		return -EINVAL;
2446 	}
2447 
2448 	sync_regs(vcpu, kvm_run);
2449 	enable_cpu_timer_accounting(vcpu);
2450 
2451 	might_fault();
2452 	rc = __vcpu_run(vcpu);
2453 
2454 	if (signal_pending(current) && !rc) {
2455 		kvm_run->exit_reason = KVM_EXIT_INTR;
2456 		rc = -EINTR;
2457 	}
2458 
2459 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2460 		kvm_s390_prepare_debug_exit(vcpu);
2461 		rc = 0;
2462 	}
2463 
2464 	if (rc == -EREMOTE) {
2465 		/* userspace support is needed, kvm_run has been prepared */
2466 		rc = 0;
2467 	}
2468 
2469 	disable_cpu_timer_accounting(vcpu);
2470 	store_regs(vcpu, kvm_run);
2471 
2472 	if (vcpu->sigset_active)
2473 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2474 
2475 	vcpu->stat.exit_userspace++;
2476 	return rc;
2477 }
2478 
2479 /*
2480  * store status at address
2481  * we use have two special cases:
2482  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2483  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2484  */
2485 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2486 {
2487 	unsigned char archmode = 1;
2488 	freg_t fprs[NUM_FPRS];
2489 	unsigned int px;
2490 	u64 clkcomp, cputm;
2491 	int rc;
2492 
2493 	px = kvm_s390_get_prefix(vcpu);
2494 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2495 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2496 			return -EFAULT;
2497 		gpa = 0;
2498 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2499 		if (write_guest_real(vcpu, 163, &archmode, 1))
2500 			return -EFAULT;
2501 		gpa = px;
2502 	} else
2503 		gpa -= __LC_FPREGS_SAVE_AREA;
2504 
2505 	/* manually convert vector registers if necessary */
2506 	if (MACHINE_HAS_VX) {
2507 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2508 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2509 				     fprs, 128);
2510 	} else {
2511 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2512 				     vcpu->run->s.regs.fprs, 128);
2513 	}
2514 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2515 			      vcpu->run->s.regs.gprs, 128);
2516 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2517 			      &vcpu->arch.sie_block->gpsw, 16);
2518 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2519 			      &px, 4);
2520 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2521 			      &vcpu->run->s.regs.fpc, 4);
2522 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2523 			      &vcpu->arch.sie_block->todpr, 4);
2524 	cputm = kvm_s390_get_cpu_timer(vcpu);
2525 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2526 			      &cputm, 8);
2527 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2528 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2529 			      &clkcomp, 8);
2530 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2531 			      &vcpu->run->s.regs.acrs, 64);
2532 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2533 			      &vcpu->arch.sie_block->gcr, 128);
2534 	return rc ? -EFAULT : 0;
2535 }
2536 
2537 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2538 {
2539 	/*
2540 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2541 	 * copying in vcpu load/put. Lets update our copies before we save
2542 	 * it into the save area
2543 	 */
2544 	save_fpu_regs();
2545 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2546 	save_access_regs(vcpu->run->s.regs.acrs);
2547 
2548 	return kvm_s390_store_status_unloaded(vcpu, addr);
2549 }
2550 
2551 /*
2552  * store additional status at address
2553  */
2554 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2555 					unsigned long gpa)
2556 {
2557 	/* Only bits 0-53 are used for address formation */
2558 	if (!(gpa & ~0x3ff))
2559 		return 0;
2560 
2561 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2562 			       (void *)&vcpu->run->s.regs.vrs, 512);
2563 }
2564 
2565 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2566 {
2567 	if (!test_kvm_facility(vcpu->kvm, 129))
2568 		return 0;
2569 
2570 	/*
2571 	 * The guest VXRS are in the host VXRs due to the lazy
2572 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2573 	 * to save the current register state because we are in the
2574 	 * middle of a load/put cycle.
2575 	 *
2576 	 * Let's update our copies before we save it into the save area.
2577 	 */
2578 	save_fpu_regs();
2579 
2580 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2581 }
2582 
2583 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2584 {
2585 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2586 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2587 }
2588 
2589 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2590 {
2591 	unsigned int i;
2592 	struct kvm_vcpu *vcpu;
2593 
2594 	kvm_for_each_vcpu(i, vcpu, kvm) {
2595 		__disable_ibs_on_vcpu(vcpu);
2596 	}
2597 }
2598 
2599 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2600 {
2601 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2602 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2603 }
2604 
2605 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2606 {
2607 	int i, online_vcpus, started_vcpus = 0;
2608 
2609 	if (!is_vcpu_stopped(vcpu))
2610 		return;
2611 
2612 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2613 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2614 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2615 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2616 
2617 	for (i = 0; i < online_vcpus; i++) {
2618 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2619 			started_vcpus++;
2620 	}
2621 
2622 	if (started_vcpus == 0) {
2623 		/* we're the only active VCPU -> speed it up */
2624 		__enable_ibs_on_vcpu(vcpu);
2625 	} else if (started_vcpus == 1) {
2626 		/*
2627 		 * As we are starting a second VCPU, we have to disable
2628 		 * the IBS facility on all VCPUs to remove potentially
2629 		 * oustanding ENABLE requests.
2630 		 */
2631 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2632 	}
2633 
2634 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2635 	/*
2636 	 * Another VCPU might have used IBS while we were offline.
2637 	 * Let's play safe and flush the VCPU at startup.
2638 	 */
2639 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2640 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2641 	return;
2642 }
2643 
2644 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2645 {
2646 	int i, online_vcpus, started_vcpus = 0;
2647 	struct kvm_vcpu *started_vcpu = NULL;
2648 
2649 	if (is_vcpu_stopped(vcpu))
2650 		return;
2651 
2652 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2653 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2654 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2655 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2656 
2657 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2658 	kvm_s390_clear_stop_irq(vcpu);
2659 
2660 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2661 	__disable_ibs_on_vcpu(vcpu);
2662 
2663 	for (i = 0; i < online_vcpus; i++) {
2664 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2665 			started_vcpus++;
2666 			started_vcpu = vcpu->kvm->vcpus[i];
2667 		}
2668 	}
2669 
2670 	if (started_vcpus == 1) {
2671 		/*
2672 		 * As we only have one VCPU left, we want to enable the
2673 		 * IBS facility for that VCPU to speed it up.
2674 		 */
2675 		__enable_ibs_on_vcpu(started_vcpu);
2676 	}
2677 
2678 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2679 	return;
2680 }
2681 
2682 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2683 				     struct kvm_enable_cap *cap)
2684 {
2685 	int r;
2686 
2687 	if (cap->flags)
2688 		return -EINVAL;
2689 
2690 	switch (cap->cap) {
2691 	case KVM_CAP_S390_CSS_SUPPORT:
2692 		if (!vcpu->kvm->arch.css_support) {
2693 			vcpu->kvm->arch.css_support = 1;
2694 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2695 			trace_kvm_s390_enable_css(vcpu->kvm);
2696 		}
2697 		r = 0;
2698 		break;
2699 	default:
2700 		r = -EINVAL;
2701 		break;
2702 	}
2703 	return r;
2704 }
2705 
2706 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2707 				  struct kvm_s390_mem_op *mop)
2708 {
2709 	void __user *uaddr = (void __user *)mop->buf;
2710 	void *tmpbuf = NULL;
2711 	int r, srcu_idx;
2712 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2713 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2714 
2715 	if (mop->flags & ~supported_flags)
2716 		return -EINVAL;
2717 
2718 	if (mop->size > MEM_OP_MAX_SIZE)
2719 		return -E2BIG;
2720 
2721 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2722 		tmpbuf = vmalloc(mop->size);
2723 		if (!tmpbuf)
2724 			return -ENOMEM;
2725 	}
2726 
2727 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2728 
2729 	switch (mop->op) {
2730 	case KVM_S390_MEMOP_LOGICAL_READ:
2731 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2732 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2733 					    mop->size, GACC_FETCH);
2734 			break;
2735 		}
2736 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2737 		if (r == 0) {
2738 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2739 				r = -EFAULT;
2740 		}
2741 		break;
2742 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2743 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2744 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2745 					    mop->size, GACC_STORE);
2746 			break;
2747 		}
2748 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2749 			r = -EFAULT;
2750 			break;
2751 		}
2752 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2753 		break;
2754 	default:
2755 		r = -EINVAL;
2756 	}
2757 
2758 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2759 
2760 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2761 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2762 
2763 	vfree(tmpbuf);
2764 	return r;
2765 }
2766 
2767 long kvm_arch_vcpu_ioctl(struct file *filp,
2768 			 unsigned int ioctl, unsigned long arg)
2769 {
2770 	struct kvm_vcpu *vcpu = filp->private_data;
2771 	void __user *argp = (void __user *)arg;
2772 	int idx;
2773 	long r;
2774 
2775 	switch (ioctl) {
2776 	case KVM_S390_IRQ: {
2777 		struct kvm_s390_irq s390irq;
2778 
2779 		r = -EFAULT;
2780 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2781 			break;
2782 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2783 		break;
2784 	}
2785 	case KVM_S390_INTERRUPT: {
2786 		struct kvm_s390_interrupt s390int;
2787 		struct kvm_s390_irq s390irq;
2788 
2789 		r = -EFAULT;
2790 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2791 			break;
2792 		if (s390int_to_s390irq(&s390int, &s390irq))
2793 			return -EINVAL;
2794 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2795 		break;
2796 	}
2797 	case KVM_S390_STORE_STATUS:
2798 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2799 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2800 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2801 		break;
2802 	case KVM_S390_SET_INITIAL_PSW: {
2803 		psw_t psw;
2804 
2805 		r = -EFAULT;
2806 		if (copy_from_user(&psw, argp, sizeof(psw)))
2807 			break;
2808 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2809 		break;
2810 	}
2811 	case KVM_S390_INITIAL_RESET:
2812 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2813 		break;
2814 	case KVM_SET_ONE_REG:
2815 	case KVM_GET_ONE_REG: {
2816 		struct kvm_one_reg reg;
2817 		r = -EFAULT;
2818 		if (copy_from_user(&reg, argp, sizeof(reg)))
2819 			break;
2820 		if (ioctl == KVM_SET_ONE_REG)
2821 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2822 		else
2823 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2824 		break;
2825 	}
2826 #ifdef CONFIG_KVM_S390_UCONTROL
2827 	case KVM_S390_UCAS_MAP: {
2828 		struct kvm_s390_ucas_mapping ucasmap;
2829 
2830 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2831 			r = -EFAULT;
2832 			break;
2833 		}
2834 
2835 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2836 			r = -EINVAL;
2837 			break;
2838 		}
2839 
2840 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2841 				     ucasmap.vcpu_addr, ucasmap.length);
2842 		break;
2843 	}
2844 	case KVM_S390_UCAS_UNMAP: {
2845 		struct kvm_s390_ucas_mapping ucasmap;
2846 
2847 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2848 			r = -EFAULT;
2849 			break;
2850 		}
2851 
2852 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2853 			r = -EINVAL;
2854 			break;
2855 		}
2856 
2857 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2858 			ucasmap.length);
2859 		break;
2860 	}
2861 #endif
2862 	case KVM_S390_VCPU_FAULT: {
2863 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2864 		break;
2865 	}
2866 	case KVM_ENABLE_CAP:
2867 	{
2868 		struct kvm_enable_cap cap;
2869 		r = -EFAULT;
2870 		if (copy_from_user(&cap, argp, sizeof(cap)))
2871 			break;
2872 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2873 		break;
2874 	}
2875 	case KVM_S390_MEM_OP: {
2876 		struct kvm_s390_mem_op mem_op;
2877 
2878 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2879 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2880 		else
2881 			r = -EFAULT;
2882 		break;
2883 	}
2884 	case KVM_S390_SET_IRQ_STATE: {
2885 		struct kvm_s390_irq_state irq_state;
2886 
2887 		r = -EFAULT;
2888 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2889 			break;
2890 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2891 		    irq_state.len == 0 ||
2892 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2893 			r = -EINVAL;
2894 			break;
2895 		}
2896 		r = kvm_s390_set_irq_state(vcpu,
2897 					   (void __user *) irq_state.buf,
2898 					   irq_state.len);
2899 		break;
2900 	}
2901 	case KVM_S390_GET_IRQ_STATE: {
2902 		struct kvm_s390_irq_state irq_state;
2903 
2904 		r = -EFAULT;
2905 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2906 			break;
2907 		if (irq_state.len == 0) {
2908 			r = -EINVAL;
2909 			break;
2910 		}
2911 		r = kvm_s390_get_irq_state(vcpu,
2912 					   (__u8 __user *)  irq_state.buf,
2913 					   irq_state.len);
2914 		break;
2915 	}
2916 	default:
2917 		r = -ENOTTY;
2918 	}
2919 	return r;
2920 }
2921 
2922 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2923 {
2924 #ifdef CONFIG_KVM_S390_UCONTROL
2925 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2926 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2927 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2928 		get_page(vmf->page);
2929 		return 0;
2930 	}
2931 #endif
2932 	return VM_FAULT_SIGBUS;
2933 }
2934 
2935 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2936 			    unsigned long npages)
2937 {
2938 	return 0;
2939 }
2940 
2941 /* Section: memory related */
2942 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2943 				   struct kvm_memory_slot *memslot,
2944 				   const struct kvm_userspace_memory_region *mem,
2945 				   enum kvm_mr_change change)
2946 {
2947 	/* A few sanity checks. We can have memory slots which have to be
2948 	   located/ended at a segment boundary (1MB). The memory in userland is
2949 	   ok to be fragmented into various different vmas. It is okay to mmap()
2950 	   and munmap() stuff in this slot after doing this call at any time */
2951 
2952 	if (mem->userspace_addr & 0xffffful)
2953 		return -EINVAL;
2954 
2955 	if (mem->memory_size & 0xffffful)
2956 		return -EINVAL;
2957 
2958 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2959 		return -EINVAL;
2960 
2961 	return 0;
2962 }
2963 
2964 void kvm_arch_commit_memory_region(struct kvm *kvm,
2965 				const struct kvm_userspace_memory_region *mem,
2966 				const struct kvm_memory_slot *old,
2967 				const struct kvm_memory_slot *new,
2968 				enum kvm_mr_change change)
2969 {
2970 	int rc;
2971 
2972 	/* If the basics of the memslot do not change, we do not want
2973 	 * to update the gmap. Every update causes several unnecessary
2974 	 * segment translation exceptions. This is usually handled just
2975 	 * fine by the normal fault handler + gmap, but it will also
2976 	 * cause faults on the prefix page of running guest CPUs.
2977 	 */
2978 	if (old->userspace_addr == mem->userspace_addr &&
2979 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2980 	    old->npages * PAGE_SIZE == mem->memory_size)
2981 		return;
2982 
2983 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2984 		mem->guest_phys_addr, mem->memory_size);
2985 	if (rc)
2986 		pr_warn("failed to commit memory region\n");
2987 	return;
2988 }
2989 
2990 static inline unsigned long nonhyp_mask(int i)
2991 {
2992 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
2993 
2994 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
2995 }
2996 
2997 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
2998 {
2999 	vcpu->valid_wakeup = false;
3000 }
3001 
3002 static int __init kvm_s390_init(void)
3003 {
3004 	int i;
3005 
3006 	if (!sclp.has_sief2) {
3007 		pr_info("SIE not available\n");
3008 		return -ENODEV;
3009 	}
3010 
3011 	for (i = 0; i < 16; i++)
3012 		kvm_s390_fac_list_mask[i] |=
3013 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3014 
3015 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3016 }
3017 
3018 static void __exit kvm_s390_exit(void)
3019 {
3020 	kvm_exit();
3021 }
3022 
3023 module_init(kvm_s390_init);
3024 module_exit(kvm_s390_exit);
3025 
3026 /*
3027  * Enable autoloading of the kvm module.
3028  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3029  * since x86 takes a different approach.
3030  */
3031 #include <linux/miscdevice.h>
3032 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3033 MODULE_ALIAS("devname:kvm");
3034