xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 8571e645)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/gmap.h>
34 #include <asm/nmi.h>
35 #include <asm/switch_to.h>
36 #include <asm/isc.h>
37 #include <asm/sclp.h>
38 #include "kvm-s390.h"
39 #include "gaccess.h"
40 
41 #define KMSG_COMPONENT "kvm-s390"
42 #undef pr_fmt
43 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 
45 #define CREATE_TRACE_POINTS
46 #include "trace.h"
47 #include "trace-s390.h"
48 
49 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
50 #define LOCAL_IRQS 32
51 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
52 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
53 
54 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 
56 struct kvm_stats_debugfs_item debugfs_entries[] = {
57 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
58 	{ "exit_null", VCPU_STAT(exit_null) },
59 	{ "exit_validity", VCPU_STAT(exit_validity) },
60 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
61 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
62 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
63 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
64 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
65 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
66 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
67 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
68 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
69 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
70 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
71 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
72 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
73 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
74 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
75 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
76 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
77 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
78 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
79 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
80 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
81 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
82 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
83 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
84 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
85 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
86 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
87 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
88 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
89 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
90 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
91 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
92 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
93 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
94 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
95 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
96 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
97 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
98 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
99 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
100 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
101 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
102 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
103 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
104 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
105 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
106 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
107 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
108 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
109 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
110 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
111 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
112 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
113 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
114 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
115 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
116 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
117 	{ NULL }
118 };
119 
120 /* upper facilities limit for kvm */
121 unsigned long kvm_s390_fac_list_mask[] = {
122 	0xffe6fffbfcfdfc40UL,
123 	0x005e800000000000UL,
124 };
125 
126 unsigned long kvm_s390_fac_list_mask_size(void)
127 {
128 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
129 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
130 }
131 
132 static struct gmap_notifier gmap_notifier;
133 debug_info_t *kvm_s390_dbf;
134 
135 /* Section: not file related */
136 int kvm_arch_hardware_enable(void)
137 {
138 	/* every s390 is virtualization enabled ;-) */
139 	return 0;
140 }
141 
142 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
143 
144 /*
145  * This callback is executed during stop_machine(). All CPUs are therefore
146  * temporarily stopped. In order not to change guest behavior, we have to
147  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
148  * so a CPU won't be stopped while calculating with the epoch.
149  */
150 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
151 			  void *v)
152 {
153 	struct kvm *kvm;
154 	struct kvm_vcpu *vcpu;
155 	int i;
156 	unsigned long long *delta = v;
157 
158 	list_for_each_entry(kvm, &vm_list, vm_list) {
159 		kvm->arch.epoch -= *delta;
160 		kvm_for_each_vcpu(i, vcpu, kvm) {
161 			vcpu->arch.sie_block->epoch -= *delta;
162 			if (vcpu->arch.cputm_enabled)
163 				vcpu->arch.cputm_start += *delta;
164 		}
165 	}
166 	return NOTIFY_OK;
167 }
168 
169 static struct notifier_block kvm_clock_notifier = {
170 	.notifier_call = kvm_clock_sync,
171 };
172 
173 int kvm_arch_hardware_setup(void)
174 {
175 	gmap_notifier.notifier_call = kvm_gmap_notifier;
176 	gmap_register_ipte_notifier(&gmap_notifier);
177 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
178 				       &kvm_clock_notifier);
179 	return 0;
180 }
181 
182 void kvm_arch_hardware_unsetup(void)
183 {
184 	gmap_unregister_ipte_notifier(&gmap_notifier);
185 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
186 					 &kvm_clock_notifier);
187 }
188 
189 int kvm_arch_init(void *opaque)
190 {
191 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 	if (!kvm_s390_dbf)
193 		return -ENOMEM;
194 
195 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
196 		debug_unregister(kvm_s390_dbf);
197 		return -ENOMEM;
198 	}
199 
200 	/* Register floating interrupt controller interface. */
201 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
202 }
203 
204 void kvm_arch_exit(void)
205 {
206 	debug_unregister(kvm_s390_dbf);
207 }
208 
209 /* Section: device related */
210 long kvm_arch_dev_ioctl(struct file *filp,
211 			unsigned int ioctl, unsigned long arg)
212 {
213 	if (ioctl == KVM_S390_ENABLE_SIE)
214 		return s390_enable_sie();
215 	return -EINVAL;
216 }
217 
218 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
219 {
220 	int r;
221 
222 	switch (ext) {
223 	case KVM_CAP_S390_PSW:
224 	case KVM_CAP_S390_GMAP:
225 	case KVM_CAP_SYNC_MMU:
226 #ifdef CONFIG_KVM_S390_UCONTROL
227 	case KVM_CAP_S390_UCONTROL:
228 #endif
229 	case KVM_CAP_ASYNC_PF:
230 	case KVM_CAP_SYNC_REGS:
231 	case KVM_CAP_ONE_REG:
232 	case KVM_CAP_ENABLE_CAP:
233 	case KVM_CAP_S390_CSS_SUPPORT:
234 	case KVM_CAP_IOEVENTFD:
235 	case KVM_CAP_DEVICE_CTRL:
236 	case KVM_CAP_ENABLE_CAP_VM:
237 	case KVM_CAP_S390_IRQCHIP:
238 	case KVM_CAP_VM_ATTRIBUTES:
239 	case KVM_CAP_MP_STATE:
240 	case KVM_CAP_S390_INJECT_IRQ:
241 	case KVM_CAP_S390_USER_SIGP:
242 	case KVM_CAP_S390_USER_STSI:
243 	case KVM_CAP_S390_SKEYS:
244 	case KVM_CAP_S390_IRQ_STATE:
245 		r = 1;
246 		break;
247 	case KVM_CAP_S390_MEM_OP:
248 		r = MEM_OP_MAX_SIZE;
249 		break;
250 	case KVM_CAP_NR_VCPUS:
251 	case KVM_CAP_MAX_VCPUS:
252 		r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
253 				  : KVM_S390_BSCA_CPU_SLOTS;
254 		break;
255 	case KVM_CAP_NR_MEMSLOTS:
256 		r = KVM_USER_MEM_SLOTS;
257 		break;
258 	case KVM_CAP_S390_COW:
259 		r = MACHINE_HAS_ESOP;
260 		break;
261 	case KVM_CAP_S390_VECTOR_REGISTERS:
262 		r = MACHINE_HAS_VX;
263 		break;
264 	case KVM_CAP_S390_RI:
265 		r = test_facility(64);
266 		break;
267 	default:
268 		r = 0;
269 	}
270 	return r;
271 }
272 
273 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
274 					struct kvm_memory_slot *memslot)
275 {
276 	gfn_t cur_gfn, last_gfn;
277 	unsigned long address;
278 	struct gmap *gmap = kvm->arch.gmap;
279 
280 	/* Loop over all guest pages */
281 	last_gfn = memslot->base_gfn + memslot->npages;
282 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
283 		address = gfn_to_hva_memslot(memslot, cur_gfn);
284 
285 		if (test_and_clear_guest_dirty(gmap->mm, address))
286 			mark_page_dirty(kvm, cur_gfn);
287 		if (fatal_signal_pending(current))
288 			return;
289 		cond_resched();
290 	}
291 }
292 
293 /* Section: vm related */
294 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
295 
296 /*
297  * Get (and clear) the dirty memory log for a memory slot.
298  */
299 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
300 			       struct kvm_dirty_log *log)
301 {
302 	int r;
303 	unsigned long n;
304 	struct kvm_memslots *slots;
305 	struct kvm_memory_slot *memslot;
306 	int is_dirty = 0;
307 
308 	mutex_lock(&kvm->slots_lock);
309 
310 	r = -EINVAL;
311 	if (log->slot >= KVM_USER_MEM_SLOTS)
312 		goto out;
313 
314 	slots = kvm_memslots(kvm);
315 	memslot = id_to_memslot(slots, log->slot);
316 	r = -ENOENT;
317 	if (!memslot->dirty_bitmap)
318 		goto out;
319 
320 	kvm_s390_sync_dirty_log(kvm, memslot);
321 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
322 	if (r)
323 		goto out;
324 
325 	/* Clear the dirty log */
326 	if (is_dirty) {
327 		n = kvm_dirty_bitmap_bytes(memslot);
328 		memset(memslot->dirty_bitmap, 0, n);
329 	}
330 	r = 0;
331 out:
332 	mutex_unlock(&kvm->slots_lock);
333 	return r;
334 }
335 
336 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
337 {
338 	int r;
339 
340 	if (cap->flags)
341 		return -EINVAL;
342 
343 	switch (cap->cap) {
344 	case KVM_CAP_S390_IRQCHIP:
345 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
346 		kvm->arch.use_irqchip = 1;
347 		r = 0;
348 		break;
349 	case KVM_CAP_S390_USER_SIGP:
350 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
351 		kvm->arch.user_sigp = 1;
352 		r = 0;
353 		break;
354 	case KVM_CAP_S390_VECTOR_REGISTERS:
355 		mutex_lock(&kvm->lock);
356 		if (atomic_read(&kvm->online_vcpus)) {
357 			r = -EBUSY;
358 		} else if (MACHINE_HAS_VX) {
359 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
360 			set_kvm_facility(kvm->arch.model.fac_list, 129);
361 			r = 0;
362 		} else
363 			r = -EINVAL;
364 		mutex_unlock(&kvm->lock);
365 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
366 			 r ? "(not available)" : "(success)");
367 		break;
368 	case KVM_CAP_S390_RI:
369 		r = -EINVAL;
370 		mutex_lock(&kvm->lock);
371 		if (atomic_read(&kvm->online_vcpus)) {
372 			r = -EBUSY;
373 		} else if (test_facility(64)) {
374 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
375 			set_kvm_facility(kvm->arch.model.fac_list, 64);
376 			r = 0;
377 		}
378 		mutex_unlock(&kvm->lock);
379 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
380 			 r ? "(not available)" : "(success)");
381 		break;
382 	case KVM_CAP_S390_USER_STSI:
383 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
384 		kvm->arch.user_stsi = 1;
385 		r = 0;
386 		break;
387 	default:
388 		r = -EINVAL;
389 		break;
390 	}
391 	return r;
392 }
393 
394 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
395 {
396 	int ret;
397 
398 	switch (attr->attr) {
399 	case KVM_S390_VM_MEM_LIMIT_SIZE:
400 		ret = 0;
401 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
402 			 kvm->arch.mem_limit);
403 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
404 			ret = -EFAULT;
405 		break;
406 	default:
407 		ret = -ENXIO;
408 		break;
409 	}
410 	return ret;
411 }
412 
413 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
414 {
415 	int ret;
416 	unsigned int idx;
417 	switch (attr->attr) {
418 	case KVM_S390_VM_MEM_ENABLE_CMMA:
419 		/* enable CMMA only for z10 and later (EDAT_1) */
420 		ret = -EINVAL;
421 		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
422 			break;
423 
424 		ret = -EBUSY;
425 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
426 		mutex_lock(&kvm->lock);
427 		if (atomic_read(&kvm->online_vcpus) == 0) {
428 			kvm->arch.use_cmma = 1;
429 			ret = 0;
430 		}
431 		mutex_unlock(&kvm->lock);
432 		break;
433 	case KVM_S390_VM_MEM_CLR_CMMA:
434 		ret = -EINVAL;
435 		if (!kvm->arch.use_cmma)
436 			break;
437 
438 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
439 		mutex_lock(&kvm->lock);
440 		idx = srcu_read_lock(&kvm->srcu);
441 		s390_reset_cmma(kvm->arch.gmap->mm);
442 		srcu_read_unlock(&kvm->srcu, idx);
443 		mutex_unlock(&kvm->lock);
444 		ret = 0;
445 		break;
446 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
447 		unsigned long new_limit;
448 
449 		if (kvm_is_ucontrol(kvm))
450 			return -EINVAL;
451 
452 		if (get_user(new_limit, (u64 __user *)attr->addr))
453 			return -EFAULT;
454 
455 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
456 		    new_limit > kvm->arch.mem_limit)
457 			return -E2BIG;
458 
459 		if (!new_limit)
460 			return -EINVAL;
461 
462 		/* gmap_alloc takes last usable address */
463 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
464 			new_limit -= 1;
465 
466 		ret = -EBUSY;
467 		mutex_lock(&kvm->lock);
468 		if (atomic_read(&kvm->online_vcpus) == 0) {
469 			/* gmap_alloc will round the limit up */
470 			struct gmap *new = gmap_alloc(current->mm, new_limit);
471 
472 			if (!new) {
473 				ret = -ENOMEM;
474 			} else {
475 				gmap_free(kvm->arch.gmap);
476 				new->private = kvm;
477 				kvm->arch.gmap = new;
478 				ret = 0;
479 			}
480 		}
481 		mutex_unlock(&kvm->lock);
482 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
483 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
484 			 (void *) kvm->arch.gmap->asce);
485 		break;
486 	}
487 	default:
488 		ret = -ENXIO;
489 		break;
490 	}
491 	return ret;
492 }
493 
494 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
495 
496 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
497 {
498 	struct kvm_vcpu *vcpu;
499 	int i;
500 
501 	if (!test_kvm_facility(kvm, 76))
502 		return -EINVAL;
503 
504 	mutex_lock(&kvm->lock);
505 	switch (attr->attr) {
506 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
507 		get_random_bytes(
508 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
509 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
510 		kvm->arch.crypto.aes_kw = 1;
511 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
512 		break;
513 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
514 		get_random_bytes(
515 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
516 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
517 		kvm->arch.crypto.dea_kw = 1;
518 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
519 		break;
520 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
521 		kvm->arch.crypto.aes_kw = 0;
522 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
523 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
524 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
525 		break;
526 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
527 		kvm->arch.crypto.dea_kw = 0;
528 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
529 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
530 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
531 		break;
532 	default:
533 		mutex_unlock(&kvm->lock);
534 		return -ENXIO;
535 	}
536 
537 	kvm_for_each_vcpu(i, vcpu, kvm) {
538 		kvm_s390_vcpu_crypto_setup(vcpu);
539 		exit_sie(vcpu);
540 	}
541 	mutex_unlock(&kvm->lock);
542 	return 0;
543 }
544 
545 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
546 {
547 	u8 gtod_high;
548 
549 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
550 					   sizeof(gtod_high)))
551 		return -EFAULT;
552 
553 	if (gtod_high != 0)
554 		return -EINVAL;
555 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
556 
557 	return 0;
558 }
559 
560 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
561 {
562 	u64 gtod;
563 
564 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
565 		return -EFAULT;
566 
567 	kvm_s390_set_tod_clock(kvm, gtod);
568 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
569 	return 0;
570 }
571 
572 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
573 {
574 	int ret;
575 
576 	if (attr->flags)
577 		return -EINVAL;
578 
579 	switch (attr->attr) {
580 	case KVM_S390_VM_TOD_HIGH:
581 		ret = kvm_s390_set_tod_high(kvm, attr);
582 		break;
583 	case KVM_S390_VM_TOD_LOW:
584 		ret = kvm_s390_set_tod_low(kvm, attr);
585 		break;
586 	default:
587 		ret = -ENXIO;
588 		break;
589 	}
590 	return ret;
591 }
592 
593 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
594 {
595 	u8 gtod_high = 0;
596 
597 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
598 					 sizeof(gtod_high)))
599 		return -EFAULT;
600 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
601 
602 	return 0;
603 }
604 
605 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
606 {
607 	u64 gtod;
608 
609 	gtod = kvm_s390_get_tod_clock_fast(kvm);
610 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
611 		return -EFAULT;
612 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
613 
614 	return 0;
615 }
616 
617 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
618 {
619 	int ret;
620 
621 	if (attr->flags)
622 		return -EINVAL;
623 
624 	switch (attr->attr) {
625 	case KVM_S390_VM_TOD_HIGH:
626 		ret = kvm_s390_get_tod_high(kvm, attr);
627 		break;
628 	case KVM_S390_VM_TOD_LOW:
629 		ret = kvm_s390_get_tod_low(kvm, attr);
630 		break;
631 	default:
632 		ret = -ENXIO;
633 		break;
634 	}
635 	return ret;
636 }
637 
638 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
639 {
640 	struct kvm_s390_vm_cpu_processor *proc;
641 	int ret = 0;
642 
643 	mutex_lock(&kvm->lock);
644 	if (atomic_read(&kvm->online_vcpus)) {
645 		ret = -EBUSY;
646 		goto out;
647 	}
648 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
649 	if (!proc) {
650 		ret = -ENOMEM;
651 		goto out;
652 	}
653 	if (!copy_from_user(proc, (void __user *)attr->addr,
654 			    sizeof(*proc))) {
655 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
656 		       sizeof(struct cpuid));
657 		kvm->arch.model.ibc = proc->ibc;
658 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
659 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
660 	} else
661 		ret = -EFAULT;
662 	kfree(proc);
663 out:
664 	mutex_unlock(&kvm->lock);
665 	return ret;
666 }
667 
668 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
669 {
670 	int ret = -ENXIO;
671 
672 	switch (attr->attr) {
673 	case KVM_S390_VM_CPU_PROCESSOR:
674 		ret = kvm_s390_set_processor(kvm, attr);
675 		break;
676 	}
677 	return ret;
678 }
679 
680 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
681 {
682 	struct kvm_s390_vm_cpu_processor *proc;
683 	int ret = 0;
684 
685 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
686 	if (!proc) {
687 		ret = -ENOMEM;
688 		goto out;
689 	}
690 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
691 	proc->ibc = kvm->arch.model.ibc;
692 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
693 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
694 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
695 		ret = -EFAULT;
696 	kfree(proc);
697 out:
698 	return ret;
699 }
700 
701 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
702 {
703 	struct kvm_s390_vm_cpu_machine *mach;
704 	int ret = 0;
705 
706 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
707 	if (!mach) {
708 		ret = -ENOMEM;
709 		goto out;
710 	}
711 	get_cpu_id((struct cpuid *) &mach->cpuid);
712 	mach->ibc = sclp.ibc;
713 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
714 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
715 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
716 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
717 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
718 		ret = -EFAULT;
719 	kfree(mach);
720 out:
721 	return ret;
722 }
723 
724 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
725 {
726 	int ret = -ENXIO;
727 
728 	switch (attr->attr) {
729 	case KVM_S390_VM_CPU_PROCESSOR:
730 		ret = kvm_s390_get_processor(kvm, attr);
731 		break;
732 	case KVM_S390_VM_CPU_MACHINE:
733 		ret = kvm_s390_get_machine(kvm, attr);
734 		break;
735 	}
736 	return ret;
737 }
738 
739 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
740 {
741 	int ret;
742 
743 	switch (attr->group) {
744 	case KVM_S390_VM_MEM_CTRL:
745 		ret = kvm_s390_set_mem_control(kvm, attr);
746 		break;
747 	case KVM_S390_VM_TOD:
748 		ret = kvm_s390_set_tod(kvm, attr);
749 		break;
750 	case KVM_S390_VM_CPU_MODEL:
751 		ret = kvm_s390_set_cpu_model(kvm, attr);
752 		break;
753 	case KVM_S390_VM_CRYPTO:
754 		ret = kvm_s390_vm_set_crypto(kvm, attr);
755 		break;
756 	default:
757 		ret = -ENXIO;
758 		break;
759 	}
760 
761 	return ret;
762 }
763 
764 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
765 {
766 	int ret;
767 
768 	switch (attr->group) {
769 	case KVM_S390_VM_MEM_CTRL:
770 		ret = kvm_s390_get_mem_control(kvm, attr);
771 		break;
772 	case KVM_S390_VM_TOD:
773 		ret = kvm_s390_get_tod(kvm, attr);
774 		break;
775 	case KVM_S390_VM_CPU_MODEL:
776 		ret = kvm_s390_get_cpu_model(kvm, attr);
777 		break;
778 	default:
779 		ret = -ENXIO;
780 		break;
781 	}
782 
783 	return ret;
784 }
785 
786 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
787 {
788 	int ret;
789 
790 	switch (attr->group) {
791 	case KVM_S390_VM_MEM_CTRL:
792 		switch (attr->attr) {
793 		case KVM_S390_VM_MEM_ENABLE_CMMA:
794 		case KVM_S390_VM_MEM_CLR_CMMA:
795 		case KVM_S390_VM_MEM_LIMIT_SIZE:
796 			ret = 0;
797 			break;
798 		default:
799 			ret = -ENXIO;
800 			break;
801 		}
802 		break;
803 	case KVM_S390_VM_TOD:
804 		switch (attr->attr) {
805 		case KVM_S390_VM_TOD_LOW:
806 		case KVM_S390_VM_TOD_HIGH:
807 			ret = 0;
808 			break;
809 		default:
810 			ret = -ENXIO;
811 			break;
812 		}
813 		break;
814 	case KVM_S390_VM_CPU_MODEL:
815 		switch (attr->attr) {
816 		case KVM_S390_VM_CPU_PROCESSOR:
817 		case KVM_S390_VM_CPU_MACHINE:
818 			ret = 0;
819 			break;
820 		default:
821 			ret = -ENXIO;
822 			break;
823 		}
824 		break;
825 	case KVM_S390_VM_CRYPTO:
826 		switch (attr->attr) {
827 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
828 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
829 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
830 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
831 			ret = 0;
832 			break;
833 		default:
834 			ret = -ENXIO;
835 			break;
836 		}
837 		break;
838 	default:
839 		ret = -ENXIO;
840 		break;
841 	}
842 
843 	return ret;
844 }
845 
846 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
847 {
848 	uint8_t *keys;
849 	uint64_t hva;
850 	unsigned long curkey;
851 	int i, r = 0;
852 
853 	if (args->flags != 0)
854 		return -EINVAL;
855 
856 	/* Is this guest using storage keys? */
857 	if (!mm_use_skey(current->mm))
858 		return KVM_S390_GET_SKEYS_NONE;
859 
860 	/* Enforce sane limit on memory allocation */
861 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
862 		return -EINVAL;
863 
864 	keys = kmalloc_array(args->count, sizeof(uint8_t),
865 			     GFP_KERNEL | __GFP_NOWARN);
866 	if (!keys)
867 		keys = vmalloc(sizeof(uint8_t) * args->count);
868 	if (!keys)
869 		return -ENOMEM;
870 
871 	for (i = 0; i < args->count; i++) {
872 		hva = gfn_to_hva(kvm, args->start_gfn + i);
873 		if (kvm_is_error_hva(hva)) {
874 			r = -EFAULT;
875 			goto out;
876 		}
877 
878 		curkey = get_guest_storage_key(current->mm, hva);
879 		if (IS_ERR_VALUE(curkey)) {
880 			r = curkey;
881 			goto out;
882 		}
883 		keys[i] = curkey;
884 	}
885 
886 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
887 			 sizeof(uint8_t) * args->count);
888 	if (r)
889 		r = -EFAULT;
890 out:
891 	kvfree(keys);
892 	return r;
893 }
894 
895 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
896 {
897 	uint8_t *keys;
898 	uint64_t hva;
899 	int i, r = 0;
900 
901 	if (args->flags != 0)
902 		return -EINVAL;
903 
904 	/* Enforce sane limit on memory allocation */
905 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
906 		return -EINVAL;
907 
908 	keys = kmalloc_array(args->count, sizeof(uint8_t),
909 			     GFP_KERNEL | __GFP_NOWARN);
910 	if (!keys)
911 		keys = vmalloc(sizeof(uint8_t) * args->count);
912 	if (!keys)
913 		return -ENOMEM;
914 
915 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
916 			   sizeof(uint8_t) * args->count);
917 	if (r) {
918 		r = -EFAULT;
919 		goto out;
920 	}
921 
922 	/* Enable storage key handling for the guest */
923 	r = s390_enable_skey();
924 	if (r)
925 		goto out;
926 
927 	for (i = 0; i < args->count; i++) {
928 		hva = gfn_to_hva(kvm, args->start_gfn + i);
929 		if (kvm_is_error_hva(hva)) {
930 			r = -EFAULT;
931 			goto out;
932 		}
933 
934 		/* Lowest order bit is reserved */
935 		if (keys[i] & 0x01) {
936 			r = -EINVAL;
937 			goto out;
938 		}
939 
940 		r = set_guest_storage_key(current->mm, hva,
941 					  (unsigned long)keys[i], 0);
942 		if (r)
943 			goto out;
944 	}
945 out:
946 	kvfree(keys);
947 	return r;
948 }
949 
950 long kvm_arch_vm_ioctl(struct file *filp,
951 		       unsigned int ioctl, unsigned long arg)
952 {
953 	struct kvm *kvm = filp->private_data;
954 	void __user *argp = (void __user *)arg;
955 	struct kvm_device_attr attr;
956 	int r;
957 
958 	switch (ioctl) {
959 	case KVM_S390_INTERRUPT: {
960 		struct kvm_s390_interrupt s390int;
961 
962 		r = -EFAULT;
963 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
964 			break;
965 		r = kvm_s390_inject_vm(kvm, &s390int);
966 		break;
967 	}
968 	case KVM_ENABLE_CAP: {
969 		struct kvm_enable_cap cap;
970 		r = -EFAULT;
971 		if (copy_from_user(&cap, argp, sizeof(cap)))
972 			break;
973 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
974 		break;
975 	}
976 	case KVM_CREATE_IRQCHIP: {
977 		struct kvm_irq_routing_entry routing;
978 
979 		r = -EINVAL;
980 		if (kvm->arch.use_irqchip) {
981 			/* Set up dummy routing. */
982 			memset(&routing, 0, sizeof(routing));
983 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
984 		}
985 		break;
986 	}
987 	case KVM_SET_DEVICE_ATTR: {
988 		r = -EFAULT;
989 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
990 			break;
991 		r = kvm_s390_vm_set_attr(kvm, &attr);
992 		break;
993 	}
994 	case KVM_GET_DEVICE_ATTR: {
995 		r = -EFAULT;
996 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
997 			break;
998 		r = kvm_s390_vm_get_attr(kvm, &attr);
999 		break;
1000 	}
1001 	case KVM_HAS_DEVICE_ATTR: {
1002 		r = -EFAULT;
1003 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1004 			break;
1005 		r = kvm_s390_vm_has_attr(kvm, &attr);
1006 		break;
1007 	}
1008 	case KVM_S390_GET_SKEYS: {
1009 		struct kvm_s390_skeys args;
1010 
1011 		r = -EFAULT;
1012 		if (copy_from_user(&args, argp,
1013 				   sizeof(struct kvm_s390_skeys)))
1014 			break;
1015 		r = kvm_s390_get_skeys(kvm, &args);
1016 		break;
1017 	}
1018 	case KVM_S390_SET_SKEYS: {
1019 		struct kvm_s390_skeys args;
1020 
1021 		r = -EFAULT;
1022 		if (copy_from_user(&args, argp,
1023 				   sizeof(struct kvm_s390_skeys)))
1024 			break;
1025 		r = kvm_s390_set_skeys(kvm, &args);
1026 		break;
1027 	}
1028 	default:
1029 		r = -ENOTTY;
1030 	}
1031 
1032 	return r;
1033 }
1034 
1035 static int kvm_s390_query_ap_config(u8 *config)
1036 {
1037 	u32 fcn_code = 0x04000000UL;
1038 	u32 cc = 0;
1039 
1040 	memset(config, 0, 128);
1041 	asm volatile(
1042 		"lgr 0,%1\n"
1043 		"lgr 2,%2\n"
1044 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1045 		"0: ipm %0\n"
1046 		"srl %0,28\n"
1047 		"1:\n"
1048 		EX_TABLE(0b, 1b)
1049 		: "+r" (cc)
1050 		: "r" (fcn_code), "r" (config)
1051 		: "cc", "0", "2", "memory"
1052 	);
1053 
1054 	return cc;
1055 }
1056 
1057 static int kvm_s390_apxa_installed(void)
1058 {
1059 	u8 config[128];
1060 	int cc;
1061 
1062 	if (test_facility(12)) {
1063 		cc = kvm_s390_query_ap_config(config);
1064 
1065 		if (cc)
1066 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1067 		else
1068 			return config[0] & 0x40;
1069 	}
1070 
1071 	return 0;
1072 }
1073 
1074 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1075 {
1076 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1077 
1078 	if (kvm_s390_apxa_installed())
1079 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1080 	else
1081 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1082 }
1083 
1084 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1085 {
1086 	get_cpu_id(cpu_id);
1087 	cpu_id->version = 0xff;
1088 }
1089 
1090 static void kvm_s390_crypto_init(struct kvm *kvm)
1091 {
1092 	if (!test_kvm_facility(kvm, 76))
1093 		return;
1094 
1095 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1096 	kvm_s390_set_crycb_format(kvm);
1097 
1098 	/* Enable AES/DEA protected key functions by default */
1099 	kvm->arch.crypto.aes_kw = 1;
1100 	kvm->arch.crypto.dea_kw = 1;
1101 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1102 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1103 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1104 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1105 }
1106 
1107 static void sca_dispose(struct kvm *kvm)
1108 {
1109 	if (kvm->arch.use_esca)
1110 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1111 	else
1112 		free_page((unsigned long)(kvm->arch.sca));
1113 	kvm->arch.sca = NULL;
1114 }
1115 
1116 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1117 {
1118 	int i, rc;
1119 	char debug_name[16];
1120 	static unsigned long sca_offset;
1121 
1122 	rc = -EINVAL;
1123 #ifdef CONFIG_KVM_S390_UCONTROL
1124 	if (type & ~KVM_VM_S390_UCONTROL)
1125 		goto out_err;
1126 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1127 		goto out_err;
1128 #else
1129 	if (type)
1130 		goto out_err;
1131 #endif
1132 
1133 	rc = s390_enable_sie();
1134 	if (rc)
1135 		goto out_err;
1136 
1137 	rc = -ENOMEM;
1138 
1139 	kvm->arch.use_esca = 0; /* start with basic SCA */
1140 	rwlock_init(&kvm->arch.sca_lock);
1141 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1142 	if (!kvm->arch.sca)
1143 		goto out_err;
1144 	spin_lock(&kvm_lock);
1145 	sca_offset += 16;
1146 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1147 		sca_offset = 0;
1148 	kvm->arch.sca = (struct bsca_block *)
1149 			((char *) kvm->arch.sca + sca_offset);
1150 	spin_unlock(&kvm_lock);
1151 
1152 	sprintf(debug_name, "kvm-%u", current->pid);
1153 
1154 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1155 	if (!kvm->arch.dbf)
1156 		goto out_err;
1157 
1158 	kvm->arch.sie_page2 =
1159 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1160 	if (!kvm->arch.sie_page2)
1161 		goto out_err;
1162 
1163 	/* Populate the facility mask initially. */
1164 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1165 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1166 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1167 		if (i < kvm_s390_fac_list_mask_size())
1168 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1169 		else
1170 			kvm->arch.model.fac_mask[i] = 0UL;
1171 	}
1172 
1173 	/* Populate the facility list initially. */
1174 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1175 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1176 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1177 
1178 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1179 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1180 
1181 	kvm_s390_crypto_init(kvm);
1182 
1183 	spin_lock_init(&kvm->arch.float_int.lock);
1184 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1185 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1186 	init_waitqueue_head(&kvm->arch.ipte_wq);
1187 	mutex_init(&kvm->arch.ipte_mutex);
1188 
1189 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1190 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1191 
1192 	if (type & KVM_VM_S390_UCONTROL) {
1193 		kvm->arch.gmap = NULL;
1194 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1195 	} else {
1196 		if (sclp.hamax == U64_MAX)
1197 			kvm->arch.mem_limit = TASK_MAX_SIZE;
1198 		else
1199 			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1200 						    sclp.hamax + 1);
1201 		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1202 		if (!kvm->arch.gmap)
1203 			goto out_err;
1204 		kvm->arch.gmap->private = kvm;
1205 		kvm->arch.gmap->pfault_enabled = 0;
1206 	}
1207 
1208 	kvm->arch.css_support = 0;
1209 	kvm->arch.use_irqchip = 0;
1210 	kvm->arch.epoch = 0;
1211 
1212 	spin_lock_init(&kvm->arch.start_stop_lock);
1213 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1214 
1215 	return 0;
1216 out_err:
1217 	free_page((unsigned long)kvm->arch.sie_page2);
1218 	debug_unregister(kvm->arch.dbf);
1219 	sca_dispose(kvm);
1220 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1221 	return rc;
1222 }
1223 
1224 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1225 {
1226 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1227 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1228 	kvm_s390_clear_local_irqs(vcpu);
1229 	kvm_clear_async_pf_completion_queue(vcpu);
1230 	if (!kvm_is_ucontrol(vcpu->kvm))
1231 		sca_del_vcpu(vcpu);
1232 
1233 	if (kvm_is_ucontrol(vcpu->kvm))
1234 		gmap_free(vcpu->arch.gmap);
1235 
1236 	if (vcpu->kvm->arch.use_cmma)
1237 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1238 	free_page((unsigned long)(vcpu->arch.sie_block));
1239 
1240 	kvm_vcpu_uninit(vcpu);
1241 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1242 }
1243 
1244 static void kvm_free_vcpus(struct kvm *kvm)
1245 {
1246 	unsigned int i;
1247 	struct kvm_vcpu *vcpu;
1248 
1249 	kvm_for_each_vcpu(i, vcpu, kvm)
1250 		kvm_arch_vcpu_destroy(vcpu);
1251 
1252 	mutex_lock(&kvm->lock);
1253 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1254 		kvm->vcpus[i] = NULL;
1255 
1256 	atomic_set(&kvm->online_vcpus, 0);
1257 	mutex_unlock(&kvm->lock);
1258 }
1259 
1260 void kvm_arch_destroy_vm(struct kvm *kvm)
1261 {
1262 	kvm_free_vcpus(kvm);
1263 	sca_dispose(kvm);
1264 	debug_unregister(kvm->arch.dbf);
1265 	free_page((unsigned long)kvm->arch.sie_page2);
1266 	if (!kvm_is_ucontrol(kvm))
1267 		gmap_free(kvm->arch.gmap);
1268 	kvm_s390_destroy_adapters(kvm);
1269 	kvm_s390_clear_float_irqs(kvm);
1270 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1271 }
1272 
1273 /* Section: vcpu related */
1274 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1275 {
1276 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1277 	if (!vcpu->arch.gmap)
1278 		return -ENOMEM;
1279 	vcpu->arch.gmap->private = vcpu->kvm;
1280 
1281 	return 0;
1282 }
1283 
1284 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1285 {
1286 	read_lock(&vcpu->kvm->arch.sca_lock);
1287 	if (vcpu->kvm->arch.use_esca) {
1288 		struct esca_block *sca = vcpu->kvm->arch.sca;
1289 
1290 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1291 		sca->cpu[vcpu->vcpu_id].sda = 0;
1292 	} else {
1293 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1294 
1295 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1296 		sca->cpu[vcpu->vcpu_id].sda = 0;
1297 	}
1298 	read_unlock(&vcpu->kvm->arch.sca_lock);
1299 }
1300 
1301 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1302 {
1303 	read_lock(&vcpu->kvm->arch.sca_lock);
1304 	if (vcpu->kvm->arch.use_esca) {
1305 		struct esca_block *sca = vcpu->kvm->arch.sca;
1306 
1307 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1308 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1309 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1310 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1311 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1312 	} else {
1313 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1314 
1315 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1316 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1317 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1318 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1319 	}
1320 	read_unlock(&vcpu->kvm->arch.sca_lock);
1321 }
1322 
1323 /* Basic SCA to Extended SCA data copy routines */
1324 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1325 {
1326 	d->sda = s->sda;
1327 	d->sigp_ctrl.c = s->sigp_ctrl.c;
1328 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1329 }
1330 
1331 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1332 {
1333 	int i;
1334 
1335 	d->ipte_control = s->ipte_control;
1336 	d->mcn[0] = s->mcn;
1337 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1338 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1339 }
1340 
1341 static int sca_switch_to_extended(struct kvm *kvm)
1342 {
1343 	struct bsca_block *old_sca = kvm->arch.sca;
1344 	struct esca_block *new_sca;
1345 	struct kvm_vcpu *vcpu;
1346 	unsigned int vcpu_idx;
1347 	u32 scaol, scaoh;
1348 
1349 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1350 	if (!new_sca)
1351 		return -ENOMEM;
1352 
1353 	scaoh = (u32)((u64)(new_sca) >> 32);
1354 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
1355 
1356 	kvm_s390_vcpu_block_all(kvm);
1357 	write_lock(&kvm->arch.sca_lock);
1358 
1359 	sca_copy_b_to_e(new_sca, old_sca);
1360 
1361 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1362 		vcpu->arch.sie_block->scaoh = scaoh;
1363 		vcpu->arch.sie_block->scaol = scaol;
1364 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1365 	}
1366 	kvm->arch.sca = new_sca;
1367 	kvm->arch.use_esca = 1;
1368 
1369 	write_unlock(&kvm->arch.sca_lock);
1370 	kvm_s390_vcpu_unblock_all(kvm);
1371 
1372 	free_page((unsigned long)old_sca);
1373 
1374 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1375 		 old_sca, kvm->arch.sca);
1376 	return 0;
1377 }
1378 
1379 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1380 {
1381 	int rc;
1382 
1383 	if (id < KVM_S390_BSCA_CPU_SLOTS)
1384 		return true;
1385 	if (!sclp.has_esca)
1386 		return false;
1387 
1388 	mutex_lock(&kvm->lock);
1389 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1390 	mutex_unlock(&kvm->lock);
1391 
1392 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1393 }
1394 
1395 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1396 {
1397 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1398 	kvm_clear_async_pf_completion_queue(vcpu);
1399 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1400 				    KVM_SYNC_GPRS |
1401 				    KVM_SYNC_ACRS |
1402 				    KVM_SYNC_CRS |
1403 				    KVM_SYNC_ARCH0 |
1404 				    KVM_SYNC_PFAULT;
1405 	if (test_kvm_facility(vcpu->kvm, 64))
1406 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1407 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1408 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1409 	 */
1410 	if (MACHINE_HAS_VX)
1411 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1412 	else
1413 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1414 
1415 	if (kvm_is_ucontrol(vcpu->kvm))
1416 		return __kvm_ucontrol_vcpu_init(vcpu);
1417 
1418 	return 0;
1419 }
1420 
1421 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1422 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1423 {
1424 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1425 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1426 	vcpu->arch.cputm_start = get_tod_clock_fast();
1427 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1428 }
1429 
1430 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1431 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1432 {
1433 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1434 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1435 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1436 	vcpu->arch.cputm_start = 0;
1437 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1438 }
1439 
1440 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1441 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1442 {
1443 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1444 	vcpu->arch.cputm_enabled = true;
1445 	__start_cpu_timer_accounting(vcpu);
1446 }
1447 
1448 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1449 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1450 {
1451 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1452 	__stop_cpu_timer_accounting(vcpu);
1453 	vcpu->arch.cputm_enabled = false;
1454 }
1455 
1456 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1457 {
1458 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1459 	__enable_cpu_timer_accounting(vcpu);
1460 	preempt_enable();
1461 }
1462 
1463 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1464 {
1465 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1466 	__disable_cpu_timer_accounting(vcpu);
1467 	preempt_enable();
1468 }
1469 
1470 /* set the cpu timer - may only be called from the VCPU thread itself */
1471 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1472 {
1473 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1474 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1475 	if (vcpu->arch.cputm_enabled)
1476 		vcpu->arch.cputm_start = get_tod_clock_fast();
1477 	vcpu->arch.sie_block->cputm = cputm;
1478 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1479 	preempt_enable();
1480 }
1481 
1482 /* update and get the cpu timer - can also be called from other VCPU threads */
1483 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1484 {
1485 	unsigned int seq;
1486 	__u64 value;
1487 
1488 	if (unlikely(!vcpu->arch.cputm_enabled))
1489 		return vcpu->arch.sie_block->cputm;
1490 
1491 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1492 	do {
1493 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1494 		/*
1495 		 * If the writer would ever execute a read in the critical
1496 		 * section, e.g. in irq context, we have a deadlock.
1497 		 */
1498 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1499 		value = vcpu->arch.sie_block->cputm;
1500 		/* if cputm_start is 0, accounting is being started/stopped */
1501 		if (likely(vcpu->arch.cputm_start))
1502 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1503 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1504 	preempt_enable();
1505 	return value;
1506 }
1507 
1508 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1509 {
1510 	/* Save host register state */
1511 	save_fpu_regs();
1512 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1513 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1514 
1515 	if (MACHINE_HAS_VX)
1516 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1517 	else
1518 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1519 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1520 	if (test_fp_ctl(current->thread.fpu.fpc))
1521 		/* User space provided an invalid FPC, let's clear it */
1522 		current->thread.fpu.fpc = 0;
1523 
1524 	save_access_regs(vcpu->arch.host_acrs);
1525 	restore_access_regs(vcpu->run->s.regs.acrs);
1526 	gmap_enable(vcpu->arch.gmap);
1527 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1528 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1529 		__start_cpu_timer_accounting(vcpu);
1530 	vcpu->cpu = cpu;
1531 }
1532 
1533 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1534 {
1535 	vcpu->cpu = -1;
1536 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1537 		__stop_cpu_timer_accounting(vcpu);
1538 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1539 	gmap_disable(vcpu->arch.gmap);
1540 
1541 	/* Save guest register state */
1542 	save_fpu_regs();
1543 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1544 
1545 	/* Restore host register state */
1546 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1547 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1548 
1549 	save_access_regs(vcpu->run->s.regs.acrs);
1550 	restore_access_regs(vcpu->arch.host_acrs);
1551 }
1552 
1553 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1554 {
1555 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1556 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1557 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1558 	kvm_s390_set_prefix(vcpu, 0);
1559 	kvm_s390_set_cpu_timer(vcpu, 0);
1560 	vcpu->arch.sie_block->ckc       = 0UL;
1561 	vcpu->arch.sie_block->todpr     = 0;
1562 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1563 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1564 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1565 	/* make sure the new fpc will be lazily loaded */
1566 	save_fpu_regs();
1567 	current->thread.fpu.fpc = 0;
1568 	vcpu->arch.sie_block->gbea = 1;
1569 	vcpu->arch.sie_block->pp = 0;
1570 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1571 	kvm_clear_async_pf_completion_queue(vcpu);
1572 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1573 		kvm_s390_vcpu_stop(vcpu);
1574 	kvm_s390_clear_local_irqs(vcpu);
1575 }
1576 
1577 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1578 {
1579 	mutex_lock(&vcpu->kvm->lock);
1580 	preempt_disable();
1581 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1582 	preempt_enable();
1583 	mutex_unlock(&vcpu->kvm->lock);
1584 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1585 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1586 		sca_add_vcpu(vcpu);
1587 	}
1588 
1589 }
1590 
1591 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1592 {
1593 	if (!test_kvm_facility(vcpu->kvm, 76))
1594 		return;
1595 
1596 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1597 
1598 	if (vcpu->kvm->arch.crypto.aes_kw)
1599 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1600 	if (vcpu->kvm->arch.crypto.dea_kw)
1601 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1602 
1603 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1604 }
1605 
1606 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1607 {
1608 	free_page(vcpu->arch.sie_block->cbrlo);
1609 	vcpu->arch.sie_block->cbrlo = 0;
1610 }
1611 
1612 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1613 {
1614 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1615 	if (!vcpu->arch.sie_block->cbrlo)
1616 		return -ENOMEM;
1617 
1618 	vcpu->arch.sie_block->ecb2 |= 0x80;
1619 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1620 	return 0;
1621 }
1622 
1623 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1624 {
1625 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1626 
1627 	vcpu->arch.cpu_id = model->cpu_id;
1628 	vcpu->arch.sie_block->ibc = model->ibc;
1629 	if (test_kvm_facility(vcpu->kvm, 7))
1630 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1631 }
1632 
1633 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1634 {
1635 	int rc = 0;
1636 
1637 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1638 						    CPUSTAT_SM |
1639 						    CPUSTAT_STOPPED);
1640 
1641 	if (test_kvm_facility(vcpu->kvm, 78))
1642 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1643 	else if (test_kvm_facility(vcpu->kvm, 8))
1644 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1645 
1646 	kvm_s390_vcpu_setup_model(vcpu);
1647 
1648 	vcpu->arch.sie_block->ecb   = 6;
1649 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1650 		vcpu->arch.sie_block->ecb |= 0x10;
1651 
1652 	vcpu->arch.sie_block->ecb2  = 8;
1653 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1654 	if (sclp.has_siif)
1655 		vcpu->arch.sie_block->eca |= 1;
1656 	if (sclp.has_sigpif)
1657 		vcpu->arch.sie_block->eca |= 0x10000000U;
1658 	if (test_kvm_facility(vcpu->kvm, 64))
1659 		vcpu->arch.sie_block->ecb3 |= 0x01;
1660 	if (test_kvm_facility(vcpu->kvm, 129)) {
1661 		vcpu->arch.sie_block->eca |= 0x00020000;
1662 		vcpu->arch.sie_block->ecd |= 0x20000000;
1663 	}
1664 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1665 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1666 
1667 	if (vcpu->kvm->arch.use_cmma) {
1668 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1669 		if (rc)
1670 			return rc;
1671 	}
1672 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1673 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1674 
1675 	kvm_s390_vcpu_crypto_setup(vcpu);
1676 
1677 	return rc;
1678 }
1679 
1680 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1681 				      unsigned int id)
1682 {
1683 	struct kvm_vcpu *vcpu;
1684 	struct sie_page *sie_page;
1685 	int rc = -EINVAL;
1686 
1687 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1688 		goto out;
1689 
1690 	rc = -ENOMEM;
1691 
1692 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1693 	if (!vcpu)
1694 		goto out;
1695 
1696 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1697 	if (!sie_page)
1698 		goto out_free_cpu;
1699 
1700 	vcpu->arch.sie_block = &sie_page->sie_block;
1701 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1702 
1703 	vcpu->arch.sie_block->icpua = id;
1704 	spin_lock_init(&vcpu->arch.local_int.lock);
1705 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1706 	vcpu->arch.local_int.wq = &vcpu->wq;
1707 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1708 	seqcount_init(&vcpu->arch.cputm_seqcount);
1709 
1710 	rc = kvm_vcpu_init(vcpu, kvm, id);
1711 	if (rc)
1712 		goto out_free_sie_block;
1713 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1714 		 vcpu->arch.sie_block);
1715 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1716 
1717 	return vcpu;
1718 out_free_sie_block:
1719 	free_page((unsigned long)(vcpu->arch.sie_block));
1720 out_free_cpu:
1721 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1722 out:
1723 	return ERR_PTR(rc);
1724 }
1725 
1726 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1727 {
1728 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1729 }
1730 
1731 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1732 {
1733 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1734 	exit_sie(vcpu);
1735 }
1736 
1737 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1738 {
1739 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1740 }
1741 
1742 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1743 {
1744 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1745 	exit_sie(vcpu);
1746 }
1747 
1748 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1749 {
1750 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1751 }
1752 
1753 /*
1754  * Kick a guest cpu out of SIE and wait until SIE is not running.
1755  * If the CPU is not running (e.g. waiting as idle) the function will
1756  * return immediately. */
1757 void exit_sie(struct kvm_vcpu *vcpu)
1758 {
1759 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1760 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1761 		cpu_relax();
1762 }
1763 
1764 /* Kick a guest cpu out of SIE to process a request synchronously */
1765 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1766 {
1767 	kvm_make_request(req, vcpu);
1768 	kvm_s390_vcpu_request(vcpu);
1769 }
1770 
1771 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1772 {
1773 	int i;
1774 	struct kvm *kvm = gmap->private;
1775 	struct kvm_vcpu *vcpu;
1776 
1777 	kvm_for_each_vcpu(i, vcpu, kvm) {
1778 		/* match against both prefix pages */
1779 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1780 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1781 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1782 		}
1783 	}
1784 }
1785 
1786 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1787 {
1788 	/* kvm common code refers to this, but never calls it */
1789 	BUG();
1790 	return 0;
1791 }
1792 
1793 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1794 					   struct kvm_one_reg *reg)
1795 {
1796 	int r = -EINVAL;
1797 
1798 	switch (reg->id) {
1799 	case KVM_REG_S390_TODPR:
1800 		r = put_user(vcpu->arch.sie_block->todpr,
1801 			     (u32 __user *)reg->addr);
1802 		break;
1803 	case KVM_REG_S390_EPOCHDIFF:
1804 		r = put_user(vcpu->arch.sie_block->epoch,
1805 			     (u64 __user *)reg->addr);
1806 		break;
1807 	case KVM_REG_S390_CPU_TIMER:
1808 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
1809 			     (u64 __user *)reg->addr);
1810 		break;
1811 	case KVM_REG_S390_CLOCK_COMP:
1812 		r = put_user(vcpu->arch.sie_block->ckc,
1813 			     (u64 __user *)reg->addr);
1814 		break;
1815 	case KVM_REG_S390_PFTOKEN:
1816 		r = put_user(vcpu->arch.pfault_token,
1817 			     (u64 __user *)reg->addr);
1818 		break;
1819 	case KVM_REG_S390_PFCOMPARE:
1820 		r = put_user(vcpu->arch.pfault_compare,
1821 			     (u64 __user *)reg->addr);
1822 		break;
1823 	case KVM_REG_S390_PFSELECT:
1824 		r = put_user(vcpu->arch.pfault_select,
1825 			     (u64 __user *)reg->addr);
1826 		break;
1827 	case KVM_REG_S390_PP:
1828 		r = put_user(vcpu->arch.sie_block->pp,
1829 			     (u64 __user *)reg->addr);
1830 		break;
1831 	case KVM_REG_S390_GBEA:
1832 		r = put_user(vcpu->arch.sie_block->gbea,
1833 			     (u64 __user *)reg->addr);
1834 		break;
1835 	default:
1836 		break;
1837 	}
1838 
1839 	return r;
1840 }
1841 
1842 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1843 					   struct kvm_one_reg *reg)
1844 {
1845 	int r = -EINVAL;
1846 	__u64 val;
1847 
1848 	switch (reg->id) {
1849 	case KVM_REG_S390_TODPR:
1850 		r = get_user(vcpu->arch.sie_block->todpr,
1851 			     (u32 __user *)reg->addr);
1852 		break;
1853 	case KVM_REG_S390_EPOCHDIFF:
1854 		r = get_user(vcpu->arch.sie_block->epoch,
1855 			     (u64 __user *)reg->addr);
1856 		break;
1857 	case KVM_REG_S390_CPU_TIMER:
1858 		r = get_user(val, (u64 __user *)reg->addr);
1859 		if (!r)
1860 			kvm_s390_set_cpu_timer(vcpu, val);
1861 		break;
1862 	case KVM_REG_S390_CLOCK_COMP:
1863 		r = get_user(vcpu->arch.sie_block->ckc,
1864 			     (u64 __user *)reg->addr);
1865 		break;
1866 	case KVM_REG_S390_PFTOKEN:
1867 		r = get_user(vcpu->arch.pfault_token,
1868 			     (u64 __user *)reg->addr);
1869 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1870 			kvm_clear_async_pf_completion_queue(vcpu);
1871 		break;
1872 	case KVM_REG_S390_PFCOMPARE:
1873 		r = get_user(vcpu->arch.pfault_compare,
1874 			     (u64 __user *)reg->addr);
1875 		break;
1876 	case KVM_REG_S390_PFSELECT:
1877 		r = get_user(vcpu->arch.pfault_select,
1878 			     (u64 __user *)reg->addr);
1879 		break;
1880 	case KVM_REG_S390_PP:
1881 		r = get_user(vcpu->arch.sie_block->pp,
1882 			     (u64 __user *)reg->addr);
1883 		break;
1884 	case KVM_REG_S390_GBEA:
1885 		r = get_user(vcpu->arch.sie_block->gbea,
1886 			     (u64 __user *)reg->addr);
1887 		break;
1888 	default:
1889 		break;
1890 	}
1891 
1892 	return r;
1893 }
1894 
1895 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1896 {
1897 	kvm_s390_vcpu_initial_reset(vcpu);
1898 	return 0;
1899 }
1900 
1901 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1902 {
1903 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1904 	return 0;
1905 }
1906 
1907 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1908 {
1909 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1910 	return 0;
1911 }
1912 
1913 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1914 				  struct kvm_sregs *sregs)
1915 {
1916 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1917 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1918 	restore_access_regs(vcpu->run->s.regs.acrs);
1919 	return 0;
1920 }
1921 
1922 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1923 				  struct kvm_sregs *sregs)
1924 {
1925 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1926 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1927 	return 0;
1928 }
1929 
1930 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1931 {
1932 	/* make sure the new values will be lazily loaded */
1933 	save_fpu_regs();
1934 	if (test_fp_ctl(fpu->fpc))
1935 		return -EINVAL;
1936 	current->thread.fpu.fpc = fpu->fpc;
1937 	if (MACHINE_HAS_VX)
1938 		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1939 	else
1940 		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1941 	return 0;
1942 }
1943 
1944 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1945 {
1946 	/* make sure we have the latest values */
1947 	save_fpu_regs();
1948 	if (MACHINE_HAS_VX)
1949 		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1950 	else
1951 		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1952 	fpu->fpc = current->thread.fpu.fpc;
1953 	return 0;
1954 }
1955 
1956 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1957 {
1958 	int rc = 0;
1959 
1960 	if (!is_vcpu_stopped(vcpu))
1961 		rc = -EBUSY;
1962 	else {
1963 		vcpu->run->psw_mask = psw.mask;
1964 		vcpu->run->psw_addr = psw.addr;
1965 	}
1966 	return rc;
1967 }
1968 
1969 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1970 				  struct kvm_translation *tr)
1971 {
1972 	return -EINVAL; /* not implemented yet */
1973 }
1974 
1975 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1976 			      KVM_GUESTDBG_USE_HW_BP | \
1977 			      KVM_GUESTDBG_ENABLE)
1978 
1979 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1980 					struct kvm_guest_debug *dbg)
1981 {
1982 	int rc = 0;
1983 
1984 	vcpu->guest_debug = 0;
1985 	kvm_s390_clear_bp_data(vcpu);
1986 
1987 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1988 		return -EINVAL;
1989 
1990 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1991 		vcpu->guest_debug = dbg->control;
1992 		/* enforce guest PER */
1993 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1994 
1995 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1996 			rc = kvm_s390_import_bp_data(vcpu, dbg);
1997 	} else {
1998 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1999 		vcpu->arch.guestdbg.last_bp = 0;
2000 	}
2001 
2002 	if (rc) {
2003 		vcpu->guest_debug = 0;
2004 		kvm_s390_clear_bp_data(vcpu);
2005 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2006 	}
2007 
2008 	return rc;
2009 }
2010 
2011 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2012 				    struct kvm_mp_state *mp_state)
2013 {
2014 	/* CHECK_STOP and LOAD are not supported yet */
2015 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2016 				       KVM_MP_STATE_OPERATING;
2017 }
2018 
2019 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2020 				    struct kvm_mp_state *mp_state)
2021 {
2022 	int rc = 0;
2023 
2024 	/* user space knows about this interface - let it control the state */
2025 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2026 
2027 	switch (mp_state->mp_state) {
2028 	case KVM_MP_STATE_STOPPED:
2029 		kvm_s390_vcpu_stop(vcpu);
2030 		break;
2031 	case KVM_MP_STATE_OPERATING:
2032 		kvm_s390_vcpu_start(vcpu);
2033 		break;
2034 	case KVM_MP_STATE_LOAD:
2035 	case KVM_MP_STATE_CHECK_STOP:
2036 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2037 	default:
2038 		rc = -ENXIO;
2039 	}
2040 
2041 	return rc;
2042 }
2043 
2044 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2045 {
2046 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2047 }
2048 
2049 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2050 {
2051 retry:
2052 	kvm_s390_vcpu_request_handled(vcpu);
2053 	if (!vcpu->requests)
2054 		return 0;
2055 	/*
2056 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2057 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2058 	 * This ensures that the ipte instruction for this request has
2059 	 * already finished. We might race against a second unmapper that
2060 	 * wants to set the blocking bit. Lets just retry the request loop.
2061 	 */
2062 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2063 		int rc;
2064 		rc = gmap_ipte_notify(vcpu->arch.gmap,
2065 				      kvm_s390_get_prefix(vcpu),
2066 				      PAGE_SIZE * 2);
2067 		if (rc)
2068 			return rc;
2069 		goto retry;
2070 	}
2071 
2072 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2073 		vcpu->arch.sie_block->ihcpu = 0xffff;
2074 		goto retry;
2075 	}
2076 
2077 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2078 		if (!ibs_enabled(vcpu)) {
2079 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2080 			atomic_or(CPUSTAT_IBS,
2081 					&vcpu->arch.sie_block->cpuflags);
2082 		}
2083 		goto retry;
2084 	}
2085 
2086 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2087 		if (ibs_enabled(vcpu)) {
2088 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2089 			atomic_andnot(CPUSTAT_IBS,
2090 					  &vcpu->arch.sie_block->cpuflags);
2091 		}
2092 		goto retry;
2093 	}
2094 
2095 	/* nothing to do, just clear the request */
2096 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2097 
2098 	return 0;
2099 }
2100 
2101 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2102 {
2103 	struct kvm_vcpu *vcpu;
2104 	int i;
2105 
2106 	mutex_lock(&kvm->lock);
2107 	preempt_disable();
2108 	kvm->arch.epoch = tod - get_tod_clock();
2109 	kvm_s390_vcpu_block_all(kvm);
2110 	kvm_for_each_vcpu(i, vcpu, kvm)
2111 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2112 	kvm_s390_vcpu_unblock_all(kvm);
2113 	preempt_enable();
2114 	mutex_unlock(&kvm->lock);
2115 }
2116 
2117 /**
2118  * kvm_arch_fault_in_page - fault-in guest page if necessary
2119  * @vcpu: The corresponding virtual cpu
2120  * @gpa: Guest physical address
2121  * @writable: Whether the page should be writable or not
2122  *
2123  * Make sure that a guest page has been faulted-in on the host.
2124  *
2125  * Return: Zero on success, negative error code otherwise.
2126  */
2127 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2128 {
2129 	return gmap_fault(vcpu->arch.gmap, gpa,
2130 			  writable ? FAULT_FLAG_WRITE : 0);
2131 }
2132 
2133 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2134 				      unsigned long token)
2135 {
2136 	struct kvm_s390_interrupt inti;
2137 	struct kvm_s390_irq irq;
2138 
2139 	if (start_token) {
2140 		irq.u.ext.ext_params2 = token;
2141 		irq.type = KVM_S390_INT_PFAULT_INIT;
2142 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2143 	} else {
2144 		inti.type = KVM_S390_INT_PFAULT_DONE;
2145 		inti.parm64 = token;
2146 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2147 	}
2148 }
2149 
2150 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2151 				     struct kvm_async_pf *work)
2152 {
2153 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2154 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2155 }
2156 
2157 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2158 				 struct kvm_async_pf *work)
2159 {
2160 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2161 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2162 }
2163 
2164 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2165 			       struct kvm_async_pf *work)
2166 {
2167 	/* s390 will always inject the page directly */
2168 }
2169 
2170 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2171 {
2172 	/*
2173 	 * s390 will always inject the page directly,
2174 	 * but we still want check_async_completion to cleanup
2175 	 */
2176 	return true;
2177 }
2178 
2179 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2180 {
2181 	hva_t hva;
2182 	struct kvm_arch_async_pf arch;
2183 	int rc;
2184 
2185 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2186 		return 0;
2187 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2188 	    vcpu->arch.pfault_compare)
2189 		return 0;
2190 	if (psw_extint_disabled(vcpu))
2191 		return 0;
2192 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2193 		return 0;
2194 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2195 		return 0;
2196 	if (!vcpu->arch.gmap->pfault_enabled)
2197 		return 0;
2198 
2199 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2200 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2201 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2202 		return 0;
2203 
2204 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2205 	return rc;
2206 }
2207 
2208 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2209 {
2210 	int rc, cpuflags;
2211 
2212 	/*
2213 	 * On s390 notifications for arriving pages will be delivered directly
2214 	 * to the guest but the house keeping for completed pfaults is
2215 	 * handled outside the worker.
2216 	 */
2217 	kvm_check_async_pf_completion(vcpu);
2218 
2219 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2220 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2221 
2222 	if (need_resched())
2223 		schedule();
2224 
2225 	if (test_cpu_flag(CIF_MCCK_PENDING))
2226 		s390_handle_mcck();
2227 
2228 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2229 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2230 		if (rc)
2231 			return rc;
2232 	}
2233 
2234 	rc = kvm_s390_handle_requests(vcpu);
2235 	if (rc)
2236 		return rc;
2237 
2238 	if (guestdbg_enabled(vcpu)) {
2239 		kvm_s390_backup_guest_per_regs(vcpu);
2240 		kvm_s390_patch_guest_per_regs(vcpu);
2241 	}
2242 
2243 	vcpu->arch.sie_block->icptcode = 0;
2244 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2245 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2246 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2247 
2248 	return 0;
2249 }
2250 
2251 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2252 {
2253 	struct kvm_s390_pgm_info pgm_info = {
2254 		.code = PGM_ADDRESSING,
2255 	};
2256 	u8 opcode, ilen;
2257 	int rc;
2258 
2259 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2260 	trace_kvm_s390_sie_fault(vcpu);
2261 
2262 	/*
2263 	 * We want to inject an addressing exception, which is defined as a
2264 	 * suppressing or terminating exception. However, since we came here
2265 	 * by a DAT access exception, the PSW still points to the faulting
2266 	 * instruction since DAT exceptions are nullifying. So we've got
2267 	 * to look up the current opcode to get the length of the instruction
2268 	 * to be able to forward the PSW.
2269 	 */
2270 	rc = read_guest_instr(vcpu, &opcode, 1);
2271 	ilen = insn_length(opcode);
2272 	if (rc < 0) {
2273 		return rc;
2274 	} else if (rc) {
2275 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2276 		 * Forward by arbitrary ilc, injection will take care of
2277 		 * nullification if necessary.
2278 		 */
2279 		pgm_info = vcpu->arch.pgm;
2280 		ilen = 4;
2281 	}
2282 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2283 	kvm_s390_forward_psw(vcpu, ilen);
2284 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2285 }
2286 
2287 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2288 {
2289 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2290 		   vcpu->arch.sie_block->icptcode);
2291 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2292 
2293 	if (guestdbg_enabled(vcpu))
2294 		kvm_s390_restore_guest_per_regs(vcpu);
2295 
2296 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2297 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2298 
2299 	if (vcpu->arch.sie_block->icptcode > 0) {
2300 		int rc = kvm_handle_sie_intercept(vcpu);
2301 
2302 		if (rc != -EOPNOTSUPP)
2303 			return rc;
2304 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2305 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2306 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2307 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2308 		return -EREMOTE;
2309 	} else if (exit_reason != -EFAULT) {
2310 		vcpu->stat.exit_null++;
2311 		return 0;
2312 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2313 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2314 		vcpu->run->s390_ucontrol.trans_exc_code =
2315 						current->thread.gmap_addr;
2316 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2317 		return -EREMOTE;
2318 	} else if (current->thread.gmap_pfault) {
2319 		trace_kvm_s390_major_guest_pfault(vcpu);
2320 		current->thread.gmap_pfault = 0;
2321 		if (kvm_arch_setup_async_pf(vcpu))
2322 			return 0;
2323 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2324 	}
2325 	return vcpu_post_run_fault_in_sie(vcpu);
2326 }
2327 
2328 static int __vcpu_run(struct kvm_vcpu *vcpu)
2329 {
2330 	int rc, exit_reason;
2331 
2332 	/*
2333 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2334 	 * ning the guest), so that memslots (and other stuff) are protected
2335 	 */
2336 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2337 
2338 	do {
2339 		rc = vcpu_pre_run(vcpu);
2340 		if (rc)
2341 			break;
2342 
2343 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2344 		/*
2345 		 * As PF_VCPU will be used in fault handler, between
2346 		 * guest_enter and guest_exit should be no uaccess.
2347 		 */
2348 		local_irq_disable();
2349 		__kvm_guest_enter();
2350 		__disable_cpu_timer_accounting(vcpu);
2351 		local_irq_enable();
2352 		exit_reason = sie64a(vcpu->arch.sie_block,
2353 				     vcpu->run->s.regs.gprs);
2354 		local_irq_disable();
2355 		__enable_cpu_timer_accounting(vcpu);
2356 		__kvm_guest_exit();
2357 		local_irq_enable();
2358 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2359 
2360 		rc = vcpu_post_run(vcpu, exit_reason);
2361 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2362 
2363 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2364 	return rc;
2365 }
2366 
2367 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2368 {
2369 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2370 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2371 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2372 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2373 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2374 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2375 		/* some control register changes require a tlb flush */
2376 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2377 	}
2378 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2379 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2380 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2381 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2382 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2383 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2384 	}
2385 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2386 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2387 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2388 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2389 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2390 			kvm_clear_async_pf_completion_queue(vcpu);
2391 	}
2392 	kvm_run->kvm_dirty_regs = 0;
2393 }
2394 
2395 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2396 {
2397 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2398 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2399 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2400 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2401 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2402 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2403 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2404 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2405 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2406 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2407 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2408 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2409 }
2410 
2411 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2412 {
2413 	int rc;
2414 	sigset_t sigsaved;
2415 
2416 	if (guestdbg_exit_pending(vcpu)) {
2417 		kvm_s390_prepare_debug_exit(vcpu);
2418 		return 0;
2419 	}
2420 
2421 	if (vcpu->sigset_active)
2422 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2423 
2424 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2425 		kvm_s390_vcpu_start(vcpu);
2426 	} else if (is_vcpu_stopped(vcpu)) {
2427 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2428 				   vcpu->vcpu_id);
2429 		return -EINVAL;
2430 	}
2431 
2432 	sync_regs(vcpu, kvm_run);
2433 	enable_cpu_timer_accounting(vcpu);
2434 
2435 	might_fault();
2436 	rc = __vcpu_run(vcpu);
2437 
2438 	if (signal_pending(current) && !rc) {
2439 		kvm_run->exit_reason = KVM_EXIT_INTR;
2440 		rc = -EINTR;
2441 	}
2442 
2443 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2444 		kvm_s390_prepare_debug_exit(vcpu);
2445 		rc = 0;
2446 	}
2447 
2448 	if (rc == -EREMOTE) {
2449 		/* userspace support is needed, kvm_run has been prepared */
2450 		rc = 0;
2451 	}
2452 
2453 	disable_cpu_timer_accounting(vcpu);
2454 	store_regs(vcpu, kvm_run);
2455 
2456 	if (vcpu->sigset_active)
2457 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2458 
2459 	vcpu->stat.exit_userspace++;
2460 	return rc;
2461 }
2462 
2463 /*
2464  * store status at address
2465  * we use have two special cases:
2466  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2467  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2468  */
2469 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2470 {
2471 	unsigned char archmode = 1;
2472 	freg_t fprs[NUM_FPRS];
2473 	unsigned int px;
2474 	u64 clkcomp, cputm;
2475 	int rc;
2476 
2477 	px = kvm_s390_get_prefix(vcpu);
2478 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2479 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2480 			return -EFAULT;
2481 		gpa = 0;
2482 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2483 		if (write_guest_real(vcpu, 163, &archmode, 1))
2484 			return -EFAULT;
2485 		gpa = px;
2486 	} else
2487 		gpa -= __LC_FPREGS_SAVE_AREA;
2488 
2489 	/* manually convert vector registers if necessary */
2490 	if (MACHINE_HAS_VX) {
2491 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2492 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2493 				     fprs, 128);
2494 	} else {
2495 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2496 				     vcpu->run->s.regs.fprs, 128);
2497 	}
2498 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2499 			      vcpu->run->s.regs.gprs, 128);
2500 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2501 			      &vcpu->arch.sie_block->gpsw, 16);
2502 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2503 			      &px, 4);
2504 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2505 			      &vcpu->run->s.regs.fpc, 4);
2506 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2507 			      &vcpu->arch.sie_block->todpr, 4);
2508 	cputm = kvm_s390_get_cpu_timer(vcpu);
2509 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2510 			      &cputm, 8);
2511 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2512 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2513 			      &clkcomp, 8);
2514 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2515 			      &vcpu->run->s.regs.acrs, 64);
2516 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2517 			      &vcpu->arch.sie_block->gcr, 128);
2518 	return rc ? -EFAULT : 0;
2519 }
2520 
2521 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2522 {
2523 	/*
2524 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2525 	 * copying in vcpu load/put. Lets update our copies before we save
2526 	 * it into the save area
2527 	 */
2528 	save_fpu_regs();
2529 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2530 	save_access_regs(vcpu->run->s.regs.acrs);
2531 
2532 	return kvm_s390_store_status_unloaded(vcpu, addr);
2533 }
2534 
2535 /*
2536  * store additional status at address
2537  */
2538 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2539 					unsigned long gpa)
2540 {
2541 	/* Only bits 0-53 are used for address formation */
2542 	if (!(gpa & ~0x3ff))
2543 		return 0;
2544 
2545 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2546 			       (void *)&vcpu->run->s.regs.vrs, 512);
2547 }
2548 
2549 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2550 {
2551 	if (!test_kvm_facility(vcpu->kvm, 129))
2552 		return 0;
2553 
2554 	/*
2555 	 * The guest VXRS are in the host VXRs due to the lazy
2556 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2557 	 * to save the current register state because we are in the
2558 	 * middle of a load/put cycle.
2559 	 *
2560 	 * Let's update our copies before we save it into the save area.
2561 	 */
2562 	save_fpu_regs();
2563 
2564 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2565 }
2566 
2567 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2568 {
2569 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2570 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2571 }
2572 
2573 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2574 {
2575 	unsigned int i;
2576 	struct kvm_vcpu *vcpu;
2577 
2578 	kvm_for_each_vcpu(i, vcpu, kvm) {
2579 		__disable_ibs_on_vcpu(vcpu);
2580 	}
2581 }
2582 
2583 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2584 {
2585 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2586 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2587 }
2588 
2589 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2590 {
2591 	int i, online_vcpus, started_vcpus = 0;
2592 
2593 	if (!is_vcpu_stopped(vcpu))
2594 		return;
2595 
2596 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2597 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2598 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2599 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2600 
2601 	for (i = 0; i < online_vcpus; i++) {
2602 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2603 			started_vcpus++;
2604 	}
2605 
2606 	if (started_vcpus == 0) {
2607 		/* we're the only active VCPU -> speed it up */
2608 		__enable_ibs_on_vcpu(vcpu);
2609 	} else if (started_vcpus == 1) {
2610 		/*
2611 		 * As we are starting a second VCPU, we have to disable
2612 		 * the IBS facility on all VCPUs to remove potentially
2613 		 * oustanding ENABLE requests.
2614 		 */
2615 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2616 	}
2617 
2618 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2619 	/*
2620 	 * Another VCPU might have used IBS while we were offline.
2621 	 * Let's play safe and flush the VCPU at startup.
2622 	 */
2623 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2624 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2625 	return;
2626 }
2627 
2628 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2629 {
2630 	int i, online_vcpus, started_vcpus = 0;
2631 	struct kvm_vcpu *started_vcpu = NULL;
2632 
2633 	if (is_vcpu_stopped(vcpu))
2634 		return;
2635 
2636 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2637 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2638 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2639 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2640 
2641 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2642 	kvm_s390_clear_stop_irq(vcpu);
2643 
2644 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2645 	__disable_ibs_on_vcpu(vcpu);
2646 
2647 	for (i = 0; i < online_vcpus; i++) {
2648 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2649 			started_vcpus++;
2650 			started_vcpu = vcpu->kvm->vcpus[i];
2651 		}
2652 	}
2653 
2654 	if (started_vcpus == 1) {
2655 		/*
2656 		 * As we only have one VCPU left, we want to enable the
2657 		 * IBS facility for that VCPU to speed it up.
2658 		 */
2659 		__enable_ibs_on_vcpu(started_vcpu);
2660 	}
2661 
2662 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2663 	return;
2664 }
2665 
2666 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2667 				     struct kvm_enable_cap *cap)
2668 {
2669 	int r;
2670 
2671 	if (cap->flags)
2672 		return -EINVAL;
2673 
2674 	switch (cap->cap) {
2675 	case KVM_CAP_S390_CSS_SUPPORT:
2676 		if (!vcpu->kvm->arch.css_support) {
2677 			vcpu->kvm->arch.css_support = 1;
2678 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2679 			trace_kvm_s390_enable_css(vcpu->kvm);
2680 		}
2681 		r = 0;
2682 		break;
2683 	default:
2684 		r = -EINVAL;
2685 		break;
2686 	}
2687 	return r;
2688 }
2689 
2690 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2691 				  struct kvm_s390_mem_op *mop)
2692 {
2693 	void __user *uaddr = (void __user *)mop->buf;
2694 	void *tmpbuf = NULL;
2695 	int r, srcu_idx;
2696 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2697 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2698 
2699 	if (mop->flags & ~supported_flags)
2700 		return -EINVAL;
2701 
2702 	if (mop->size > MEM_OP_MAX_SIZE)
2703 		return -E2BIG;
2704 
2705 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2706 		tmpbuf = vmalloc(mop->size);
2707 		if (!tmpbuf)
2708 			return -ENOMEM;
2709 	}
2710 
2711 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2712 
2713 	switch (mop->op) {
2714 	case KVM_S390_MEMOP_LOGICAL_READ:
2715 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2716 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2717 					    mop->size, GACC_FETCH);
2718 			break;
2719 		}
2720 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2721 		if (r == 0) {
2722 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2723 				r = -EFAULT;
2724 		}
2725 		break;
2726 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2727 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2728 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2729 					    mop->size, GACC_STORE);
2730 			break;
2731 		}
2732 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2733 			r = -EFAULT;
2734 			break;
2735 		}
2736 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2737 		break;
2738 	default:
2739 		r = -EINVAL;
2740 	}
2741 
2742 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2743 
2744 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2745 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2746 
2747 	vfree(tmpbuf);
2748 	return r;
2749 }
2750 
2751 long kvm_arch_vcpu_ioctl(struct file *filp,
2752 			 unsigned int ioctl, unsigned long arg)
2753 {
2754 	struct kvm_vcpu *vcpu = filp->private_data;
2755 	void __user *argp = (void __user *)arg;
2756 	int idx;
2757 	long r;
2758 
2759 	switch (ioctl) {
2760 	case KVM_S390_IRQ: {
2761 		struct kvm_s390_irq s390irq;
2762 
2763 		r = -EFAULT;
2764 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2765 			break;
2766 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2767 		break;
2768 	}
2769 	case KVM_S390_INTERRUPT: {
2770 		struct kvm_s390_interrupt s390int;
2771 		struct kvm_s390_irq s390irq;
2772 
2773 		r = -EFAULT;
2774 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2775 			break;
2776 		if (s390int_to_s390irq(&s390int, &s390irq))
2777 			return -EINVAL;
2778 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2779 		break;
2780 	}
2781 	case KVM_S390_STORE_STATUS:
2782 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2783 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2784 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2785 		break;
2786 	case KVM_S390_SET_INITIAL_PSW: {
2787 		psw_t psw;
2788 
2789 		r = -EFAULT;
2790 		if (copy_from_user(&psw, argp, sizeof(psw)))
2791 			break;
2792 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2793 		break;
2794 	}
2795 	case KVM_S390_INITIAL_RESET:
2796 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2797 		break;
2798 	case KVM_SET_ONE_REG:
2799 	case KVM_GET_ONE_REG: {
2800 		struct kvm_one_reg reg;
2801 		r = -EFAULT;
2802 		if (copy_from_user(&reg, argp, sizeof(reg)))
2803 			break;
2804 		if (ioctl == KVM_SET_ONE_REG)
2805 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2806 		else
2807 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2808 		break;
2809 	}
2810 #ifdef CONFIG_KVM_S390_UCONTROL
2811 	case KVM_S390_UCAS_MAP: {
2812 		struct kvm_s390_ucas_mapping ucasmap;
2813 
2814 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2815 			r = -EFAULT;
2816 			break;
2817 		}
2818 
2819 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2820 			r = -EINVAL;
2821 			break;
2822 		}
2823 
2824 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2825 				     ucasmap.vcpu_addr, ucasmap.length);
2826 		break;
2827 	}
2828 	case KVM_S390_UCAS_UNMAP: {
2829 		struct kvm_s390_ucas_mapping ucasmap;
2830 
2831 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2832 			r = -EFAULT;
2833 			break;
2834 		}
2835 
2836 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2837 			r = -EINVAL;
2838 			break;
2839 		}
2840 
2841 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2842 			ucasmap.length);
2843 		break;
2844 	}
2845 #endif
2846 	case KVM_S390_VCPU_FAULT: {
2847 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2848 		break;
2849 	}
2850 	case KVM_ENABLE_CAP:
2851 	{
2852 		struct kvm_enable_cap cap;
2853 		r = -EFAULT;
2854 		if (copy_from_user(&cap, argp, sizeof(cap)))
2855 			break;
2856 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2857 		break;
2858 	}
2859 	case KVM_S390_MEM_OP: {
2860 		struct kvm_s390_mem_op mem_op;
2861 
2862 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2863 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2864 		else
2865 			r = -EFAULT;
2866 		break;
2867 	}
2868 	case KVM_S390_SET_IRQ_STATE: {
2869 		struct kvm_s390_irq_state irq_state;
2870 
2871 		r = -EFAULT;
2872 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2873 			break;
2874 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2875 		    irq_state.len == 0 ||
2876 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2877 			r = -EINVAL;
2878 			break;
2879 		}
2880 		r = kvm_s390_set_irq_state(vcpu,
2881 					   (void __user *) irq_state.buf,
2882 					   irq_state.len);
2883 		break;
2884 	}
2885 	case KVM_S390_GET_IRQ_STATE: {
2886 		struct kvm_s390_irq_state irq_state;
2887 
2888 		r = -EFAULT;
2889 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2890 			break;
2891 		if (irq_state.len == 0) {
2892 			r = -EINVAL;
2893 			break;
2894 		}
2895 		r = kvm_s390_get_irq_state(vcpu,
2896 					   (__u8 __user *)  irq_state.buf,
2897 					   irq_state.len);
2898 		break;
2899 	}
2900 	default:
2901 		r = -ENOTTY;
2902 	}
2903 	return r;
2904 }
2905 
2906 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2907 {
2908 #ifdef CONFIG_KVM_S390_UCONTROL
2909 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2910 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2911 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2912 		get_page(vmf->page);
2913 		return 0;
2914 	}
2915 #endif
2916 	return VM_FAULT_SIGBUS;
2917 }
2918 
2919 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2920 			    unsigned long npages)
2921 {
2922 	return 0;
2923 }
2924 
2925 /* Section: memory related */
2926 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2927 				   struct kvm_memory_slot *memslot,
2928 				   const struct kvm_userspace_memory_region *mem,
2929 				   enum kvm_mr_change change)
2930 {
2931 	/* A few sanity checks. We can have memory slots which have to be
2932 	   located/ended at a segment boundary (1MB). The memory in userland is
2933 	   ok to be fragmented into various different vmas. It is okay to mmap()
2934 	   and munmap() stuff in this slot after doing this call at any time */
2935 
2936 	if (mem->userspace_addr & 0xffffful)
2937 		return -EINVAL;
2938 
2939 	if (mem->memory_size & 0xffffful)
2940 		return -EINVAL;
2941 
2942 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2943 		return -EINVAL;
2944 
2945 	return 0;
2946 }
2947 
2948 void kvm_arch_commit_memory_region(struct kvm *kvm,
2949 				const struct kvm_userspace_memory_region *mem,
2950 				const struct kvm_memory_slot *old,
2951 				const struct kvm_memory_slot *new,
2952 				enum kvm_mr_change change)
2953 {
2954 	int rc;
2955 
2956 	/* If the basics of the memslot do not change, we do not want
2957 	 * to update the gmap. Every update causes several unnecessary
2958 	 * segment translation exceptions. This is usually handled just
2959 	 * fine by the normal fault handler + gmap, but it will also
2960 	 * cause faults on the prefix page of running guest CPUs.
2961 	 */
2962 	if (old->userspace_addr == mem->userspace_addr &&
2963 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2964 	    old->npages * PAGE_SIZE == mem->memory_size)
2965 		return;
2966 
2967 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2968 		mem->guest_phys_addr, mem->memory_size);
2969 	if (rc)
2970 		pr_warn("failed to commit memory region\n");
2971 	return;
2972 }
2973 
2974 static int __init kvm_s390_init(void)
2975 {
2976 	if (!sclp.has_sief2) {
2977 		pr_info("SIE not available\n");
2978 		return -ENODEV;
2979 	}
2980 
2981 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2982 }
2983 
2984 static void __exit kvm_s390_exit(void)
2985 {
2986 	kvm_exit();
2987 }
2988 
2989 module_init(kvm_s390_init);
2990 module_exit(kvm_s390_exit);
2991 
2992 /*
2993  * Enable autoloading of the kvm module.
2994  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2995  * since x86 takes a different approach.
2996  */
2997 #include <linux/miscdevice.h>
2998 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2999 MODULE_ALIAS("devname:kvm");
3000