xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision a4a4f1916abbbc3148d79a37cf3fe4f3f6c604d9)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/pgtable.h>
32 #include <asm/nmi.h>
33 #include <asm/switch_to.h>
34 #include <asm/isc.h>
35 #include <asm/sclp.h>
36 #include "kvm-s390.h"
37 #include "gaccess.h"
38 
39 #define CREATE_TRACE_POINTS
40 #include "trace.h"
41 #include "trace-s390.h"
42 
43 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
44 #define LOCAL_IRQS 32
45 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
46 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
47 
48 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
49 
50 struct kvm_stats_debugfs_item debugfs_entries[] = {
51 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
52 	{ "exit_null", VCPU_STAT(exit_null) },
53 	{ "exit_validity", VCPU_STAT(exit_validity) },
54 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
55 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
56 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
57 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
58 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
59 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
60 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
61 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
62 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
63 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
64 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
65 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
66 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
67 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
68 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
69 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
70 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
71 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
72 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
73 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
74 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
75 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
76 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
77 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
78 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
79 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
80 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
81 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
82 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
83 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
84 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
85 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
86 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
87 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
88 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
89 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
90 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
91 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
92 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
93 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
94 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
95 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
96 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
97 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
98 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
99 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
100 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
101 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
102 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
103 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
104 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
105 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
106 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
107 	{ NULL }
108 };
109 
110 /* upper facilities limit for kvm */
111 unsigned long kvm_s390_fac_list_mask[] = {
112 	0xffe6fffbfcfdfc40UL,
113 	0x005c800000000000UL,
114 };
115 
116 unsigned long kvm_s390_fac_list_mask_size(void)
117 {
118 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
119 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
120 }
121 
122 static struct gmap_notifier gmap_notifier;
123 
124 /* Section: not file related */
125 int kvm_arch_hardware_enable(void)
126 {
127 	/* every s390 is virtualization enabled ;-) */
128 	return 0;
129 }
130 
131 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
132 
133 int kvm_arch_hardware_setup(void)
134 {
135 	gmap_notifier.notifier_call = kvm_gmap_notifier;
136 	gmap_register_ipte_notifier(&gmap_notifier);
137 	return 0;
138 }
139 
140 void kvm_arch_hardware_unsetup(void)
141 {
142 	gmap_unregister_ipte_notifier(&gmap_notifier);
143 }
144 
145 int kvm_arch_init(void *opaque)
146 {
147 	/* Register floating interrupt controller interface. */
148 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
149 }
150 
151 /* Section: device related */
152 long kvm_arch_dev_ioctl(struct file *filp,
153 			unsigned int ioctl, unsigned long arg)
154 {
155 	if (ioctl == KVM_S390_ENABLE_SIE)
156 		return s390_enable_sie();
157 	return -EINVAL;
158 }
159 
160 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
161 {
162 	int r;
163 
164 	switch (ext) {
165 	case KVM_CAP_S390_PSW:
166 	case KVM_CAP_S390_GMAP:
167 	case KVM_CAP_SYNC_MMU:
168 #ifdef CONFIG_KVM_S390_UCONTROL
169 	case KVM_CAP_S390_UCONTROL:
170 #endif
171 	case KVM_CAP_ASYNC_PF:
172 	case KVM_CAP_SYNC_REGS:
173 	case KVM_CAP_ONE_REG:
174 	case KVM_CAP_ENABLE_CAP:
175 	case KVM_CAP_S390_CSS_SUPPORT:
176 	case KVM_CAP_IOEVENTFD:
177 	case KVM_CAP_DEVICE_CTRL:
178 	case KVM_CAP_ENABLE_CAP_VM:
179 	case KVM_CAP_S390_IRQCHIP:
180 	case KVM_CAP_VM_ATTRIBUTES:
181 	case KVM_CAP_MP_STATE:
182 	case KVM_CAP_S390_INJECT_IRQ:
183 	case KVM_CAP_S390_USER_SIGP:
184 	case KVM_CAP_S390_USER_STSI:
185 	case KVM_CAP_S390_SKEYS:
186 	case KVM_CAP_S390_IRQ_STATE:
187 		r = 1;
188 		break;
189 	case KVM_CAP_S390_MEM_OP:
190 		r = MEM_OP_MAX_SIZE;
191 		break;
192 	case KVM_CAP_NR_VCPUS:
193 	case KVM_CAP_MAX_VCPUS:
194 		r = KVM_MAX_VCPUS;
195 		break;
196 	case KVM_CAP_NR_MEMSLOTS:
197 		r = KVM_USER_MEM_SLOTS;
198 		break;
199 	case KVM_CAP_S390_COW:
200 		r = MACHINE_HAS_ESOP;
201 		break;
202 	case KVM_CAP_S390_VECTOR_REGISTERS:
203 		r = MACHINE_HAS_VX;
204 		break;
205 	default:
206 		r = 0;
207 	}
208 	return r;
209 }
210 
211 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
212 					struct kvm_memory_slot *memslot)
213 {
214 	gfn_t cur_gfn, last_gfn;
215 	unsigned long address;
216 	struct gmap *gmap = kvm->arch.gmap;
217 
218 	down_read(&gmap->mm->mmap_sem);
219 	/* Loop over all guest pages */
220 	last_gfn = memslot->base_gfn + memslot->npages;
221 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
222 		address = gfn_to_hva_memslot(memslot, cur_gfn);
223 
224 		if (gmap_test_and_clear_dirty(address, gmap))
225 			mark_page_dirty(kvm, cur_gfn);
226 	}
227 	up_read(&gmap->mm->mmap_sem);
228 }
229 
230 /* Section: vm related */
231 /*
232  * Get (and clear) the dirty memory log for a memory slot.
233  */
234 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
235 			       struct kvm_dirty_log *log)
236 {
237 	int r;
238 	unsigned long n;
239 	struct kvm_memory_slot *memslot;
240 	int is_dirty = 0;
241 
242 	mutex_lock(&kvm->slots_lock);
243 
244 	r = -EINVAL;
245 	if (log->slot >= KVM_USER_MEM_SLOTS)
246 		goto out;
247 
248 	memslot = id_to_memslot(kvm->memslots, log->slot);
249 	r = -ENOENT;
250 	if (!memslot->dirty_bitmap)
251 		goto out;
252 
253 	kvm_s390_sync_dirty_log(kvm, memslot);
254 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
255 	if (r)
256 		goto out;
257 
258 	/* Clear the dirty log */
259 	if (is_dirty) {
260 		n = kvm_dirty_bitmap_bytes(memslot);
261 		memset(memslot->dirty_bitmap, 0, n);
262 	}
263 	r = 0;
264 out:
265 	mutex_unlock(&kvm->slots_lock);
266 	return r;
267 }
268 
269 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
270 {
271 	int r;
272 
273 	if (cap->flags)
274 		return -EINVAL;
275 
276 	switch (cap->cap) {
277 	case KVM_CAP_S390_IRQCHIP:
278 		kvm->arch.use_irqchip = 1;
279 		r = 0;
280 		break;
281 	case KVM_CAP_S390_USER_SIGP:
282 		kvm->arch.user_sigp = 1;
283 		r = 0;
284 		break;
285 	case KVM_CAP_S390_VECTOR_REGISTERS:
286 		if (MACHINE_HAS_VX) {
287 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
288 			set_kvm_facility(kvm->arch.model.fac->list, 129);
289 			r = 0;
290 		} else
291 			r = -EINVAL;
292 		break;
293 	case KVM_CAP_S390_USER_STSI:
294 		kvm->arch.user_stsi = 1;
295 		r = 0;
296 		break;
297 	default:
298 		r = -EINVAL;
299 		break;
300 	}
301 	return r;
302 }
303 
304 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
305 {
306 	int ret;
307 
308 	switch (attr->attr) {
309 	case KVM_S390_VM_MEM_LIMIT_SIZE:
310 		ret = 0;
311 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
312 			ret = -EFAULT;
313 		break;
314 	default:
315 		ret = -ENXIO;
316 		break;
317 	}
318 	return ret;
319 }
320 
321 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
322 {
323 	int ret;
324 	unsigned int idx;
325 	switch (attr->attr) {
326 	case KVM_S390_VM_MEM_ENABLE_CMMA:
327 		ret = -EBUSY;
328 		mutex_lock(&kvm->lock);
329 		if (atomic_read(&kvm->online_vcpus) == 0) {
330 			kvm->arch.use_cmma = 1;
331 			ret = 0;
332 		}
333 		mutex_unlock(&kvm->lock);
334 		break;
335 	case KVM_S390_VM_MEM_CLR_CMMA:
336 		mutex_lock(&kvm->lock);
337 		idx = srcu_read_lock(&kvm->srcu);
338 		s390_reset_cmma(kvm->arch.gmap->mm);
339 		srcu_read_unlock(&kvm->srcu, idx);
340 		mutex_unlock(&kvm->lock);
341 		ret = 0;
342 		break;
343 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
344 		unsigned long new_limit;
345 
346 		if (kvm_is_ucontrol(kvm))
347 			return -EINVAL;
348 
349 		if (get_user(new_limit, (u64 __user *)attr->addr))
350 			return -EFAULT;
351 
352 		if (new_limit > kvm->arch.gmap->asce_end)
353 			return -E2BIG;
354 
355 		ret = -EBUSY;
356 		mutex_lock(&kvm->lock);
357 		if (atomic_read(&kvm->online_vcpus) == 0) {
358 			/* gmap_alloc will round the limit up */
359 			struct gmap *new = gmap_alloc(current->mm, new_limit);
360 
361 			if (!new) {
362 				ret = -ENOMEM;
363 			} else {
364 				gmap_free(kvm->arch.gmap);
365 				new->private = kvm;
366 				kvm->arch.gmap = new;
367 				ret = 0;
368 			}
369 		}
370 		mutex_unlock(&kvm->lock);
371 		break;
372 	}
373 	default:
374 		ret = -ENXIO;
375 		break;
376 	}
377 	return ret;
378 }
379 
380 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
381 
382 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
383 {
384 	struct kvm_vcpu *vcpu;
385 	int i;
386 
387 	if (!test_kvm_facility(kvm, 76))
388 		return -EINVAL;
389 
390 	mutex_lock(&kvm->lock);
391 	switch (attr->attr) {
392 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
393 		get_random_bytes(
394 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
395 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
396 		kvm->arch.crypto.aes_kw = 1;
397 		break;
398 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
399 		get_random_bytes(
400 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
401 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
402 		kvm->arch.crypto.dea_kw = 1;
403 		break;
404 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
405 		kvm->arch.crypto.aes_kw = 0;
406 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
407 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
408 		break;
409 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
410 		kvm->arch.crypto.dea_kw = 0;
411 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
412 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
413 		break;
414 	default:
415 		mutex_unlock(&kvm->lock);
416 		return -ENXIO;
417 	}
418 
419 	kvm_for_each_vcpu(i, vcpu, kvm) {
420 		kvm_s390_vcpu_crypto_setup(vcpu);
421 		exit_sie(vcpu);
422 	}
423 	mutex_unlock(&kvm->lock);
424 	return 0;
425 }
426 
427 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
428 {
429 	u8 gtod_high;
430 
431 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
432 					   sizeof(gtod_high)))
433 		return -EFAULT;
434 
435 	if (gtod_high != 0)
436 		return -EINVAL;
437 
438 	return 0;
439 }
440 
441 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
442 {
443 	struct kvm_vcpu *cur_vcpu;
444 	unsigned int vcpu_idx;
445 	u64 host_tod, gtod;
446 	int r;
447 
448 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
449 		return -EFAULT;
450 
451 	r = store_tod_clock(&host_tod);
452 	if (r)
453 		return r;
454 
455 	mutex_lock(&kvm->lock);
456 	kvm->arch.epoch = gtod - host_tod;
457 	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
458 		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
459 		exit_sie(cur_vcpu);
460 	}
461 	mutex_unlock(&kvm->lock);
462 	return 0;
463 }
464 
465 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
466 {
467 	int ret;
468 
469 	if (attr->flags)
470 		return -EINVAL;
471 
472 	switch (attr->attr) {
473 	case KVM_S390_VM_TOD_HIGH:
474 		ret = kvm_s390_set_tod_high(kvm, attr);
475 		break;
476 	case KVM_S390_VM_TOD_LOW:
477 		ret = kvm_s390_set_tod_low(kvm, attr);
478 		break;
479 	default:
480 		ret = -ENXIO;
481 		break;
482 	}
483 	return ret;
484 }
485 
486 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
487 {
488 	u8 gtod_high = 0;
489 
490 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
491 					 sizeof(gtod_high)))
492 		return -EFAULT;
493 
494 	return 0;
495 }
496 
497 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
498 {
499 	u64 host_tod, gtod;
500 	int r;
501 
502 	r = store_tod_clock(&host_tod);
503 	if (r)
504 		return r;
505 
506 	gtod = host_tod + kvm->arch.epoch;
507 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
508 		return -EFAULT;
509 
510 	return 0;
511 }
512 
513 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
514 {
515 	int ret;
516 
517 	if (attr->flags)
518 		return -EINVAL;
519 
520 	switch (attr->attr) {
521 	case KVM_S390_VM_TOD_HIGH:
522 		ret = kvm_s390_get_tod_high(kvm, attr);
523 		break;
524 	case KVM_S390_VM_TOD_LOW:
525 		ret = kvm_s390_get_tod_low(kvm, attr);
526 		break;
527 	default:
528 		ret = -ENXIO;
529 		break;
530 	}
531 	return ret;
532 }
533 
534 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
535 {
536 	struct kvm_s390_vm_cpu_processor *proc;
537 	int ret = 0;
538 
539 	mutex_lock(&kvm->lock);
540 	if (atomic_read(&kvm->online_vcpus)) {
541 		ret = -EBUSY;
542 		goto out;
543 	}
544 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
545 	if (!proc) {
546 		ret = -ENOMEM;
547 		goto out;
548 	}
549 	if (!copy_from_user(proc, (void __user *)attr->addr,
550 			    sizeof(*proc))) {
551 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
552 		       sizeof(struct cpuid));
553 		kvm->arch.model.ibc = proc->ibc;
554 		memcpy(kvm->arch.model.fac->list, proc->fac_list,
555 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
556 	} else
557 		ret = -EFAULT;
558 	kfree(proc);
559 out:
560 	mutex_unlock(&kvm->lock);
561 	return ret;
562 }
563 
564 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
565 {
566 	int ret = -ENXIO;
567 
568 	switch (attr->attr) {
569 	case KVM_S390_VM_CPU_PROCESSOR:
570 		ret = kvm_s390_set_processor(kvm, attr);
571 		break;
572 	}
573 	return ret;
574 }
575 
576 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
577 {
578 	struct kvm_s390_vm_cpu_processor *proc;
579 	int ret = 0;
580 
581 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
582 	if (!proc) {
583 		ret = -ENOMEM;
584 		goto out;
585 	}
586 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
587 	proc->ibc = kvm->arch.model.ibc;
588 	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
589 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
590 		ret = -EFAULT;
591 	kfree(proc);
592 out:
593 	return ret;
594 }
595 
596 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
597 {
598 	struct kvm_s390_vm_cpu_machine *mach;
599 	int ret = 0;
600 
601 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
602 	if (!mach) {
603 		ret = -ENOMEM;
604 		goto out;
605 	}
606 	get_cpu_id((struct cpuid *) &mach->cpuid);
607 	mach->ibc = sclp_get_ibc();
608 	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
609 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
610 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
611 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
612 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
613 		ret = -EFAULT;
614 	kfree(mach);
615 out:
616 	return ret;
617 }
618 
619 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
620 {
621 	int ret = -ENXIO;
622 
623 	switch (attr->attr) {
624 	case KVM_S390_VM_CPU_PROCESSOR:
625 		ret = kvm_s390_get_processor(kvm, attr);
626 		break;
627 	case KVM_S390_VM_CPU_MACHINE:
628 		ret = kvm_s390_get_machine(kvm, attr);
629 		break;
630 	}
631 	return ret;
632 }
633 
634 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
635 {
636 	int ret;
637 
638 	switch (attr->group) {
639 	case KVM_S390_VM_MEM_CTRL:
640 		ret = kvm_s390_set_mem_control(kvm, attr);
641 		break;
642 	case KVM_S390_VM_TOD:
643 		ret = kvm_s390_set_tod(kvm, attr);
644 		break;
645 	case KVM_S390_VM_CPU_MODEL:
646 		ret = kvm_s390_set_cpu_model(kvm, attr);
647 		break;
648 	case KVM_S390_VM_CRYPTO:
649 		ret = kvm_s390_vm_set_crypto(kvm, attr);
650 		break;
651 	default:
652 		ret = -ENXIO;
653 		break;
654 	}
655 
656 	return ret;
657 }
658 
659 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
660 {
661 	int ret;
662 
663 	switch (attr->group) {
664 	case KVM_S390_VM_MEM_CTRL:
665 		ret = kvm_s390_get_mem_control(kvm, attr);
666 		break;
667 	case KVM_S390_VM_TOD:
668 		ret = kvm_s390_get_tod(kvm, attr);
669 		break;
670 	case KVM_S390_VM_CPU_MODEL:
671 		ret = kvm_s390_get_cpu_model(kvm, attr);
672 		break;
673 	default:
674 		ret = -ENXIO;
675 		break;
676 	}
677 
678 	return ret;
679 }
680 
681 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
682 {
683 	int ret;
684 
685 	switch (attr->group) {
686 	case KVM_S390_VM_MEM_CTRL:
687 		switch (attr->attr) {
688 		case KVM_S390_VM_MEM_ENABLE_CMMA:
689 		case KVM_S390_VM_MEM_CLR_CMMA:
690 		case KVM_S390_VM_MEM_LIMIT_SIZE:
691 			ret = 0;
692 			break;
693 		default:
694 			ret = -ENXIO;
695 			break;
696 		}
697 		break;
698 	case KVM_S390_VM_TOD:
699 		switch (attr->attr) {
700 		case KVM_S390_VM_TOD_LOW:
701 		case KVM_S390_VM_TOD_HIGH:
702 			ret = 0;
703 			break;
704 		default:
705 			ret = -ENXIO;
706 			break;
707 		}
708 		break;
709 	case KVM_S390_VM_CPU_MODEL:
710 		switch (attr->attr) {
711 		case KVM_S390_VM_CPU_PROCESSOR:
712 		case KVM_S390_VM_CPU_MACHINE:
713 			ret = 0;
714 			break;
715 		default:
716 			ret = -ENXIO;
717 			break;
718 		}
719 		break;
720 	case KVM_S390_VM_CRYPTO:
721 		switch (attr->attr) {
722 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
724 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
725 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
726 			ret = 0;
727 			break;
728 		default:
729 			ret = -ENXIO;
730 			break;
731 		}
732 		break;
733 	default:
734 		ret = -ENXIO;
735 		break;
736 	}
737 
738 	return ret;
739 }
740 
741 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
742 {
743 	uint8_t *keys;
744 	uint64_t hva;
745 	unsigned long curkey;
746 	int i, r = 0;
747 
748 	if (args->flags != 0)
749 		return -EINVAL;
750 
751 	/* Is this guest using storage keys? */
752 	if (!mm_use_skey(current->mm))
753 		return KVM_S390_GET_SKEYS_NONE;
754 
755 	/* Enforce sane limit on memory allocation */
756 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
757 		return -EINVAL;
758 
759 	keys = kmalloc_array(args->count, sizeof(uint8_t),
760 			     GFP_KERNEL | __GFP_NOWARN);
761 	if (!keys)
762 		keys = vmalloc(sizeof(uint8_t) * args->count);
763 	if (!keys)
764 		return -ENOMEM;
765 
766 	for (i = 0; i < args->count; i++) {
767 		hva = gfn_to_hva(kvm, args->start_gfn + i);
768 		if (kvm_is_error_hva(hva)) {
769 			r = -EFAULT;
770 			goto out;
771 		}
772 
773 		curkey = get_guest_storage_key(current->mm, hva);
774 		if (IS_ERR_VALUE(curkey)) {
775 			r = curkey;
776 			goto out;
777 		}
778 		keys[i] = curkey;
779 	}
780 
781 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
782 			 sizeof(uint8_t) * args->count);
783 	if (r)
784 		r = -EFAULT;
785 out:
786 	kvfree(keys);
787 	return r;
788 }
789 
790 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
791 {
792 	uint8_t *keys;
793 	uint64_t hva;
794 	int i, r = 0;
795 
796 	if (args->flags != 0)
797 		return -EINVAL;
798 
799 	/* Enforce sane limit on memory allocation */
800 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
801 		return -EINVAL;
802 
803 	keys = kmalloc_array(args->count, sizeof(uint8_t),
804 			     GFP_KERNEL | __GFP_NOWARN);
805 	if (!keys)
806 		keys = vmalloc(sizeof(uint8_t) * args->count);
807 	if (!keys)
808 		return -ENOMEM;
809 
810 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
811 			   sizeof(uint8_t) * args->count);
812 	if (r) {
813 		r = -EFAULT;
814 		goto out;
815 	}
816 
817 	/* Enable storage key handling for the guest */
818 	s390_enable_skey();
819 
820 	for (i = 0; i < args->count; i++) {
821 		hva = gfn_to_hva(kvm, args->start_gfn + i);
822 		if (kvm_is_error_hva(hva)) {
823 			r = -EFAULT;
824 			goto out;
825 		}
826 
827 		/* Lowest order bit is reserved */
828 		if (keys[i] & 0x01) {
829 			r = -EINVAL;
830 			goto out;
831 		}
832 
833 		r = set_guest_storage_key(current->mm, hva,
834 					  (unsigned long)keys[i], 0);
835 		if (r)
836 			goto out;
837 	}
838 out:
839 	kvfree(keys);
840 	return r;
841 }
842 
843 long kvm_arch_vm_ioctl(struct file *filp,
844 		       unsigned int ioctl, unsigned long arg)
845 {
846 	struct kvm *kvm = filp->private_data;
847 	void __user *argp = (void __user *)arg;
848 	struct kvm_device_attr attr;
849 	int r;
850 
851 	switch (ioctl) {
852 	case KVM_S390_INTERRUPT: {
853 		struct kvm_s390_interrupt s390int;
854 
855 		r = -EFAULT;
856 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
857 			break;
858 		r = kvm_s390_inject_vm(kvm, &s390int);
859 		break;
860 	}
861 	case KVM_ENABLE_CAP: {
862 		struct kvm_enable_cap cap;
863 		r = -EFAULT;
864 		if (copy_from_user(&cap, argp, sizeof(cap)))
865 			break;
866 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
867 		break;
868 	}
869 	case KVM_CREATE_IRQCHIP: {
870 		struct kvm_irq_routing_entry routing;
871 
872 		r = -EINVAL;
873 		if (kvm->arch.use_irqchip) {
874 			/* Set up dummy routing. */
875 			memset(&routing, 0, sizeof(routing));
876 			kvm_set_irq_routing(kvm, &routing, 0, 0);
877 			r = 0;
878 		}
879 		break;
880 	}
881 	case KVM_SET_DEVICE_ATTR: {
882 		r = -EFAULT;
883 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
884 			break;
885 		r = kvm_s390_vm_set_attr(kvm, &attr);
886 		break;
887 	}
888 	case KVM_GET_DEVICE_ATTR: {
889 		r = -EFAULT;
890 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
891 			break;
892 		r = kvm_s390_vm_get_attr(kvm, &attr);
893 		break;
894 	}
895 	case KVM_HAS_DEVICE_ATTR: {
896 		r = -EFAULT;
897 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
898 			break;
899 		r = kvm_s390_vm_has_attr(kvm, &attr);
900 		break;
901 	}
902 	case KVM_S390_GET_SKEYS: {
903 		struct kvm_s390_skeys args;
904 
905 		r = -EFAULT;
906 		if (copy_from_user(&args, argp,
907 				   sizeof(struct kvm_s390_skeys)))
908 			break;
909 		r = kvm_s390_get_skeys(kvm, &args);
910 		break;
911 	}
912 	case KVM_S390_SET_SKEYS: {
913 		struct kvm_s390_skeys args;
914 
915 		r = -EFAULT;
916 		if (copy_from_user(&args, argp,
917 				   sizeof(struct kvm_s390_skeys)))
918 			break;
919 		r = kvm_s390_set_skeys(kvm, &args);
920 		break;
921 	}
922 	default:
923 		r = -ENOTTY;
924 	}
925 
926 	return r;
927 }
928 
929 static int kvm_s390_query_ap_config(u8 *config)
930 {
931 	u32 fcn_code = 0x04000000UL;
932 	u32 cc = 0;
933 
934 	memset(config, 0, 128);
935 	asm volatile(
936 		"lgr 0,%1\n"
937 		"lgr 2,%2\n"
938 		".long 0xb2af0000\n"		/* PQAP(QCI) */
939 		"0: ipm %0\n"
940 		"srl %0,28\n"
941 		"1:\n"
942 		EX_TABLE(0b, 1b)
943 		: "+r" (cc)
944 		: "r" (fcn_code), "r" (config)
945 		: "cc", "0", "2", "memory"
946 	);
947 
948 	return cc;
949 }
950 
951 static int kvm_s390_apxa_installed(void)
952 {
953 	u8 config[128];
954 	int cc;
955 
956 	if (test_facility(2) && test_facility(12)) {
957 		cc = kvm_s390_query_ap_config(config);
958 
959 		if (cc)
960 			pr_err("PQAP(QCI) failed with cc=%d", cc);
961 		else
962 			return config[0] & 0x40;
963 	}
964 
965 	return 0;
966 }
967 
968 static void kvm_s390_set_crycb_format(struct kvm *kvm)
969 {
970 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
971 
972 	if (kvm_s390_apxa_installed())
973 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
974 	else
975 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
976 }
977 
978 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
979 {
980 	get_cpu_id(cpu_id);
981 	cpu_id->version = 0xff;
982 }
983 
984 static int kvm_s390_crypto_init(struct kvm *kvm)
985 {
986 	if (!test_kvm_facility(kvm, 76))
987 		return 0;
988 
989 	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
990 					 GFP_KERNEL | GFP_DMA);
991 	if (!kvm->arch.crypto.crycb)
992 		return -ENOMEM;
993 
994 	kvm_s390_set_crycb_format(kvm);
995 
996 	/* Enable AES/DEA protected key functions by default */
997 	kvm->arch.crypto.aes_kw = 1;
998 	kvm->arch.crypto.dea_kw = 1;
999 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1000 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1001 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1002 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1003 
1004 	return 0;
1005 }
1006 
1007 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1008 {
1009 	int i, rc;
1010 	char debug_name[16];
1011 	static unsigned long sca_offset;
1012 
1013 	rc = -EINVAL;
1014 #ifdef CONFIG_KVM_S390_UCONTROL
1015 	if (type & ~KVM_VM_S390_UCONTROL)
1016 		goto out_err;
1017 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1018 		goto out_err;
1019 #else
1020 	if (type)
1021 		goto out_err;
1022 #endif
1023 
1024 	rc = s390_enable_sie();
1025 	if (rc)
1026 		goto out_err;
1027 
1028 	rc = -ENOMEM;
1029 
1030 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1031 	if (!kvm->arch.sca)
1032 		goto out_err;
1033 	spin_lock(&kvm_lock);
1034 	sca_offset = (sca_offset + 16) & 0x7f0;
1035 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1036 	spin_unlock(&kvm_lock);
1037 
1038 	sprintf(debug_name, "kvm-%u", current->pid);
1039 
1040 	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
1041 	if (!kvm->arch.dbf)
1042 		goto out_err;
1043 
1044 	/*
1045 	 * The architectural maximum amount of facilities is 16 kbit. To store
1046 	 * this amount, 2 kbyte of memory is required. Thus we need a full
1047 	 * page to hold the guest facility list (arch.model.fac->list) and the
1048 	 * facility mask (arch.model.fac->mask). Its address size has to be
1049 	 * 31 bits and word aligned.
1050 	 */
1051 	kvm->arch.model.fac =
1052 		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1053 	if (!kvm->arch.model.fac)
1054 		goto out_err;
1055 
1056 	/* Populate the facility mask initially. */
1057 	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1058 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1059 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1060 		if (i < kvm_s390_fac_list_mask_size())
1061 			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1062 		else
1063 			kvm->arch.model.fac->mask[i] = 0UL;
1064 	}
1065 
1066 	/* Populate the facility list initially. */
1067 	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1068 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1069 
1070 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1071 	kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
1072 
1073 	if (kvm_s390_crypto_init(kvm) < 0)
1074 		goto out_err;
1075 
1076 	spin_lock_init(&kvm->arch.float_int.lock);
1077 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1078 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1079 	init_waitqueue_head(&kvm->arch.ipte_wq);
1080 	mutex_init(&kvm->arch.ipte_mutex);
1081 
1082 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1083 	VM_EVENT(kvm, 3, "%s", "vm created");
1084 
1085 	if (type & KVM_VM_S390_UCONTROL) {
1086 		kvm->arch.gmap = NULL;
1087 	} else {
1088 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1089 		if (!kvm->arch.gmap)
1090 			goto out_err;
1091 		kvm->arch.gmap->private = kvm;
1092 		kvm->arch.gmap->pfault_enabled = 0;
1093 	}
1094 
1095 	kvm->arch.css_support = 0;
1096 	kvm->arch.use_irqchip = 0;
1097 	kvm->arch.epoch = 0;
1098 
1099 	spin_lock_init(&kvm->arch.start_stop_lock);
1100 
1101 	return 0;
1102 out_err:
1103 	kfree(kvm->arch.crypto.crycb);
1104 	free_page((unsigned long)kvm->arch.model.fac);
1105 	debug_unregister(kvm->arch.dbf);
1106 	free_page((unsigned long)(kvm->arch.sca));
1107 	return rc;
1108 }
1109 
1110 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1111 {
1112 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1113 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1114 	kvm_s390_clear_local_irqs(vcpu);
1115 	kvm_clear_async_pf_completion_queue(vcpu);
1116 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1117 		clear_bit(63 - vcpu->vcpu_id,
1118 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1119 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1120 		    (__u64) vcpu->arch.sie_block)
1121 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1122 	}
1123 	smp_mb();
1124 
1125 	if (kvm_is_ucontrol(vcpu->kvm))
1126 		gmap_free(vcpu->arch.gmap);
1127 
1128 	if (kvm_s390_cmma_enabled(vcpu->kvm))
1129 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1130 	free_page((unsigned long)(vcpu->arch.sie_block));
1131 
1132 	kvm_vcpu_uninit(vcpu);
1133 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1134 }
1135 
1136 static void kvm_free_vcpus(struct kvm *kvm)
1137 {
1138 	unsigned int i;
1139 	struct kvm_vcpu *vcpu;
1140 
1141 	kvm_for_each_vcpu(i, vcpu, kvm)
1142 		kvm_arch_vcpu_destroy(vcpu);
1143 
1144 	mutex_lock(&kvm->lock);
1145 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1146 		kvm->vcpus[i] = NULL;
1147 
1148 	atomic_set(&kvm->online_vcpus, 0);
1149 	mutex_unlock(&kvm->lock);
1150 }
1151 
1152 void kvm_arch_destroy_vm(struct kvm *kvm)
1153 {
1154 	kvm_free_vcpus(kvm);
1155 	free_page((unsigned long)kvm->arch.model.fac);
1156 	free_page((unsigned long)(kvm->arch.sca));
1157 	debug_unregister(kvm->arch.dbf);
1158 	kfree(kvm->arch.crypto.crycb);
1159 	if (!kvm_is_ucontrol(kvm))
1160 		gmap_free(kvm->arch.gmap);
1161 	kvm_s390_destroy_adapters(kvm);
1162 	kvm_s390_clear_float_irqs(kvm);
1163 }
1164 
1165 /* Section: vcpu related */
1166 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1167 {
1168 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1169 	if (!vcpu->arch.gmap)
1170 		return -ENOMEM;
1171 	vcpu->arch.gmap->private = vcpu->kvm;
1172 
1173 	return 0;
1174 }
1175 
1176 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1177 {
1178 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1179 	kvm_clear_async_pf_completion_queue(vcpu);
1180 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1181 				    KVM_SYNC_GPRS |
1182 				    KVM_SYNC_ACRS |
1183 				    KVM_SYNC_CRS |
1184 				    KVM_SYNC_ARCH0 |
1185 				    KVM_SYNC_PFAULT;
1186 	if (test_kvm_facility(vcpu->kvm, 129))
1187 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1188 
1189 	if (kvm_is_ucontrol(vcpu->kvm))
1190 		return __kvm_ucontrol_vcpu_init(vcpu);
1191 
1192 	return 0;
1193 }
1194 
1195 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1196 {
1197 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1198 	if (test_kvm_facility(vcpu->kvm, 129))
1199 		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1200 	else
1201 		save_fp_regs(vcpu->arch.host_fpregs.fprs);
1202 	save_access_regs(vcpu->arch.host_acrs);
1203 	if (test_kvm_facility(vcpu->kvm, 129)) {
1204 		restore_fp_ctl(&vcpu->run->s.regs.fpc);
1205 		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1206 	} else {
1207 		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1208 		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1209 	}
1210 	restore_access_regs(vcpu->run->s.regs.acrs);
1211 	gmap_enable(vcpu->arch.gmap);
1212 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1213 }
1214 
1215 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1216 {
1217 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1218 	gmap_disable(vcpu->arch.gmap);
1219 	if (test_kvm_facility(vcpu->kvm, 129)) {
1220 		save_fp_ctl(&vcpu->run->s.regs.fpc);
1221 		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1222 	} else {
1223 		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1224 		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1225 	}
1226 	save_access_regs(vcpu->run->s.regs.acrs);
1227 	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1228 	if (test_kvm_facility(vcpu->kvm, 129))
1229 		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1230 	else
1231 		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
1232 	restore_access_regs(vcpu->arch.host_acrs);
1233 }
1234 
1235 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1236 {
1237 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1238 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1239 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1240 	kvm_s390_set_prefix(vcpu, 0);
1241 	vcpu->arch.sie_block->cputm     = 0UL;
1242 	vcpu->arch.sie_block->ckc       = 0UL;
1243 	vcpu->arch.sie_block->todpr     = 0;
1244 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1245 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1246 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1247 	vcpu->arch.guest_fpregs.fpc = 0;
1248 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1249 	vcpu->arch.sie_block->gbea = 1;
1250 	vcpu->arch.sie_block->pp = 0;
1251 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1252 	kvm_clear_async_pf_completion_queue(vcpu);
1253 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1254 		kvm_s390_vcpu_stop(vcpu);
1255 	kvm_s390_clear_local_irqs(vcpu);
1256 }
1257 
1258 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1259 {
1260 	mutex_lock(&vcpu->kvm->lock);
1261 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1262 	mutex_unlock(&vcpu->kvm->lock);
1263 	if (!kvm_is_ucontrol(vcpu->kvm))
1264 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1265 }
1266 
1267 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1268 {
1269 	if (!test_kvm_facility(vcpu->kvm, 76))
1270 		return;
1271 
1272 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1273 
1274 	if (vcpu->kvm->arch.crypto.aes_kw)
1275 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1276 	if (vcpu->kvm->arch.crypto.dea_kw)
1277 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1278 
1279 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1280 }
1281 
1282 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1283 {
1284 	free_page(vcpu->arch.sie_block->cbrlo);
1285 	vcpu->arch.sie_block->cbrlo = 0;
1286 }
1287 
1288 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1289 {
1290 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1291 	if (!vcpu->arch.sie_block->cbrlo)
1292 		return -ENOMEM;
1293 
1294 	vcpu->arch.sie_block->ecb2 |= 0x80;
1295 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1296 	return 0;
1297 }
1298 
1299 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1300 {
1301 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1302 
1303 	vcpu->arch.cpu_id = model->cpu_id;
1304 	vcpu->arch.sie_block->ibc = model->ibc;
1305 	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1306 }
1307 
1308 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1309 {
1310 	int rc = 0;
1311 
1312 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1313 						    CPUSTAT_SM |
1314 						    CPUSTAT_STOPPED);
1315 
1316 	if (test_kvm_facility(vcpu->kvm, 8))
1317 		atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1318 
1319 	kvm_s390_vcpu_setup_model(vcpu);
1320 
1321 	vcpu->arch.sie_block->ecb   = 6;
1322 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1323 		vcpu->arch.sie_block->ecb |= 0x10;
1324 
1325 	vcpu->arch.sie_block->ecb2  = 8;
1326 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1327 	if (sclp_has_siif())
1328 		vcpu->arch.sie_block->eca |= 1;
1329 	if (sclp_has_sigpif())
1330 		vcpu->arch.sie_block->eca |= 0x10000000U;
1331 	if (test_kvm_facility(vcpu->kvm, 129)) {
1332 		vcpu->arch.sie_block->eca |= 0x00020000;
1333 		vcpu->arch.sie_block->ecd |= 0x20000000;
1334 	}
1335 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1336 
1337 	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
1338 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1339 		if (rc)
1340 			return rc;
1341 	}
1342 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1343 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1344 
1345 	kvm_s390_vcpu_crypto_setup(vcpu);
1346 
1347 	return rc;
1348 }
1349 
1350 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1351 				      unsigned int id)
1352 {
1353 	struct kvm_vcpu *vcpu;
1354 	struct sie_page *sie_page;
1355 	int rc = -EINVAL;
1356 
1357 	if (id >= KVM_MAX_VCPUS)
1358 		goto out;
1359 
1360 	rc = -ENOMEM;
1361 
1362 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1363 	if (!vcpu)
1364 		goto out;
1365 
1366 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1367 	if (!sie_page)
1368 		goto out_free_cpu;
1369 
1370 	vcpu->arch.sie_block = &sie_page->sie_block;
1371 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1372 	vcpu->arch.host_vregs = &sie_page->vregs;
1373 
1374 	vcpu->arch.sie_block->icpua = id;
1375 	if (!kvm_is_ucontrol(kvm)) {
1376 		if (!kvm->arch.sca) {
1377 			WARN_ON_ONCE(1);
1378 			goto out_free_cpu;
1379 		}
1380 		if (!kvm->arch.sca->cpu[id].sda)
1381 			kvm->arch.sca->cpu[id].sda =
1382 				(__u64) vcpu->arch.sie_block;
1383 		vcpu->arch.sie_block->scaoh =
1384 			(__u32)(((__u64)kvm->arch.sca) >> 32);
1385 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1386 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1387 	}
1388 
1389 	spin_lock_init(&vcpu->arch.local_int.lock);
1390 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1391 	vcpu->arch.local_int.wq = &vcpu->wq;
1392 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1393 
1394 	rc = kvm_vcpu_init(vcpu, kvm, id);
1395 	if (rc)
1396 		goto out_free_sie_block;
1397 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1398 		 vcpu->arch.sie_block);
1399 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1400 
1401 	return vcpu;
1402 out_free_sie_block:
1403 	free_page((unsigned long)(vcpu->arch.sie_block));
1404 out_free_cpu:
1405 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1406 out:
1407 	return ERR_PTR(rc);
1408 }
1409 
1410 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1411 {
1412 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1413 }
1414 
1415 void s390_vcpu_block(struct kvm_vcpu *vcpu)
1416 {
1417 	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1418 }
1419 
1420 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1421 {
1422 	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1423 }
1424 
1425 /*
1426  * Kick a guest cpu out of SIE and wait until SIE is not running.
1427  * If the CPU is not running (e.g. waiting as idle) the function will
1428  * return immediately. */
1429 void exit_sie(struct kvm_vcpu *vcpu)
1430 {
1431 	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1432 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1433 		cpu_relax();
1434 }
1435 
1436 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
1437 void exit_sie_sync(struct kvm_vcpu *vcpu)
1438 {
1439 	s390_vcpu_block(vcpu);
1440 	exit_sie(vcpu);
1441 }
1442 
1443 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1444 {
1445 	int i;
1446 	struct kvm *kvm = gmap->private;
1447 	struct kvm_vcpu *vcpu;
1448 
1449 	kvm_for_each_vcpu(i, vcpu, kvm) {
1450 		/* match against both prefix pages */
1451 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1452 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1453 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
1454 			exit_sie_sync(vcpu);
1455 		}
1456 	}
1457 }
1458 
1459 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1460 {
1461 	/* kvm common code refers to this, but never calls it */
1462 	BUG();
1463 	return 0;
1464 }
1465 
1466 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1467 					   struct kvm_one_reg *reg)
1468 {
1469 	int r = -EINVAL;
1470 
1471 	switch (reg->id) {
1472 	case KVM_REG_S390_TODPR:
1473 		r = put_user(vcpu->arch.sie_block->todpr,
1474 			     (u32 __user *)reg->addr);
1475 		break;
1476 	case KVM_REG_S390_EPOCHDIFF:
1477 		r = put_user(vcpu->arch.sie_block->epoch,
1478 			     (u64 __user *)reg->addr);
1479 		break;
1480 	case KVM_REG_S390_CPU_TIMER:
1481 		r = put_user(vcpu->arch.sie_block->cputm,
1482 			     (u64 __user *)reg->addr);
1483 		break;
1484 	case KVM_REG_S390_CLOCK_COMP:
1485 		r = put_user(vcpu->arch.sie_block->ckc,
1486 			     (u64 __user *)reg->addr);
1487 		break;
1488 	case KVM_REG_S390_PFTOKEN:
1489 		r = put_user(vcpu->arch.pfault_token,
1490 			     (u64 __user *)reg->addr);
1491 		break;
1492 	case KVM_REG_S390_PFCOMPARE:
1493 		r = put_user(vcpu->arch.pfault_compare,
1494 			     (u64 __user *)reg->addr);
1495 		break;
1496 	case KVM_REG_S390_PFSELECT:
1497 		r = put_user(vcpu->arch.pfault_select,
1498 			     (u64 __user *)reg->addr);
1499 		break;
1500 	case KVM_REG_S390_PP:
1501 		r = put_user(vcpu->arch.sie_block->pp,
1502 			     (u64 __user *)reg->addr);
1503 		break;
1504 	case KVM_REG_S390_GBEA:
1505 		r = put_user(vcpu->arch.sie_block->gbea,
1506 			     (u64 __user *)reg->addr);
1507 		break;
1508 	default:
1509 		break;
1510 	}
1511 
1512 	return r;
1513 }
1514 
1515 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1516 					   struct kvm_one_reg *reg)
1517 {
1518 	int r = -EINVAL;
1519 
1520 	switch (reg->id) {
1521 	case KVM_REG_S390_TODPR:
1522 		r = get_user(vcpu->arch.sie_block->todpr,
1523 			     (u32 __user *)reg->addr);
1524 		break;
1525 	case KVM_REG_S390_EPOCHDIFF:
1526 		r = get_user(vcpu->arch.sie_block->epoch,
1527 			     (u64 __user *)reg->addr);
1528 		break;
1529 	case KVM_REG_S390_CPU_TIMER:
1530 		r = get_user(vcpu->arch.sie_block->cputm,
1531 			     (u64 __user *)reg->addr);
1532 		break;
1533 	case KVM_REG_S390_CLOCK_COMP:
1534 		r = get_user(vcpu->arch.sie_block->ckc,
1535 			     (u64 __user *)reg->addr);
1536 		break;
1537 	case KVM_REG_S390_PFTOKEN:
1538 		r = get_user(vcpu->arch.pfault_token,
1539 			     (u64 __user *)reg->addr);
1540 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1541 			kvm_clear_async_pf_completion_queue(vcpu);
1542 		break;
1543 	case KVM_REG_S390_PFCOMPARE:
1544 		r = get_user(vcpu->arch.pfault_compare,
1545 			     (u64 __user *)reg->addr);
1546 		break;
1547 	case KVM_REG_S390_PFSELECT:
1548 		r = get_user(vcpu->arch.pfault_select,
1549 			     (u64 __user *)reg->addr);
1550 		break;
1551 	case KVM_REG_S390_PP:
1552 		r = get_user(vcpu->arch.sie_block->pp,
1553 			     (u64 __user *)reg->addr);
1554 		break;
1555 	case KVM_REG_S390_GBEA:
1556 		r = get_user(vcpu->arch.sie_block->gbea,
1557 			     (u64 __user *)reg->addr);
1558 		break;
1559 	default:
1560 		break;
1561 	}
1562 
1563 	return r;
1564 }
1565 
1566 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1567 {
1568 	kvm_s390_vcpu_initial_reset(vcpu);
1569 	return 0;
1570 }
1571 
1572 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1573 {
1574 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1575 	return 0;
1576 }
1577 
1578 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1579 {
1580 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1581 	return 0;
1582 }
1583 
1584 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1585 				  struct kvm_sregs *sregs)
1586 {
1587 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1588 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1589 	restore_access_regs(vcpu->run->s.regs.acrs);
1590 	return 0;
1591 }
1592 
1593 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1594 				  struct kvm_sregs *sregs)
1595 {
1596 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1597 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1598 	return 0;
1599 }
1600 
1601 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1602 {
1603 	if (test_fp_ctl(fpu->fpc))
1604 		return -EINVAL;
1605 	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1606 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1607 	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1608 	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1609 	return 0;
1610 }
1611 
1612 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1613 {
1614 	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1615 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1616 	return 0;
1617 }
1618 
1619 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1620 {
1621 	int rc = 0;
1622 
1623 	if (!is_vcpu_stopped(vcpu))
1624 		rc = -EBUSY;
1625 	else {
1626 		vcpu->run->psw_mask = psw.mask;
1627 		vcpu->run->psw_addr = psw.addr;
1628 	}
1629 	return rc;
1630 }
1631 
1632 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1633 				  struct kvm_translation *tr)
1634 {
1635 	return -EINVAL; /* not implemented yet */
1636 }
1637 
1638 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1639 			      KVM_GUESTDBG_USE_HW_BP | \
1640 			      KVM_GUESTDBG_ENABLE)
1641 
1642 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1643 					struct kvm_guest_debug *dbg)
1644 {
1645 	int rc = 0;
1646 
1647 	vcpu->guest_debug = 0;
1648 	kvm_s390_clear_bp_data(vcpu);
1649 
1650 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1651 		return -EINVAL;
1652 
1653 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1654 		vcpu->guest_debug = dbg->control;
1655 		/* enforce guest PER */
1656 		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1657 
1658 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1659 			rc = kvm_s390_import_bp_data(vcpu, dbg);
1660 	} else {
1661 		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1662 		vcpu->arch.guestdbg.last_bp = 0;
1663 	}
1664 
1665 	if (rc) {
1666 		vcpu->guest_debug = 0;
1667 		kvm_s390_clear_bp_data(vcpu);
1668 		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1669 	}
1670 
1671 	return rc;
1672 }
1673 
1674 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1675 				    struct kvm_mp_state *mp_state)
1676 {
1677 	/* CHECK_STOP and LOAD are not supported yet */
1678 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1679 				       KVM_MP_STATE_OPERATING;
1680 }
1681 
1682 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1683 				    struct kvm_mp_state *mp_state)
1684 {
1685 	int rc = 0;
1686 
1687 	/* user space knows about this interface - let it control the state */
1688 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1689 
1690 	switch (mp_state->mp_state) {
1691 	case KVM_MP_STATE_STOPPED:
1692 		kvm_s390_vcpu_stop(vcpu);
1693 		break;
1694 	case KVM_MP_STATE_OPERATING:
1695 		kvm_s390_vcpu_start(vcpu);
1696 		break;
1697 	case KVM_MP_STATE_LOAD:
1698 	case KVM_MP_STATE_CHECK_STOP:
1699 		/* fall through - CHECK_STOP and LOAD are not supported yet */
1700 	default:
1701 		rc = -ENXIO;
1702 	}
1703 
1704 	return rc;
1705 }
1706 
1707 bool kvm_s390_cmma_enabled(struct kvm *kvm)
1708 {
1709 	if (!MACHINE_IS_LPAR)
1710 		return false;
1711 	/* only enable for z10 and later */
1712 	if (!MACHINE_HAS_EDAT1)
1713 		return false;
1714 	if (!kvm->arch.use_cmma)
1715 		return false;
1716 	return true;
1717 }
1718 
1719 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1720 {
1721 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1722 }
1723 
1724 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1725 {
1726 	if (!vcpu->requests)
1727 		return 0;
1728 retry:
1729 	s390_vcpu_unblock(vcpu);
1730 	/*
1731 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1732 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1733 	 * This ensures that the ipte instruction for this request has
1734 	 * already finished. We might race against a second unmapper that
1735 	 * wants to set the blocking bit. Lets just retry the request loop.
1736 	 */
1737 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1738 		int rc;
1739 		rc = gmap_ipte_notify(vcpu->arch.gmap,
1740 				      kvm_s390_get_prefix(vcpu),
1741 				      PAGE_SIZE * 2);
1742 		if (rc)
1743 			return rc;
1744 		goto retry;
1745 	}
1746 
1747 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1748 		vcpu->arch.sie_block->ihcpu = 0xffff;
1749 		goto retry;
1750 	}
1751 
1752 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1753 		if (!ibs_enabled(vcpu)) {
1754 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1755 			atomic_set_mask(CPUSTAT_IBS,
1756 					&vcpu->arch.sie_block->cpuflags);
1757 		}
1758 		goto retry;
1759 	}
1760 
1761 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1762 		if (ibs_enabled(vcpu)) {
1763 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1764 			atomic_clear_mask(CPUSTAT_IBS,
1765 					  &vcpu->arch.sie_block->cpuflags);
1766 		}
1767 		goto retry;
1768 	}
1769 
1770 	/* nothing to do, just clear the request */
1771 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1772 
1773 	return 0;
1774 }
1775 
1776 /**
1777  * kvm_arch_fault_in_page - fault-in guest page if necessary
1778  * @vcpu: The corresponding virtual cpu
1779  * @gpa: Guest physical address
1780  * @writable: Whether the page should be writable or not
1781  *
1782  * Make sure that a guest page has been faulted-in on the host.
1783  *
1784  * Return: Zero on success, negative error code otherwise.
1785  */
1786 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1787 {
1788 	return gmap_fault(vcpu->arch.gmap, gpa,
1789 			  writable ? FAULT_FLAG_WRITE : 0);
1790 }
1791 
1792 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1793 				      unsigned long token)
1794 {
1795 	struct kvm_s390_interrupt inti;
1796 	struct kvm_s390_irq irq;
1797 
1798 	if (start_token) {
1799 		irq.u.ext.ext_params2 = token;
1800 		irq.type = KVM_S390_INT_PFAULT_INIT;
1801 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1802 	} else {
1803 		inti.type = KVM_S390_INT_PFAULT_DONE;
1804 		inti.parm64 = token;
1805 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1806 	}
1807 }
1808 
1809 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1810 				     struct kvm_async_pf *work)
1811 {
1812 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1813 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1814 }
1815 
1816 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1817 				 struct kvm_async_pf *work)
1818 {
1819 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1820 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1821 }
1822 
1823 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1824 			       struct kvm_async_pf *work)
1825 {
1826 	/* s390 will always inject the page directly */
1827 }
1828 
1829 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1830 {
1831 	/*
1832 	 * s390 will always inject the page directly,
1833 	 * but we still want check_async_completion to cleanup
1834 	 */
1835 	return true;
1836 }
1837 
1838 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1839 {
1840 	hva_t hva;
1841 	struct kvm_arch_async_pf arch;
1842 	int rc;
1843 
1844 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1845 		return 0;
1846 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1847 	    vcpu->arch.pfault_compare)
1848 		return 0;
1849 	if (psw_extint_disabled(vcpu))
1850 		return 0;
1851 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1852 		return 0;
1853 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1854 		return 0;
1855 	if (!vcpu->arch.gmap->pfault_enabled)
1856 		return 0;
1857 
1858 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1859 	hva += current->thread.gmap_addr & ~PAGE_MASK;
1860 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1861 		return 0;
1862 
1863 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1864 	return rc;
1865 }
1866 
1867 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1868 {
1869 	int rc, cpuflags;
1870 
1871 	/*
1872 	 * On s390 notifications for arriving pages will be delivered directly
1873 	 * to the guest but the house keeping for completed pfaults is
1874 	 * handled outside the worker.
1875 	 */
1876 	kvm_check_async_pf_completion(vcpu);
1877 
1878 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1879 
1880 	if (need_resched())
1881 		schedule();
1882 
1883 	if (test_cpu_flag(CIF_MCCK_PENDING))
1884 		s390_handle_mcck();
1885 
1886 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1887 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
1888 		if (rc)
1889 			return rc;
1890 	}
1891 
1892 	rc = kvm_s390_handle_requests(vcpu);
1893 	if (rc)
1894 		return rc;
1895 
1896 	if (guestdbg_enabled(vcpu)) {
1897 		kvm_s390_backup_guest_per_regs(vcpu);
1898 		kvm_s390_patch_guest_per_regs(vcpu);
1899 	}
1900 
1901 	vcpu->arch.sie_block->icptcode = 0;
1902 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
1903 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
1904 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
1905 
1906 	return 0;
1907 }
1908 
1909 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
1910 {
1911 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
1912 	u8 opcode;
1913 	int rc;
1914 
1915 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
1916 	trace_kvm_s390_sie_fault(vcpu);
1917 
1918 	/*
1919 	 * We want to inject an addressing exception, which is defined as a
1920 	 * suppressing or terminating exception. However, since we came here
1921 	 * by a DAT access exception, the PSW still points to the faulting
1922 	 * instruction since DAT exceptions are nullifying. So we've got
1923 	 * to look up the current opcode to get the length of the instruction
1924 	 * to be able to forward the PSW.
1925 	 */
1926 	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
1927 	if (rc)
1928 		return kvm_s390_inject_prog_cond(vcpu, rc);
1929 	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
1930 
1931 	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1932 }
1933 
1934 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
1935 {
1936 	int rc = -1;
1937 
1938 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
1939 		   vcpu->arch.sie_block->icptcode);
1940 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
1941 
1942 	if (guestdbg_enabled(vcpu))
1943 		kvm_s390_restore_guest_per_regs(vcpu);
1944 
1945 	if (exit_reason >= 0) {
1946 		rc = 0;
1947 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
1948 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
1949 		vcpu->run->s390_ucontrol.trans_exc_code =
1950 						current->thread.gmap_addr;
1951 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
1952 		rc = -EREMOTE;
1953 
1954 	} else if (current->thread.gmap_pfault) {
1955 		trace_kvm_s390_major_guest_pfault(vcpu);
1956 		current->thread.gmap_pfault = 0;
1957 		if (kvm_arch_setup_async_pf(vcpu)) {
1958 			rc = 0;
1959 		} else {
1960 			gpa_t gpa = current->thread.gmap_addr;
1961 			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1962 		}
1963 	}
1964 
1965 	if (rc == -1)
1966 		rc = vcpu_post_run_fault_in_sie(vcpu);
1967 
1968 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
1969 
1970 	if (rc == 0) {
1971 		if (kvm_is_ucontrol(vcpu->kvm))
1972 			/* Don't exit for host interrupts. */
1973 			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
1974 		else
1975 			rc = kvm_handle_sie_intercept(vcpu);
1976 	}
1977 
1978 	return rc;
1979 }
1980 
1981 static int __vcpu_run(struct kvm_vcpu *vcpu)
1982 {
1983 	int rc, exit_reason;
1984 
1985 	/*
1986 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
1987 	 * ning the guest), so that memslots (and other stuff) are protected
1988 	 */
1989 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1990 
1991 	do {
1992 		rc = vcpu_pre_run(vcpu);
1993 		if (rc)
1994 			break;
1995 
1996 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1997 		/*
1998 		 * As PF_VCPU will be used in fault handler, between
1999 		 * guest_enter and guest_exit should be no uaccess.
2000 		 */
2001 		preempt_disable();
2002 		kvm_guest_enter();
2003 		preempt_enable();
2004 		exit_reason = sie64a(vcpu->arch.sie_block,
2005 				     vcpu->run->s.regs.gprs);
2006 		kvm_guest_exit();
2007 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2008 
2009 		rc = vcpu_post_run(vcpu, exit_reason);
2010 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2011 
2012 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2013 	return rc;
2014 }
2015 
2016 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2017 {
2018 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2019 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2020 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2021 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2022 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2023 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2024 		/* some control register changes require a tlb flush */
2025 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2026 	}
2027 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2028 		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2029 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2030 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2031 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2032 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2033 	}
2034 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2035 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2036 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2037 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2038 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2039 			kvm_clear_async_pf_completion_queue(vcpu);
2040 	}
2041 	kvm_run->kvm_dirty_regs = 0;
2042 }
2043 
2044 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2045 {
2046 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2047 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2048 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2049 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2050 	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2051 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2052 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2053 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2054 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2055 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2056 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2057 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2058 }
2059 
2060 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2061 {
2062 	int rc;
2063 	sigset_t sigsaved;
2064 
2065 	if (guestdbg_exit_pending(vcpu)) {
2066 		kvm_s390_prepare_debug_exit(vcpu);
2067 		return 0;
2068 	}
2069 
2070 	if (vcpu->sigset_active)
2071 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2072 
2073 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2074 		kvm_s390_vcpu_start(vcpu);
2075 	} else if (is_vcpu_stopped(vcpu)) {
2076 		pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
2077 				   vcpu->vcpu_id);
2078 		return -EINVAL;
2079 	}
2080 
2081 	sync_regs(vcpu, kvm_run);
2082 
2083 	might_fault();
2084 	rc = __vcpu_run(vcpu);
2085 
2086 	if (signal_pending(current) && !rc) {
2087 		kvm_run->exit_reason = KVM_EXIT_INTR;
2088 		rc = -EINTR;
2089 	}
2090 
2091 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2092 		kvm_s390_prepare_debug_exit(vcpu);
2093 		rc = 0;
2094 	}
2095 
2096 	if (rc == -EOPNOTSUPP) {
2097 		/* intercept cannot be handled in-kernel, prepare kvm-run */
2098 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2099 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2100 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2101 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2102 		rc = 0;
2103 	}
2104 
2105 	if (rc == -EREMOTE) {
2106 		/* intercept was handled, but userspace support is needed
2107 		 * kvm_run has been prepared by the handler */
2108 		rc = 0;
2109 	}
2110 
2111 	store_regs(vcpu, kvm_run);
2112 
2113 	if (vcpu->sigset_active)
2114 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2115 
2116 	vcpu->stat.exit_userspace++;
2117 	return rc;
2118 }
2119 
2120 /*
2121  * store status at address
2122  * we use have two special cases:
2123  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2124  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2125  */
2126 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2127 {
2128 	unsigned char archmode = 1;
2129 	unsigned int px;
2130 	u64 clkcomp;
2131 	int rc;
2132 
2133 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2134 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2135 			return -EFAULT;
2136 		gpa = SAVE_AREA_BASE;
2137 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2138 		if (write_guest_real(vcpu, 163, &archmode, 1))
2139 			return -EFAULT;
2140 		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2141 	}
2142 	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2143 			     vcpu->arch.guest_fpregs.fprs, 128);
2144 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2145 			      vcpu->run->s.regs.gprs, 128);
2146 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2147 			      &vcpu->arch.sie_block->gpsw, 16);
2148 	px = kvm_s390_get_prefix(vcpu);
2149 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2150 			      &px, 4);
2151 	rc |= write_guest_abs(vcpu,
2152 			      gpa + offsetof(struct save_area, fp_ctrl_reg),
2153 			      &vcpu->arch.guest_fpregs.fpc, 4);
2154 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2155 			      &vcpu->arch.sie_block->todpr, 4);
2156 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2157 			      &vcpu->arch.sie_block->cputm, 8);
2158 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2159 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2160 			      &clkcomp, 8);
2161 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2162 			      &vcpu->run->s.regs.acrs, 64);
2163 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2164 			      &vcpu->arch.sie_block->gcr, 128);
2165 	return rc ? -EFAULT : 0;
2166 }
2167 
2168 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2169 {
2170 	/*
2171 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2172 	 * copying in vcpu load/put. Lets update our copies before we save
2173 	 * it into the save area
2174 	 */
2175 	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
2176 	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
2177 	save_access_regs(vcpu->run->s.regs.acrs);
2178 
2179 	return kvm_s390_store_status_unloaded(vcpu, addr);
2180 }
2181 
2182 /*
2183  * store additional status at address
2184  */
2185 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2186 					unsigned long gpa)
2187 {
2188 	/* Only bits 0-53 are used for address formation */
2189 	if (!(gpa & ~0x3ff))
2190 		return 0;
2191 
2192 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2193 			       (void *)&vcpu->run->s.regs.vrs, 512);
2194 }
2195 
2196 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2197 {
2198 	if (!test_kvm_facility(vcpu->kvm, 129))
2199 		return 0;
2200 
2201 	/*
2202 	 * The guest VXRS are in the host VXRs due to the lazy
2203 	 * copying in vcpu load/put. Let's update our copies before we save
2204 	 * it into the save area.
2205 	 */
2206 	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
2207 
2208 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2209 }
2210 
2211 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2212 {
2213 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2214 	kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
2215 	exit_sie_sync(vcpu);
2216 }
2217 
2218 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2219 {
2220 	unsigned int i;
2221 	struct kvm_vcpu *vcpu;
2222 
2223 	kvm_for_each_vcpu(i, vcpu, kvm) {
2224 		__disable_ibs_on_vcpu(vcpu);
2225 	}
2226 }
2227 
2228 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2229 {
2230 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2231 	kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
2232 	exit_sie_sync(vcpu);
2233 }
2234 
2235 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2236 {
2237 	int i, online_vcpus, started_vcpus = 0;
2238 
2239 	if (!is_vcpu_stopped(vcpu))
2240 		return;
2241 
2242 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2243 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2244 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2245 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2246 
2247 	for (i = 0; i < online_vcpus; i++) {
2248 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2249 			started_vcpus++;
2250 	}
2251 
2252 	if (started_vcpus == 0) {
2253 		/* we're the only active VCPU -> speed it up */
2254 		__enable_ibs_on_vcpu(vcpu);
2255 	} else if (started_vcpus == 1) {
2256 		/*
2257 		 * As we are starting a second VCPU, we have to disable
2258 		 * the IBS facility on all VCPUs to remove potentially
2259 		 * oustanding ENABLE requests.
2260 		 */
2261 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2262 	}
2263 
2264 	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2265 	/*
2266 	 * Another VCPU might have used IBS while we were offline.
2267 	 * Let's play safe and flush the VCPU at startup.
2268 	 */
2269 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2270 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2271 	return;
2272 }
2273 
2274 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2275 {
2276 	int i, online_vcpus, started_vcpus = 0;
2277 	struct kvm_vcpu *started_vcpu = NULL;
2278 
2279 	if (is_vcpu_stopped(vcpu))
2280 		return;
2281 
2282 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2283 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2284 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2285 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2286 
2287 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2288 	kvm_s390_clear_stop_irq(vcpu);
2289 
2290 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2291 	__disable_ibs_on_vcpu(vcpu);
2292 
2293 	for (i = 0; i < online_vcpus; i++) {
2294 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2295 			started_vcpus++;
2296 			started_vcpu = vcpu->kvm->vcpus[i];
2297 		}
2298 	}
2299 
2300 	if (started_vcpus == 1) {
2301 		/*
2302 		 * As we only have one VCPU left, we want to enable the
2303 		 * IBS facility for that VCPU to speed it up.
2304 		 */
2305 		__enable_ibs_on_vcpu(started_vcpu);
2306 	}
2307 
2308 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2309 	return;
2310 }
2311 
2312 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2313 				     struct kvm_enable_cap *cap)
2314 {
2315 	int r;
2316 
2317 	if (cap->flags)
2318 		return -EINVAL;
2319 
2320 	switch (cap->cap) {
2321 	case KVM_CAP_S390_CSS_SUPPORT:
2322 		if (!vcpu->kvm->arch.css_support) {
2323 			vcpu->kvm->arch.css_support = 1;
2324 			trace_kvm_s390_enable_css(vcpu->kvm);
2325 		}
2326 		r = 0;
2327 		break;
2328 	default:
2329 		r = -EINVAL;
2330 		break;
2331 	}
2332 	return r;
2333 }
2334 
2335 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2336 				  struct kvm_s390_mem_op *mop)
2337 {
2338 	void __user *uaddr = (void __user *)mop->buf;
2339 	void *tmpbuf = NULL;
2340 	int r, srcu_idx;
2341 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2342 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2343 
2344 	if (mop->flags & ~supported_flags)
2345 		return -EINVAL;
2346 
2347 	if (mop->size > MEM_OP_MAX_SIZE)
2348 		return -E2BIG;
2349 
2350 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2351 		tmpbuf = vmalloc(mop->size);
2352 		if (!tmpbuf)
2353 			return -ENOMEM;
2354 	}
2355 
2356 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2357 
2358 	switch (mop->op) {
2359 	case KVM_S390_MEMOP_LOGICAL_READ:
2360 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2361 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2362 			break;
2363 		}
2364 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2365 		if (r == 0) {
2366 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2367 				r = -EFAULT;
2368 		}
2369 		break;
2370 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2371 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2372 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2373 			break;
2374 		}
2375 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2376 			r = -EFAULT;
2377 			break;
2378 		}
2379 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2380 		break;
2381 	default:
2382 		r = -EINVAL;
2383 	}
2384 
2385 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2386 
2387 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2388 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2389 
2390 	vfree(tmpbuf);
2391 	return r;
2392 }
2393 
2394 long kvm_arch_vcpu_ioctl(struct file *filp,
2395 			 unsigned int ioctl, unsigned long arg)
2396 {
2397 	struct kvm_vcpu *vcpu = filp->private_data;
2398 	void __user *argp = (void __user *)arg;
2399 	int idx;
2400 	long r;
2401 
2402 	switch (ioctl) {
2403 	case KVM_S390_IRQ: {
2404 		struct kvm_s390_irq s390irq;
2405 
2406 		r = -EFAULT;
2407 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2408 			break;
2409 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2410 		break;
2411 	}
2412 	case KVM_S390_INTERRUPT: {
2413 		struct kvm_s390_interrupt s390int;
2414 		struct kvm_s390_irq s390irq;
2415 
2416 		r = -EFAULT;
2417 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2418 			break;
2419 		if (s390int_to_s390irq(&s390int, &s390irq))
2420 			return -EINVAL;
2421 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2422 		break;
2423 	}
2424 	case KVM_S390_STORE_STATUS:
2425 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2426 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2427 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2428 		break;
2429 	case KVM_S390_SET_INITIAL_PSW: {
2430 		psw_t psw;
2431 
2432 		r = -EFAULT;
2433 		if (copy_from_user(&psw, argp, sizeof(psw)))
2434 			break;
2435 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2436 		break;
2437 	}
2438 	case KVM_S390_INITIAL_RESET:
2439 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2440 		break;
2441 	case KVM_SET_ONE_REG:
2442 	case KVM_GET_ONE_REG: {
2443 		struct kvm_one_reg reg;
2444 		r = -EFAULT;
2445 		if (copy_from_user(&reg, argp, sizeof(reg)))
2446 			break;
2447 		if (ioctl == KVM_SET_ONE_REG)
2448 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2449 		else
2450 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2451 		break;
2452 	}
2453 #ifdef CONFIG_KVM_S390_UCONTROL
2454 	case KVM_S390_UCAS_MAP: {
2455 		struct kvm_s390_ucas_mapping ucasmap;
2456 
2457 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2458 			r = -EFAULT;
2459 			break;
2460 		}
2461 
2462 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2463 			r = -EINVAL;
2464 			break;
2465 		}
2466 
2467 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2468 				     ucasmap.vcpu_addr, ucasmap.length);
2469 		break;
2470 	}
2471 	case KVM_S390_UCAS_UNMAP: {
2472 		struct kvm_s390_ucas_mapping ucasmap;
2473 
2474 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2475 			r = -EFAULT;
2476 			break;
2477 		}
2478 
2479 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2480 			r = -EINVAL;
2481 			break;
2482 		}
2483 
2484 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2485 			ucasmap.length);
2486 		break;
2487 	}
2488 #endif
2489 	case KVM_S390_VCPU_FAULT: {
2490 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2491 		break;
2492 	}
2493 	case KVM_ENABLE_CAP:
2494 	{
2495 		struct kvm_enable_cap cap;
2496 		r = -EFAULT;
2497 		if (copy_from_user(&cap, argp, sizeof(cap)))
2498 			break;
2499 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2500 		break;
2501 	}
2502 	case KVM_S390_MEM_OP: {
2503 		struct kvm_s390_mem_op mem_op;
2504 
2505 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2506 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2507 		else
2508 			r = -EFAULT;
2509 		break;
2510 	}
2511 	case KVM_S390_SET_IRQ_STATE: {
2512 		struct kvm_s390_irq_state irq_state;
2513 
2514 		r = -EFAULT;
2515 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2516 			break;
2517 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2518 		    irq_state.len == 0 ||
2519 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2520 			r = -EINVAL;
2521 			break;
2522 		}
2523 		r = kvm_s390_set_irq_state(vcpu,
2524 					   (void __user *) irq_state.buf,
2525 					   irq_state.len);
2526 		break;
2527 	}
2528 	case KVM_S390_GET_IRQ_STATE: {
2529 		struct kvm_s390_irq_state irq_state;
2530 
2531 		r = -EFAULT;
2532 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2533 			break;
2534 		if (irq_state.len == 0) {
2535 			r = -EINVAL;
2536 			break;
2537 		}
2538 		r = kvm_s390_get_irq_state(vcpu,
2539 					   (__u8 __user *)  irq_state.buf,
2540 					   irq_state.len);
2541 		break;
2542 	}
2543 	default:
2544 		r = -ENOTTY;
2545 	}
2546 	return r;
2547 }
2548 
2549 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2550 {
2551 #ifdef CONFIG_KVM_S390_UCONTROL
2552 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2553 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2554 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2555 		get_page(vmf->page);
2556 		return 0;
2557 	}
2558 #endif
2559 	return VM_FAULT_SIGBUS;
2560 }
2561 
2562 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2563 			    unsigned long npages)
2564 {
2565 	return 0;
2566 }
2567 
2568 /* Section: memory related */
2569 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2570 				   struct kvm_memory_slot *memslot,
2571 				   struct kvm_userspace_memory_region *mem,
2572 				   enum kvm_mr_change change)
2573 {
2574 	/* A few sanity checks. We can have memory slots which have to be
2575 	   located/ended at a segment boundary (1MB). The memory in userland is
2576 	   ok to be fragmented into various different vmas. It is okay to mmap()
2577 	   and munmap() stuff in this slot after doing this call at any time */
2578 
2579 	if (mem->userspace_addr & 0xffffful)
2580 		return -EINVAL;
2581 
2582 	if (mem->memory_size & 0xffffful)
2583 		return -EINVAL;
2584 
2585 	return 0;
2586 }
2587 
2588 void kvm_arch_commit_memory_region(struct kvm *kvm,
2589 				struct kvm_userspace_memory_region *mem,
2590 				const struct kvm_memory_slot *old,
2591 				enum kvm_mr_change change)
2592 {
2593 	int rc;
2594 
2595 	/* If the basics of the memslot do not change, we do not want
2596 	 * to update the gmap. Every update causes several unnecessary
2597 	 * segment translation exceptions. This is usually handled just
2598 	 * fine by the normal fault handler + gmap, but it will also
2599 	 * cause faults on the prefix page of running guest CPUs.
2600 	 */
2601 	if (old->userspace_addr == mem->userspace_addr &&
2602 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2603 	    old->npages * PAGE_SIZE == mem->memory_size)
2604 		return;
2605 
2606 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2607 		mem->guest_phys_addr, mem->memory_size);
2608 	if (rc)
2609 		printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
2610 	return;
2611 }
2612 
2613 static int __init kvm_s390_init(void)
2614 {
2615 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2616 }
2617 
2618 static void __exit kvm_s390_exit(void)
2619 {
2620 	kvm_exit();
2621 }
2622 
2623 module_init(kvm_s390_init);
2624 module_exit(kvm_s390_exit);
2625 
2626 /*
2627  * Enable autoloading of the kvm module.
2628  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2629  * since x86 takes a different approach.
2630  */
2631 #include <linux/miscdevice.h>
2632 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2633 MODULE_ALIAS("devname:kvm");
2634