xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 94cdda6b)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/pgtable.h>
32 #include <asm/nmi.h>
33 #include <asm/switch_to.h>
34 #include <asm/isc.h>
35 #include <asm/sclp.h>
36 #include "kvm-s390.h"
37 #include "gaccess.h"
38 
39 #define CREATE_TRACE_POINTS
40 #include "trace.h"
41 #include "trace-s390.h"
42 
43 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
44 #define LOCAL_IRQS 32
45 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
46 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
47 
48 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
49 
50 struct kvm_stats_debugfs_item debugfs_entries[] = {
51 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
52 	{ "exit_null", VCPU_STAT(exit_null) },
53 	{ "exit_validity", VCPU_STAT(exit_validity) },
54 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
55 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
56 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
57 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
58 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
59 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
60 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
61 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
62 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
63 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
64 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
65 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
66 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
67 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
68 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
69 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
70 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
71 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
72 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
73 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
74 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
75 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
76 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
77 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
78 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
79 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
80 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
81 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
82 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
83 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
84 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
85 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
86 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
87 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
88 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
89 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
90 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
91 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
92 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
93 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
94 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
95 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
96 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
97 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
98 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
99 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
100 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
101 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
102 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
103 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
104 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
105 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
106 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
107 	{ NULL }
108 };
109 
110 /* upper facilities limit for kvm */
111 unsigned long kvm_s390_fac_list_mask[] = {
112 	0xffe6fffbfcfdfc40UL,
113 	0x005c800000000000UL,
114 };
115 
116 unsigned long kvm_s390_fac_list_mask_size(void)
117 {
118 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
119 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
120 }
121 
122 static struct gmap_notifier gmap_notifier;
123 
124 /* Section: not file related */
125 int kvm_arch_hardware_enable(void)
126 {
127 	/* every s390 is virtualization enabled ;-) */
128 	return 0;
129 }
130 
131 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
132 
133 int kvm_arch_hardware_setup(void)
134 {
135 	gmap_notifier.notifier_call = kvm_gmap_notifier;
136 	gmap_register_ipte_notifier(&gmap_notifier);
137 	return 0;
138 }
139 
140 void kvm_arch_hardware_unsetup(void)
141 {
142 	gmap_unregister_ipte_notifier(&gmap_notifier);
143 }
144 
145 int kvm_arch_init(void *opaque)
146 {
147 	/* Register floating interrupt controller interface. */
148 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
149 }
150 
151 /* Section: device related */
152 long kvm_arch_dev_ioctl(struct file *filp,
153 			unsigned int ioctl, unsigned long arg)
154 {
155 	if (ioctl == KVM_S390_ENABLE_SIE)
156 		return s390_enable_sie();
157 	return -EINVAL;
158 }
159 
160 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
161 {
162 	int r;
163 
164 	switch (ext) {
165 	case KVM_CAP_S390_PSW:
166 	case KVM_CAP_S390_GMAP:
167 	case KVM_CAP_SYNC_MMU:
168 #ifdef CONFIG_KVM_S390_UCONTROL
169 	case KVM_CAP_S390_UCONTROL:
170 #endif
171 	case KVM_CAP_ASYNC_PF:
172 	case KVM_CAP_SYNC_REGS:
173 	case KVM_CAP_ONE_REG:
174 	case KVM_CAP_ENABLE_CAP:
175 	case KVM_CAP_S390_CSS_SUPPORT:
176 	case KVM_CAP_IOEVENTFD:
177 	case KVM_CAP_DEVICE_CTRL:
178 	case KVM_CAP_ENABLE_CAP_VM:
179 	case KVM_CAP_S390_IRQCHIP:
180 	case KVM_CAP_VM_ATTRIBUTES:
181 	case KVM_CAP_MP_STATE:
182 	case KVM_CAP_S390_INJECT_IRQ:
183 	case KVM_CAP_S390_USER_SIGP:
184 	case KVM_CAP_S390_USER_STSI:
185 	case KVM_CAP_S390_SKEYS:
186 	case KVM_CAP_S390_IRQ_STATE:
187 		r = 1;
188 		break;
189 	case KVM_CAP_S390_MEM_OP:
190 		r = MEM_OP_MAX_SIZE;
191 		break;
192 	case KVM_CAP_NR_VCPUS:
193 	case KVM_CAP_MAX_VCPUS:
194 		r = KVM_MAX_VCPUS;
195 		break;
196 	case KVM_CAP_NR_MEMSLOTS:
197 		r = KVM_USER_MEM_SLOTS;
198 		break;
199 	case KVM_CAP_S390_COW:
200 		r = MACHINE_HAS_ESOP;
201 		break;
202 	case KVM_CAP_S390_VECTOR_REGISTERS:
203 		r = MACHINE_HAS_VX;
204 		break;
205 	default:
206 		r = 0;
207 	}
208 	return r;
209 }
210 
211 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
212 					struct kvm_memory_slot *memslot)
213 {
214 	gfn_t cur_gfn, last_gfn;
215 	unsigned long address;
216 	struct gmap *gmap = kvm->arch.gmap;
217 
218 	down_read(&gmap->mm->mmap_sem);
219 	/* Loop over all guest pages */
220 	last_gfn = memslot->base_gfn + memslot->npages;
221 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
222 		address = gfn_to_hva_memslot(memslot, cur_gfn);
223 
224 		if (gmap_test_and_clear_dirty(address, gmap))
225 			mark_page_dirty(kvm, cur_gfn);
226 	}
227 	up_read(&gmap->mm->mmap_sem);
228 }
229 
230 /* Section: vm related */
231 /*
232  * Get (and clear) the dirty memory log for a memory slot.
233  */
234 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
235 			       struct kvm_dirty_log *log)
236 {
237 	int r;
238 	unsigned long n;
239 	struct kvm_memory_slot *memslot;
240 	int is_dirty = 0;
241 
242 	mutex_lock(&kvm->slots_lock);
243 
244 	r = -EINVAL;
245 	if (log->slot >= KVM_USER_MEM_SLOTS)
246 		goto out;
247 
248 	memslot = id_to_memslot(kvm->memslots, log->slot);
249 	r = -ENOENT;
250 	if (!memslot->dirty_bitmap)
251 		goto out;
252 
253 	kvm_s390_sync_dirty_log(kvm, memslot);
254 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
255 	if (r)
256 		goto out;
257 
258 	/* Clear the dirty log */
259 	if (is_dirty) {
260 		n = kvm_dirty_bitmap_bytes(memslot);
261 		memset(memslot->dirty_bitmap, 0, n);
262 	}
263 	r = 0;
264 out:
265 	mutex_unlock(&kvm->slots_lock);
266 	return r;
267 }
268 
269 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
270 {
271 	int r;
272 
273 	if (cap->flags)
274 		return -EINVAL;
275 
276 	switch (cap->cap) {
277 	case KVM_CAP_S390_IRQCHIP:
278 		kvm->arch.use_irqchip = 1;
279 		r = 0;
280 		break;
281 	case KVM_CAP_S390_USER_SIGP:
282 		kvm->arch.user_sigp = 1;
283 		r = 0;
284 		break;
285 	case KVM_CAP_S390_VECTOR_REGISTERS:
286 		if (MACHINE_HAS_VX) {
287 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
288 			set_kvm_facility(kvm->arch.model.fac->list, 129);
289 			r = 0;
290 		} else
291 			r = -EINVAL;
292 		break;
293 	case KVM_CAP_S390_USER_STSI:
294 		kvm->arch.user_stsi = 1;
295 		r = 0;
296 		break;
297 	default:
298 		r = -EINVAL;
299 		break;
300 	}
301 	return r;
302 }
303 
304 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
305 {
306 	int ret;
307 
308 	switch (attr->attr) {
309 	case KVM_S390_VM_MEM_LIMIT_SIZE:
310 		ret = 0;
311 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
312 			ret = -EFAULT;
313 		break;
314 	default:
315 		ret = -ENXIO;
316 		break;
317 	}
318 	return ret;
319 }
320 
321 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
322 {
323 	int ret;
324 	unsigned int idx;
325 	switch (attr->attr) {
326 	case KVM_S390_VM_MEM_ENABLE_CMMA:
327 		ret = -EBUSY;
328 		mutex_lock(&kvm->lock);
329 		if (atomic_read(&kvm->online_vcpus) == 0) {
330 			kvm->arch.use_cmma = 1;
331 			ret = 0;
332 		}
333 		mutex_unlock(&kvm->lock);
334 		break;
335 	case KVM_S390_VM_MEM_CLR_CMMA:
336 		mutex_lock(&kvm->lock);
337 		idx = srcu_read_lock(&kvm->srcu);
338 		s390_reset_cmma(kvm->arch.gmap->mm);
339 		srcu_read_unlock(&kvm->srcu, idx);
340 		mutex_unlock(&kvm->lock);
341 		ret = 0;
342 		break;
343 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
344 		unsigned long new_limit;
345 
346 		if (kvm_is_ucontrol(kvm))
347 			return -EINVAL;
348 
349 		if (get_user(new_limit, (u64 __user *)attr->addr))
350 			return -EFAULT;
351 
352 		if (new_limit > kvm->arch.gmap->asce_end)
353 			return -E2BIG;
354 
355 		ret = -EBUSY;
356 		mutex_lock(&kvm->lock);
357 		if (atomic_read(&kvm->online_vcpus) == 0) {
358 			/* gmap_alloc will round the limit up */
359 			struct gmap *new = gmap_alloc(current->mm, new_limit);
360 
361 			if (!new) {
362 				ret = -ENOMEM;
363 			} else {
364 				gmap_free(kvm->arch.gmap);
365 				new->private = kvm;
366 				kvm->arch.gmap = new;
367 				ret = 0;
368 			}
369 		}
370 		mutex_unlock(&kvm->lock);
371 		break;
372 	}
373 	default:
374 		ret = -ENXIO;
375 		break;
376 	}
377 	return ret;
378 }
379 
380 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
381 
382 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
383 {
384 	struct kvm_vcpu *vcpu;
385 	int i;
386 
387 	if (!test_kvm_facility(kvm, 76))
388 		return -EINVAL;
389 
390 	mutex_lock(&kvm->lock);
391 	switch (attr->attr) {
392 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
393 		get_random_bytes(
394 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
395 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
396 		kvm->arch.crypto.aes_kw = 1;
397 		break;
398 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
399 		get_random_bytes(
400 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
401 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
402 		kvm->arch.crypto.dea_kw = 1;
403 		break;
404 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
405 		kvm->arch.crypto.aes_kw = 0;
406 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
407 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
408 		break;
409 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
410 		kvm->arch.crypto.dea_kw = 0;
411 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
412 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
413 		break;
414 	default:
415 		mutex_unlock(&kvm->lock);
416 		return -ENXIO;
417 	}
418 
419 	kvm_for_each_vcpu(i, vcpu, kvm) {
420 		kvm_s390_vcpu_crypto_setup(vcpu);
421 		exit_sie(vcpu);
422 	}
423 	mutex_unlock(&kvm->lock);
424 	return 0;
425 }
426 
427 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
428 {
429 	u8 gtod_high;
430 
431 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
432 					   sizeof(gtod_high)))
433 		return -EFAULT;
434 
435 	if (gtod_high != 0)
436 		return -EINVAL;
437 
438 	return 0;
439 }
440 
441 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
442 {
443 	struct kvm_vcpu *cur_vcpu;
444 	unsigned int vcpu_idx;
445 	u64 host_tod, gtod;
446 	int r;
447 
448 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
449 		return -EFAULT;
450 
451 	r = store_tod_clock(&host_tod);
452 	if (r)
453 		return r;
454 
455 	mutex_lock(&kvm->lock);
456 	kvm->arch.epoch = gtod - host_tod;
457 	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
458 		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
459 		exit_sie(cur_vcpu);
460 	}
461 	mutex_unlock(&kvm->lock);
462 	return 0;
463 }
464 
465 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
466 {
467 	int ret;
468 
469 	if (attr->flags)
470 		return -EINVAL;
471 
472 	switch (attr->attr) {
473 	case KVM_S390_VM_TOD_HIGH:
474 		ret = kvm_s390_set_tod_high(kvm, attr);
475 		break;
476 	case KVM_S390_VM_TOD_LOW:
477 		ret = kvm_s390_set_tod_low(kvm, attr);
478 		break;
479 	default:
480 		ret = -ENXIO;
481 		break;
482 	}
483 	return ret;
484 }
485 
486 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
487 {
488 	u8 gtod_high = 0;
489 
490 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
491 					 sizeof(gtod_high)))
492 		return -EFAULT;
493 
494 	return 0;
495 }
496 
497 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
498 {
499 	u64 host_tod, gtod;
500 	int r;
501 
502 	r = store_tod_clock(&host_tod);
503 	if (r)
504 		return r;
505 
506 	gtod = host_tod + kvm->arch.epoch;
507 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
508 		return -EFAULT;
509 
510 	return 0;
511 }
512 
513 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
514 {
515 	int ret;
516 
517 	if (attr->flags)
518 		return -EINVAL;
519 
520 	switch (attr->attr) {
521 	case KVM_S390_VM_TOD_HIGH:
522 		ret = kvm_s390_get_tod_high(kvm, attr);
523 		break;
524 	case KVM_S390_VM_TOD_LOW:
525 		ret = kvm_s390_get_tod_low(kvm, attr);
526 		break;
527 	default:
528 		ret = -ENXIO;
529 		break;
530 	}
531 	return ret;
532 }
533 
534 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
535 {
536 	struct kvm_s390_vm_cpu_processor *proc;
537 	int ret = 0;
538 
539 	mutex_lock(&kvm->lock);
540 	if (atomic_read(&kvm->online_vcpus)) {
541 		ret = -EBUSY;
542 		goto out;
543 	}
544 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
545 	if (!proc) {
546 		ret = -ENOMEM;
547 		goto out;
548 	}
549 	if (!copy_from_user(proc, (void __user *)attr->addr,
550 			    sizeof(*proc))) {
551 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
552 		       sizeof(struct cpuid));
553 		kvm->arch.model.ibc = proc->ibc;
554 		memcpy(kvm->arch.model.fac->list, proc->fac_list,
555 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
556 	} else
557 		ret = -EFAULT;
558 	kfree(proc);
559 out:
560 	mutex_unlock(&kvm->lock);
561 	return ret;
562 }
563 
564 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
565 {
566 	int ret = -ENXIO;
567 
568 	switch (attr->attr) {
569 	case KVM_S390_VM_CPU_PROCESSOR:
570 		ret = kvm_s390_set_processor(kvm, attr);
571 		break;
572 	}
573 	return ret;
574 }
575 
576 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
577 {
578 	struct kvm_s390_vm_cpu_processor *proc;
579 	int ret = 0;
580 
581 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
582 	if (!proc) {
583 		ret = -ENOMEM;
584 		goto out;
585 	}
586 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
587 	proc->ibc = kvm->arch.model.ibc;
588 	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
589 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
590 		ret = -EFAULT;
591 	kfree(proc);
592 out:
593 	return ret;
594 }
595 
596 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
597 {
598 	struct kvm_s390_vm_cpu_machine *mach;
599 	int ret = 0;
600 
601 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
602 	if (!mach) {
603 		ret = -ENOMEM;
604 		goto out;
605 	}
606 	get_cpu_id((struct cpuid *) &mach->cpuid);
607 	mach->ibc = sclp_get_ibc();
608 	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
609 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
610 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
611 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
612 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
613 		ret = -EFAULT;
614 	kfree(mach);
615 out:
616 	return ret;
617 }
618 
619 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
620 {
621 	int ret = -ENXIO;
622 
623 	switch (attr->attr) {
624 	case KVM_S390_VM_CPU_PROCESSOR:
625 		ret = kvm_s390_get_processor(kvm, attr);
626 		break;
627 	case KVM_S390_VM_CPU_MACHINE:
628 		ret = kvm_s390_get_machine(kvm, attr);
629 		break;
630 	}
631 	return ret;
632 }
633 
634 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
635 {
636 	int ret;
637 
638 	switch (attr->group) {
639 	case KVM_S390_VM_MEM_CTRL:
640 		ret = kvm_s390_set_mem_control(kvm, attr);
641 		break;
642 	case KVM_S390_VM_TOD:
643 		ret = kvm_s390_set_tod(kvm, attr);
644 		break;
645 	case KVM_S390_VM_CPU_MODEL:
646 		ret = kvm_s390_set_cpu_model(kvm, attr);
647 		break;
648 	case KVM_S390_VM_CRYPTO:
649 		ret = kvm_s390_vm_set_crypto(kvm, attr);
650 		break;
651 	default:
652 		ret = -ENXIO;
653 		break;
654 	}
655 
656 	return ret;
657 }
658 
659 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
660 {
661 	int ret;
662 
663 	switch (attr->group) {
664 	case KVM_S390_VM_MEM_CTRL:
665 		ret = kvm_s390_get_mem_control(kvm, attr);
666 		break;
667 	case KVM_S390_VM_TOD:
668 		ret = kvm_s390_get_tod(kvm, attr);
669 		break;
670 	case KVM_S390_VM_CPU_MODEL:
671 		ret = kvm_s390_get_cpu_model(kvm, attr);
672 		break;
673 	default:
674 		ret = -ENXIO;
675 		break;
676 	}
677 
678 	return ret;
679 }
680 
681 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
682 {
683 	int ret;
684 
685 	switch (attr->group) {
686 	case KVM_S390_VM_MEM_CTRL:
687 		switch (attr->attr) {
688 		case KVM_S390_VM_MEM_ENABLE_CMMA:
689 		case KVM_S390_VM_MEM_CLR_CMMA:
690 		case KVM_S390_VM_MEM_LIMIT_SIZE:
691 			ret = 0;
692 			break;
693 		default:
694 			ret = -ENXIO;
695 			break;
696 		}
697 		break;
698 	case KVM_S390_VM_TOD:
699 		switch (attr->attr) {
700 		case KVM_S390_VM_TOD_LOW:
701 		case KVM_S390_VM_TOD_HIGH:
702 			ret = 0;
703 			break;
704 		default:
705 			ret = -ENXIO;
706 			break;
707 		}
708 		break;
709 	case KVM_S390_VM_CPU_MODEL:
710 		switch (attr->attr) {
711 		case KVM_S390_VM_CPU_PROCESSOR:
712 		case KVM_S390_VM_CPU_MACHINE:
713 			ret = 0;
714 			break;
715 		default:
716 			ret = -ENXIO;
717 			break;
718 		}
719 		break;
720 	case KVM_S390_VM_CRYPTO:
721 		switch (attr->attr) {
722 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
724 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
725 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
726 			ret = 0;
727 			break;
728 		default:
729 			ret = -ENXIO;
730 			break;
731 		}
732 		break;
733 	default:
734 		ret = -ENXIO;
735 		break;
736 	}
737 
738 	return ret;
739 }
740 
741 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
742 {
743 	uint8_t *keys;
744 	uint64_t hva;
745 	unsigned long curkey;
746 	int i, r = 0;
747 
748 	if (args->flags != 0)
749 		return -EINVAL;
750 
751 	/* Is this guest using storage keys? */
752 	if (!mm_use_skey(current->mm))
753 		return KVM_S390_GET_SKEYS_NONE;
754 
755 	/* Enforce sane limit on memory allocation */
756 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
757 		return -EINVAL;
758 
759 	keys = kmalloc_array(args->count, sizeof(uint8_t),
760 			     GFP_KERNEL | __GFP_NOWARN);
761 	if (!keys)
762 		keys = vmalloc(sizeof(uint8_t) * args->count);
763 	if (!keys)
764 		return -ENOMEM;
765 
766 	for (i = 0; i < args->count; i++) {
767 		hva = gfn_to_hva(kvm, args->start_gfn + i);
768 		if (kvm_is_error_hva(hva)) {
769 			r = -EFAULT;
770 			goto out;
771 		}
772 
773 		curkey = get_guest_storage_key(current->mm, hva);
774 		if (IS_ERR_VALUE(curkey)) {
775 			r = curkey;
776 			goto out;
777 		}
778 		keys[i] = curkey;
779 	}
780 
781 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
782 			 sizeof(uint8_t) * args->count);
783 	if (r)
784 		r = -EFAULT;
785 out:
786 	kvfree(keys);
787 	return r;
788 }
789 
790 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
791 {
792 	uint8_t *keys;
793 	uint64_t hva;
794 	int i, r = 0;
795 
796 	if (args->flags != 0)
797 		return -EINVAL;
798 
799 	/* Enforce sane limit on memory allocation */
800 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
801 		return -EINVAL;
802 
803 	keys = kmalloc_array(args->count, sizeof(uint8_t),
804 			     GFP_KERNEL | __GFP_NOWARN);
805 	if (!keys)
806 		keys = vmalloc(sizeof(uint8_t) * args->count);
807 	if (!keys)
808 		return -ENOMEM;
809 
810 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
811 			   sizeof(uint8_t) * args->count);
812 	if (r) {
813 		r = -EFAULT;
814 		goto out;
815 	}
816 
817 	/* Enable storage key handling for the guest */
818 	s390_enable_skey();
819 
820 	for (i = 0; i < args->count; i++) {
821 		hva = gfn_to_hva(kvm, args->start_gfn + i);
822 		if (kvm_is_error_hva(hva)) {
823 			r = -EFAULT;
824 			goto out;
825 		}
826 
827 		/* Lowest order bit is reserved */
828 		if (keys[i] & 0x01) {
829 			r = -EINVAL;
830 			goto out;
831 		}
832 
833 		r = set_guest_storage_key(current->mm, hva,
834 					  (unsigned long)keys[i], 0);
835 		if (r)
836 			goto out;
837 	}
838 out:
839 	kvfree(keys);
840 	return r;
841 }
842 
843 long kvm_arch_vm_ioctl(struct file *filp,
844 		       unsigned int ioctl, unsigned long arg)
845 {
846 	struct kvm *kvm = filp->private_data;
847 	void __user *argp = (void __user *)arg;
848 	struct kvm_device_attr attr;
849 	int r;
850 
851 	switch (ioctl) {
852 	case KVM_S390_INTERRUPT: {
853 		struct kvm_s390_interrupt s390int;
854 
855 		r = -EFAULT;
856 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
857 			break;
858 		r = kvm_s390_inject_vm(kvm, &s390int);
859 		break;
860 	}
861 	case KVM_ENABLE_CAP: {
862 		struct kvm_enable_cap cap;
863 		r = -EFAULT;
864 		if (copy_from_user(&cap, argp, sizeof(cap)))
865 			break;
866 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
867 		break;
868 	}
869 	case KVM_CREATE_IRQCHIP: {
870 		struct kvm_irq_routing_entry routing;
871 
872 		r = -EINVAL;
873 		if (kvm->arch.use_irqchip) {
874 			/* Set up dummy routing. */
875 			memset(&routing, 0, sizeof(routing));
876 			kvm_set_irq_routing(kvm, &routing, 0, 0);
877 			r = 0;
878 		}
879 		break;
880 	}
881 	case KVM_SET_DEVICE_ATTR: {
882 		r = -EFAULT;
883 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
884 			break;
885 		r = kvm_s390_vm_set_attr(kvm, &attr);
886 		break;
887 	}
888 	case KVM_GET_DEVICE_ATTR: {
889 		r = -EFAULT;
890 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
891 			break;
892 		r = kvm_s390_vm_get_attr(kvm, &attr);
893 		break;
894 	}
895 	case KVM_HAS_DEVICE_ATTR: {
896 		r = -EFAULT;
897 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
898 			break;
899 		r = kvm_s390_vm_has_attr(kvm, &attr);
900 		break;
901 	}
902 	case KVM_S390_GET_SKEYS: {
903 		struct kvm_s390_skeys args;
904 
905 		r = -EFAULT;
906 		if (copy_from_user(&args, argp,
907 				   sizeof(struct kvm_s390_skeys)))
908 			break;
909 		r = kvm_s390_get_skeys(kvm, &args);
910 		break;
911 	}
912 	case KVM_S390_SET_SKEYS: {
913 		struct kvm_s390_skeys args;
914 
915 		r = -EFAULT;
916 		if (copy_from_user(&args, argp,
917 				   sizeof(struct kvm_s390_skeys)))
918 			break;
919 		r = kvm_s390_set_skeys(kvm, &args);
920 		break;
921 	}
922 	default:
923 		r = -ENOTTY;
924 	}
925 
926 	return r;
927 }
928 
929 static int kvm_s390_query_ap_config(u8 *config)
930 {
931 	u32 fcn_code = 0x04000000UL;
932 	u32 cc = 0;
933 
934 	memset(config, 0, 128);
935 	asm volatile(
936 		"lgr 0,%1\n"
937 		"lgr 2,%2\n"
938 		".long 0xb2af0000\n"		/* PQAP(QCI) */
939 		"0: ipm %0\n"
940 		"srl %0,28\n"
941 		"1:\n"
942 		EX_TABLE(0b, 1b)
943 		: "+r" (cc)
944 		: "r" (fcn_code), "r" (config)
945 		: "cc", "0", "2", "memory"
946 	);
947 
948 	return cc;
949 }
950 
951 static int kvm_s390_apxa_installed(void)
952 {
953 	u8 config[128];
954 	int cc;
955 
956 	if (test_facility(2) && test_facility(12)) {
957 		cc = kvm_s390_query_ap_config(config);
958 
959 		if (cc)
960 			pr_err("PQAP(QCI) failed with cc=%d", cc);
961 		else
962 			return config[0] & 0x40;
963 	}
964 
965 	return 0;
966 }
967 
968 static void kvm_s390_set_crycb_format(struct kvm *kvm)
969 {
970 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
971 
972 	if (kvm_s390_apxa_installed())
973 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
974 	else
975 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
976 }
977 
978 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
979 {
980 	get_cpu_id(cpu_id);
981 	cpu_id->version = 0xff;
982 }
983 
984 static int kvm_s390_crypto_init(struct kvm *kvm)
985 {
986 	if (!test_kvm_facility(kvm, 76))
987 		return 0;
988 
989 	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
990 					 GFP_KERNEL | GFP_DMA);
991 	if (!kvm->arch.crypto.crycb)
992 		return -ENOMEM;
993 
994 	kvm_s390_set_crycb_format(kvm);
995 
996 	/* Enable AES/DEA protected key functions by default */
997 	kvm->arch.crypto.aes_kw = 1;
998 	kvm->arch.crypto.dea_kw = 1;
999 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1000 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1001 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1002 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1003 
1004 	return 0;
1005 }
1006 
1007 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1008 {
1009 	int i, rc;
1010 	char debug_name[16];
1011 	static unsigned long sca_offset;
1012 
1013 	rc = -EINVAL;
1014 #ifdef CONFIG_KVM_S390_UCONTROL
1015 	if (type & ~KVM_VM_S390_UCONTROL)
1016 		goto out_err;
1017 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1018 		goto out_err;
1019 #else
1020 	if (type)
1021 		goto out_err;
1022 #endif
1023 
1024 	rc = s390_enable_sie();
1025 	if (rc)
1026 		goto out_err;
1027 
1028 	rc = -ENOMEM;
1029 
1030 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1031 	if (!kvm->arch.sca)
1032 		goto out_err;
1033 	spin_lock(&kvm_lock);
1034 	sca_offset = (sca_offset + 16) & 0x7f0;
1035 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1036 	spin_unlock(&kvm_lock);
1037 
1038 	sprintf(debug_name, "kvm-%u", current->pid);
1039 
1040 	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
1041 	if (!kvm->arch.dbf)
1042 		goto out_err;
1043 
1044 	/*
1045 	 * The architectural maximum amount of facilities is 16 kbit. To store
1046 	 * this amount, 2 kbyte of memory is required. Thus we need a full
1047 	 * page to hold the guest facility list (arch.model.fac->list) and the
1048 	 * facility mask (arch.model.fac->mask). Its address size has to be
1049 	 * 31 bits and word aligned.
1050 	 */
1051 	kvm->arch.model.fac =
1052 		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1053 	if (!kvm->arch.model.fac)
1054 		goto out_err;
1055 
1056 	/* Populate the facility mask initially. */
1057 	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1058 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1059 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1060 		if (i < kvm_s390_fac_list_mask_size())
1061 			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1062 		else
1063 			kvm->arch.model.fac->mask[i] = 0UL;
1064 	}
1065 
1066 	/* Populate the facility list initially. */
1067 	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1068 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1069 
1070 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1071 	kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
1072 
1073 	if (kvm_s390_crypto_init(kvm) < 0)
1074 		goto out_err;
1075 
1076 	spin_lock_init(&kvm->arch.float_int.lock);
1077 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1078 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1079 	init_waitqueue_head(&kvm->arch.ipte_wq);
1080 	mutex_init(&kvm->arch.ipte_mutex);
1081 
1082 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1083 	VM_EVENT(kvm, 3, "%s", "vm created");
1084 
1085 	if (type & KVM_VM_S390_UCONTROL) {
1086 		kvm->arch.gmap = NULL;
1087 	} else {
1088 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1089 		if (!kvm->arch.gmap)
1090 			goto out_err;
1091 		kvm->arch.gmap->private = kvm;
1092 		kvm->arch.gmap->pfault_enabled = 0;
1093 	}
1094 
1095 	kvm->arch.css_support = 0;
1096 	kvm->arch.use_irqchip = 0;
1097 	kvm->arch.epoch = 0;
1098 
1099 	spin_lock_init(&kvm->arch.start_stop_lock);
1100 
1101 	return 0;
1102 out_err:
1103 	kfree(kvm->arch.crypto.crycb);
1104 	free_page((unsigned long)kvm->arch.model.fac);
1105 	debug_unregister(kvm->arch.dbf);
1106 	free_page((unsigned long)(kvm->arch.sca));
1107 	return rc;
1108 }
1109 
1110 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1111 {
1112 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1113 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1114 	kvm_s390_clear_local_irqs(vcpu);
1115 	kvm_clear_async_pf_completion_queue(vcpu);
1116 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1117 		clear_bit(63 - vcpu->vcpu_id,
1118 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1119 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1120 		    (__u64) vcpu->arch.sie_block)
1121 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1122 	}
1123 	smp_mb();
1124 
1125 	if (kvm_is_ucontrol(vcpu->kvm))
1126 		gmap_free(vcpu->arch.gmap);
1127 
1128 	if (kvm_s390_cmma_enabled(vcpu->kvm))
1129 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1130 	free_page((unsigned long)(vcpu->arch.sie_block));
1131 
1132 	kvm_vcpu_uninit(vcpu);
1133 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1134 }
1135 
1136 static void kvm_free_vcpus(struct kvm *kvm)
1137 {
1138 	unsigned int i;
1139 	struct kvm_vcpu *vcpu;
1140 
1141 	kvm_for_each_vcpu(i, vcpu, kvm)
1142 		kvm_arch_vcpu_destroy(vcpu);
1143 
1144 	mutex_lock(&kvm->lock);
1145 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1146 		kvm->vcpus[i] = NULL;
1147 
1148 	atomic_set(&kvm->online_vcpus, 0);
1149 	mutex_unlock(&kvm->lock);
1150 }
1151 
1152 void kvm_arch_destroy_vm(struct kvm *kvm)
1153 {
1154 	kvm_free_vcpus(kvm);
1155 	free_page((unsigned long)kvm->arch.model.fac);
1156 	free_page((unsigned long)(kvm->arch.sca));
1157 	debug_unregister(kvm->arch.dbf);
1158 	kfree(kvm->arch.crypto.crycb);
1159 	if (!kvm_is_ucontrol(kvm))
1160 		gmap_free(kvm->arch.gmap);
1161 	kvm_s390_destroy_adapters(kvm);
1162 	kvm_s390_clear_float_irqs(kvm);
1163 }
1164 
1165 /* Section: vcpu related */
1166 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1167 {
1168 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1169 	if (!vcpu->arch.gmap)
1170 		return -ENOMEM;
1171 	vcpu->arch.gmap->private = vcpu->kvm;
1172 
1173 	return 0;
1174 }
1175 
1176 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1177 {
1178 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1179 	kvm_clear_async_pf_completion_queue(vcpu);
1180 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1181 				    KVM_SYNC_GPRS |
1182 				    KVM_SYNC_ACRS |
1183 				    KVM_SYNC_CRS |
1184 				    KVM_SYNC_ARCH0 |
1185 				    KVM_SYNC_PFAULT;
1186 	if (test_kvm_facility(vcpu->kvm, 129))
1187 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1188 
1189 	if (kvm_is_ucontrol(vcpu->kvm))
1190 		return __kvm_ucontrol_vcpu_init(vcpu);
1191 
1192 	return 0;
1193 }
1194 
1195 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1196 {
1197 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1198 	if (test_kvm_facility(vcpu->kvm, 129))
1199 		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1200 	else
1201 		save_fp_regs(vcpu->arch.host_fpregs.fprs);
1202 	save_access_regs(vcpu->arch.host_acrs);
1203 	if (test_kvm_facility(vcpu->kvm, 129)) {
1204 		restore_fp_ctl(&vcpu->run->s.regs.fpc);
1205 		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1206 	} else {
1207 		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1208 		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1209 	}
1210 	restore_access_regs(vcpu->run->s.regs.acrs);
1211 	gmap_enable(vcpu->arch.gmap);
1212 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1213 }
1214 
1215 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1216 {
1217 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1218 	gmap_disable(vcpu->arch.gmap);
1219 	if (test_kvm_facility(vcpu->kvm, 129)) {
1220 		save_fp_ctl(&vcpu->run->s.regs.fpc);
1221 		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1222 	} else {
1223 		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1224 		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1225 	}
1226 	save_access_regs(vcpu->run->s.regs.acrs);
1227 	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1228 	if (test_kvm_facility(vcpu->kvm, 129))
1229 		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1230 	else
1231 		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
1232 	restore_access_regs(vcpu->arch.host_acrs);
1233 }
1234 
1235 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1236 {
1237 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1238 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1239 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1240 	kvm_s390_set_prefix(vcpu, 0);
1241 	vcpu->arch.sie_block->cputm     = 0UL;
1242 	vcpu->arch.sie_block->ckc       = 0UL;
1243 	vcpu->arch.sie_block->todpr     = 0;
1244 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1245 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1246 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1247 	vcpu->arch.guest_fpregs.fpc = 0;
1248 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1249 	vcpu->arch.sie_block->gbea = 1;
1250 	vcpu->arch.sie_block->pp = 0;
1251 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1252 	kvm_clear_async_pf_completion_queue(vcpu);
1253 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1254 		kvm_s390_vcpu_stop(vcpu);
1255 	kvm_s390_clear_local_irqs(vcpu);
1256 }
1257 
1258 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1259 {
1260 	mutex_lock(&vcpu->kvm->lock);
1261 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1262 	mutex_unlock(&vcpu->kvm->lock);
1263 	if (!kvm_is_ucontrol(vcpu->kvm))
1264 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1265 }
1266 
1267 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1268 {
1269 	if (!test_kvm_facility(vcpu->kvm, 76))
1270 		return;
1271 
1272 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1273 
1274 	if (vcpu->kvm->arch.crypto.aes_kw)
1275 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1276 	if (vcpu->kvm->arch.crypto.dea_kw)
1277 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1278 
1279 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1280 }
1281 
1282 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1283 {
1284 	free_page(vcpu->arch.sie_block->cbrlo);
1285 	vcpu->arch.sie_block->cbrlo = 0;
1286 }
1287 
1288 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1289 {
1290 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1291 	if (!vcpu->arch.sie_block->cbrlo)
1292 		return -ENOMEM;
1293 
1294 	vcpu->arch.sie_block->ecb2 |= 0x80;
1295 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1296 	return 0;
1297 }
1298 
1299 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1300 {
1301 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1302 
1303 	vcpu->arch.cpu_id = model->cpu_id;
1304 	vcpu->arch.sie_block->ibc = model->ibc;
1305 	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1306 }
1307 
1308 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1309 {
1310 	int rc = 0;
1311 
1312 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1313 						    CPUSTAT_SM |
1314 						    CPUSTAT_STOPPED |
1315 						    CPUSTAT_GED);
1316 	kvm_s390_vcpu_setup_model(vcpu);
1317 
1318 	vcpu->arch.sie_block->ecb   = 6;
1319 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1320 		vcpu->arch.sie_block->ecb |= 0x10;
1321 
1322 	vcpu->arch.sie_block->ecb2  = 8;
1323 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1324 	if (sclp_has_siif())
1325 		vcpu->arch.sie_block->eca |= 1;
1326 	if (sclp_has_sigpif())
1327 		vcpu->arch.sie_block->eca |= 0x10000000U;
1328 	if (test_kvm_facility(vcpu->kvm, 129)) {
1329 		vcpu->arch.sie_block->eca |= 0x00020000;
1330 		vcpu->arch.sie_block->ecd |= 0x20000000;
1331 	}
1332 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1333 
1334 	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
1335 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1336 		if (rc)
1337 			return rc;
1338 	}
1339 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1340 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1341 
1342 	kvm_s390_vcpu_crypto_setup(vcpu);
1343 
1344 	return rc;
1345 }
1346 
1347 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1348 				      unsigned int id)
1349 {
1350 	struct kvm_vcpu *vcpu;
1351 	struct sie_page *sie_page;
1352 	int rc = -EINVAL;
1353 
1354 	if (id >= KVM_MAX_VCPUS)
1355 		goto out;
1356 
1357 	rc = -ENOMEM;
1358 
1359 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1360 	if (!vcpu)
1361 		goto out;
1362 
1363 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1364 	if (!sie_page)
1365 		goto out_free_cpu;
1366 
1367 	vcpu->arch.sie_block = &sie_page->sie_block;
1368 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1369 	vcpu->arch.host_vregs = &sie_page->vregs;
1370 
1371 	vcpu->arch.sie_block->icpua = id;
1372 	if (!kvm_is_ucontrol(kvm)) {
1373 		if (!kvm->arch.sca) {
1374 			WARN_ON_ONCE(1);
1375 			goto out_free_cpu;
1376 		}
1377 		if (!kvm->arch.sca->cpu[id].sda)
1378 			kvm->arch.sca->cpu[id].sda =
1379 				(__u64) vcpu->arch.sie_block;
1380 		vcpu->arch.sie_block->scaoh =
1381 			(__u32)(((__u64)kvm->arch.sca) >> 32);
1382 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1383 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1384 	}
1385 
1386 	spin_lock_init(&vcpu->arch.local_int.lock);
1387 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1388 	vcpu->arch.local_int.wq = &vcpu->wq;
1389 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1390 
1391 	rc = kvm_vcpu_init(vcpu, kvm, id);
1392 	if (rc)
1393 		goto out_free_sie_block;
1394 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1395 		 vcpu->arch.sie_block);
1396 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1397 
1398 	return vcpu;
1399 out_free_sie_block:
1400 	free_page((unsigned long)(vcpu->arch.sie_block));
1401 out_free_cpu:
1402 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1403 out:
1404 	return ERR_PTR(rc);
1405 }
1406 
1407 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1408 {
1409 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1410 }
1411 
1412 void s390_vcpu_block(struct kvm_vcpu *vcpu)
1413 {
1414 	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1415 }
1416 
1417 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1418 {
1419 	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1420 }
1421 
1422 /*
1423  * Kick a guest cpu out of SIE and wait until SIE is not running.
1424  * If the CPU is not running (e.g. waiting as idle) the function will
1425  * return immediately. */
1426 void exit_sie(struct kvm_vcpu *vcpu)
1427 {
1428 	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1429 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1430 		cpu_relax();
1431 }
1432 
1433 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
1434 void exit_sie_sync(struct kvm_vcpu *vcpu)
1435 {
1436 	s390_vcpu_block(vcpu);
1437 	exit_sie(vcpu);
1438 }
1439 
1440 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1441 {
1442 	int i;
1443 	struct kvm *kvm = gmap->private;
1444 	struct kvm_vcpu *vcpu;
1445 
1446 	kvm_for_each_vcpu(i, vcpu, kvm) {
1447 		/* match against both prefix pages */
1448 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1449 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1450 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
1451 			exit_sie_sync(vcpu);
1452 		}
1453 	}
1454 }
1455 
1456 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1457 {
1458 	/* kvm common code refers to this, but never calls it */
1459 	BUG();
1460 	return 0;
1461 }
1462 
1463 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1464 					   struct kvm_one_reg *reg)
1465 {
1466 	int r = -EINVAL;
1467 
1468 	switch (reg->id) {
1469 	case KVM_REG_S390_TODPR:
1470 		r = put_user(vcpu->arch.sie_block->todpr,
1471 			     (u32 __user *)reg->addr);
1472 		break;
1473 	case KVM_REG_S390_EPOCHDIFF:
1474 		r = put_user(vcpu->arch.sie_block->epoch,
1475 			     (u64 __user *)reg->addr);
1476 		break;
1477 	case KVM_REG_S390_CPU_TIMER:
1478 		r = put_user(vcpu->arch.sie_block->cputm,
1479 			     (u64 __user *)reg->addr);
1480 		break;
1481 	case KVM_REG_S390_CLOCK_COMP:
1482 		r = put_user(vcpu->arch.sie_block->ckc,
1483 			     (u64 __user *)reg->addr);
1484 		break;
1485 	case KVM_REG_S390_PFTOKEN:
1486 		r = put_user(vcpu->arch.pfault_token,
1487 			     (u64 __user *)reg->addr);
1488 		break;
1489 	case KVM_REG_S390_PFCOMPARE:
1490 		r = put_user(vcpu->arch.pfault_compare,
1491 			     (u64 __user *)reg->addr);
1492 		break;
1493 	case KVM_REG_S390_PFSELECT:
1494 		r = put_user(vcpu->arch.pfault_select,
1495 			     (u64 __user *)reg->addr);
1496 		break;
1497 	case KVM_REG_S390_PP:
1498 		r = put_user(vcpu->arch.sie_block->pp,
1499 			     (u64 __user *)reg->addr);
1500 		break;
1501 	case KVM_REG_S390_GBEA:
1502 		r = put_user(vcpu->arch.sie_block->gbea,
1503 			     (u64 __user *)reg->addr);
1504 		break;
1505 	default:
1506 		break;
1507 	}
1508 
1509 	return r;
1510 }
1511 
1512 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1513 					   struct kvm_one_reg *reg)
1514 {
1515 	int r = -EINVAL;
1516 
1517 	switch (reg->id) {
1518 	case KVM_REG_S390_TODPR:
1519 		r = get_user(vcpu->arch.sie_block->todpr,
1520 			     (u32 __user *)reg->addr);
1521 		break;
1522 	case KVM_REG_S390_EPOCHDIFF:
1523 		r = get_user(vcpu->arch.sie_block->epoch,
1524 			     (u64 __user *)reg->addr);
1525 		break;
1526 	case KVM_REG_S390_CPU_TIMER:
1527 		r = get_user(vcpu->arch.sie_block->cputm,
1528 			     (u64 __user *)reg->addr);
1529 		break;
1530 	case KVM_REG_S390_CLOCK_COMP:
1531 		r = get_user(vcpu->arch.sie_block->ckc,
1532 			     (u64 __user *)reg->addr);
1533 		break;
1534 	case KVM_REG_S390_PFTOKEN:
1535 		r = get_user(vcpu->arch.pfault_token,
1536 			     (u64 __user *)reg->addr);
1537 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1538 			kvm_clear_async_pf_completion_queue(vcpu);
1539 		break;
1540 	case KVM_REG_S390_PFCOMPARE:
1541 		r = get_user(vcpu->arch.pfault_compare,
1542 			     (u64 __user *)reg->addr);
1543 		break;
1544 	case KVM_REG_S390_PFSELECT:
1545 		r = get_user(vcpu->arch.pfault_select,
1546 			     (u64 __user *)reg->addr);
1547 		break;
1548 	case KVM_REG_S390_PP:
1549 		r = get_user(vcpu->arch.sie_block->pp,
1550 			     (u64 __user *)reg->addr);
1551 		break;
1552 	case KVM_REG_S390_GBEA:
1553 		r = get_user(vcpu->arch.sie_block->gbea,
1554 			     (u64 __user *)reg->addr);
1555 		break;
1556 	default:
1557 		break;
1558 	}
1559 
1560 	return r;
1561 }
1562 
1563 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1564 {
1565 	kvm_s390_vcpu_initial_reset(vcpu);
1566 	return 0;
1567 }
1568 
1569 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1570 {
1571 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1572 	return 0;
1573 }
1574 
1575 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1576 {
1577 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1578 	return 0;
1579 }
1580 
1581 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1582 				  struct kvm_sregs *sregs)
1583 {
1584 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1585 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1586 	restore_access_regs(vcpu->run->s.regs.acrs);
1587 	return 0;
1588 }
1589 
1590 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1591 				  struct kvm_sregs *sregs)
1592 {
1593 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1594 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1595 	return 0;
1596 }
1597 
1598 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1599 {
1600 	if (test_fp_ctl(fpu->fpc))
1601 		return -EINVAL;
1602 	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1603 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1604 	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1605 	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1606 	return 0;
1607 }
1608 
1609 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1610 {
1611 	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1612 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1613 	return 0;
1614 }
1615 
1616 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1617 {
1618 	int rc = 0;
1619 
1620 	if (!is_vcpu_stopped(vcpu))
1621 		rc = -EBUSY;
1622 	else {
1623 		vcpu->run->psw_mask = psw.mask;
1624 		vcpu->run->psw_addr = psw.addr;
1625 	}
1626 	return rc;
1627 }
1628 
1629 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1630 				  struct kvm_translation *tr)
1631 {
1632 	return -EINVAL; /* not implemented yet */
1633 }
1634 
1635 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1636 			      KVM_GUESTDBG_USE_HW_BP | \
1637 			      KVM_GUESTDBG_ENABLE)
1638 
1639 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1640 					struct kvm_guest_debug *dbg)
1641 {
1642 	int rc = 0;
1643 
1644 	vcpu->guest_debug = 0;
1645 	kvm_s390_clear_bp_data(vcpu);
1646 
1647 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1648 		return -EINVAL;
1649 
1650 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1651 		vcpu->guest_debug = dbg->control;
1652 		/* enforce guest PER */
1653 		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1654 
1655 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1656 			rc = kvm_s390_import_bp_data(vcpu, dbg);
1657 	} else {
1658 		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1659 		vcpu->arch.guestdbg.last_bp = 0;
1660 	}
1661 
1662 	if (rc) {
1663 		vcpu->guest_debug = 0;
1664 		kvm_s390_clear_bp_data(vcpu);
1665 		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1666 	}
1667 
1668 	return rc;
1669 }
1670 
1671 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1672 				    struct kvm_mp_state *mp_state)
1673 {
1674 	/* CHECK_STOP and LOAD are not supported yet */
1675 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1676 				       KVM_MP_STATE_OPERATING;
1677 }
1678 
1679 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1680 				    struct kvm_mp_state *mp_state)
1681 {
1682 	int rc = 0;
1683 
1684 	/* user space knows about this interface - let it control the state */
1685 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1686 
1687 	switch (mp_state->mp_state) {
1688 	case KVM_MP_STATE_STOPPED:
1689 		kvm_s390_vcpu_stop(vcpu);
1690 		break;
1691 	case KVM_MP_STATE_OPERATING:
1692 		kvm_s390_vcpu_start(vcpu);
1693 		break;
1694 	case KVM_MP_STATE_LOAD:
1695 	case KVM_MP_STATE_CHECK_STOP:
1696 		/* fall through - CHECK_STOP and LOAD are not supported yet */
1697 	default:
1698 		rc = -ENXIO;
1699 	}
1700 
1701 	return rc;
1702 }
1703 
1704 bool kvm_s390_cmma_enabled(struct kvm *kvm)
1705 {
1706 	if (!MACHINE_IS_LPAR)
1707 		return false;
1708 	/* only enable for z10 and later */
1709 	if (!MACHINE_HAS_EDAT1)
1710 		return false;
1711 	if (!kvm->arch.use_cmma)
1712 		return false;
1713 	return true;
1714 }
1715 
1716 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1717 {
1718 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1719 }
1720 
1721 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1722 {
1723 retry:
1724 	s390_vcpu_unblock(vcpu);
1725 	/*
1726 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1727 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1728 	 * This ensures that the ipte instruction for this request has
1729 	 * already finished. We might race against a second unmapper that
1730 	 * wants to set the blocking bit. Lets just retry the request loop.
1731 	 */
1732 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1733 		int rc;
1734 		rc = gmap_ipte_notify(vcpu->arch.gmap,
1735 				      kvm_s390_get_prefix(vcpu),
1736 				      PAGE_SIZE * 2);
1737 		if (rc)
1738 			return rc;
1739 		goto retry;
1740 	}
1741 
1742 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1743 		vcpu->arch.sie_block->ihcpu = 0xffff;
1744 		goto retry;
1745 	}
1746 
1747 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1748 		if (!ibs_enabled(vcpu)) {
1749 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1750 			atomic_set_mask(CPUSTAT_IBS,
1751 					&vcpu->arch.sie_block->cpuflags);
1752 		}
1753 		goto retry;
1754 	}
1755 
1756 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1757 		if (ibs_enabled(vcpu)) {
1758 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1759 			atomic_clear_mask(CPUSTAT_IBS,
1760 					  &vcpu->arch.sie_block->cpuflags);
1761 		}
1762 		goto retry;
1763 	}
1764 
1765 	/* nothing to do, just clear the request */
1766 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1767 
1768 	return 0;
1769 }
1770 
1771 /**
1772  * kvm_arch_fault_in_page - fault-in guest page if necessary
1773  * @vcpu: The corresponding virtual cpu
1774  * @gpa: Guest physical address
1775  * @writable: Whether the page should be writable or not
1776  *
1777  * Make sure that a guest page has been faulted-in on the host.
1778  *
1779  * Return: Zero on success, negative error code otherwise.
1780  */
1781 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1782 {
1783 	return gmap_fault(vcpu->arch.gmap, gpa,
1784 			  writable ? FAULT_FLAG_WRITE : 0);
1785 }
1786 
1787 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1788 				      unsigned long token)
1789 {
1790 	struct kvm_s390_interrupt inti;
1791 	struct kvm_s390_irq irq;
1792 
1793 	if (start_token) {
1794 		irq.u.ext.ext_params2 = token;
1795 		irq.type = KVM_S390_INT_PFAULT_INIT;
1796 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1797 	} else {
1798 		inti.type = KVM_S390_INT_PFAULT_DONE;
1799 		inti.parm64 = token;
1800 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1801 	}
1802 }
1803 
1804 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1805 				     struct kvm_async_pf *work)
1806 {
1807 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1808 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1809 }
1810 
1811 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1812 				 struct kvm_async_pf *work)
1813 {
1814 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1815 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1816 }
1817 
1818 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1819 			       struct kvm_async_pf *work)
1820 {
1821 	/* s390 will always inject the page directly */
1822 }
1823 
1824 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1825 {
1826 	/*
1827 	 * s390 will always inject the page directly,
1828 	 * but we still want check_async_completion to cleanup
1829 	 */
1830 	return true;
1831 }
1832 
1833 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1834 {
1835 	hva_t hva;
1836 	struct kvm_arch_async_pf arch;
1837 	int rc;
1838 
1839 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1840 		return 0;
1841 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1842 	    vcpu->arch.pfault_compare)
1843 		return 0;
1844 	if (psw_extint_disabled(vcpu))
1845 		return 0;
1846 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1847 		return 0;
1848 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1849 		return 0;
1850 	if (!vcpu->arch.gmap->pfault_enabled)
1851 		return 0;
1852 
1853 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1854 	hva += current->thread.gmap_addr & ~PAGE_MASK;
1855 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1856 		return 0;
1857 
1858 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1859 	return rc;
1860 }
1861 
1862 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1863 {
1864 	int rc, cpuflags;
1865 
1866 	/*
1867 	 * On s390 notifications for arriving pages will be delivered directly
1868 	 * to the guest but the house keeping for completed pfaults is
1869 	 * handled outside the worker.
1870 	 */
1871 	kvm_check_async_pf_completion(vcpu);
1872 
1873 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1874 
1875 	if (need_resched())
1876 		schedule();
1877 
1878 	if (test_cpu_flag(CIF_MCCK_PENDING))
1879 		s390_handle_mcck();
1880 
1881 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1882 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
1883 		if (rc)
1884 			return rc;
1885 	}
1886 
1887 	rc = kvm_s390_handle_requests(vcpu);
1888 	if (rc)
1889 		return rc;
1890 
1891 	if (guestdbg_enabled(vcpu)) {
1892 		kvm_s390_backup_guest_per_regs(vcpu);
1893 		kvm_s390_patch_guest_per_regs(vcpu);
1894 	}
1895 
1896 	vcpu->arch.sie_block->icptcode = 0;
1897 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
1898 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
1899 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
1900 
1901 	return 0;
1902 }
1903 
1904 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
1905 {
1906 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
1907 	u8 opcode;
1908 	int rc;
1909 
1910 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
1911 	trace_kvm_s390_sie_fault(vcpu);
1912 
1913 	/*
1914 	 * We want to inject an addressing exception, which is defined as a
1915 	 * suppressing or terminating exception. However, since we came here
1916 	 * by a DAT access exception, the PSW still points to the faulting
1917 	 * instruction since DAT exceptions are nullifying. So we've got
1918 	 * to look up the current opcode to get the length of the instruction
1919 	 * to be able to forward the PSW.
1920 	 */
1921 	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
1922 	if (rc)
1923 		return kvm_s390_inject_prog_cond(vcpu, rc);
1924 	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
1925 
1926 	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1927 }
1928 
1929 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
1930 {
1931 	int rc = -1;
1932 
1933 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
1934 		   vcpu->arch.sie_block->icptcode);
1935 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
1936 
1937 	if (guestdbg_enabled(vcpu))
1938 		kvm_s390_restore_guest_per_regs(vcpu);
1939 
1940 	if (exit_reason >= 0) {
1941 		rc = 0;
1942 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
1943 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
1944 		vcpu->run->s390_ucontrol.trans_exc_code =
1945 						current->thread.gmap_addr;
1946 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
1947 		rc = -EREMOTE;
1948 
1949 	} else if (current->thread.gmap_pfault) {
1950 		trace_kvm_s390_major_guest_pfault(vcpu);
1951 		current->thread.gmap_pfault = 0;
1952 		if (kvm_arch_setup_async_pf(vcpu)) {
1953 			rc = 0;
1954 		} else {
1955 			gpa_t gpa = current->thread.gmap_addr;
1956 			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1957 		}
1958 	}
1959 
1960 	if (rc == -1)
1961 		rc = vcpu_post_run_fault_in_sie(vcpu);
1962 
1963 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
1964 
1965 	if (rc == 0) {
1966 		if (kvm_is_ucontrol(vcpu->kvm))
1967 			/* Don't exit for host interrupts. */
1968 			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
1969 		else
1970 			rc = kvm_handle_sie_intercept(vcpu);
1971 	}
1972 
1973 	return rc;
1974 }
1975 
1976 static int __vcpu_run(struct kvm_vcpu *vcpu)
1977 {
1978 	int rc, exit_reason;
1979 
1980 	/*
1981 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
1982 	 * ning the guest), so that memslots (and other stuff) are protected
1983 	 */
1984 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1985 
1986 	do {
1987 		rc = vcpu_pre_run(vcpu);
1988 		if (rc)
1989 			break;
1990 
1991 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1992 		/*
1993 		 * As PF_VCPU will be used in fault handler, between
1994 		 * guest_enter and guest_exit should be no uaccess.
1995 		 */
1996 		preempt_disable();
1997 		kvm_guest_enter();
1998 		preempt_enable();
1999 		exit_reason = sie64a(vcpu->arch.sie_block,
2000 				     vcpu->run->s.regs.gprs);
2001 		kvm_guest_exit();
2002 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2003 
2004 		rc = vcpu_post_run(vcpu, exit_reason);
2005 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2006 
2007 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2008 	return rc;
2009 }
2010 
2011 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2012 {
2013 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2014 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2015 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2016 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2017 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2018 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2019 		/* some control register changes require a tlb flush */
2020 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2021 	}
2022 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2023 		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2024 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2025 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2026 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2027 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2028 	}
2029 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2030 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2031 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2032 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2033 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2034 			kvm_clear_async_pf_completion_queue(vcpu);
2035 	}
2036 	kvm_run->kvm_dirty_regs = 0;
2037 }
2038 
2039 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2040 {
2041 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2042 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2043 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2044 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2045 	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2046 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2047 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2048 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2049 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2050 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2051 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2052 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2053 }
2054 
2055 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2056 {
2057 	int rc;
2058 	sigset_t sigsaved;
2059 
2060 	if (guestdbg_exit_pending(vcpu)) {
2061 		kvm_s390_prepare_debug_exit(vcpu);
2062 		return 0;
2063 	}
2064 
2065 	if (vcpu->sigset_active)
2066 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2067 
2068 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2069 		kvm_s390_vcpu_start(vcpu);
2070 	} else if (is_vcpu_stopped(vcpu)) {
2071 		pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
2072 				   vcpu->vcpu_id);
2073 		return -EINVAL;
2074 	}
2075 
2076 	sync_regs(vcpu, kvm_run);
2077 
2078 	might_fault();
2079 	rc = __vcpu_run(vcpu);
2080 
2081 	if (signal_pending(current) && !rc) {
2082 		kvm_run->exit_reason = KVM_EXIT_INTR;
2083 		rc = -EINTR;
2084 	}
2085 
2086 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2087 		kvm_s390_prepare_debug_exit(vcpu);
2088 		rc = 0;
2089 	}
2090 
2091 	if (rc == -EOPNOTSUPP) {
2092 		/* intercept cannot be handled in-kernel, prepare kvm-run */
2093 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2094 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2095 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2096 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2097 		rc = 0;
2098 	}
2099 
2100 	if (rc == -EREMOTE) {
2101 		/* intercept was handled, but userspace support is needed
2102 		 * kvm_run has been prepared by the handler */
2103 		rc = 0;
2104 	}
2105 
2106 	store_regs(vcpu, kvm_run);
2107 
2108 	if (vcpu->sigset_active)
2109 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2110 
2111 	vcpu->stat.exit_userspace++;
2112 	return rc;
2113 }
2114 
2115 /*
2116  * store status at address
2117  * we use have two special cases:
2118  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2119  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2120  */
2121 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2122 {
2123 	unsigned char archmode = 1;
2124 	unsigned int px;
2125 	u64 clkcomp;
2126 	int rc;
2127 
2128 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2129 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2130 			return -EFAULT;
2131 		gpa = SAVE_AREA_BASE;
2132 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2133 		if (write_guest_real(vcpu, 163, &archmode, 1))
2134 			return -EFAULT;
2135 		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2136 	}
2137 	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2138 			     vcpu->arch.guest_fpregs.fprs, 128);
2139 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2140 			      vcpu->run->s.regs.gprs, 128);
2141 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2142 			      &vcpu->arch.sie_block->gpsw, 16);
2143 	px = kvm_s390_get_prefix(vcpu);
2144 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2145 			      &px, 4);
2146 	rc |= write_guest_abs(vcpu,
2147 			      gpa + offsetof(struct save_area, fp_ctrl_reg),
2148 			      &vcpu->arch.guest_fpregs.fpc, 4);
2149 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2150 			      &vcpu->arch.sie_block->todpr, 4);
2151 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2152 			      &vcpu->arch.sie_block->cputm, 8);
2153 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2154 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2155 			      &clkcomp, 8);
2156 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2157 			      &vcpu->run->s.regs.acrs, 64);
2158 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2159 			      &vcpu->arch.sie_block->gcr, 128);
2160 	return rc ? -EFAULT : 0;
2161 }
2162 
2163 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2164 {
2165 	/*
2166 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2167 	 * copying in vcpu load/put. Lets update our copies before we save
2168 	 * it into the save area
2169 	 */
2170 	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
2171 	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
2172 	save_access_regs(vcpu->run->s.regs.acrs);
2173 
2174 	return kvm_s390_store_status_unloaded(vcpu, addr);
2175 }
2176 
2177 /*
2178  * store additional status at address
2179  */
2180 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2181 					unsigned long gpa)
2182 {
2183 	/* Only bits 0-53 are used for address formation */
2184 	if (!(gpa & ~0x3ff))
2185 		return 0;
2186 
2187 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2188 			       (void *)&vcpu->run->s.regs.vrs, 512);
2189 }
2190 
2191 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2192 {
2193 	if (!test_kvm_facility(vcpu->kvm, 129))
2194 		return 0;
2195 
2196 	/*
2197 	 * The guest VXRS are in the host VXRs due to the lazy
2198 	 * copying in vcpu load/put. Let's update our copies before we save
2199 	 * it into the save area.
2200 	 */
2201 	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
2202 
2203 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2204 }
2205 
2206 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2207 {
2208 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2209 	kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
2210 	exit_sie_sync(vcpu);
2211 }
2212 
2213 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2214 {
2215 	unsigned int i;
2216 	struct kvm_vcpu *vcpu;
2217 
2218 	kvm_for_each_vcpu(i, vcpu, kvm) {
2219 		__disable_ibs_on_vcpu(vcpu);
2220 	}
2221 }
2222 
2223 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2224 {
2225 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2226 	kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
2227 	exit_sie_sync(vcpu);
2228 }
2229 
2230 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2231 {
2232 	int i, online_vcpus, started_vcpus = 0;
2233 
2234 	if (!is_vcpu_stopped(vcpu))
2235 		return;
2236 
2237 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2238 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2239 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2240 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2241 
2242 	for (i = 0; i < online_vcpus; i++) {
2243 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2244 			started_vcpus++;
2245 	}
2246 
2247 	if (started_vcpus == 0) {
2248 		/* we're the only active VCPU -> speed it up */
2249 		__enable_ibs_on_vcpu(vcpu);
2250 	} else if (started_vcpus == 1) {
2251 		/*
2252 		 * As we are starting a second VCPU, we have to disable
2253 		 * the IBS facility on all VCPUs to remove potentially
2254 		 * oustanding ENABLE requests.
2255 		 */
2256 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2257 	}
2258 
2259 	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2260 	/*
2261 	 * Another VCPU might have used IBS while we were offline.
2262 	 * Let's play safe and flush the VCPU at startup.
2263 	 */
2264 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2265 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2266 	return;
2267 }
2268 
2269 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2270 {
2271 	int i, online_vcpus, started_vcpus = 0;
2272 	struct kvm_vcpu *started_vcpu = NULL;
2273 
2274 	if (is_vcpu_stopped(vcpu))
2275 		return;
2276 
2277 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2278 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2279 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2280 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2281 
2282 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2283 	kvm_s390_clear_stop_irq(vcpu);
2284 
2285 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2286 	__disable_ibs_on_vcpu(vcpu);
2287 
2288 	for (i = 0; i < online_vcpus; i++) {
2289 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2290 			started_vcpus++;
2291 			started_vcpu = vcpu->kvm->vcpus[i];
2292 		}
2293 	}
2294 
2295 	if (started_vcpus == 1) {
2296 		/*
2297 		 * As we only have one VCPU left, we want to enable the
2298 		 * IBS facility for that VCPU to speed it up.
2299 		 */
2300 		__enable_ibs_on_vcpu(started_vcpu);
2301 	}
2302 
2303 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2304 	return;
2305 }
2306 
2307 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2308 				     struct kvm_enable_cap *cap)
2309 {
2310 	int r;
2311 
2312 	if (cap->flags)
2313 		return -EINVAL;
2314 
2315 	switch (cap->cap) {
2316 	case KVM_CAP_S390_CSS_SUPPORT:
2317 		if (!vcpu->kvm->arch.css_support) {
2318 			vcpu->kvm->arch.css_support = 1;
2319 			trace_kvm_s390_enable_css(vcpu->kvm);
2320 		}
2321 		r = 0;
2322 		break;
2323 	default:
2324 		r = -EINVAL;
2325 		break;
2326 	}
2327 	return r;
2328 }
2329 
2330 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2331 				  struct kvm_s390_mem_op *mop)
2332 {
2333 	void __user *uaddr = (void __user *)mop->buf;
2334 	void *tmpbuf = NULL;
2335 	int r, srcu_idx;
2336 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2337 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2338 
2339 	if (mop->flags & ~supported_flags)
2340 		return -EINVAL;
2341 
2342 	if (mop->size > MEM_OP_MAX_SIZE)
2343 		return -E2BIG;
2344 
2345 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2346 		tmpbuf = vmalloc(mop->size);
2347 		if (!tmpbuf)
2348 			return -ENOMEM;
2349 	}
2350 
2351 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2352 
2353 	switch (mop->op) {
2354 	case KVM_S390_MEMOP_LOGICAL_READ:
2355 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2356 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2357 			break;
2358 		}
2359 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2360 		if (r == 0) {
2361 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2362 				r = -EFAULT;
2363 		}
2364 		break;
2365 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2366 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2367 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2368 			break;
2369 		}
2370 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2371 			r = -EFAULT;
2372 			break;
2373 		}
2374 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2375 		break;
2376 	default:
2377 		r = -EINVAL;
2378 	}
2379 
2380 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2381 
2382 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2383 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2384 
2385 	vfree(tmpbuf);
2386 	return r;
2387 }
2388 
2389 long kvm_arch_vcpu_ioctl(struct file *filp,
2390 			 unsigned int ioctl, unsigned long arg)
2391 {
2392 	struct kvm_vcpu *vcpu = filp->private_data;
2393 	void __user *argp = (void __user *)arg;
2394 	int idx;
2395 	long r;
2396 
2397 	switch (ioctl) {
2398 	case KVM_S390_IRQ: {
2399 		struct kvm_s390_irq s390irq;
2400 
2401 		r = -EFAULT;
2402 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2403 			break;
2404 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2405 		break;
2406 	}
2407 	case KVM_S390_INTERRUPT: {
2408 		struct kvm_s390_interrupt s390int;
2409 		struct kvm_s390_irq s390irq;
2410 
2411 		r = -EFAULT;
2412 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2413 			break;
2414 		if (s390int_to_s390irq(&s390int, &s390irq))
2415 			return -EINVAL;
2416 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2417 		break;
2418 	}
2419 	case KVM_S390_STORE_STATUS:
2420 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2421 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2422 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2423 		break;
2424 	case KVM_S390_SET_INITIAL_PSW: {
2425 		psw_t psw;
2426 
2427 		r = -EFAULT;
2428 		if (copy_from_user(&psw, argp, sizeof(psw)))
2429 			break;
2430 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2431 		break;
2432 	}
2433 	case KVM_S390_INITIAL_RESET:
2434 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2435 		break;
2436 	case KVM_SET_ONE_REG:
2437 	case KVM_GET_ONE_REG: {
2438 		struct kvm_one_reg reg;
2439 		r = -EFAULT;
2440 		if (copy_from_user(&reg, argp, sizeof(reg)))
2441 			break;
2442 		if (ioctl == KVM_SET_ONE_REG)
2443 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2444 		else
2445 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2446 		break;
2447 	}
2448 #ifdef CONFIG_KVM_S390_UCONTROL
2449 	case KVM_S390_UCAS_MAP: {
2450 		struct kvm_s390_ucas_mapping ucasmap;
2451 
2452 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2453 			r = -EFAULT;
2454 			break;
2455 		}
2456 
2457 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2458 			r = -EINVAL;
2459 			break;
2460 		}
2461 
2462 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2463 				     ucasmap.vcpu_addr, ucasmap.length);
2464 		break;
2465 	}
2466 	case KVM_S390_UCAS_UNMAP: {
2467 		struct kvm_s390_ucas_mapping ucasmap;
2468 
2469 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2470 			r = -EFAULT;
2471 			break;
2472 		}
2473 
2474 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2475 			r = -EINVAL;
2476 			break;
2477 		}
2478 
2479 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2480 			ucasmap.length);
2481 		break;
2482 	}
2483 #endif
2484 	case KVM_S390_VCPU_FAULT: {
2485 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2486 		break;
2487 	}
2488 	case KVM_ENABLE_CAP:
2489 	{
2490 		struct kvm_enable_cap cap;
2491 		r = -EFAULT;
2492 		if (copy_from_user(&cap, argp, sizeof(cap)))
2493 			break;
2494 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2495 		break;
2496 	}
2497 	case KVM_S390_MEM_OP: {
2498 		struct kvm_s390_mem_op mem_op;
2499 
2500 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2501 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2502 		else
2503 			r = -EFAULT;
2504 		break;
2505 	}
2506 	case KVM_S390_SET_IRQ_STATE: {
2507 		struct kvm_s390_irq_state irq_state;
2508 
2509 		r = -EFAULT;
2510 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2511 			break;
2512 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2513 		    irq_state.len == 0 ||
2514 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2515 			r = -EINVAL;
2516 			break;
2517 		}
2518 		r = kvm_s390_set_irq_state(vcpu,
2519 					   (void __user *) irq_state.buf,
2520 					   irq_state.len);
2521 		break;
2522 	}
2523 	case KVM_S390_GET_IRQ_STATE: {
2524 		struct kvm_s390_irq_state irq_state;
2525 
2526 		r = -EFAULT;
2527 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2528 			break;
2529 		if (irq_state.len == 0) {
2530 			r = -EINVAL;
2531 			break;
2532 		}
2533 		r = kvm_s390_get_irq_state(vcpu,
2534 					   (__u8 __user *)  irq_state.buf,
2535 					   irq_state.len);
2536 		break;
2537 	}
2538 	default:
2539 		r = -ENOTTY;
2540 	}
2541 	return r;
2542 }
2543 
2544 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2545 {
2546 #ifdef CONFIG_KVM_S390_UCONTROL
2547 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2548 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2549 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2550 		get_page(vmf->page);
2551 		return 0;
2552 	}
2553 #endif
2554 	return VM_FAULT_SIGBUS;
2555 }
2556 
2557 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2558 			    unsigned long npages)
2559 {
2560 	return 0;
2561 }
2562 
2563 /* Section: memory related */
2564 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2565 				   struct kvm_memory_slot *memslot,
2566 				   struct kvm_userspace_memory_region *mem,
2567 				   enum kvm_mr_change change)
2568 {
2569 	/* A few sanity checks. We can have memory slots which have to be
2570 	   located/ended at a segment boundary (1MB). The memory in userland is
2571 	   ok to be fragmented into various different vmas. It is okay to mmap()
2572 	   and munmap() stuff in this slot after doing this call at any time */
2573 
2574 	if (mem->userspace_addr & 0xffffful)
2575 		return -EINVAL;
2576 
2577 	if (mem->memory_size & 0xffffful)
2578 		return -EINVAL;
2579 
2580 	return 0;
2581 }
2582 
2583 void kvm_arch_commit_memory_region(struct kvm *kvm,
2584 				struct kvm_userspace_memory_region *mem,
2585 				const struct kvm_memory_slot *old,
2586 				enum kvm_mr_change change)
2587 {
2588 	int rc;
2589 
2590 	/* If the basics of the memslot do not change, we do not want
2591 	 * to update the gmap. Every update causes several unnecessary
2592 	 * segment translation exceptions. This is usually handled just
2593 	 * fine by the normal fault handler + gmap, but it will also
2594 	 * cause faults on the prefix page of running guest CPUs.
2595 	 */
2596 	if (old->userspace_addr == mem->userspace_addr &&
2597 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2598 	    old->npages * PAGE_SIZE == mem->memory_size)
2599 		return;
2600 
2601 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2602 		mem->guest_phys_addr, mem->memory_size);
2603 	if (rc)
2604 		printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
2605 	return;
2606 }
2607 
2608 static int __init kvm_s390_init(void)
2609 {
2610 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2611 }
2612 
2613 static void __exit kvm_s390_exit(void)
2614 {
2615 	kvm_exit();
2616 }
2617 
2618 module_init(kvm_s390_init);
2619 module_exit(kvm_s390_exit);
2620 
2621 /*
2622  * Enable autoloading of the kvm module.
2623  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2624  * since x86 takes a different approach.
2625  */
2626 #include <linux/miscdevice.h>
2627 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2628 MODULE_ALIAS("devname:kvm");
2629