xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision e23feb16)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  */
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
29 #include <asm/nmi.h>
30 #include <asm/switch_to.h>
31 #include <asm/facility.h>
32 #include <asm/sclp.h>
33 #include "kvm-s390.h"
34 #include "gaccess.h"
35 
36 #define CREATE_TRACE_POINTS
37 #include "trace.h"
38 #include "trace-s390.h"
39 
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
41 
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
44 	{ "exit_null", VCPU_STAT(exit_null) },
45 	{ "exit_validity", VCPU_STAT(exit_validity) },
46 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
47 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
48 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
49 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
50 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
51 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
54 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
57 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
58 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
59 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
60 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
61 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
62 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
63 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
64 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
65 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
66 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
68 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
69 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
72 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
73 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
74 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
75 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
76 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
77 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
78 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
79 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
80 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
81 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
82 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
83 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
84 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
85 	{ NULL }
86 };
87 
88 unsigned long *vfacilities;
89 static struct gmap_notifier gmap_notifier;
90 
91 /* test availability of vfacility */
92 static inline int test_vfacility(unsigned long nr)
93 {
94 	return __test_facility(nr, (void *) vfacilities);
95 }
96 
97 /* Section: not file related */
98 int kvm_arch_hardware_enable(void *garbage)
99 {
100 	/* every s390 is virtualization enabled ;-) */
101 	return 0;
102 }
103 
104 void kvm_arch_hardware_disable(void *garbage)
105 {
106 }
107 
108 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
109 
110 int kvm_arch_hardware_setup(void)
111 {
112 	gmap_notifier.notifier_call = kvm_gmap_notifier;
113 	gmap_register_ipte_notifier(&gmap_notifier);
114 	return 0;
115 }
116 
117 void kvm_arch_hardware_unsetup(void)
118 {
119 	gmap_unregister_ipte_notifier(&gmap_notifier);
120 }
121 
122 void kvm_arch_check_processor_compat(void *rtn)
123 {
124 }
125 
126 int kvm_arch_init(void *opaque)
127 {
128 	return 0;
129 }
130 
131 void kvm_arch_exit(void)
132 {
133 }
134 
135 /* Section: device related */
136 long kvm_arch_dev_ioctl(struct file *filp,
137 			unsigned int ioctl, unsigned long arg)
138 {
139 	if (ioctl == KVM_S390_ENABLE_SIE)
140 		return s390_enable_sie();
141 	return -EINVAL;
142 }
143 
144 int kvm_dev_ioctl_check_extension(long ext)
145 {
146 	int r;
147 
148 	switch (ext) {
149 	case KVM_CAP_S390_PSW:
150 	case KVM_CAP_S390_GMAP:
151 	case KVM_CAP_SYNC_MMU:
152 #ifdef CONFIG_KVM_S390_UCONTROL
153 	case KVM_CAP_S390_UCONTROL:
154 #endif
155 	case KVM_CAP_SYNC_REGS:
156 	case KVM_CAP_ONE_REG:
157 	case KVM_CAP_ENABLE_CAP:
158 	case KVM_CAP_S390_CSS_SUPPORT:
159 	case KVM_CAP_IOEVENTFD:
160 		r = 1;
161 		break;
162 	case KVM_CAP_NR_VCPUS:
163 	case KVM_CAP_MAX_VCPUS:
164 		r = KVM_MAX_VCPUS;
165 		break;
166 	case KVM_CAP_NR_MEMSLOTS:
167 		r = KVM_USER_MEM_SLOTS;
168 		break;
169 	case KVM_CAP_S390_COW:
170 		r = MACHINE_HAS_ESOP;
171 		break;
172 	default:
173 		r = 0;
174 	}
175 	return r;
176 }
177 
178 /* Section: vm related */
179 /*
180  * Get (and clear) the dirty memory log for a memory slot.
181  */
182 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
183 			       struct kvm_dirty_log *log)
184 {
185 	return 0;
186 }
187 
188 long kvm_arch_vm_ioctl(struct file *filp,
189 		       unsigned int ioctl, unsigned long arg)
190 {
191 	struct kvm *kvm = filp->private_data;
192 	void __user *argp = (void __user *)arg;
193 	int r;
194 
195 	switch (ioctl) {
196 	case KVM_S390_INTERRUPT: {
197 		struct kvm_s390_interrupt s390int;
198 
199 		r = -EFAULT;
200 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
201 			break;
202 		r = kvm_s390_inject_vm(kvm, &s390int);
203 		break;
204 	}
205 	default:
206 		r = -ENOTTY;
207 	}
208 
209 	return r;
210 }
211 
212 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
213 {
214 	int rc;
215 	char debug_name[16];
216 
217 	rc = -EINVAL;
218 #ifdef CONFIG_KVM_S390_UCONTROL
219 	if (type & ~KVM_VM_S390_UCONTROL)
220 		goto out_err;
221 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
222 		goto out_err;
223 #else
224 	if (type)
225 		goto out_err;
226 #endif
227 
228 	rc = s390_enable_sie();
229 	if (rc)
230 		goto out_err;
231 
232 	rc = -ENOMEM;
233 
234 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
235 	if (!kvm->arch.sca)
236 		goto out_err;
237 
238 	sprintf(debug_name, "kvm-%u", current->pid);
239 
240 	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
241 	if (!kvm->arch.dbf)
242 		goto out_nodbf;
243 
244 	spin_lock_init(&kvm->arch.float_int.lock);
245 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
246 
247 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
248 	VM_EVENT(kvm, 3, "%s", "vm created");
249 
250 	if (type & KVM_VM_S390_UCONTROL) {
251 		kvm->arch.gmap = NULL;
252 	} else {
253 		kvm->arch.gmap = gmap_alloc(current->mm);
254 		if (!kvm->arch.gmap)
255 			goto out_nogmap;
256 		kvm->arch.gmap->private = kvm;
257 	}
258 
259 	kvm->arch.css_support = 0;
260 
261 	return 0;
262 out_nogmap:
263 	debug_unregister(kvm->arch.dbf);
264 out_nodbf:
265 	free_page((unsigned long)(kvm->arch.sca));
266 out_err:
267 	return rc;
268 }
269 
270 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
271 {
272 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
273 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
274 	if (!kvm_is_ucontrol(vcpu->kvm)) {
275 		clear_bit(63 - vcpu->vcpu_id,
276 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
277 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
278 		    (__u64) vcpu->arch.sie_block)
279 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
280 	}
281 	smp_mb();
282 
283 	if (kvm_is_ucontrol(vcpu->kvm))
284 		gmap_free(vcpu->arch.gmap);
285 
286 	free_page((unsigned long)(vcpu->arch.sie_block));
287 	kvm_vcpu_uninit(vcpu);
288 	kmem_cache_free(kvm_vcpu_cache, vcpu);
289 }
290 
291 static void kvm_free_vcpus(struct kvm *kvm)
292 {
293 	unsigned int i;
294 	struct kvm_vcpu *vcpu;
295 
296 	kvm_for_each_vcpu(i, vcpu, kvm)
297 		kvm_arch_vcpu_destroy(vcpu);
298 
299 	mutex_lock(&kvm->lock);
300 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
301 		kvm->vcpus[i] = NULL;
302 
303 	atomic_set(&kvm->online_vcpus, 0);
304 	mutex_unlock(&kvm->lock);
305 }
306 
307 void kvm_arch_sync_events(struct kvm *kvm)
308 {
309 }
310 
311 void kvm_arch_destroy_vm(struct kvm *kvm)
312 {
313 	kvm_free_vcpus(kvm);
314 	free_page((unsigned long)(kvm->arch.sca));
315 	debug_unregister(kvm->arch.dbf);
316 	if (!kvm_is_ucontrol(kvm))
317 		gmap_free(kvm->arch.gmap);
318 }
319 
320 /* Section: vcpu related */
321 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
322 {
323 	if (kvm_is_ucontrol(vcpu->kvm)) {
324 		vcpu->arch.gmap = gmap_alloc(current->mm);
325 		if (!vcpu->arch.gmap)
326 			return -ENOMEM;
327 		vcpu->arch.gmap->private = vcpu->kvm;
328 		return 0;
329 	}
330 
331 	vcpu->arch.gmap = vcpu->kvm->arch.gmap;
332 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
333 				    KVM_SYNC_GPRS |
334 				    KVM_SYNC_ACRS |
335 				    KVM_SYNC_CRS;
336 	return 0;
337 }
338 
339 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
340 {
341 	/* Nothing todo */
342 }
343 
344 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
345 {
346 	save_fp_regs(&vcpu->arch.host_fpregs);
347 	save_access_regs(vcpu->arch.host_acrs);
348 	vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
349 	restore_fp_regs(&vcpu->arch.guest_fpregs);
350 	restore_access_regs(vcpu->run->s.regs.acrs);
351 	gmap_enable(vcpu->arch.gmap);
352 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
353 }
354 
355 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
356 {
357 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
358 	gmap_disable(vcpu->arch.gmap);
359 	save_fp_regs(&vcpu->arch.guest_fpregs);
360 	save_access_regs(vcpu->run->s.regs.acrs);
361 	restore_fp_regs(&vcpu->arch.host_fpregs);
362 	restore_access_regs(vcpu->arch.host_acrs);
363 }
364 
365 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
366 {
367 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
368 	vcpu->arch.sie_block->gpsw.mask = 0UL;
369 	vcpu->arch.sie_block->gpsw.addr = 0UL;
370 	kvm_s390_set_prefix(vcpu, 0);
371 	vcpu->arch.sie_block->cputm     = 0UL;
372 	vcpu->arch.sie_block->ckc       = 0UL;
373 	vcpu->arch.sie_block->todpr     = 0;
374 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
375 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
376 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
377 	vcpu->arch.guest_fpregs.fpc = 0;
378 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
379 	vcpu->arch.sie_block->gbea = 1;
380 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
381 }
382 
383 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
384 {
385 	return 0;
386 }
387 
388 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
389 {
390 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
391 						    CPUSTAT_SM |
392 						    CPUSTAT_STOPPED |
393 						    CPUSTAT_GED);
394 	vcpu->arch.sie_block->ecb   = 6;
395 	vcpu->arch.sie_block->ecb2  = 8;
396 	vcpu->arch.sie_block->eca   = 0xC1002001U;
397 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
398 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
399 	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
400 		     (unsigned long) vcpu);
401 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
402 	get_cpu_id(&vcpu->arch.cpu_id);
403 	vcpu->arch.cpu_id.version = 0xff;
404 	return 0;
405 }
406 
407 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
408 				      unsigned int id)
409 {
410 	struct kvm_vcpu *vcpu;
411 	int rc = -EINVAL;
412 
413 	if (id >= KVM_MAX_VCPUS)
414 		goto out;
415 
416 	rc = -ENOMEM;
417 
418 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
419 	if (!vcpu)
420 		goto out;
421 
422 	vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
423 					get_zeroed_page(GFP_KERNEL);
424 
425 	if (!vcpu->arch.sie_block)
426 		goto out_free_cpu;
427 
428 	vcpu->arch.sie_block->icpua = id;
429 	if (!kvm_is_ucontrol(kvm)) {
430 		if (!kvm->arch.sca) {
431 			WARN_ON_ONCE(1);
432 			goto out_free_cpu;
433 		}
434 		if (!kvm->arch.sca->cpu[id].sda)
435 			kvm->arch.sca->cpu[id].sda =
436 				(__u64) vcpu->arch.sie_block;
437 		vcpu->arch.sie_block->scaoh =
438 			(__u32)(((__u64)kvm->arch.sca) >> 32);
439 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
440 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
441 	}
442 
443 	spin_lock_init(&vcpu->arch.local_int.lock);
444 	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
445 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
446 	spin_lock(&kvm->arch.float_int.lock);
447 	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
448 	vcpu->arch.local_int.wq = &vcpu->wq;
449 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
450 	spin_unlock(&kvm->arch.float_int.lock);
451 
452 	rc = kvm_vcpu_init(vcpu, kvm, id);
453 	if (rc)
454 		goto out_free_sie_block;
455 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
456 		 vcpu->arch.sie_block);
457 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
458 
459 	return vcpu;
460 out_free_sie_block:
461 	free_page((unsigned long)(vcpu->arch.sie_block));
462 out_free_cpu:
463 	kmem_cache_free(kvm_vcpu_cache, vcpu);
464 out:
465 	return ERR_PTR(rc);
466 }
467 
468 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
469 {
470 	/* kvm common code refers to this, but never calls it */
471 	BUG();
472 	return 0;
473 }
474 
475 void s390_vcpu_block(struct kvm_vcpu *vcpu)
476 {
477 	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
478 }
479 
480 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
481 {
482 	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
483 }
484 
485 /*
486  * Kick a guest cpu out of SIE and wait until SIE is not running.
487  * If the CPU is not running (e.g. waiting as idle) the function will
488  * return immediately. */
489 void exit_sie(struct kvm_vcpu *vcpu)
490 {
491 	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
492 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
493 		cpu_relax();
494 }
495 
496 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
497 void exit_sie_sync(struct kvm_vcpu *vcpu)
498 {
499 	s390_vcpu_block(vcpu);
500 	exit_sie(vcpu);
501 }
502 
503 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
504 {
505 	int i;
506 	struct kvm *kvm = gmap->private;
507 	struct kvm_vcpu *vcpu;
508 
509 	kvm_for_each_vcpu(i, vcpu, kvm) {
510 		/* match against both prefix pages */
511 		if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
512 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
513 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
514 			exit_sie_sync(vcpu);
515 		}
516 	}
517 }
518 
519 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
520 {
521 	/* kvm common code refers to this, but never calls it */
522 	BUG();
523 	return 0;
524 }
525 
526 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
527 					   struct kvm_one_reg *reg)
528 {
529 	int r = -EINVAL;
530 
531 	switch (reg->id) {
532 	case KVM_REG_S390_TODPR:
533 		r = put_user(vcpu->arch.sie_block->todpr,
534 			     (u32 __user *)reg->addr);
535 		break;
536 	case KVM_REG_S390_EPOCHDIFF:
537 		r = put_user(vcpu->arch.sie_block->epoch,
538 			     (u64 __user *)reg->addr);
539 		break;
540 	case KVM_REG_S390_CPU_TIMER:
541 		r = put_user(vcpu->arch.sie_block->cputm,
542 			     (u64 __user *)reg->addr);
543 		break;
544 	case KVM_REG_S390_CLOCK_COMP:
545 		r = put_user(vcpu->arch.sie_block->ckc,
546 			     (u64 __user *)reg->addr);
547 		break;
548 	default:
549 		break;
550 	}
551 
552 	return r;
553 }
554 
555 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
556 					   struct kvm_one_reg *reg)
557 {
558 	int r = -EINVAL;
559 
560 	switch (reg->id) {
561 	case KVM_REG_S390_TODPR:
562 		r = get_user(vcpu->arch.sie_block->todpr,
563 			     (u32 __user *)reg->addr);
564 		break;
565 	case KVM_REG_S390_EPOCHDIFF:
566 		r = get_user(vcpu->arch.sie_block->epoch,
567 			     (u64 __user *)reg->addr);
568 		break;
569 	case KVM_REG_S390_CPU_TIMER:
570 		r = get_user(vcpu->arch.sie_block->cputm,
571 			     (u64 __user *)reg->addr);
572 		break;
573 	case KVM_REG_S390_CLOCK_COMP:
574 		r = get_user(vcpu->arch.sie_block->ckc,
575 			     (u64 __user *)reg->addr);
576 		break;
577 	default:
578 		break;
579 	}
580 
581 	return r;
582 }
583 
584 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
585 {
586 	kvm_s390_vcpu_initial_reset(vcpu);
587 	return 0;
588 }
589 
590 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
591 {
592 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
593 	return 0;
594 }
595 
596 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
597 {
598 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
599 	return 0;
600 }
601 
602 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
603 				  struct kvm_sregs *sregs)
604 {
605 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
606 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
607 	restore_access_regs(vcpu->run->s.regs.acrs);
608 	return 0;
609 }
610 
611 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
612 				  struct kvm_sregs *sregs)
613 {
614 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
615 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
616 	return 0;
617 }
618 
619 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
620 {
621 	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
622 	vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK;
623 	restore_fp_regs(&vcpu->arch.guest_fpregs);
624 	return 0;
625 }
626 
627 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
628 {
629 	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
630 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
631 	return 0;
632 }
633 
634 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
635 {
636 	int rc = 0;
637 
638 	if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
639 		rc = -EBUSY;
640 	else {
641 		vcpu->run->psw_mask = psw.mask;
642 		vcpu->run->psw_addr = psw.addr;
643 	}
644 	return rc;
645 }
646 
647 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
648 				  struct kvm_translation *tr)
649 {
650 	return -EINVAL; /* not implemented yet */
651 }
652 
653 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
654 					struct kvm_guest_debug *dbg)
655 {
656 	return -EINVAL; /* not implemented yet */
657 }
658 
659 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
660 				    struct kvm_mp_state *mp_state)
661 {
662 	return -EINVAL; /* not implemented yet */
663 }
664 
665 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
666 				    struct kvm_mp_state *mp_state)
667 {
668 	return -EINVAL; /* not implemented yet */
669 }
670 
671 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
672 {
673 	/*
674 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
675 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
676 	 * This ensures that the ipte instruction for this request has
677 	 * already finished. We might race against a second unmapper that
678 	 * wants to set the blocking bit. Lets just retry the request loop.
679 	 */
680 	while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
681 		int rc;
682 		rc = gmap_ipte_notify(vcpu->arch.gmap,
683 				      vcpu->arch.sie_block->prefix,
684 				      PAGE_SIZE * 2);
685 		if (rc)
686 			return rc;
687 		s390_vcpu_unblock(vcpu);
688 	}
689 	return 0;
690 }
691 
692 static int __vcpu_run(struct kvm_vcpu *vcpu)
693 {
694 	int rc;
695 
696 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
697 
698 	if (need_resched())
699 		schedule();
700 
701 	if (test_thread_flag(TIF_MCCK_PENDING))
702 		s390_handle_mcck();
703 
704 	if (!kvm_is_ucontrol(vcpu->kvm))
705 		kvm_s390_deliver_pending_interrupts(vcpu);
706 
707 	rc = kvm_s390_handle_requests(vcpu);
708 	if (rc)
709 		return rc;
710 
711 	vcpu->arch.sie_block->icptcode = 0;
712 	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
713 		   atomic_read(&vcpu->arch.sie_block->cpuflags));
714 	trace_kvm_s390_sie_enter(vcpu,
715 				 atomic_read(&vcpu->arch.sie_block->cpuflags));
716 
717 	/*
718 	 * As PF_VCPU will be used in fault handler, between guest_enter
719 	 * and guest_exit should be no uaccess.
720 	 */
721 	preempt_disable();
722 	kvm_guest_enter();
723 	preempt_enable();
724 	rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
725 	kvm_guest_exit();
726 
727 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
728 		   vcpu->arch.sie_block->icptcode);
729 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
730 
731 	if (rc > 0)
732 		rc = 0;
733 	if (rc < 0) {
734 		if (kvm_is_ucontrol(vcpu->kvm)) {
735 			rc = SIE_INTERCEPT_UCONTROL;
736 		} else {
737 			VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
738 			trace_kvm_s390_sie_fault(vcpu);
739 			rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
740 		}
741 	}
742 
743 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
744 	return rc;
745 }
746 
747 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
748 {
749 	int rc;
750 	sigset_t sigsaved;
751 
752 rerun_vcpu:
753 	if (vcpu->sigset_active)
754 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
755 
756 	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
757 
758 	BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
759 
760 	switch (kvm_run->exit_reason) {
761 	case KVM_EXIT_S390_SIEIC:
762 	case KVM_EXIT_UNKNOWN:
763 	case KVM_EXIT_INTR:
764 	case KVM_EXIT_S390_RESET:
765 	case KVM_EXIT_S390_UCONTROL:
766 	case KVM_EXIT_S390_TSCH:
767 		break;
768 	default:
769 		BUG();
770 	}
771 
772 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
773 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
774 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
775 		kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
776 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
777 	}
778 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
779 		kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
780 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
781 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
782 	}
783 
784 	might_fault();
785 
786 	do {
787 		rc = __vcpu_run(vcpu);
788 		if (rc)
789 			break;
790 		if (kvm_is_ucontrol(vcpu->kvm))
791 			rc = -EOPNOTSUPP;
792 		else
793 			rc = kvm_handle_sie_intercept(vcpu);
794 	} while (!signal_pending(current) && !rc);
795 
796 	if (rc == SIE_INTERCEPT_RERUNVCPU)
797 		goto rerun_vcpu;
798 
799 	if (signal_pending(current) && !rc) {
800 		kvm_run->exit_reason = KVM_EXIT_INTR;
801 		rc = -EINTR;
802 	}
803 
804 #ifdef CONFIG_KVM_S390_UCONTROL
805 	if (rc == SIE_INTERCEPT_UCONTROL) {
806 		kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
807 		kvm_run->s390_ucontrol.trans_exc_code =
808 			current->thread.gmap_addr;
809 		kvm_run->s390_ucontrol.pgm_code = 0x10;
810 		rc = 0;
811 	}
812 #endif
813 
814 	if (rc == -EOPNOTSUPP) {
815 		/* intercept cannot be handled in-kernel, prepare kvm-run */
816 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
817 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
818 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
819 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
820 		rc = 0;
821 	}
822 
823 	if (rc == -EREMOTE) {
824 		/* intercept was handled, but userspace support is needed
825 		 * kvm_run has been prepared by the handler */
826 		rc = 0;
827 	}
828 
829 	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
830 	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
831 	kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
832 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
833 
834 	if (vcpu->sigset_active)
835 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
836 
837 	vcpu->stat.exit_userspace++;
838 	return rc;
839 }
840 
841 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
842 		       unsigned long n, int prefix)
843 {
844 	if (prefix)
845 		return copy_to_guest(vcpu, guestdest, from, n);
846 	else
847 		return copy_to_guest_absolute(vcpu, guestdest, from, n);
848 }
849 
850 /*
851  * store status at address
852  * we use have two special cases:
853  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
854  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
855  */
856 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
857 {
858 	unsigned char archmode = 1;
859 	int prefix;
860 
861 	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
862 		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
863 			return -EFAULT;
864 		addr = SAVE_AREA_BASE;
865 		prefix = 0;
866 	} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
867 		if (copy_to_guest(vcpu, 163ul, &archmode, 1))
868 			return -EFAULT;
869 		addr = SAVE_AREA_BASE;
870 		prefix = 1;
871 	} else
872 		prefix = 0;
873 
874 	/*
875 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
876 	 * copying in vcpu load/put. Lets update our copies before we save
877 	 * it into the save area
878 	 */
879 	save_fp_regs(&vcpu->arch.guest_fpregs);
880 	save_access_regs(vcpu->run->s.regs.acrs);
881 
882 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
883 			vcpu->arch.guest_fpregs.fprs, 128, prefix))
884 		return -EFAULT;
885 
886 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
887 			vcpu->run->s.regs.gprs, 128, prefix))
888 		return -EFAULT;
889 
890 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
891 			&vcpu->arch.sie_block->gpsw, 16, prefix))
892 		return -EFAULT;
893 
894 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
895 			&vcpu->arch.sie_block->prefix, 4, prefix))
896 		return -EFAULT;
897 
898 	if (__guestcopy(vcpu,
899 			addr + offsetof(struct save_area, fp_ctrl_reg),
900 			&vcpu->arch.guest_fpregs.fpc, 4, prefix))
901 		return -EFAULT;
902 
903 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
904 			&vcpu->arch.sie_block->todpr, 4, prefix))
905 		return -EFAULT;
906 
907 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
908 			&vcpu->arch.sie_block->cputm, 8, prefix))
909 		return -EFAULT;
910 
911 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
912 			&vcpu->arch.sie_block->ckc, 8, prefix))
913 		return -EFAULT;
914 
915 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
916 			&vcpu->run->s.regs.acrs, 64, prefix))
917 		return -EFAULT;
918 
919 	if (__guestcopy(vcpu,
920 			addr + offsetof(struct save_area, ctrl_regs),
921 			&vcpu->arch.sie_block->gcr, 128, prefix))
922 		return -EFAULT;
923 	return 0;
924 }
925 
926 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
927 				     struct kvm_enable_cap *cap)
928 {
929 	int r;
930 
931 	if (cap->flags)
932 		return -EINVAL;
933 
934 	switch (cap->cap) {
935 	case KVM_CAP_S390_CSS_SUPPORT:
936 		if (!vcpu->kvm->arch.css_support) {
937 			vcpu->kvm->arch.css_support = 1;
938 			trace_kvm_s390_enable_css(vcpu->kvm);
939 		}
940 		r = 0;
941 		break;
942 	default:
943 		r = -EINVAL;
944 		break;
945 	}
946 	return r;
947 }
948 
949 long kvm_arch_vcpu_ioctl(struct file *filp,
950 			 unsigned int ioctl, unsigned long arg)
951 {
952 	struct kvm_vcpu *vcpu = filp->private_data;
953 	void __user *argp = (void __user *)arg;
954 	long r;
955 
956 	switch (ioctl) {
957 	case KVM_S390_INTERRUPT: {
958 		struct kvm_s390_interrupt s390int;
959 
960 		r = -EFAULT;
961 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
962 			break;
963 		r = kvm_s390_inject_vcpu(vcpu, &s390int);
964 		break;
965 	}
966 	case KVM_S390_STORE_STATUS:
967 		r = kvm_s390_vcpu_store_status(vcpu, arg);
968 		break;
969 	case KVM_S390_SET_INITIAL_PSW: {
970 		psw_t psw;
971 
972 		r = -EFAULT;
973 		if (copy_from_user(&psw, argp, sizeof(psw)))
974 			break;
975 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
976 		break;
977 	}
978 	case KVM_S390_INITIAL_RESET:
979 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
980 		break;
981 	case KVM_SET_ONE_REG:
982 	case KVM_GET_ONE_REG: {
983 		struct kvm_one_reg reg;
984 		r = -EFAULT;
985 		if (copy_from_user(&reg, argp, sizeof(reg)))
986 			break;
987 		if (ioctl == KVM_SET_ONE_REG)
988 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
989 		else
990 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
991 		break;
992 	}
993 #ifdef CONFIG_KVM_S390_UCONTROL
994 	case KVM_S390_UCAS_MAP: {
995 		struct kvm_s390_ucas_mapping ucasmap;
996 
997 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
998 			r = -EFAULT;
999 			break;
1000 		}
1001 
1002 		if (!kvm_is_ucontrol(vcpu->kvm)) {
1003 			r = -EINVAL;
1004 			break;
1005 		}
1006 
1007 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1008 				     ucasmap.vcpu_addr, ucasmap.length);
1009 		break;
1010 	}
1011 	case KVM_S390_UCAS_UNMAP: {
1012 		struct kvm_s390_ucas_mapping ucasmap;
1013 
1014 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1015 			r = -EFAULT;
1016 			break;
1017 		}
1018 
1019 		if (!kvm_is_ucontrol(vcpu->kvm)) {
1020 			r = -EINVAL;
1021 			break;
1022 		}
1023 
1024 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1025 			ucasmap.length);
1026 		break;
1027 	}
1028 #endif
1029 	case KVM_S390_VCPU_FAULT: {
1030 		r = gmap_fault(arg, vcpu->arch.gmap);
1031 		if (!IS_ERR_VALUE(r))
1032 			r = 0;
1033 		break;
1034 	}
1035 	case KVM_ENABLE_CAP:
1036 	{
1037 		struct kvm_enable_cap cap;
1038 		r = -EFAULT;
1039 		if (copy_from_user(&cap, argp, sizeof(cap)))
1040 			break;
1041 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1042 		break;
1043 	}
1044 	default:
1045 		r = -ENOTTY;
1046 	}
1047 	return r;
1048 }
1049 
1050 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1051 {
1052 #ifdef CONFIG_KVM_S390_UCONTROL
1053 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1054 		 && (kvm_is_ucontrol(vcpu->kvm))) {
1055 		vmf->page = virt_to_page(vcpu->arch.sie_block);
1056 		get_page(vmf->page);
1057 		return 0;
1058 	}
1059 #endif
1060 	return VM_FAULT_SIGBUS;
1061 }
1062 
1063 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
1064 			   struct kvm_memory_slot *dont)
1065 {
1066 }
1067 
1068 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
1069 {
1070 	return 0;
1071 }
1072 
1073 void kvm_arch_memslots_updated(struct kvm *kvm)
1074 {
1075 }
1076 
1077 /* Section: memory related */
1078 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1079 				   struct kvm_memory_slot *memslot,
1080 				   struct kvm_userspace_memory_region *mem,
1081 				   enum kvm_mr_change change)
1082 {
1083 	/* A few sanity checks. We can have memory slots which have to be
1084 	   located/ended at a segment boundary (1MB). The memory in userland is
1085 	   ok to be fragmented into various different vmas. It is okay to mmap()
1086 	   and munmap() stuff in this slot after doing this call at any time */
1087 
1088 	if (mem->userspace_addr & 0xffffful)
1089 		return -EINVAL;
1090 
1091 	if (mem->memory_size & 0xffffful)
1092 		return -EINVAL;
1093 
1094 	return 0;
1095 }
1096 
1097 void kvm_arch_commit_memory_region(struct kvm *kvm,
1098 				struct kvm_userspace_memory_region *mem,
1099 				const struct kvm_memory_slot *old,
1100 				enum kvm_mr_change change)
1101 {
1102 	int rc;
1103 
1104 	/* If the basics of the memslot do not change, we do not want
1105 	 * to update the gmap. Every update causes several unnecessary
1106 	 * segment translation exceptions. This is usually handled just
1107 	 * fine by the normal fault handler + gmap, but it will also
1108 	 * cause faults on the prefix page of running guest CPUs.
1109 	 */
1110 	if (old->userspace_addr == mem->userspace_addr &&
1111 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1112 	    old->npages * PAGE_SIZE == mem->memory_size)
1113 		return;
1114 
1115 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1116 		mem->guest_phys_addr, mem->memory_size);
1117 	if (rc)
1118 		printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1119 	return;
1120 }
1121 
1122 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1123 {
1124 }
1125 
1126 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1127 				   struct kvm_memory_slot *slot)
1128 {
1129 }
1130 
1131 static int __init kvm_s390_init(void)
1132 {
1133 	int ret;
1134 	ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1135 	if (ret)
1136 		return ret;
1137 
1138 	/*
1139 	 * guests can ask for up to 255+1 double words, we need a full page
1140 	 * to hold the maximum amount of facilities. On the other hand, we
1141 	 * only set facilities that are known to work in KVM.
1142 	 */
1143 	vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1144 	if (!vfacilities) {
1145 		kvm_exit();
1146 		return -ENOMEM;
1147 	}
1148 	memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1149 	vfacilities[0] &= 0xff82fff3f47c0000UL;
1150 	vfacilities[1] &= 0x001c000000000000UL;
1151 	return 0;
1152 }
1153 
1154 static void __exit kvm_s390_exit(void)
1155 {
1156 	free_page((unsigned long) vfacilities);
1157 	kvm_exit();
1158 }
1159 
1160 module_init(kvm_s390_init);
1161 module_exit(kvm_s390_exit);
1162 
1163 /*
1164  * Enable autoloading of the kvm module.
1165  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1166  * since x86 takes a different approach.
1167  */
1168 #include <linux/miscdevice.h>
1169 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1170 MODULE_ALIAS("devname:kvm");
1171