xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision ee8a99bd)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  */
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
29 #include <asm/nmi.h>
30 #include <asm/switch_to.h>
31 #include <asm/sclp.h>
32 #include "kvm-s390.h"
33 #include "gaccess.h"
34 
35 #define CREATE_TRACE_POINTS
36 #include "trace.h"
37 #include "trace-s390.h"
38 
39 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
40 
41 struct kvm_stats_debugfs_item debugfs_entries[] = {
42 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
43 	{ "exit_null", VCPU_STAT(exit_null) },
44 	{ "exit_validity", VCPU_STAT(exit_validity) },
45 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
46 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
47 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
48 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
49 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
50 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
51 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
52 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
53 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
54 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
55 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
56 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
57 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
58 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
59 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
60 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
61 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
62 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
63 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
64 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
65 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
66 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
67 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
68 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
69 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
70 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
71 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
72 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
73 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
74 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
75 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
76 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
77 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
78 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
79 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
80 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
81 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
82 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
83 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
84 	{ NULL }
85 };
86 
87 static unsigned long long *facilities;
88 static struct gmap_notifier gmap_notifier;
89 
90 /* Section: not file related */
91 int kvm_arch_hardware_enable(void *garbage)
92 {
93 	/* every s390 is virtualization enabled ;-) */
94 	return 0;
95 }
96 
97 void kvm_arch_hardware_disable(void *garbage)
98 {
99 }
100 
101 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
102 
103 int kvm_arch_hardware_setup(void)
104 {
105 	gmap_notifier.notifier_call = kvm_gmap_notifier;
106 	gmap_register_ipte_notifier(&gmap_notifier);
107 	return 0;
108 }
109 
110 void kvm_arch_hardware_unsetup(void)
111 {
112 	gmap_unregister_ipte_notifier(&gmap_notifier);
113 }
114 
115 void kvm_arch_check_processor_compat(void *rtn)
116 {
117 }
118 
119 int kvm_arch_init(void *opaque)
120 {
121 	return 0;
122 }
123 
124 void kvm_arch_exit(void)
125 {
126 }
127 
128 /* Section: device related */
129 long kvm_arch_dev_ioctl(struct file *filp,
130 			unsigned int ioctl, unsigned long arg)
131 {
132 	if (ioctl == KVM_S390_ENABLE_SIE)
133 		return s390_enable_sie();
134 	return -EINVAL;
135 }
136 
137 int kvm_dev_ioctl_check_extension(long ext)
138 {
139 	int r;
140 
141 	switch (ext) {
142 	case KVM_CAP_S390_PSW:
143 	case KVM_CAP_S390_GMAP:
144 	case KVM_CAP_SYNC_MMU:
145 #ifdef CONFIG_KVM_S390_UCONTROL
146 	case KVM_CAP_S390_UCONTROL:
147 #endif
148 	case KVM_CAP_SYNC_REGS:
149 	case KVM_CAP_ONE_REG:
150 	case KVM_CAP_ENABLE_CAP:
151 	case KVM_CAP_S390_CSS_SUPPORT:
152 	case KVM_CAP_IOEVENTFD:
153 		r = 1;
154 		break;
155 	case KVM_CAP_NR_VCPUS:
156 	case KVM_CAP_MAX_VCPUS:
157 		r = KVM_MAX_VCPUS;
158 		break;
159 	case KVM_CAP_NR_MEMSLOTS:
160 		r = KVM_USER_MEM_SLOTS;
161 		break;
162 	case KVM_CAP_S390_COW:
163 		r = MACHINE_HAS_ESOP;
164 		break;
165 	default:
166 		r = 0;
167 	}
168 	return r;
169 }
170 
171 /* Section: vm related */
172 /*
173  * Get (and clear) the dirty memory log for a memory slot.
174  */
175 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
176 			       struct kvm_dirty_log *log)
177 {
178 	return 0;
179 }
180 
181 long kvm_arch_vm_ioctl(struct file *filp,
182 		       unsigned int ioctl, unsigned long arg)
183 {
184 	struct kvm *kvm = filp->private_data;
185 	void __user *argp = (void __user *)arg;
186 	int r;
187 
188 	switch (ioctl) {
189 	case KVM_S390_INTERRUPT: {
190 		struct kvm_s390_interrupt s390int;
191 
192 		r = -EFAULT;
193 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
194 			break;
195 		r = kvm_s390_inject_vm(kvm, &s390int);
196 		break;
197 	}
198 	default:
199 		r = -ENOTTY;
200 	}
201 
202 	return r;
203 }
204 
205 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
206 {
207 	int rc;
208 	char debug_name[16];
209 
210 	rc = -EINVAL;
211 #ifdef CONFIG_KVM_S390_UCONTROL
212 	if (type & ~KVM_VM_S390_UCONTROL)
213 		goto out_err;
214 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
215 		goto out_err;
216 #else
217 	if (type)
218 		goto out_err;
219 #endif
220 
221 	rc = s390_enable_sie();
222 	if (rc)
223 		goto out_err;
224 
225 	rc = -ENOMEM;
226 
227 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
228 	if (!kvm->arch.sca)
229 		goto out_err;
230 
231 	sprintf(debug_name, "kvm-%u", current->pid);
232 
233 	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
234 	if (!kvm->arch.dbf)
235 		goto out_nodbf;
236 
237 	spin_lock_init(&kvm->arch.float_int.lock);
238 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
239 
240 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
241 	VM_EVENT(kvm, 3, "%s", "vm created");
242 
243 	if (type & KVM_VM_S390_UCONTROL) {
244 		kvm->arch.gmap = NULL;
245 	} else {
246 		kvm->arch.gmap = gmap_alloc(current->mm);
247 		if (!kvm->arch.gmap)
248 			goto out_nogmap;
249 		kvm->arch.gmap->private = kvm;
250 	}
251 
252 	kvm->arch.css_support = 0;
253 
254 	return 0;
255 out_nogmap:
256 	debug_unregister(kvm->arch.dbf);
257 out_nodbf:
258 	free_page((unsigned long)(kvm->arch.sca));
259 out_err:
260 	return rc;
261 }
262 
263 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
264 {
265 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
266 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
267 	if (!kvm_is_ucontrol(vcpu->kvm)) {
268 		clear_bit(63 - vcpu->vcpu_id,
269 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
270 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
271 		    (__u64) vcpu->arch.sie_block)
272 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
273 	}
274 	smp_mb();
275 
276 	if (kvm_is_ucontrol(vcpu->kvm))
277 		gmap_free(vcpu->arch.gmap);
278 
279 	free_page((unsigned long)(vcpu->arch.sie_block));
280 	kvm_vcpu_uninit(vcpu);
281 	kmem_cache_free(kvm_vcpu_cache, vcpu);
282 }
283 
284 static void kvm_free_vcpus(struct kvm *kvm)
285 {
286 	unsigned int i;
287 	struct kvm_vcpu *vcpu;
288 
289 	kvm_for_each_vcpu(i, vcpu, kvm)
290 		kvm_arch_vcpu_destroy(vcpu);
291 
292 	mutex_lock(&kvm->lock);
293 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
294 		kvm->vcpus[i] = NULL;
295 
296 	atomic_set(&kvm->online_vcpus, 0);
297 	mutex_unlock(&kvm->lock);
298 }
299 
300 void kvm_arch_sync_events(struct kvm *kvm)
301 {
302 }
303 
304 void kvm_arch_destroy_vm(struct kvm *kvm)
305 {
306 	kvm_free_vcpus(kvm);
307 	free_page((unsigned long)(kvm->arch.sca));
308 	debug_unregister(kvm->arch.dbf);
309 	if (!kvm_is_ucontrol(kvm))
310 		gmap_free(kvm->arch.gmap);
311 }
312 
313 /* Section: vcpu related */
314 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
315 {
316 	if (kvm_is_ucontrol(vcpu->kvm)) {
317 		vcpu->arch.gmap = gmap_alloc(current->mm);
318 		if (!vcpu->arch.gmap)
319 			return -ENOMEM;
320 		vcpu->arch.gmap->private = vcpu->kvm;
321 		return 0;
322 	}
323 
324 	vcpu->arch.gmap = vcpu->kvm->arch.gmap;
325 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
326 				    KVM_SYNC_GPRS |
327 				    KVM_SYNC_ACRS |
328 				    KVM_SYNC_CRS;
329 	return 0;
330 }
331 
332 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
333 {
334 	/* Nothing todo */
335 }
336 
337 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
338 {
339 	save_fp_regs(&vcpu->arch.host_fpregs);
340 	save_access_regs(vcpu->arch.host_acrs);
341 	vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
342 	restore_fp_regs(&vcpu->arch.guest_fpregs);
343 	restore_access_regs(vcpu->run->s.regs.acrs);
344 	gmap_enable(vcpu->arch.gmap);
345 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
346 }
347 
348 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
349 {
350 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
351 	gmap_disable(vcpu->arch.gmap);
352 	save_fp_regs(&vcpu->arch.guest_fpregs);
353 	save_access_regs(vcpu->run->s.regs.acrs);
354 	restore_fp_regs(&vcpu->arch.host_fpregs);
355 	restore_access_regs(vcpu->arch.host_acrs);
356 }
357 
358 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
359 {
360 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
361 	vcpu->arch.sie_block->gpsw.mask = 0UL;
362 	vcpu->arch.sie_block->gpsw.addr = 0UL;
363 	kvm_s390_set_prefix(vcpu, 0);
364 	vcpu->arch.sie_block->cputm     = 0UL;
365 	vcpu->arch.sie_block->ckc       = 0UL;
366 	vcpu->arch.sie_block->todpr     = 0;
367 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
368 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
369 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
370 	vcpu->arch.guest_fpregs.fpc = 0;
371 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
372 	vcpu->arch.sie_block->gbea = 1;
373 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
374 }
375 
376 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
377 {
378 	return 0;
379 }
380 
381 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
382 {
383 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
384 						    CPUSTAT_SM |
385 						    CPUSTAT_STOPPED |
386 						    CPUSTAT_GED);
387 	vcpu->arch.sie_block->ecb   = 6;
388 	vcpu->arch.sie_block->ecb2  = 8;
389 	vcpu->arch.sie_block->eca   = 0xC1002001U;
390 	vcpu->arch.sie_block->fac   = (int) (long) facilities;
391 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
392 	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
393 		     (unsigned long) vcpu);
394 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
395 	get_cpu_id(&vcpu->arch.cpu_id);
396 	vcpu->arch.cpu_id.version = 0xff;
397 	return 0;
398 }
399 
400 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
401 				      unsigned int id)
402 {
403 	struct kvm_vcpu *vcpu;
404 	int rc = -EINVAL;
405 
406 	if (id >= KVM_MAX_VCPUS)
407 		goto out;
408 
409 	rc = -ENOMEM;
410 
411 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
412 	if (!vcpu)
413 		goto out;
414 
415 	vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
416 					get_zeroed_page(GFP_KERNEL);
417 
418 	if (!vcpu->arch.sie_block)
419 		goto out_free_cpu;
420 
421 	vcpu->arch.sie_block->icpua = id;
422 	if (!kvm_is_ucontrol(kvm)) {
423 		if (!kvm->arch.sca) {
424 			WARN_ON_ONCE(1);
425 			goto out_free_cpu;
426 		}
427 		if (!kvm->arch.sca->cpu[id].sda)
428 			kvm->arch.sca->cpu[id].sda =
429 				(__u64) vcpu->arch.sie_block;
430 		vcpu->arch.sie_block->scaoh =
431 			(__u32)(((__u64)kvm->arch.sca) >> 32);
432 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
433 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
434 	}
435 
436 	spin_lock_init(&vcpu->arch.local_int.lock);
437 	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
438 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
439 	spin_lock(&kvm->arch.float_int.lock);
440 	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
441 	vcpu->arch.local_int.wq = &vcpu->wq;
442 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
443 	spin_unlock(&kvm->arch.float_int.lock);
444 
445 	rc = kvm_vcpu_init(vcpu, kvm, id);
446 	if (rc)
447 		goto out_free_sie_block;
448 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
449 		 vcpu->arch.sie_block);
450 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
451 
452 	return vcpu;
453 out_free_sie_block:
454 	free_page((unsigned long)(vcpu->arch.sie_block));
455 out_free_cpu:
456 	kmem_cache_free(kvm_vcpu_cache, vcpu);
457 out:
458 	return ERR_PTR(rc);
459 }
460 
461 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
462 {
463 	/* kvm common code refers to this, but never calls it */
464 	BUG();
465 	return 0;
466 }
467 
468 void s390_vcpu_block(struct kvm_vcpu *vcpu)
469 {
470 	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
471 }
472 
473 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
474 {
475 	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
476 }
477 
478 /*
479  * Kick a guest cpu out of SIE and wait until SIE is not running.
480  * If the CPU is not running (e.g. waiting as idle) the function will
481  * return immediately. */
482 void exit_sie(struct kvm_vcpu *vcpu)
483 {
484 	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
485 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
486 		cpu_relax();
487 }
488 
489 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
490 void exit_sie_sync(struct kvm_vcpu *vcpu)
491 {
492 	s390_vcpu_block(vcpu);
493 	exit_sie(vcpu);
494 }
495 
496 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
497 {
498 	int i;
499 	struct kvm *kvm = gmap->private;
500 	struct kvm_vcpu *vcpu;
501 
502 	kvm_for_each_vcpu(i, vcpu, kvm) {
503 		/* match against both prefix pages */
504 		if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
505 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
506 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
507 			exit_sie_sync(vcpu);
508 		}
509 	}
510 }
511 
512 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
513 {
514 	/* kvm common code refers to this, but never calls it */
515 	BUG();
516 	return 0;
517 }
518 
519 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
520 					   struct kvm_one_reg *reg)
521 {
522 	int r = -EINVAL;
523 
524 	switch (reg->id) {
525 	case KVM_REG_S390_TODPR:
526 		r = put_user(vcpu->arch.sie_block->todpr,
527 			     (u32 __user *)reg->addr);
528 		break;
529 	case KVM_REG_S390_EPOCHDIFF:
530 		r = put_user(vcpu->arch.sie_block->epoch,
531 			     (u64 __user *)reg->addr);
532 		break;
533 	case KVM_REG_S390_CPU_TIMER:
534 		r = put_user(vcpu->arch.sie_block->cputm,
535 			     (u64 __user *)reg->addr);
536 		break;
537 	case KVM_REG_S390_CLOCK_COMP:
538 		r = put_user(vcpu->arch.sie_block->ckc,
539 			     (u64 __user *)reg->addr);
540 		break;
541 	default:
542 		break;
543 	}
544 
545 	return r;
546 }
547 
548 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
549 					   struct kvm_one_reg *reg)
550 {
551 	int r = -EINVAL;
552 
553 	switch (reg->id) {
554 	case KVM_REG_S390_TODPR:
555 		r = get_user(vcpu->arch.sie_block->todpr,
556 			     (u32 __user *)reg->addr);
557 		break;
558 	case KVM_REG_S390_EPOCHDIFF:
559 		r = get_user(vcpu->arch.sie_block->epoch,
560 			     (u64 __user *)reg->addr);
561 		break;
562 	case KVM_REG_S390_CPU_TIMER:
563 		r = get_user(vcpu->arch.sie_block->cputm,
564 			     (u64 __user *)reg->addr);
565 		break;
566 	case KVM_REG_S390_CLOCK_COMP:
567 		r = get_user(vcpu->arch.sie_block->ckc,
568 			     (u64 __user *)reg->addr);
569 		break;
570 	default:
571 		break;
572 	}
573 
574 	return r;
575 }
576 
577 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
578 {
579 	kvm_s390_vcpu_initial_reset(vcpu);
580 	return 0;
581 }
582 
583 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
584 {
585 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
586 	return 0;
587 }
588 
589 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
590 {
591 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
592 	return 0;
593 }
594 
595 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
596 				  struct kvm_sregs *sregs)
597 {
598 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
599 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
600 	restore_access_regs(vcpu->run->s.regs.acrs);
601 	return 0;
602 }
603 
604 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
605 				  struct kvm_sregs *sregs)
606 {
607 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
608 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
609 	return 0;
610 }
611 
612 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
613 {
614 	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
615 	vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK;
616 	restore_fp_regs(&vcpu->arch.guest_fpregs);
617 	return 0;
618 }
619 
620 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
621 {
622 	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
623 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
624 	return 0;
625 }
626 
627 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
628 {
629 	int rc = 0;
630 
631 	if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
632 		rc = -EBUSY;
633 	else {
634 		vcpu->run->psw_mask = psw.mask;
635 		vcpu->run->psw_addr = psw.addr;
636 	}
637 	return rc;
638 }
639 
640 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
641 				  struct kvm_translation *tr)
642 {
643 	return -EINVAL; /* not implemented yet */
644 }
645 
646 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
647 					struct kvm_guest_debug *dbg)
648 {
649 	return -EINVAL; /* not implemented yet */
650 }
651 
652 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
653 				    struct kvm_mp_state *mp_state)
654 {
655 	return -EINVAL; /* not implemented yet */
656 }
657 
658 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
659 				    struct kvm_mp_state *mp_state)
660 {
661 	return -EINVAL; /* not implemented yet */
662 }
663 
664 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
665 {
666 	/*
667 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
668 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
669 	 * This ensures that the ipte instruction for this request has
670 	 * already finished. We might race against a second unmapper that
671 	 * wants to set the blocking bit. Lets just retry the request loop.
672 	 */
673 	while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
674 		int rc;
675 		rc = gmap_ipte_notify(vcpu->arch.gmap,
676 				      vcpu->arch.sie_block->prefix,
677 				      PAGE_SIZE * 2);
678 		if (rc)
679 			return rc;
680 		s390_vcpu_unblock(vcpu);
681 	}
682 	return 0;
683 }
684 
685 static int __vcpu_run(struct kvm_vcpu *vcpu)
686 {
687 	int rc;
688 
689 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
690 
691 	if (need_resched())
692 		schedule();
693 
694 	if (test_thread_flag(TIF_MCCK_PENDING))
695 		s390_handle_mcck();
696 
697 	if (!kvm_is_ucontrol(vcpu->kvm))
698 		kvm_s390_deliver_pending_interrupts(vcpu);
699 
700 	rc = kvm_s390_handle_requests(vcpu);
701 	if (rc)
702 		return rc;
703 
704 	vcpu->arch.sie_block->icptcode = 0;
705 	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
706 		   atomic_read(&vcpu->arch.sie_block->cpuflags));
707 	trace_kvm_s390_sie_enter(vcpu,
708 				 atomic_read(&vcpu->arch.sie_block->cpuflags));
709 
710 	/*
711 	 * As PF_VCPU will be used in fault handler, between guest_enter
712 	 * and guest_exit should be no uaccess.
713 	 */
714 	preempt_disable();
715 	kvm_guest_enter();
716 	preempt_enable();
717 	rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
718 	kvm_guest_exit();
719 
720 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
721 		   vcpu->arch.sie_block->icptcode);
722 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
723 
724 	if (rc > 0)
725 		rc = 0;
726 	if (rc < 0) {
727 		if (kvm_is_ucontrol(vcpu->kvm)) {
728 			rc = SIE_INTERCEPT_UCONTROL;
729 		} else {
730 			VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
731 			trace_kvm_s390_sie_fault(vcpu);
732 			rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
733 		}
734 	}
735 
736 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
737 	return rc;
738 }
739 
740 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
741 {
742 	int rc;
743 	sigset_t sigsaved;
744 
745 rerun_vcpu:
746 	if (vcpu->sigset_active)
747 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
748 
749 	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
750 
751 	BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
752 
753 	switch (kvm_run->exit_reason) {
754 	case KVM_EXIT_S390_SIEIC:
755 	case KVM_EXIT_UNKNOWN:
756 	case KVM_EXIT_INTR:
757 	case KVM_EXIT_S390_RESET:
758 	case KVM_EXIT_S390_UCONTROL:
759 	case KVM_EXIT_S390_TSCH:
760 		break;
761 	default:
762 		BUG();
763 	}
764 
765 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
766 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
767 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
768 		kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
769 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
770 	}
771 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
772 		kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
773 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
774 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
775 	}
776 
777 	might_fault();
778 
779 	do {
780 		rc = __vcpu_run(vcpu);
781 		if (rc)
782 			break;
783 		if (kvm_is_ucontrol(vcpu->kvm))
784 			rc = -EOPNOTSUPP;
785 		else
786 			rc = kvm_handle_sie_intercept(vcpu);
787 	} while (!signal_pending(current) && !rc);
788 
789 	if (rc == SIE_INTERCEPT_RERUNVCPU)
790 		goto rerun_vcpu;
791 
792 	if (signal_pending(current) && !rc) {
793 		kvm_run->exit_reason = KVM_EXIT_INTR;
794 		rc = -EINTR;
795 	}
796 
797 #ifdef CONFIG_KVM_S390_UCONTROL
798 	if (rc == SIE_INTERCEPT_UCONTROL) {
799 		kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
800 		kvm_run->s390_ucontrol.trans_exc_code =
801 			current->thread.gmap_addr;
802 		kvm_run->s390_ucontrol.pgm_code = 0x10;
803 		rc = 0;
804 	}
805 #endif
806 
807 	if (rc == -EOPNOTSUPP) {
808 		/* intercept cannot be handled in-kernel, prepare kvm-run */
809 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
810 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
811 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
812 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
813 		rc = 0;
814 	}
815 
816 	if (rc == -EREMOTE) {
817 		/* intercept was handled, but userspace support is needed
818 		 * kvm_run has been prepared by the handler */
819 		rc = 0;
820 	}
821 
822 	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
823 	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
824 	kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
825 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
826 
827 	if (vcpu->sigset_active)
828 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
829 
830 	vcpu->stat.exit_userspace++;
831 	return rc;
832 }
833 
834 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
835 		       unsigned long n, int prefix)
836 {
837 	if (prefix)
838 		return copy_to_guest(vcpu, guestdest, from, n);
839 	else
840 		return copy_to_guest_absolute(vcpu, guestdest, from, n);
841 }
842 
843 /*
844  * store status at address
845  * we use have two special cases:
846  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
847  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
848  */
849 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
850 {
851 	unsigned char archmode = 1;
852 	int prefix;
853 
854 	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
855 		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
856 			return -EFAULT;
857 		addr = SAVE_AREA_BASE;
858 		prefix = 0;
859 	} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
860 		if (copy_to_guest(vcpu, 163ul, &archmode, 1))
861 			return -EFAULT;
862 		addr = SAVE_AREA_BASE;
863 		prefix = 1;
864 	} else
865 		prefix = 0;
866 
867 	/*
868 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
869 	 * copying in vcpu load/put. Lets update our copies before we save
870 	 * it into the save area
871 	 */
872 	save_fp_regs(&vcpu->arch.guest_fpregs);
873 	save_access_regs(vcpu->run->s.regs.acrs);
874 
875 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
876 			vcpu->arch.guest_fpregs.fprs, 128, prefix))
877 		return -EFAULT;
878 
879 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
880 			vcpu->run->s.regs.gprs, 128, prefix))
881 		return -EFAULT;
882 
883 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
884 			&vcpu->arch.sie_block->gpsw, 16, prefix))
885 		return -EFAULT;
886 
887 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
888 			&vcpu->arch.sie_block->prefix, 4, prefix))
889 		return -EFAULT;
890 
891 	if (__guestcopy(vcpu,
892 			addr + offsetof(struct save_area, fp_ctrl_reg),
893 			&vcpu->arch.guest_fpregs.fpc, 4, prefix))
894 		return -EFAULT;
895 
896 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
897 			&vcpu->arch.sie_block->todpr, 4, prefix))
898 		return -EFAULT;
899 
900 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
901 			&vcpu->arch.sie_block->cputm, 8, prefix))
902 		return -EFAULT;
903 
904 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
905 			&vcpu->arch.sie_block->ckc, 8, prefix))
906 		return -EFAULT;
907 
908 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
909 			&vcpu->run->s.regs.acrs, 64, prefix))
910 		return -EFAULT;
911 
912 	if (__guestcopy(vcpu,
913 			addr + offsetof(struct save_area, ctrl_regs),
914 			&vcpu->arch.sie_block->gcr, 128, prefix))
915 		return -EFAULT;
916 	return 0;
917 }
918 
919 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
920 				     struct kvm_enable_cap *cap)
921 {
922 	int r;
923 
924 	if (cap->flags)
925 		return -EINVAL;
926 
927 	switch (cap->cap) {
928 	case KVM_CAP_S390_CSS_SUPPORT:
929 		if (!vcpu->kvm->arch.css_support) {
930 			vcpu->kvm->arch.css_support = 1;
931 			trace_kvm_s390_enable_css(vcpu->kvm);
932 		}
933 		r = 0;
934 		break;
935 	default:
936 		r = -EINVAL;
937 		break;
938 	}
939 	return r;
940 }
941 
942 long kvm_arch_vcpu_ioctl(struct file *filp,
943 			 unsigned int ioctl, unsigned long arg)
944 {
945 	struct kvm_vcpu *vcpu = filp->private_data;
946 	void __user *argp = (void __user *)arg;
947 	long r;
948 
949 	switch (ioctl) {
950 	case KVM_S390_INTERRUPT: {
951 		struct kvm_s390_interrupt s390int;
952 
953 		r = -EFAULT;
954 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
955 			break;
956 		r = kvm_s390_inject_vcpu(vcpu, &s390int);
957 		break;
958 	}
959 	case KVM_S390_STORE_STATUS:
960 		r = kvm_s390_vcpu_store_status(vcpu, arg);
961 		break;
962 	case KVM_S390_SET_INITIAL_PSW: {
963 		psw_t psw;
964 
965 		r = -EFAULT;
966 		if (copy_from_user(&psw, argp, sizeof(psw)))
967 			break;
968 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
969 		break;
970 	}
971 	case KVM_S390_INITIAL_RESET:
972 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
973 		break;
974 	case KVM_SET_ONE_REG:
975 	case KVM_GET_ONE_REG: {
976 		struct kvm_one_reg reg;
977 		r = -EFAULT;
978 		if (copy_from_user(&reg, argp, sizeof(reg)))
979 			break;
980 		if (ioctl == KVM_SET_ONE_REG)
981 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
982 		else
983 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
984 		break;
985 	}
986 #ifdef CONFIG_KVM_S390_UCONTROL
987 	case KVM_S390_UCAS_MAP: {
988 		struct kvm_s390_ucas_mapping ucasmap;
989 
990 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
991 			r = -EFAULT;
992 			break;
993 		}
994 
995 		if (!kvm_is_ucontrol(vcpu->kvm)) {
996 			r = -EINVAL;
997 			break;
998 		}
999 
1000 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1001 				     ucasmap.vcpu_addr, ucasmap.length);
1002 		break;
1003 	}
1004 	case KVM_S390_UCAS_UNMAP: {
1005 		struct kvm_s390_ucas_mapping ucasmap;
1006 
1007 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1008 			r = -EFAULT;
1009 			break;
1010 		}
1011 
1012 		if (!kvm_is_ucontrol(vcpu->kvm)) {
1013 			r = -EINVAL;
1014 			break;
1015 		}
1016 
1017 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1018 			ucasmap.length);
1019 		break;
1020 	}
1021 #endif
1022 	case KVM_S390_VCPU_FAULT: {
1023 		r = gmap_fault(arg, vcpu->arch.gmap);
1024 		if (!IS_ERR_VALUE(r))
1025 			r = 0;
1026 		break;
1027 	}
1028 	case KVM_ENABLE_CAP:
1029 	{
1030 		struct kvm_enable_cap cap;
1031 		r = -EFAULT;
1032 		if (copy_from_user(&cap, argp, sizeof(cap)))
1033 			break;
1034 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1035 		break;
1036 	}
1037 	default:
1038 		r = -ENOTTY;
1039 	}
1040 	return r;
1041 }
1042 
1043 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1044 {
1045 #ifdef CONFIG_KVM_S390_UCONTROL
1046 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1047 		 && (kvm_is_ucontrol(vcpu->kvm))) {
1048 		vmf->page = virt_to_page(vcpu->arch.sie_block);
1049 		get_page(vmf->page);
1050 		return 0;
1051 	}
1052 #endif
1053 	return VM_FAULT_SIGBUS;
1054 }
1055 
1056 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
1057 			   struct kvm_memory_slot *dont)
1058 {
1059 }
1060 
1061 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
1062 {
1063 	return 0;
1064 }
1065 
1066 /* Section: memory related */
1067 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1068 				   struct kvm_memory_slot *memslot,
1069 				   struct kvm_userspace_memory_region *mem,
1070 				   enum kvm_mr_change change)
1071 {
1072 	/* A few sanity checks. We can have memory slots which have to be
1073 	   located/ended at a segment boundary (1MB). The memory in userland is
1074 	   ok to be fragmented into various different vmas. It is okay to mmap()
1075 	   and munmap() stuff in this slot after doing this call at any time */
1076 
1077 	if (mem->userspace_addr & 0xffffful)
1078 		return -EINVAL;
1079 
1080 	if (mem->memory_size & 0xffffful)
1081 		return -EINVAL;
1082 
1083 	return 0;
1084 }
1085 
1086 void kvm_arch_commit_memory_region(struct kvm *kvm,
1087 				struct kvm_userspace_memory_region *mem,
1088 				const struct kvm_memory_slot *old,
1089 				enum kvm_mr_change change)
1090 {
1091 	int rc;
1092 
1093 	/* If the basics of the memslot do not change, we do not want
1094 	 * to update the gmap. Every update causes several unnecessary
1095 	 * segment translation exceptions. This is usually handled just
1096 	 * fine by the normal fault handler + gmap, but it will also
1097 	 * cause faults on the prefix page of running guest CPUs.
1098 	 */
1099 	if (old->userspace_addr == mem->userspace_addr &&
1100 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1101 	    old->npages * PAGE_SIZE == mem->memory_size)
1102 		return;
1103 
1104 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1105 		mem->guest_phys_addr, mem->memory_size);
1106 	if (rc)
1107 		printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1108 	return;
1109 }
1110 
1111 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1112 {
1113 }
1114 
1115 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1116 				   struct kvm_memory_slot *slot)
1117 {
1118 }
1119 
1120 static int __init kvm_s390_init(void)
1121 {
1122 	int ret;
1123 	ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1124 	if (ret)
1125 		return ret;
1126 
1127 	/*
1128 	 * guests can ask for up to 255+1 double words, we need a full page
1129 	 * to hold the maximum amount of facilities. On the other hand, we
1130 	 * only set facilities that are known to work in KVM.
1131 	 */
1132 	facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1133 	if (!facilities) {
1134 		kvm_exit();
1135 		return -ENOMEM;
1136 	}
1137 	memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
1138 	facilities[0] &= 0xff82fff3f47c0000ULL;
1139 	facilities[1] &= 0x001c000000000000ULL;
1140 	return 0;
1141 }
1142 
1143 static void __exit kvm_s390_exit(void)
1144 {
1145 	free_page((unsigned long) facilities);
1146 	kvm_exit();
1147 }
1148 
1149 module_init(kvm_s390_init);
1150 module_exit(kvm_s390_exit);
1151 
1152 /*
1153  * Enable autoloading of the kvm module.
1154  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1155  * since x86 takes a different approach.
1156  */
1157 #include <linux/miscdevice.h>
1158 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1159 MODULE_ALIAS("devname:kvm");
1160