xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 65417d9f)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48 
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
66 	{ "exit_null", VCPU_STAT(exit_null) },
67 	{ "exit_validity", VCPU_STAT(exit_validity) },
68 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
96 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
98 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
103 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
108 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
125 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
126 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
128 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
129 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
130 	{ NULL }
131 };
132 
133 struct kvm_s390_tod_clock_ext {
134 	__u8 epoch_idx;
135 	__u64 tod;
136 	__u8 reserved[7];
137 } __packed;
138 
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143 
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146 
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152 
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157 
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161 
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165 	/* every s390 is virtualization enabled ;-) */
166 	return 0;
167 }
168 
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 			      unsigned long end);
171 
172 /*
173  * This callback is executed during stop_machine(). All CPUs are therefore
174  * temporarily stopped. In order not to change guest behavior, we have to
175  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176  * so a CPU won't be stopped while calculating with the epoch.
177  */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 			  void *v)
180 {
181 	struct kvm *kvm;
182 	struct kvm_vcpu *vcpu;
183 	int i;
184 	unsigned long long *delta = v;
185 
186 	list_for_each_entry(kvm, &vm_list, vm_list) {
187 		kvm->arch.epoch -= *delta;
188 		kvm_for_each_vcpu(i, vcpu, kvm) {
189 			vcpu->arch.sie_block->epoch -= *delta;
190 			if (vcpu->arch.cputm_enabled)
191 				vcpu->arch.cputm_start += *delta;
192 			if (vcpu->arch.vsie_block)
193 				vcpu->arch.vsie_block->epoch -= *delta;
194 		}
195 	}
196 	return NOTIFY_OK;
197 }
198 
199 static struct notifier_block kvm_clock_notifier = {
200 	.notifier_call = kvm_clock_sync,
201 };
202 
203 int kvm_arch_hardware_setup(void)
204 {
205 	gmap_notifier.notifier_call = kvm_gmap_notifier;
206 	gmap_register_pte_notifier(&gmap_notifier);
207 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 	gmap_register_pte_notifier(&vsie_gmap_notifier);
209 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 				       &kvm_clock_notifier);
211 	return 0;
212 }
213 
214 void kvm_arch_hardware_unsetup(void)
215 {
216 	gmap_unregister_pte_notifier(&gmap_notifier);
217 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 					 &kvm_clock_notifier);
220 }
221 
222 static void allow_cpu_feat(unsigned long nr)
223 {
224 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226 
227 static inline int plo_test_bit(unsigned char nr)
228 {
229 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 	int cc;
231 
232 	asm volatile(
233 		/* Parameter registers are ignored for "test bit" */
234 		"	plo	0,0,0,0(0)\n"
235 		"	ipm	%0\n"
236 		"	srl	%0,28\n"
237 		: "=d" (cc)
238 		: "d" (r0)
239 		: "cc");
240 	return cc == 0;
241 }
242 
243 static void kvm_s390_cpu_feat_init(void)
244 {
245 	int i;
246 
247 	for (i = 0; i < 256; ++i) {
248 		if (plo_test_bit(i))
249 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250 	}
251 
252 	if (test_facility(28)) /* TOD-clock steering */
253 		ptff(kvm_s390_available_subfunc.ptff,
254 		     sizeof(kvm_s390_available_subfunc.ptff),
255 		     PTFF_QAF);
256 
257 	if (test_facility(17)) { /* MSA */
258 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 			      kvm_s390_available_subfunc.kmac);
260 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 			      kvm_s390_available_subfunc.kmc);
262 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 			      kvm_s390_available_subfunc.km);
264 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 			      kvm_s390_available_subfunc.kimd);
266 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 			      kvm_s390_available_subfunc.klmd);
268 	}
269 	if (test_facility(76)) /* MSA3 */
270 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.pckmo);
272 	if (test_facility(77)) { /* MSA4 */
273 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 			      kvm_s390_available_subfunc.kmctr);
275 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 			      kvm_s390_available_subfunc.kmf);
277 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 			      kvm_s390_available_subfunc.kmo);
279 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 			      kvm_s390_available_subfunc.pcc);
281 	}
282 	if (test_facility(57)) /* MSA5 */
283 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 			      kvm_s390_available_subfunc.ppno);
285 
286 	if (test_facility(146)) /* MSA8 */
287 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 			      kvm_s390_available_subfunc.kma);
289 
290 	if (MACHINE_HAS_ESOP)
291 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292 	/*
293 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295 	 */
296 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 	    !test_facility(3) || !nested)
298 		return;
299 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 	if (sclp.has_64bscao)
301 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302 	if (sclp.has_siif)
303 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304 	if (sclp.has_gpere)
305 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306 	if (sclp.has_gsls)
307 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308 	if (sclp.has_ib)
309 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310 	if (sclp.has_cei)
311 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312 	if (sclp.has_ibs)
313 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314 	if (sclp.has_kss)
315 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316 	/*
317 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 	 * all skey handling functions read/set the skey from the PGSTE
319 	 * instead of the real storage key.
320 	 *
321 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 	 * pages being detected as preserved although they are resident.
323 	 *
324 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326 	 *
327 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330 	 *
331 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 	 * cannot easily shadow the SCA because of the ipte lock.
333 	 */
334 }
335 
336 int kvm_arch_init(void *opaque)
337 {
338 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 	if (!kvm_s390_dbf)
340 		return -ENOMEM;
341 
342 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 		debug_unregister(kvm_s390_dbf);
344 		return -ENOMEM;
345 	}
346 
347 	kvm_s390_cpu_feat_init();
348 
349 	/* Register floating interrupt controller interface. */
350 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352 
353 void kvm_arch_exit(void)
354 {
355 	debug_unregister(kvm_s390_dbf);
356 }
357 
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 			unsigned int ioctl, unsigned long arg)
361 {
362 	if (ioctl == KVM_S390_ENABLE_SIE)
363 		return s390_enable_sie();
364 	return -EINVAL;
365 }
366 
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369 	int r;
370 
371 	switch (ext) {
372 	case KVM_CAP_S390_PSW:
373 	case KVM_CAP_S390_GMAP:
374 	case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 	case KVM_CAP_S390_UCONTROL:
377 #endif
378 	case KVM_CAP_ASYNC_PF:
379 	case KVM_CAP_SYNC_REGS:
380 	case KVM_CAP_ONE_REG:
381 	case KVM_CAP_ENABLE_CAP:
382 	case KVM_CAP_S390_CSS_SUPPORT:
383 	case KVM_CAP_IOEVENTFD:
384 	case KVM_CAP_DEVICE_CTRL:
385 	case KVM_CAP_ENABLE_CAP_VM:
386 	case KVM_CAP_S390_IRQCHIP:
387 	case KVM_CAP_VM_ATTRIBUTES:
388 	case KVM_CAP_MP_STATE:
389 	case KVM_CAP_IMMEDIATE_EXIT:
390 	case KVM_CAP_S390_INJECT_IRQ:
391 	case KVM_CAP_S390_USER_SIGP:
392 	case KVM_CAP_S390_USER_STSI:
393 	case KVM_CAP_S390_SKEYS:
394 	case KVM_CAP_S390_IRQ_STATE:
395 	case KVM_CAP_S390_USER_INSTR0:
396 	case KVM_CAP_S390_CMMA_MIGRATION:
397 	case KVM_CAP_S390_AIS:
398 	case KVM_CAP_S390_AIS_MIGRATION:
399 		r = 1;
400 		break;
401 	case KVM_CAP_S390_MEM_OP:
402 		r = MEM_OP_MAX_SIZE;
403 		break;
404 	case KVM_CAP_NR_VCPUS:
405 	case KVM_CAP_MAX_VCPUS:
406 		r = KVM_S390_BSCA_CPU_SLOTS;
407 		if (!kvm_s390_use_sca_entries())
408 			r = KVM_MAX_VCPUS;
409 		else if (sclp.has_esca && sclp.has_64bscao)
410 			r = KVM_S390_ESCA_CPU_SLOTS;
411 		break;
412 	case KVM_CAP_NR_MEMSLOTS:
413 		r = KVM_USER_MEM_SLOTS;
414 		break;
415 	case KVM_CAP_S390_COW:
416 		r = MACHINE_HAS_ESOP;
417 		break;
418 	case KVM_CAP_S390_VECTOR_REGISTERS:
419 		r = MACHINE_HAS_VX;
420 		break;
421 	case KVM_CAP_S390_RI:
422 		r = test_facility(64);
423 		break;
424 	case KVM_CAP_S390_GS:
425 		r = test_facility(133);
426 		break;
427 	default:
428 		r = 0;
429 	}
430 	return r;
431 }
432 
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434 					struct kvm_memory_slot *memslot)
435 {
436 	gfn_t cur_gfn, last_gfn;
437 	unsigned long address;
438 	struct gmap *gmap = kvm->arch.gmap;
439 
440 	/* Loop over all guest pages */
441 	last_gfn = memslot->base_gfn + memslot->npages;
442 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443 		address = gfn_to_hva_memslot(memslot, cur_gfn);
444 
445 		if (test_and_clear_guest_dirty(gmap->mm, address))
446 			mark_page_dirty(kvm, cur_gfn);
447 		if (fatal_signal_pending(current))
448 			return;
449 		cond_resched();
450 	}
451 }
452 
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455 
456 /*
457  * Get (and clear) the dirty memory log for a memory slot.
458  */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460 			       struct kvm_dirty_log *log)
461 {
462 	int r;
463 	unsigned long n;
464 	struct kvm_memslots *slots;
465 	struct kvm_memory_slot *memslot;
466 	int is_dirty = 0;
467 
468 	if (kvm_is_ucontrol(kvm))
469 		return -EINVAL;
470 
471 	mutex_lock(&kvm->slots_lock);
472 
473 	r = -EINVAL;
474 	if (log->slot >= KVM_USER_MEM_SLOTS)
475 		goto out;
476 
477 	slots = kvm_memslots(kvm);
478 	memslot = id_to_memslot(slots, log->slot);
479 	r = -ENOENT;
480 	if (!memslot->dirty_bitmap)
481 		goto out;
482 
483 	kvm_s390_sync_dirty_log(kvm, memslot);
484 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
485 	if (r)
486 		goto out;
487 
488 	/* Clear the dirty log */
489 	if (is_dirty) {
490 		n = kvm_dirty_bitmap_bytes(memslot);
491 		memset(memslot->dirty_bitmap, 0, n);
492 	}
493 	r = 0;
494 out:
495 	mutex_unlock(&kvm->slots_lock);
496 	return r;
497 }
498 
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501 	unsigned int i;
502 	struct kvm_vcpu *vcpu;
503 
504 	kvm_for_each_vcpu(i, vcpu, kvm) {
505 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506 	}
507 }
508 
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511 	int r;
512 
513 	if (cap->flags)
514 		return -EINVAL;
515 
516 	switch (cap->cap) {
517 	case KVM_CAP_S390_IRQCHIP:
518 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519 		kvm->arch.use_irqchip = 1;
520 		r = 0;
521 		break;
522 	case KVM_CAP_S390_USER_SIGP:
523 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524 		kvm->arch.user_sigp = 1;
525 		r = 0;
526 		break;
527 	case KVM_CAP_S390_VECTOR_REGISTERS:
528 		mutex_lock(&kvm->lock);
529 		if (kvm->created_vcpus) {
530 			r = -EBUSY;
531 		} else if (MACHINE_HAS_VX) {
532 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
533 			set_kvm_facility(kvm->arch.model.fac_list, 129);
534 			if (test_facility(134)) {
535 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
536 				set_kvm_facility(kvm->arch.model.fac_list, 134);
537 			}
538 			if (test_facility(135)) {
539 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
540 				set_kvm_facility(kvm->arch.model.fac_list, 135);
541 			}
542 			r = 0;
543 		} else
544 			r = -EINVAL;
545 		mutex_unlock(&kvm->lock);
546 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547 			 r ? "(not available)" : "(success)");
548 		break;
549 	case KVM_CAP_S390_RI:
550 		r = -EINVAL;
551 		mutex_lock(&kvm->lock);
552 		if (kvm->created_vcpus) {
553 			r = -EBUSY;
554 		} else if (test_facility(64)) {
555 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
556 			set_kvm_facility(kvm->arch.model.fac_list, 64);
557 			r = 0;
558 		}
559 		mutex_unlock(&kvm->lock);
560 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561 			 r ? "(not available)" : "(success)");
562 		break;
563 	case KVM_CAP_S390_AIS:
564 		mutex_lock(&kvm->lock);
565 		if (kvm->created_vcpus) {
566 			r = -EBUSY;
567 		} else {
568 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
569 			set_kvm_facility(kvm->arch.model.fac_list, 72);
570 			r = 0;
571 		}
572 		mutex_unlock(&kvm->lock);
573 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574 			 r ? "(not available)" : "(success)");
575 		break;
576 	case KVM_CAP_S390_GS:
577 		r = -EINVAL;
578 		mutex_lock(&kvm->lock);
579 		if (atomic_read(&kvm->online_vcpus)) {
580 			r = -EBUSY;
581 		} else if (test_facility(133)) {
582 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
583 			set_kvm_facility(kvm->arch.model.fac_list, 133);
584 			r = 0;
585 		}
586 		mutex_unlock(&kvm->lock);
587 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588 			 r ? "(not available)" : "(success)");
589 		break;
590 	case KVM_CAP_S390_USER_STSI:
591 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592 		kvm->arch.user_stsi = 1;
593 		r = 0;
594 		break;
595 	case KVM_CAP_S390_USER_INSTR0:
596 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597 		kvm->arch.user_instr0 = 1;
598 		icpt_operexc_on_all_vcpus(kvm);
599 		r = 0;
600 		break;
601 	default:
602 		r = -EINVAL;
603 		break;
604 	}
605 	return r;
606 }
607 
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610 	int ret;
611 
612 	switch (attr->attr) {
613 	case KVM_S390_VM_MEM_LIMIT_SIZE:
614 		ret = 0;
615 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616 			 kvm->arch.mem_limit);
617 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618 			ret = -EFAULT;
619 		break;
620 	default:
621 		ret = -ENXIO;
622 		break;
623 	}
624 	return ret;
625 }
626 
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629 	int ret;
630 	unsigned int idx;
631 	switch (attr->attr) {
632 	case KVM_S390_VM_MEM_ENABLE_CMMA:
633 		ret = -ENXIO;
634 		if (!sclp.has_cmma)
635 			break;
636 
637 		ret = -EBUSY;
638 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639 		mutex_lock(&kvm->lock);
640 		if (!kvm->created_vcpus) {
641 			kvm->arch.use_cmma = 1;
642 			ret = 0;
643 		}
644 		mutex_unlock(&kvm->lock);
645 		break;
646 	case KVM_S390_VM_MEM_CLR_CMMA:
647 		ret = -ENXIO;
648 		if (!sclp.has_cmma)
649 			break;
650 		ret = -EINVAL;
651 		if (!kvm->arch.use_cmma)
652 			break;
653 
654 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655 		mutex_lock(&kvm->lock);
656 		idx = srcu_read_lock(&kvm->srcu);
657 		s390_reset_cmma(kvm->arch.gmap->mm);
658 		srcu_read_unlock(&kvm->srcu, idx);
659 		mutex_unlock(&kvm->lock);
660 		ret = 0;
661 		break;
662 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
663 		unsigned long new_limit;
664 
665 		if (kvm_is_ucontrol(kvm))
666 			return -EINVAL;
667 
668 		if (get_user(new_limit, (u64 __user *)attr->addr))
669 			return -EFAULT;
670 
671 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672 		    new_limit > kvm->arch.mem_limit)
673 			return -E2BIG;
674 
675 		if (!new_limit)
676 			return -EINVAL;
677 
678 		/* gmap_create takes last usable address */
679 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
680 			new_limit -= 1;
681 
682 		ret = -EBUSY;
683 		mutex_lock(&kvm->lock);
684 		if (!kvm->created_vcpus) {
685 			/* gmap_create will round the limit up */
686 			struct gmap *new = gmap_create(current->mm, new_limit);
687 
688 			if (!new) {
689 				ret = -ENOMEM;
690 			} else {
691 				gmap_remove(kvm->arch.gmap);
692 				new->private = kvm;
693 				kvm->arch.gmap = new;
694 				ret = 0;
695 			}
696 		}
697 		mutex_unlock(&kvm->lock);
698 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700 			 (void *) kvm->arch.gmap->asce);
701 		break;
702 	}
703 	default:
704 		ret = -ENXIO;
705 		break;
706 	}
707 	return ret;
708 }
709 
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711 
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714 	struct kvm_vcpu *vcpu;
715 	int i;
716 
717 	if (!test_kvm_facility(kvm, 76))
718 		return -EINVAL;
719 
720 	mutex_lock(&kvm->lock);
721 	switch (attr->attr) {
722 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723 		get_random_bytes(
724 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726 		kvm->arch.crypto.aes_kw = 1;
727 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728 		break;
729 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730 		get_random_bytes(
731 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733 		kvm->arch.crypto.dea_kw = 1;
734 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735 		break;
736 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737 		kvm->arch.crypto.aes_kw = 0;
738 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741 		break;
742 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743 		kvm->arch.crypto.dea_kw = 0;
744 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747 		break;
748 	default:
749 		mutex_unlock(&kvm->lock);
750 		return -ENXIO;
751 	}
752 
753 	kvm_for_each_vcpu(i, vcpu, kvm) {
754 		kvm_s390_vcpu_crypto_setup(vcpu);
755 		exit_sie(vcpu);
756 	}
757 	mutex_unlock(&kvm->lock);
758 	return 0;
759 }
760 
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763 	int cx;
764 	struct kvm_vcpu *vcpu;
765 
766 	kvm_for_each_vcpu(cx, vcpu, kvm)
767 		kvm_s390_sync_request(req, vcpu);
768 }
769 
770 /*
771  * Must be called with kvm->srcu held to avoid races on memslots, and with
772  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773  */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776 	struct kvm_s390_migration_state *mgs;
777 	struct kvm_memory_slot *ms;
778 	/* should be the only one */
779 	struct kvm_memslots *slots;
780 	unsigned long ram_pages;
781 	int slotnr;
782 
783 	/* migration mode already enabled */
784 	if (kvm->arch.migration_state)
785 		return 0;
786 
787 	slots = kvm_memslots(kvm);
788 	if (!slots || !slots->used_slots)
789 		return -EINVAL;
790 
791 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792 	if (!mgs)
793 		return -ENOMEM;
794 	kvm->arch.migration_state = mgs;
795 
796 	if (kvm->arch.use_cmma) {
797 		/*
798 		 * Get the last slot. They should be sorted by base_gfn, so the
799 		 * last slot is also the one at the end of the address space.
800 		 * We have verified above that at least one slot is present.
801 		 */
802 		ms = slots->memslots + slots->used_slots - 1;
803 		/* round up so we only use full longs */
804 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
805 		/* allocate enough bytes to store all the bits */
806 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
807 		if (!mgs->pgste_bitmap) {
808 			kfree(mgs);
809 			kvm->arch.migration_state = NULL;
810 			return -ENOMEM;
811 		}
812 
813 		mgs->bitmap_size = ram_pages;
814 		atomic64_set(&mgs->dirty_pages, ram_pages);
815 		/* mark all the pages in active slots as dirty */
816 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
817 			ms = slots->memslots + slotnr;
818 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
819 		}
820 
821 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
822 	}
823 	return 0;
824 }
825 
826 /*
827  * Must be called with kvm->lock to avoid races with ourselves and
828  * kvm_s390_vm_start_migration.
829  */
830 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
831 {
832 	struct kvm_s390_migration_state *mgs;
833 
834 	/* migration mode already disabled */
835 	if (!kvm->arch.migration_state)
836 		return 0;
837 	mgs = kvm->arch.migration_state;
838 	kvm->arch.migration_state = NULL;
839 
840 	if (kvm->arch.use_cmma) {
841 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
842 		vfree(mgs->pgste_bitmap);
843 	}
844 	kfree(mgs);
845 	return 0;
846 }
847 
848 static int kvm_s390_vm_set_migration(struct kvm *kvm,
849 				     struct kvm_device_attr *attr)
850 {
851 	int idx, res = -ENXIO;
852 
853 	mutex_lock(&kvm->lock);
854 	switch (attr->attr) {
855 	case KVM_S390_VM_MIGRATION_START:
856 		idx = srcu_read_lock(&kvm->srcu);
857 		res = kvm_s390_vm_start_migration(kvm);
858 		srcu_read_unlock(&kvm->srcu, idx);
859 		break;
860 	case KVM_S390_VM_MIGRATION_STOP:
861 		res = kvm_s390_vm_stop_migration(kvm);
862 		break;
863 	default:
864 		break;
865 	}
866 	mutex_unlock(&kvm->lock);
867 
868 	return res;
869 }
870 
871 static int kvm_s390_vm_get_migration(struct kvm *kvm,
872 				     struct kvm_device_attr *attr)
873 {
874 	u64 mig = (kvm->arch.migration_state != NULL);
875 
876 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
877 		return -ENXIO;
878 
879 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
880 		return -EFAULT;
881 	return 0;
882 }
883 
884 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
885 {
886 	struct kvm_s390_vm_tod_clock gtod;
887 
888 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
889 		return -EFAULT;
890 
891 	if (test_kvm_facility(kvm, 139))
892 		kvm_s390_set_tod_clock_ext(kvm, &gtod);
893 	else if (gtod.epoch_idx == 0)
894 		kvm_s390_set_tod_clock(kvm, gtod.tod);
895 	else
896 		return -EINVAL;
897 
898 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
899 		gtod.epoch_idx, gtod.tod);
900 
901 	return 0;
902 }
903 
904 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906 	u8 gtod_high;
907 
908 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
909 					   sizeof(gtod_high)))
910 		return -EFAULT;
911 
912 	if (gtod_high != 0)
913 		return -EINVAL;
914 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
915 
916 	return 0;
917 }
918 
919 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
920 {
921 	u64 gtod;
922 
923 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
924 		return -EFAULT;
925 
926 	kvm_s390_set_tod_clock(kvm, gtod);
927 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
928 	return 0;
929 }
930 
931 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
932 {
933 	int ret;
934 
935 	if (attr->flags)
936 		return -EINVAL;
937 
938 	switch (attr->attr) {
939 	case KVM_S390_VM_TOD_EXT:
940 		ret = kvm_s390_set_tod_ext(kvm, attr);
941 		break;
942 	case KVM_S390_VM_TOD_HIGH:
943 		ret = kvm_s390_set_tod_high(kvm, attr);
944 		break;
945 	case KVM_S390_VM_TOD_LOW:
946 		ret = kvm_s390_set_tod_low(kvm, attr);
947 		break;
948 	default:
949 		ret = -ENXIO;
950 		break;
951 	}
952 	return ret;
953 }
954 
955 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
956 					struct kvm_s390_vm_tod_clock *gtod)
957 {
958 	struct kvm_s390_tod_clock_ext htod;
959 
960 	preempt_disable();
961 
962 	get_tod_clock_ext((char *)&htod);
963 
964 	gtod->tod = htod.tod + kvm->arch.epoch;
965 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
966 
967 	if (gtod->tod < htod.tod)
968 		gtod->epoch_idx += 1;
969 
970 	preempt_enable();
971 }
972 
973 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
974 {
975 	struct kvm_s390_vm_tod_clock gtod;
976 
977 	memset(&gtod, 0, sizeof(gtod));
978 
979 	if (test_kvm_facility(kvm, 139))
980 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
981 	else
982 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
983 
984 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
985 		return -EFAULT;
986 
987 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
988 		gtod.epoch_idx, gtod.tod);
989 	return 0;
990 }
991 
992 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
993 {
994 	u8 gtod_high = 0;
995 
996 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
997 					 sizeof(gtod_high)))
998 		return -EFAULT;
999 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1000 
1001 	return 0;
1002 }
1003 
1004 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1005 {
1006 	u64 gtod;
1007 
1008 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1009 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1010 		return -EFAULT;
1011 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1012 
1013 	return 0;
1014 }
1015 
1016 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1017 {
1018 	int ret;
1019 
1020 	if (attr->flags)
1021 		return -EINVAL;
1022 
1023 	switch (attr->attr) {
1024 	case KVM_S390_VM_TOD_EXT:
1025 		ret = kvm_s390_get_tod_ext(kvm, attr);
1026 		break;
1027 	case KVM_S390_VM_TOD_HIGH:
1028 		ret = kvm_s390_get_tod_high(kvm, attr);
1029 		break;
1030 	case KVM_S390_VM_TOD_LOW:
1031 		ret = kvm_s390_get_tod_low(kvm, attr);
1032 		break;
1033 	default:
1034 		ret = -ENXIO;
1035 		break;
1036 	}
1037 	return ret;
1038 }
1039 
1040 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1041 {
1042 	struct kvm_s390_vm_cpu_processor *proc;
1043 	u16 lowest_ibc, unblocked_ibc;
1044 	int ret = 0;
1045 
1046 	mutex_lock(&kvm->lock);
1047 	if (kvm->created_vcpus) {
1048 		ret = -EBUSY;
1049 		goto out;
1050 	}
1051 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1052 	if (!proc) {
1053 		ret = -ENOMEM;
1054 		goto out;
1055 	}
1056 	if (!copy_from_user(proc, (void __user *)attr->addr,
1057 			    sizeof(*proc))) {
1058 		kvm->arch.model.cpuid = proc->cpuid;
1059 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1060 		unblocked_ibc = sclp.ibc & 0xfff;
1061 		if (lowest_ibc && proc->ibc) {
1062 			if (proc->ibc > unblocked_ibc)
1063 				kvm->arch.model.ibc = unblocked_ibc;
1064 			else if (proc->ibc < lowest_ibc)
1065 				kvm->arch.model.ibc = lowest_ibc;
1066 			else
1067 				kvm->arch.model.ibc = proc->ibc;
1068 		}
1069 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1070 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1071 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1072 			 kvm->arch.model.ibc,
1073 			 kvm->arch.model.cpuid);
1074 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1075 			 kvm->arch.model.fac_list[0],
1076 			 kvm->arch.model.fac_list[1],
1077 			 kvm->arch.model.fac_list[2]);
1078 	} else
1079 		ret = -EFAULT;
1080 	kfree(proc);
1081 out:
1082 	mutex_unlock(&kvm->lock);
1083 	return ret;
1084 }
1085 
1086 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1087 				       struct kvm_device_attr *attr)
1088 {
1089 	struct kvm_s390_vm_cpu_feat data;
1090 	int ret = -EBUSY;
1091 
1092 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1093 		return -EFAULT;
1094 	if (!bitmap_subset((unsigned long *) data.feat,
1095 			   kvm_s390_available_cpu_feat,
1096 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1097 		return -EINVAL;
1098 
1099 	mutex_lock(&kvm->lock);
1100 	if (!atomic_read(&kvm->online_vcpus)) {
1101 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1102 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1103 		ret = 0;
1104 	}
1105 	mutex_unlock(&kvm->lock);
1106 	return ret;
1107 }
1108 
1109 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1110 					  struct kvm_device_attr *attr)
1111 {
1112 	/*
1113 	 * Once supported by kernel + hw, we have to store the subfunctions
1114 	 * in kvm->arch and remember that user space configured them.
1115 	 */
1116 	return -ENXIO;
1117 }
1118 
1119 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1120 {
1121 	int ret = -ENXIO;
1122 
1123 	switch (attr->attr) {
1124 	case KVM_S390_VM_CPU_PROCESSOR:
1125 		ret = kvm_s390_set_processor(kvm, attr);
1126 		break;
1127 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1128 		ret = kvm_s390_set_processor_feat(kvm, attr);
1129 		break;
1130 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1131 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1132 		break;
1133 	}
1134 	return ret;
1135 }
1136 
1137 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139 	struct kvm_s390_vm_cpu_processor *proc;
1140 	int ret = 0;
1141 
1142 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1143 	if (!proc) {
1144 		ret = -ENOMEM;
1145 		goto out;
1146 	}
1147 	proc->cpuid = kvm->arch.model.cpuid;
1148 	proc->ibc = kvm->arch.model.ibc;
1149 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1150 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1151 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1152 		 kvm->arch.model.ibc,
1153 		 kvm->arch.model.cpuid);
1154 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1155 		 kvm->arch.model.fac_list[0],
1156 		 kvm->arch.model.fac_list[1],
1157 		 kvm->arch.model.fac_list[2]);
1158 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1159 		ret = -EFAULT;
1160 	kfree(proc);
1161 out:
1162 	return ret;
1163 }
1164 
1165 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1166 {
1167 	struct kvm_s390_vm_cpu_machine *mach;
1168 	int ret = 0;
1169 
1170 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1171 	if (!mach) {
1172 		ret = -ENOMEM;
1173 		goto out;
1174 	}
1175 	get_cpu_id((struct cpuid *) &mach->cpuid);
1176 	mach->ibc = sclp.ibc;
1177 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1178 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1179 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1180 	       sizeof(S390_lowcore.stfle_fac_list));
1181 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1182 		 kvm->arch.model.ibc,
1183 		 kvm->arch.model.cpuid);
1184 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1185 		 mach->fac_mask[0],
1186 		 mach->fac_mask[1],
1187 		 mach->fac_mask[2]);
1188 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1189 		 mach->fac_list[0],
1190 		 mach->fac_list[1],
1191 		 mach->fac_list[2]);
1192 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1193 		ret = -EFAULT;
1194 	kfree(mach);
1195 out:
1196 	return ret;
1197 }
1198 
1199 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1200 				       struct kvm_device_attr *attr)
1201 {
1202 	struct kvm_s390_vm_cpu_feat data;
1203 
1204 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1205 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1206 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1207 		return -EFAULT;
1208 	return 0;
1209 }
1210 
1211 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1212 				     struct kvm_device_attr *attr)
1213 {
1214 	struct kvm_s390_vm_cpu_feat data;
1215 
1216 	bitmap_copy((unsigned long *) data.feat,
1217 		    kvm_s390_available_cpu_feat,
1218 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1219 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1220 		return -EFAULT;
1221 	return 0;
1222 }
1223 
1224 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1225 					  struct kvm_device_attr *attr)
1226 {
1227 	/*
1228 	 * Once we can actually configure subfunctions (kernel + hw support),
1229 	 * we have to check if they were already set by user space, if so copy
1230 	 * them from kvm->arch.
1231 	 */
1232 	return -ENXIO;
1233 }
1234 
1235 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1236 					struct kvm_device_attr *attr)
1237 {
1238 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1239 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1240 		return -EFAULT;
1241 	return 0;
1242 }
1243 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1244 {
1245 	int ret = -ENXIO;
1246 
1247 	switch (attr->attr) {
1248 	case KVM_S390_VM_CPU_PROCESSOR:
1249 		ret = kvm_s390_get_processor(kvm, attr);
1250 		break;
1251 	case KVM_S390_VM_CPU_MACHINE:
1252 		ret = kvm_s390_get_machine(kvm, attr);
1253 		break;
1254 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1255 		ret = kvm_s390_get_processor_feat(kvm, attr);
1256 		break;
1257 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1258 		ret = kvm_s390_get_machine_feat(kvm, attr);
1259 		break;
1260 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1261 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1262 		break;
1263 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1264 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1265 		break;
1266 	}
1267 	return ret;
1268 }
1269 
1270 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1271 {
1272 	int ret;
1273 
1274 	switch (attr->group) {
1275 	case KVM_S390_VM_MEM_CTRL:
1276 		ret = kvm_s390_set_mem_control(kvm, attr);
1277 		break;
1278 	case KVM_S390_VM_TOD:
1279 		ret = kvm_s390_set_tod(kvm, attr);
1280 		break;
1281 	case KVM_S390_VM_CPU_MODEL:
1282 		ret = kvm_s390_set_cpu_model(kvm, attr);
1283 		break;
1284 	case KVM_S390_VM_CRYPTO:
1285 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1286 		break;
1287 	case KVM_S390_VM_MIGRATION:
1288 		ret = kvm_s390_vm_set_migration(kvm, attr);
1289 		break;
1290 	default:
1291 		ret = -ENXIO;
1292 		break;
1293 	}
1294 
1295 	return ret;
1296 }
1297 
1298 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1299 {
1300 	int ret;
1301 
1302 	switch (attr->group) {
1303 	case KVM_S390_VM_MEM_CTRL:
1304 		ret = kvm_s390_get_mem_control(kvm, attr);
1305 		break;
1306 	case KVM_S390_VM_TOD:
1307 		ret = kvm_s390_get_tod(kvm, attr);
1308 		break;
1309 	case KVM_S390_VM_CPU_MODEL:
1310 		ret = kvm_s390_get_cpu_model(kvm, attr);
1311 		break;
1312 	case KVM_S390_VM_MIGRATION:
1313 		ret = kvm_s390_vm_get_migration(kvm, attr);
1314 		break;
1315 	default:
1316 		ret = -ENXIO;
1317 		break;
1318 	}
1319 
1320 	return ret;
1321 }
1322 
1323 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1324 {
1325 	int ret;
1326 
1327 	switch (attr->group) {
1328 	case KVM_S390_VM_MEM_CTRL:
1329 		switch (attr->attr) {
1330 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1331 		case KVM_S390_VM_MEM_CLR_CMMA:
1332 			ret = sclp.has_cmma ? 0 : -ENXIO;
1333 			break;
1334 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1335 			ret = 0;
1336 			break;
1337 		default:
1338 			ret = -ENXIO;
1339 			break;
1340 		}
1341 		break;
1342 	case KVM_S390_VM_TOD:
1343 		switch (attr->attr) {
1344 		case KVM_S390_VM_TOD_LOW:
1345 		case KVM_S390_VM_TOD_HIGH:
1346 			ret = 0;
1347 			break;
1348 		default:
1349 			ret = -ENXIO;
1350 			break;
1351 		}
1352 		break;
1353 	case KVM_S390_VM_CPU_MODEL:
1354 		switch (attr->attr) {
1355 		case KVM_S390_VM_CPU_PROCESSOR:
1356 		case KVM_S390_VM_CPU_MACHINE:
1357 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1358 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1359 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1360 			ret = 0;
1361 			break;
1362 		/* configuring subfunctions is not supported yet */
1363 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1364 		default:
1365 			ret = -ENXIO;
1366 			break;
1367 		}
1368 		break;
1369 	case KVM_S390_VM_CRYPTO:
1370 		switch (attr->attr) {
1371 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1372 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1373 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1374 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1375 			ret = 0;
1376 			break;
1377 		default:
1378 			ret = -ENXIO;
1379 			break;
1380 		}
1381 		break;
1382 	case KVM_S390_VM_MIGRATION:
1383 		ret = 0;
1384 		break;
1385 	default:
1386 		ret = -ENXIO;
1387 		break;
1388 	}
1389 
1390 	return ret;
1391 }
1392 
1393 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1394 {
1395 	uint8_t *keys;
1396 	uint64_t hva;
1397 	int srcu_idx, i, r = 0;
1398 
1399 	if (args->flags != 0)
1400 		return -EINVAL;
1401 
1402 	/* Is this guest using storage keys? */
1403 	if (!mm_use_skey(current->mm))
1404 		return KVM_S390_GET_SKEYS_NONE;
1405 
1406 	/* Enforce sane limit on memory allocation */
1407 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1408 		return -EINVAL;
1409 
1410 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1411 	if (!keys)
1412 		return -ENOMEM;
1413 
1414 	down_read(&current->mm->mmap_sem);
1415 	srcu_idx = srcu_read_lock(&kvm->srcu);
1416 	for (i = 0; i < args->count; i++) {
1417 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1418 		if (kvm_is_error_hva(hva)) {
1419 			r = -EFAULT;
1420 			break;
1421 		}
1422 
1423 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1424 		if (r)
1425 			break;
1426 	}
1427 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1428 	up_read(&current->mm->mmap_sem);
1429 
1430 	if (!r) {
1431 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1432 				 sizeof(uint8_t) * args->count);
1433 		if (r)
1434 			r = -EFAULT;
1435 	}
1436 
1437 	kvfree(keys);
1438 	return r;
1439 }
1440 
1441 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1442 {
1443 	uint8_t *keys;
1444 	uint64_t hva;
1445 	int srcu_idx, i, r = 0;
1446 
1447 	if (args->flags != 0)
1448 		return -EINVAL;
1449 
1450 	/* Enforce sane limit on memory allocation */
1451 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1452 		return -EINVAL;
1453 
1454 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1455 	if (!keys)
1456 		return -ENOMEM;
1457 
1458 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1459 			   sizeof(uint8_t) * args->count);
1460 	if (r) {
1461 		r = -EFAULT;
1462 		goto out;
1463 	}
1464 
1465 	/* Enable storage key handling for the guest */
1466 	r = s390_enable_skey();
1467 	if (r)
1468 		goto out;
1469 
1470 	down_read(&current->mm->mmap_sem);
1471 	srcu_idx = srcu_read_lock(&kvm->srcu);
1472 	for (i = 0; i < args->count; i++) {
1473 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1474 		if (kvm_is_error_hva(hva)) {
1475 			r = -EFAULT;
1476 			break;
1477 		}
1478 
1479 		/* Lowest order bit is reserved */
1480 		if (keys[i] & 0x01) {
1481 			r = -EINVAL;
1482 			break;
1483 		}
1484 
1485 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1486 		if (r)
1487 			break;
1488 	}
1489 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1490 	up_read(&current->mm->mmap_sem);
1491 out:
1492 	kvfree(keys);
1493 	return r;
1494 }
1495 
1496 /*
1497  * Base address and length must be sent at the start of each block, therefore
1498  * it's cheaper to send some clean data, as long as it's less than the size of
1499  * two longs.
1500  */
1501 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1502 /* for consistency */
1503 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1504 
1505 /*
1506  * This function searches for the next page with dirty CMMA attributes, and
1507  * saves the attributes in the buffer up to either the end of the buffer or
1508  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1509  * no trailing clean bytes are saved.
1510  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1511  * output buffer will indicate 0 as length.
1512  */
1513 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1514 				  struct kvm_s390_cmma_log *args)
1515 {
1516 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1517 	unsigned long bufsize, hva, pgstev, i, next, cur;
1518 	int srcu_idx, peek, r = 0, rr;
1519 	u8 *res;
1520 
1521 	cur = args->start_gfn;
1522 	i = next = pgstev = 0;
1523 
1524 	if (unlikely(!kvm->arch.use_cmma))
1525 		return -ENXIO;
1526 	/* Invalid/unsupported flags were specified */
1527 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1528 		return -EINVAL;
1529 	/* Migration mode query, and we are not doing a migration */
1530 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1531 	if (!peek && !s)
1532 		return -EINVAL;
1533 	/* CMMA is disabled or was not used, or the buffer has length zero */
1534 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1535 	if (!bufsize || !kvm->mm->context.use_cmma) {
1536 		memset(args, 0, sizeof(*args));
1537 		return 0;
1538 	}
1539 
1540 	if (!peek) {
1541 		/* We are not peeking, and there are no dirty pages */
1542 		if (!atomic64_read(&s->dirty_pages)) {
1543 			memset(args, 0, sizeof(*args));
1544 			return 0;
1545 		}
1546 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1547 				    args->start_gfn);
1548 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1549 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1550 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1551 			memset(args, 0, sizeof(*args));
1552 			return 0;
1553 		}
1554 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1555 	}
1556 
1557 	res = vmalloc(bufsize);
1558 	if (!res)
1559 		return -ENOMEM;
1560 
1561 	args->start_gfn = cur;
1562 
1563 	down_read(&kvm->mm->mmap_sem);
1564 	srcu_idx = srcu_read_lock(&kvm->srcu);
1565 	while (i < bufsize) {
1566 		hva = gfn_to_hva(kvm, cur);
1567 		if (kvm_is_error_hva(hva)) {
1568 			r = -EFAULT;
1569 			break;
1570 		}
1571 		/* decrement only if we actually flipped the bit to 0 */
1572 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1573 			atomic64_dec(&s->dirty_pages);
1574 		r = get_pgste(kvm->mm, hva, &pgstev);
1575 		if (r < 0)
1576 			pgstev = 0;
1577 		/* save the value */
1578 		res[i++] = (pgstev >> 24) & 0x43;
1579 		/*
1580 		 * if the next bit is too far away, stop.
1581 		 * if we reached the previous "next", find the next one
1582 		 */
1583 		if (!peek) {
1584 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1585 				break;
1586 			if (cur == next)
1587 				next = find_next_bit(s->pgste_bitmap,
1588 						     s->bitmap_size, cur + 1);
1589 		/* reached the end of the bitmap or of the buffer, stop */
1590 			if ((next >= s->bitmap_size) ||
1591 			    (next >= args->start_gfn + bufsize))
1592 				break;
1593 		}
1594 		cur++;
1595 	}
1596 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1597 	up_read(&kvm->mm->mmap_sem);
1598 	args->count = i;
1599 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1600 
1601 	rr = copy_to_user((void __user *)args->values, res, args->count);
1602 	if (rr)
1603 		r = -EFAULT;
1604 
1605 	vfree(res);
1606 	return r;
1607 }
1608 
1609 /*
1610  * This function sets the CMMA attributes for the given pages. If the input
1611  * buffer has zero length, no action is taken, otherwise the attributes are
1612  * set and the mm->context.use_cmma flag is set.
1613  */
1614 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1615 				  const struct kvm_s390_cmma_log *args)
1616 {
1617 	unsigned long hva, mask, pgstev, i;
1618 	uint8_t *bits;
1619 	int srcu_idx, r = 0;
1620 
1621 	mask = args->mask;
1622 
1623 	if (!kvm->arch.use_cmma)
1624 		return -ENXIO;
1625 	/* invalid/unsupported flags */
1626 	if (args->flags != 0)
1627 		return -EINVAL;
1628 	/* Enforce sane limit on memory allocation */
1629 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1630 		return -EINVAL;
1631 	/* Nothing to do */
1632 	if (args->count == 0)
1633 		return 0;
1634 
1635 	bits = vmalloc(sizeof(*bits) * args->count);
1636 	if (!bits)
1637 		return -ENOMEM;
1638 
1639 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1640 	if (r) {
1641 		r = -EFAULT;
1642 		goto out;
1643 	}
1644 
1645 	down_read(&kvm->mm->mmap_sem);
1646 	srcu_idx = srcu_read_lock(&kvm->srcu);
1647 	for (i = 0; i < args->count; i++) {
1648 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1649 		if (kvm_is_error_hva(hva)) {
1650 			r = -EFAULT;
1651 			break;
1652 		}
1653 
1654 		pgstev = bits[i];
1655 		pgstev = pgstev << 24;
1656 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1657 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1658 	}
1659 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1660 	up_read(&kvm->mm->mmap_sem);
1661 
1662 	if (!kvm->mm->context.use_cmma) {
1663 		down_write(&kvm->mm->mmap_sem);
1664 		kvm->mm->context.use_cmma = 1;
1665 		up_write(&kvm->mm->mmap_sem);
1666 	}
1667 out:
1668 	vfree(bits);
1669 	return r;
1670 }
1671 
1672 long kvm_arch_vm_ioctl(struct file *filp,
1673 		       unsigned int ioctl, unsigned long arg)
1674 {
1675 	struct kvm *kvm = filp->private_data;
1676 	void __user *argp = (void __user *)arg;
1677 	struct kvm_device_attr attr;
1678 	int r;
1679 
1680 	switch (ioctl) {
1681 	case KVM_S390_INTERRUPT: {
1682 		struct kvm_s390_interrupt s390int;
1683 
1684 		r = -EFAULT;
1685 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1686 			break;
1687 		r = kvm_s390_inject_vm(kvm, &s390int);
1688 		break;
1689 	}
1690 	case KVM_ENABLE_CAP: {
1691 		struct kvm_enable_cap cap;
1692 		r = -EFAULT;
1693 		if (copy_from_user(&cap, argp, sizeof(cap)))
1694 			break;
1695 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1696 		break;
1697 	}
1698 	case KVM_CREATE_IRQCHIP: {
1699 		struct kvm_irq_routing_entry routing;
1700 
1701 		r = -EINVAL;
1702 		if (kvm->arch.use_irqchip) {
1703 			/* Set up dummy routing. */
1704 			memset(&routing, 0, sizeof(routing));
1705 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1706 		}
1707 		break;
1708 	}
1709 	case KVM_SET_DEVICE_ATTR: {
1710 		r = -EFAULT;
1711 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1712 			break;
1713 		r = kvm_s390_vm_set_attr(kvm, &attr);
1714 		break;
1715 	}
1716 	case KVM_GET_DEVICE_ATTR: {
1717 		r = -EFAULT;
1718 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1719 			break;
1720 		r = kvm_s390_vm_get_attr(kvm, &attr);
1721 		break;
1722 	}
1723 	case KVM_HAS_DEVICE_ATTR: {
1724 		r = -EFAULT;
1725 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1726 			break;
1727 		r = kvm_s390_vm_has_attr(kvm, &attr);
1728 		break;
1729 	}
1730 	case KVM_S390_GET_SKEYS: {
1731 		struct kvm_s390_skeys args;
1732 
1733 		r = -EFAULT;
1734 		if (copy_from_user(&args, argp,
1735 				   sizeof(struct kvm_s390_skeys)))
1736 			break;
1737 		r = kvm_s390_get_skeys(kvm, &args);
1738 		break;
1739 	}
1740 	case KVM_S390_SET_SKEYS: {
1741 		struct kvm_s390_skeys args;
1742 
1743 		r = -EFAULT;
1744 		if (copy_from_user(&args, argp,
1745 				   sizeof(struct kvm_s390_skeys)))
1746 			break;
1747 		r = kvm_s390_set_skeys(kvm, &args);
1748 		break;
1749 	}
1750 	case KVM_S390_GET_CMMA_BITS: {
1751 		struct kvm_s390_cmma_log args;
1752 
1753 		r = -EFAULT;
1754 		if (copy_from_user(&args, argp, sizeof(args)))
1755 			break;
1756 		r = kvm_s390_get_cmma_bits(kvm, &args);
1757 		if (!r) {
1758 			r = copy_to_user(argp, &args, sizeof(args));
1759 			if (r)
1760 				r = -EFAULT;
1761 		}
1762 		break;
1763 	}
1764 	case KVM_S390_SET_CMMA_BITS: {
1765 		struct kvm_s390_cmma_log args;
1766 
1767 		r = -EFAULT;
1768 		if (copy_from_user(&args, argp, sizeof(args)))
1769 			break;
1770 		r = kvm_s390_set_cmma_bits(kvm, &args);
1771 		break;
1772 	}
1773 	default:
1774 		r = -ENOTTY;
1775 	}
1776 
1777 	return r;
1778 }
1779 
1780 static int kvm_s390_query_ap_config(u8 *config)
1781 {
1782 	u32 fcn_code = 0x04000000UL;
1783 	u32 cc = 0;
1784 
1785 	memset(config, 0, 128);
1786 	asm volatile(
1787 		"lgr 0,%1\n"
1788 		"lgr 2,%2\n"
1789 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1790 		"0: ipm %0\n"
1791 		"srl %0,28\n"
1792 		"1:\n"
1793 		EX_TABLE(0b, 1b)
1794 		: "+r" (cc)
1795 		: "r" (fcn_code), "r" (config)
1796 		: "cc", "0", "2", "memory"
1797 	);
1798 
1799 	return cc;
1800 }
1801 
1802 static int kvm_s390_apxa_installed(void)
1803 {
1804 	u8 config[128];
1805 	int cc;
1806 
1807 	if (test_facility(12)) {
1808 		cc = kvm_s390_query_ap_config(config);
1809 
1810 		if (cc)
1811 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1812 		else
1813 			return config[0] & 0x40;
1814 	}
1815 
1816 	return 0;
1817 }
1818 
1819 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1820 {
1821 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1822 
1823 	if (kvm_s390_apxa_installed())
1824 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1825 	else
1826 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1827 }
1828 
1829 static u64 kvm_s390_get_initial_cpuid(void)
1830 {
1831 	struct cpuid cpuid;
1832 
1833 	get_cpu_id(&cpuid);
1834 	cpuid.version = 0xff;
1835 	return *((u64 *) &cpuid);
1836 }
1837 
1838 static void kvm_s390_crypto_init(struct kvm *kvm)
1839 {
1840 	if (!test_kvm_facility(kvm, 76))
1841 		return;
1842 
1843 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1844 	kvm_s390_set_crycb_format(kvm);
1845 
1846 	/* Enable AES/DEA protected key functions by default */
1847 	kvm->arch.crypto.aes_kw = 1;
1848 	kvm->arch.crypto.dea_kw = 1;
1849 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1850 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1851 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1852 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1853 }
1854 
1855 static void sca_dispose(struct kvm *kvm)
1856 {
1857 	if (kvm->arch.use_esca)
1858 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1859 	else
1860 		free_page((unsigned long)(kvm->arch.sca));
1861 	kvm->arch.sca = NULL;
1862 }
1863 
1864 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1865 {
1866 	gfp_t alloc_flags = GFP_KERNEL;
1867 	int i, rc;
1868 	char debug_name[16];
1869 	static unsigned long sca_offset;
1870 
1871 	rc = -EINVAL;
1872 #ifdef CONFIG_KVM_S390_UCONTROL
1873 	if (type & ~KVM_VM_S390_UCONTROL)
1874 		goto out_err;
1875 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1876 		goto out_err;
1877 #else
1878 	if (type)
1879 		goto out_err;
1880 #endif
1881 
1882 	rc = s390_enable_sie();
1883 	if (rc)
1884 		goto out_err;
1885 
1886 	rc = -ENOMEM;
1887 
1888 	kvm->arch.use_esca = 0; /* start with basic SCA */
1889 	if (!sclp.has_64bscao)
1890 		alloc_flags |= GFP_DMA;
1891 	rwlock_init(&kvm->arch.sca_lock);
1892 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1893 	if (!kvm->arch.sca)
1894 		goto out_err;
1895 	spin_lock(&kvm_lock);
1896 	sca_offset += 16;
1897 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1898 		sca_offset = 0;
1899 	kvm->arch.sca = (struct bsca_block *)
1900 			((char *) kvm->arch.sca + sca_offset);
1901 	spin_unlock(&kvm_lock);
1902 
1903 	sprintf(debug_name, "kvm-%u", current->pid);
1904 
1905 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1906 	if (!kvm->arch.dbf)
1907 		goto out_err;
1908 
1909 	kvm->arch.sie_page2 =
1910 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1911 	if (!kvm->arch.sie_page2)
1912 		goto out_err;
1913 
1914 	/* Populate the facility mask initially. */
1915 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1916 	       sizeof(S390_lowcore.stfle_fac_list));
1917 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1918 		if (i < kvm_s390_fac_list_mask_size())
1919 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1920 		else
1921 			kvm->arch.model.fac_mask[i] = 0UL;
1922 	}
1923 
1924 	/* Populate the facility list initially. */
1925 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1926 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1927 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1928 
1929 	/* we are always in czam mode - even on pre z14 machines */
1930 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1931 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1932 	/* we emulate STHYI in kvm */
1933 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1934 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1935 	if (MACHINE_HAS_TLB_GUEST) {
1936 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1937 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1938 	}
1939 
1940 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1941 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1942 
1943 	kvm_s390_crypto_init(kvm);
1944 
1945 	mutex_init(&kvm->arch.float_int.ais_lock);
1946 	kvm->arch.float_int.simm = 0;
1947 	kvm->arch.float_int.nimm = 0;
1948 	spin_lock_init(&kvm->arch.float_int.lock);
1949 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1950 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1951 	init_waitqueue_head(&kvm->arch.ipte_wq);
1952 	mutex_init(&kvm->arch.ipte_mutex);
1953 
1954 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1955 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1956 
1957 	if (type & KVM_VM_S390_UCONTROL) {
1958 		kvm->arch.gmap = NULL;
1959 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1960 	} else {
1961 		if (sclp.hamax == U64_MAX)
1962 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1963 		else
1964 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1965 						    sclp.hamax + 1);
1966 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1967 		if (!kvm->arch.gmap)
1968 			goto out_err;
1969 		kvm->arch.gmap->private = kvm;
1970 		kvm->arch.gmap->pfault_enabled = 0;
1971 	}
1972 
1973 	kvm->arch.css_support = 0;
1974 	kvm->arch.use_irqchip = 0;
1975 	kvm->arch.epoch = 0;
1976 
1977 	spin_lock_init(&kvm->arch.start_stop_lock);
1978 	kvm_s390_vsie_init(kvm);
1979 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1980 
1981 	return 0;
1982 out_err:
1983 	free_page((unsigned long)kvm->arch.sie_page2);
1984 	debug_unregister(kvm->arch.dbf);
1985 	sca_dispose(kvm);
1986 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1987 	return rc;
1988 }
1989 
1990 bool kvm_arch_has_vcpu_debugfs(void)
1991 {
1992 	return false;
1993 }
1994 
1995 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1996 {
1997 	return 0;
1998 }
1999 
2000 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2001 {
2002 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2003 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2004 	kvm_s390_clear_local_irqs(vcpu);
2005 	kvm_clear_async_pf_completion_queue(vcpu);
2006 	if (!kvm_is_ucontrol(vcpu->kvm))
2007 		sca_del_vcpu(vcpu);
2008 
2009 	if (kvm_is_ucontrol(vcpu->kvm))
2010 		gmap_remove(vcpu->arch.gmap);
2011 
2012 	if (vcpu->kvm->arch.use_cmma)
2013 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2014 	free_page((unsigned long)(vcpu->arch.sie_block));
2015 
2016 	kvm_vcpu_uninit(vcpu);
2017 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2018 }
2019 
2020 static void kvm_free_vcpus(struct kvm *kvm)
2021 {
2022 	unsigned int i;
2023 	struct kvm_vcpu *vcpu;
2024 
2025 	kvm_for_each_vcpu(i, vcpu, kvm)
2026 		kvm_arch_vcpu_destroy(vcpu);
2027 
2028 	mutex_lock(&kvm->lock);
2029 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2030 		kvm->vcpus[i] = NULL;
2031 
2032 	atomic_set(&kvm->online_vcpus, 0);
2033 	mutex_unlock(&kvm->lock);
2034 }
2035 
2036 void kvm_arch_destroy_vm(struct kvm *kvm)
2037 {
2038 	kvm_free_vcpus(kvm);
2039 	sca_dispose(kvm);
2040 	debug_unregister(kvm->arch.dbf);
2041 	free_page((unsigned long)kvm->arch.sie_page2);
2042 	if (!kvm_is_ucontrol(kvm))
2043 		gmap_remove(kvm->arch.gmap);
2044 	kvm_s390_destroy_adapters(kvm);
2045 	kvm_s390_clear_float_irqs(kvm);
2046 	kvm_s390_vsie_destroy(kvm);
2047 	if (kvm->arch.migration_state) {
2048 		vfree(kvm->arch.migration_state->pgste_bitmap);
2049 		kfree(kvm->arch.migration_state);
2050 	}
2051 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2052 }
2053 
2054 /* Section: vcpu related */
2055 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2056 {
2057 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2058 	if (!vcpu->arch.gmap)
2059 		return -ENOMEM;
2060 	vcpu->arch.gmap->private = vcpu->kvm;
2061 
2062 	return 0;
2063 }
2064 
2065 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2066 {
2067 	if (!kvm_s390_use_sca_entries())
2068 		return;
2069 	read_lock(&vcpu->kvm->arch.sca_lock);
2070 	if (vcpu->kvm->arch.use_esca) {
2071 		struct esca_block *sca = vcpu->kvm->arch.sca;
2072 
2073 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2074 		sca->cpu[vcpu->vcpu_id].sda = 0;
2075 	} else {
2076 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2077 
2078 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2079 		sca->cpu[vcpu->vcpu_id].sda = 0;
2080 	}
2081 	read_unlock(&vcpu->kvm->arch.sca_lock);
2082 }
2083 
2084 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2085 {
2086 	if (!kvm_s390_use_sca_entries()) {
2087 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2088 
2089 		/* we still need the basic sca for the ipte control */
2090 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2091 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2092 	}
2093 	read_lock(&vcpu->kvm->arch.sca_lock);
2094 	if (vcpu->kvm->arch.use_esca) {
2095 		struct esca_block *sca = vcpu->kvm->arch.sca;
2096 
2097 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2098 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2099 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2100 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2101 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2102 	} else {
2103 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2104 
2105 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2106 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2107 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2108 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2109 	}
2110 	read_unlock(&vcpu->kvm->arch.sca_lock);
2111 }
2112 
2113 /* Basic SCA to Extended SCA data copy routines */
2114 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2115 {
2116 	d->sda = s->sda;
2117 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2118 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2119 }
2120 
2121 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2122 {
2123 	int i;
2124 
2125 	d->ipte_control = s->ipte_control;
2126 	d->mcn[0] = s->mcn;
2127 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2128 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2129 }
2130 
2131 static int sca_switch_to_extended(struct kvm *kvm)
2132 {
2133 	struct bsca_block *old_sca = kvm->arch.sca;
2134 	struct esca_block *new_sca;
2135 	struct kvm_vcpu *vcpu;
2136 	unsigned int vcpu_idx;
2137 	u32 scaol, scaoh;
2138 
2139 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2140 	if (!new_sca)
2141 		return -ENOMEM;
2142 
2143 	scaoh = (u32)((u64)(new_sca) >> 32);
2144 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2145 
2146 	kvm_s390_vcpu_block_all(kvm);
2147 	write_lock(&kvm->arch.sca_lock);
2148 
2149 	sca_copy_b_to_e(new_sca, old_sca);
2150 
2151 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2152 		vcpu->arch.sie_block->scaoh = scaoh;
2153 		vcpu->arch.sie_block->scaol = scaol;
2154 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2155 	}
2156 	kvm->arch.sca = new_sca;
2157 	kvm->arch.use_esca = 1;
2158 
2159 	write_unlock(&kvm->arch.sca_lock);
2160 	kvm_s390_vcpu_unblock_all(kvm);
2161 
2162 	free_page((unsigned long)old_sca);
2163 
2164 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2165 		 old_sca, kvm->arch.sca);
2166 	return 0;
2167 }
2168 
2169 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2170 {
2171 	int rc;
2172 
2173 	if (!kvm_s390_use_sca_entries()) {
2174 		if (id < KVM_MAX_VCPUS)
2175 			return true;
2176 		return false;
2177 	}
2178 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2179 		return true;
2180 	if (!sclp.has_esca || !sclp.has_64bscao)
2181 		return false;
2182 
2183 	mutex_lock(&kvm->lock);
2184 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2185 	mutex_unlock(&kvm->lock);
2186 
2187 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2188 }
2189 
2190 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2191 {
2192 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2193 	kvm_clear_async_pf_completion_queue(vcpu);
2194 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2195 				    KVM_SYNC_GPRS |
2196 				    KVM_SYNC_ACRS |
2197 				    KVM_SYNC_CRS |
2198 				    KVM_SYNC_ARCH0 |
2199 				    KVM_SYNC_PFAULT;
2200 	kvm_s390_set_prefix(vcpu, 0);
2201 	if (test_kvm_facility(vcpu->kvm, 64))
2202 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2203 	if (test_kvm_facility(vcpu->kvm, 133))
2204 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2205 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2206 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2207 	 */
2208 	if (MACHINE_HAS_VX)
2209 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2210 	else
2211 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2212 
2213 	if (kvm_is_ucontrol(vcpu->kvm))
2214 		return __kvm_ucontrol_vcpu_init(vcpu);
2215 
2216 	return 0;
2217 }
2218 
2219 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2220 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2221 {
2222 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2223 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2224 	vcpu->arch.cputm_start = get_tod_clock_fast();
2225 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2226 }
2227 
2228 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2229 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2230 {
2231 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2232 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2233 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2234 	vcpu->arch.cputm_start = 0;
2235 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2236 }
2237 
2238 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2239 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2240 {
2241 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2242 	vcpu->arch.cputm_enabled = true;
2243 	__start_cpu_timer_accounting(vcpu);
2244 }
2245 
2246 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2247 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2248 {
2249 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2250 	__stop_cpu_timer_accounting(vcpu);
2251 	vcpu->arch.cputm_enabled = false;
2252 }
2253 
2254 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2255 {
2256 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2257 	__enable_cpu_timer_accounting(vcpu);
2258 	preempt_enable();
2259 }
2260 
2261 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2262 {
2263 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2264 	__disable_cpu_timer_accounting(vcpu);
2265 	preempt_enable();
2266 }
2267 
2268 /* set the cpu timer - may only be called from the VCPU thread itself */
2269 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2270 {
2271 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2272 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2273 	if (vcpu->arch.cputm_enabled)
2274 		vcpu->arch.cputm_start = get_tod_clock_fast();
2275 	vcpu->arch.sie_block->cputm = cputm;
2276 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2277 	preempt_enable();
2278 }
2279 
2280 /* update and get the cpu timer - can also be called from other VCPU threads */
2281 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2282 {
2283 	unsigned int seq;
2284 	__u64 value;
2285 
2286 	if (unlikely(!vcpu->arch.cputm_enabled))
2287 		return vcpu->arch.sie_block->cputm;
2288 
2289 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2290 	do {
2291 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2292 		/*
2293 		 * If the writer would ever execute a read in the critical
2294 		 * section, e.g. in irq context, we have a deadlock.
2295 		 */
2296 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2297 		value = vcpu->arch.sie_block->cputm;
2298 		/* if cputm_start is 0, accounting is being started/stopped */
2299 		if (likely(vcpu->arch.cputm_start))
2300 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2301 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2302 	preempt_enable();
2303 	return value;
2304 }
2305 
2306 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2307 {
2308 
2309 	gmap_enable(vcpu->arch.enabled_gmap);
2310 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2311 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2312 		__start_cpu_timer_accounting(vcpu);
2313 	vcpu->cpu = cpu;
2314 }
2315 
2316 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2317 {
2318 	vcpu->cpu = -1;
2319 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2320 		__stop_cpu_timer_accounting(vcpu);
2321 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2322 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2323 	gmap_disable(vcpu->arch.enabled_gmap);
2324 
2325 }
2326 
2327 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2328 {
2329 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2330 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2331 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2332 	kvm_s390_set_prefix(vcpu, 0);
2333 	kvm_s390_set_cpu_timer(vcpu, 0);
2334 	vcpu->arch.sie_block->ckc       = 0UL;
2335 	vcpu->arch.sie_block->todpr     = 0;
2336 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2337 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2338 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2339 	/* make sure the new fpc will be lazily loaded */
2340 	save_fpu_regs();
2341 	current->thread.fpu.fpc = 0;
2342 	vcpu->arch.sie_block->gbea = 1;
2343 	vcpu->arch.sie_block->pp = 0;
2344 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2345 	kvm_clear_async_pf_completion_queue(vcpu);
2346 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2347 		kvm_s390_vcpu_stop(vcpu);
2348 	kvm_s390_clear_local_irqs(vcpu);
2349 }
2350 
2351 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2352 {
2353 	mutex_lock(&vcpu->kvm->lock);
2354 	preempt_disable();
2355 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2356 	preempt_enable();
2357 	mutex_unlock(&vcpu->kvm->lock);
2358 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2359 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2360 		sca_add_vcpu(vcpu);
2361 	}
2362 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2363 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2364 	/* make vcpu_load load the right gmap on the first trigger */
2365 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2366 }
2367 
2368 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2369 {
2370 	if (!test_kvm_facility(vcpu->kvm, 76))
2371 		return;
2372 
2373 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2374 
2375 	if (vcpu->kvm->arch.crypto.aes_kw)
2376 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2377 	if (vcpu->kvm->arch.crypto.dea_kw)
2378 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2379 
2380 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2381 }
2382 
2383 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2384 {
2385 	free_page(vcpu->arch.sie_block->cbrlo);
2386 	vcpu->arch.sie_block->cbrlo = 0;
2387 }
2388 
2389 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2390 {
2391 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2392 	if (!vcpu->arch.sie_block->cbrlo)
2393 		return -ENOMEM;
2394 
2395 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2396 	return 0;
2397 }
2398 
2399 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2400 {
2401 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2402 
2403 	vcpu->arch.sie_block->ibc = model->ibc;
2404 	if (test_kvm_facility(vcpu->kvm, 7))
2405 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2406 }
2407 
2408 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2409 {
2410 	int rc = 0;
2411 
2412 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2413 						    CPUSTAT_SM |
2414 						    CPUSTAT_STOPPED);
2415 
2416 	if (test_kvm_facility(vcpu->kvm, 78))
2417 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2418 	else if (test_kvm_facility(vcpu->kvm, 8))
2419 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2420 
2421 	kvm_s390_vcpu_setup_model(vcpu);
2422 
2423 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2424 	if (MACHINE_HAS_ESOP)
2425 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2426 	if (test_kvm_facility(vcpu->kvm, 9))
2427 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2428 	if (test_kvm_facility(vcpu->kvm, 73))
2429 		vcpu->arch.sie_block->ecb |= ECB_TE;
2430 
2431 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2432 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2433 	if (test_kvm_facility(vcpu->kvm, 130))
2434 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2435 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2436 	if (sclp.has_cei)
2437 		vcpu->arch.sie_block->eca |= ECA_CEI;
2438 	if (sclp.has_ib)
2439 		vcpu->arch.sie_block->eca |= ECA_IB;
2440 	if (sclp.has_siif)
2441 		vcpu->arch.sie_block->eca |= ECA_SII;
2442 	if (sclp.has_sigpif)
2443 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2444 	if (test_kvm_facility(vcpu->kvm, 129)) {
2445 		vcpu->arch.sie_block->eca |= ECA_VX;
2446 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2447 	}
2448 	if (test_kvm_facility(vcpu->kvm, 139))
2449 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2450 
2451 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2452 					| SDNXC;
2453 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2454 
2455 	if (sclp.has_kss)
2456 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2457 	else
2458 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2459 
2460 	if (vcpu->kvm->arch.use_cmma) {
2461 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2462 		if (rc)
2463 			return rc;
2464 	}
2465 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2466 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2467 
2468 	kvm_s390_vcpu_crypto_setup(vcpu);
2469 
2470 	return rc;
2471 }
2472 
2473 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2474 				      unsigned int id)
2475 {
2476 	struct kvm_vcpu *vcpu;
2477 	struct sie_page *sie_page;
2478 	int rc = -EINVAL;
2479 
2480 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2481 		goto out;
2482 
2483 	rc = -ENOMEM;
2484 
2485 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2486 	if (!vcpu)
2487 		goto out;
2488 
2489 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2490 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2491 	if (!sie_page)
2492 		goto out_free_cpu;
2493 
2494 	vcpu->arch.sie_block = &sie_page->sie_block;
2495 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2496 
2497 	/* the real guest size will always be smaller than msl */
2498 	vcpu->arch.sie_block->mso = 0;
2499 	vcpu->arch.sie_block->msl = sclp.hamax;
2500 
2501 	vcpu->arch.sie_block->icpua = id;
2502 	spin_lock_init(&vcpu->arch.local_int.lock);
2503 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2504 	vcpu->arch.local_int.wq = &vcpu->wq;
2505 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2506 	seqcount_init(&vcpu->arch.cputm_seqcount);
2507 
2508 	rc = kvm_vcpu_init(vcpu, kvm, id);
2509 	if (rc)
2510 		goto out_free_sie_block;
2511 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2512 		 vcpu->arch.sie_block);
2513 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2514 
2515 	return vcpu;
2516 out_free_sie_block:
2517 	free_page((unsigned long)(vcpu->arch.sie_block));
2518 out_free_cpu:
2519 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2520 out:
2521 	return ERR_PTR(rc);
2522 }
2523 
2524 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2525 {
2526 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2527 }
2528 
2529 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2530 {
2531 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2532 }
2533 
2534 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2535 {
2536 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2537 	exit_sie(vcpu);
2538 }
2539 
2540 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2541 {
2542 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2543 }
2544 
2545 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2546 {
2547 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2548 	exit_sie(vcpu);
2549 }
2550 
2551 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2552 {
2553 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2554 }
2555 
2556 /*
2557  * Kick a guest cpu out of SIE and wait until SIE is not running.
2558  * If the CPU is not running (e.g. waiting as idle) the function will
2559  * return immediately. */
2560 void exit_sie(struct kvm_vcpu *vcpu)
2561 {
2562 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2563 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2564 		cpu_relax();
2565 }
2566 
2567 /* Kick a guest cpu out of SIE to process a request synchronously */
2568 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2569 {
2570 	kvm_make_request(req, vcpu);
2571 	kvm_s390_vcpu_request(vcpu);
2572 }
2573 
2574 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2575 			      unsigned long end)
2576 {
2577 	struct kvm *kvm = gmap->private;
2578 	struct kvm_vcpu *vcpu;
2579 	unsigned long prefix;
2580 	int i;
2581 
2582 	if (gmap_is_shadow(gmap))
2583 		return;
2584 	if (start >= 1UL << 31)
2585 		/* We are only interested in prefix pages */
2586 		return;
2587 	kvm_for_each_vcpu(i, vcpu, kvm) {
2588 		/* match against both prefix pages */
2589 		prefix = kvm_s390_get_prefix(vcpu);
2590 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2591 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2592 				   start, end);
2593 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2594 		}
2595 	}
2596 }
2597 
2598 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2599 {
2600 	/* kvm common code refers to this, but never calls it */
2601 	BUG();
2602 	return 0;
2603 }
2604 
2605 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2606 					   struct kvm_one_reg *reg)
2607 {
2608 	int r = -EINVAL;
2609 
2610 	switch (reg->id) {
2611 	case KVM_REG_S390_TODPR:
2612 		r = put_user(vcpu->arch.sie_block->todpr,
2613 			     (u32 __user *)reg->addr);
2614 		break;
2615 	case KVM_REG_S390_EPOCHDIFF:
2616 		r = put_user(vcpu->arch.sie_block->epoch,
2617 			     (u64 __user *)reg->addr);
2618 		break;
2619 	case KVM_REG_S390_CPU_TIMER:
2620 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2621 			     (u64 __user *)reg->addr);
2622 		break;
2623 	case KVM_REG_S390_CLOCK_COMP:
2624 		r = put_user(vcpu->arch.sie_block->ckc,
2625 			     (u64 __user *)reg->addr);
2626 		break;
2627 	case KVM_REG_S390_PFTOKEN:
2628 		r = put_user(vcpu->arch.pfault_token,
2629 			     (u64 __user *)reg->addr);
2630 		break;
2631 	case KVM_REG_S390_PFCOMPARE:
2632 		r = put_user(vcpu->arch.pfault_compare,
2633 			     (u64 __user *)reg->addr);
2634 		break;
2635 	case KVM_REG_S390_PFSELECT:
2636 		r = put_user(vcpu->arch.pfault_select,
2637 			     (u64 __user *)reg->addr);
2638 		break;
2639 	case KVM_REG_S390_PP:
2640 		r = put_user(vcpu->arch.sie_block->pp,
2641 			     (u64 __user *)reg->addr);
2642 		break;
2643 	case KVM_REG_S390_GBEA:
2644 		r = put_user(vcpu->arch.sie_block->gbea,
2645 			     (u64 __user *)reg->addr);
2646 		break;
2647 	default:
2648 		break;
2649 	}
2650 
2651 	return r;
2652 }
2653 
2654 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2655 					   struct kvm_one_reg *reg)
2656 {
2657 	int r = -EINVAL;
2658 	__u64 val;
2659 
2660 	switch (reg->id) {
2661 	case KVM_REG_S390_TODPR:
2662 		r = get_user(vcpu->arch.sie_block->todpr,
2663 			     (u32 __user *)reg->addr);
2664 		break;
2665 	case KVM_REG_S390_EPOCHDIFF:
2666 		r = get_user(vcpu->arch.sie_block->epoch,
2667 			     (u64 __user *)reg->addr);
2668 		break;
2669 	case KVM_REG_S390_CPU_TIMER:
2670 		r = get_user(val, (u64 __user *)reg->addr);
2671 		if (!r)
2672 			kvm_s390_set_cpu_timer(vcpu, val);
2673 		break;
2674 	case KVM_REG_S390_CLOCK_COMP:
2675 		r = get_user(vcpu->arch.sie_block->ckc,
2676 			     (u64 __user *)reg->addr);
2677 		break;
2678 	case KVM_REG_S390_PFTOKEN:
2679 		r = get_user(vcpu->arch.pfault_token,
2680 			     (u64 __user *)reg->addr);
2681 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2682 			kvm_clear_async_pf_completion_queue(vcpu);
2683 		break;
2684 	case KVM_REG_S390_PFCOMPARE:
2685 		r = get_user(vcpu->arch.pfault_compare,
2686 			     (u64 __user *)reg->addr);
2687 		break;
2688 	case KVM_REG_S390_PFSELECT:
2689 		r = get_user(vcpu->arch.pfault_select,
2690 			     (u64 __user *)reg->addr);
2691 		break;
2692 	case KVM_REG_S390_PP:
2693 		r = get_user(vcpu->arch.sie_block->pp,
2694 			     (u64 __user *)reg->addr);
2695 		break;
2696 	case KVM_REG_S390_GBEA:
2697 		r = get_user(vcpu->arch.sie_block->gbea,
2698 			     (u64 __user *)reg->addr);
2699 		break;
2700 	default:
2701 		break;
2702 	}
2703 
2704 	return r;
2705 }
2706 
2707 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2708 {
2709 	kvm_s390_vcpu_initial_reset(vcpu);
2710 	return 0;
2711 }
2712 
2713 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2714 {
2715 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2716 	return 0;
2717 }
2718 
2719 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2720 {
2721 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2722 	return 0;
2723 }
2724 
2725 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2726 				  struct kvm_sregs *sregs)
2727 {
2728 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2729 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2730 	return 0;
2731 }
2732 
2733 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2734 				  struct kvm_sregs *sregs)
2735 {
2736 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2737 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2738 	return 0;
2739 }
2740 
2741 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2742 {
2743 	if (test_fp_ctl(fpu->fpc))
2744 		return -EINVAL;
2745 	vcpu->run->s.regs.fpc = fpu->fpc;
2746 	if (MACHINE_HAS_VX)
2747 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2748 				 (freg_t *) fpu->fprs);
2749 	else
2750 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2751 	return 0;
2752 }
2753 
2754 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2755 {
2756 	/* make sure we have the latest values */
2757 	save_fpu_regs();
2758 	if (MACHINE_HAS_VX)
2759 		convert_vx_to_fp((freg_t *) fpu->fprs,
2760 				 (__vector128 *) vcpu->run->s.regs.vrs);
2761 	else
2762 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2763 	fpu->fpc = vcpu->run->s.regs.fpc;
2764 	return 0;
2765 }
2766 
2767 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2768 {
2769 	int rc = 0;
2770 
2771 	if (!is_vcpu_stopped(vcpu))
2772 		rc = -EBUSY;
2773 	else {
2774 		vcpu->run->psw_mask = psw.mask;
2775 		vcpu->run->psw_addr = psw.addr;
2776 	}
2777 	return rc;
2778 }
2779 
2780 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2781 				  struct kvm_translation *tr)
2782 {
2783 	return -EINVAL; /* not implemented yet */
2784 }
2785 
2786 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2787 			      KVM_GUESTDBG_USE_HW_BP | \
2788 			      KVM_GUESTDBG_ENABLE)
2789 
2790 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2791 					struct kvm_guest_debug *dbg)
2792 {
2793 	int rc = 0;
2794 
2795 	vcpu->guest_debug = 0;
2796 	kvm_s390_clear_bp_data(vcpu);
2797 
2798 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2799 		return -EINVAL;
2800 	if (!sclp.has_gpere)
2801 		return -EINVAL;
2802 
2803 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2804 		vcpu->guest_debug = dbg->control;
2805 		/* enforce guest PER */
2806 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2807 
2808 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2809 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2810 	} else {
2811 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2812 		vcpu->arch.guestdbg.last_bp = 0;
2813 	}
2814 
2815 	if (rc) {
2816 		vcpu->guest_debug = 0;
2817 		kvm_s390_clear_bp_data(vcpu);
2818 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2819 	}
2820 
2821 	return rc;
2822 }
2823 
2824 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2825 				    struct kvm_mp_state *mp_state)
2826 {
2827 	/* CHECK_STOP and LOAD are not supported yet */
2828 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2829 				       KVM_MP_STATE_OPERATING;
2830 }
2831 
2832 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2833 				    struct kvm_mp_state *mp_state)
2834 {
2835 	int rc = 0;
2836 
2837 	/* user space knows about this interface - let it control the state */
2838 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2839 
2840 	switch (mp_state->mp_state) {
2841 	case KVM_MP_STATE_STOPPED:
2842 		kvm_s390_vcpu_stop(vcpu);
2843 		break;
2844 	case KVM_MP_STATE_OPERATING:
2845 		kvm_s390_vcpu_start(vcpu);
2846 		break;
2847 	case KVM_MP_STATE_LOAD:
2848 	case KVM_MP_STATE_CHECK_STOP:
2849 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2850 	default:
2851 		rc = -ENXIO;
2852 	}
2853 
2854 	return rc;
2855 }
2856 
2857 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2858 {
2859 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2860 }
2861 
2862 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2863 {
2864 retry:
2865 	kvm_s390_vcpu_request_handled(vcpu);
2866 	if (!kvm_request_pending(vcpu))
2867 		return 0;
2868 	/*
2869 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2870 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2871 	 * This ensures that the ipte instruction for this request has
2872 	 * already finished. We might race against a second unmapper that
2873 	 * wants to set the blocking bit. Lets just retry the request loop.
2874 	 */
2875 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2876 		int rc;
2877 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2878 					  kvm_s390_get_prefix(vcpu),
2879 					  PAGE_SIZE * 2, PROT_WRITE);
2880 		if (rc) {
2881 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2882 			return rc;
2883 		}
2884 		goto retry;
2885 	}
2886 
2887 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2888 		vcpu->arch.sie_block->ihcpu = 0xffff;
2889 		goto retry;
2890 	}
2891 
2892 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2893 		if (!ibs_enabled(vcpu)) {
2894 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2895 			atomic_or(CPUSTAT_IBS,
2896 					&vcpu->arch.sie_block->cpuflags);
2897 		}
2898 		goto retry;
2899 	}
2900 
2901 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2902 		if (ibs_enabled(vcpu)) {
2903 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2904 			atomic_andnot(CPUSTAT_IBS,
2905 					  &vcpu->arch.sie_block->cpuflags);
2906 		}
2907 		goto retry;
2908 	}
2909 
2910 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2911 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2912 		goto retry;
2913 	}
2914 
2915 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2916 		/*
2917 		 * Disable CMMA virtualization; we will emulate the ESSA
2918 		 * instruction manually, in order to provide additional
2919 		 * functionalities needed for live migration.
2920 		 */
2921 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2922 		goto retry;
2923 	}
2924 
2925 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2926 		/*
2927 		 * Re-enable CMMA virtualization if CMMA is available and
2928 		 * was used.
2929 		 */
2930 		if ((vcpu->kvm->arch.use_cmma) &&
2931 		    (vcpu->kvm->mm->context.use_cmma))
2932 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2933 		goto retry;
2934 	}
2935 
2936 	/* nothing to do, just clear the request */
2937 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2938 
2939 	return 0;
2940 }
2941 
2942 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2943 				 const struct kvm_s390_vm_tod_clock *gtod)
2944 {
2945 	struct kvm_vcpu *vcpu;
2946 	struct kvm_s390_tod_clock_ext htod;
2947 	int i;
2948 
2949 	mutex_lock(&kvm->lock);
2950 	preempt_disable();
2951 
2952 	get_tod_clock_ext((char *)&htod);
2953 
2954 	kvm->arch.epoch = gtod->tod - htod.tod;
2955 	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2956 
2957 	if (kvm->arch.epoch > gtod->tod)
2958 		kvm->arch.epdx -= 1;
2959 
2960 	kvm_s390_vcpu_block_all(kvm);
2961 	kvm_for_each_vcpu(i, vcpu, kvm) {
2962 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2963 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2964 	}
2965 
2966 	kvm_s390_vcpu_unblock_all(kvm);
2967 	preempt_enable();
2968 	mutex_unlock(&kvm->lock);
2969 }
2970 
2971 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2972 {
2973 	struct kvm_vcpu *vcpu;
2974 	int i;
2975 
2976 	mutex_lock(&kvm->lock);
2977 	preempt_disable();
2978 	kvm->arch.epoch = tod - get_tod_clock();
2979 	kvm_s390_vcpu_block_all(kvm);
2980 	kvm_for_each_vcpu(i, vcpu, kvm)
2981 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2982 	kvm_s390_vcpu_unblock_all(kvm);
2983 	preempt_enable();
2984 	mutex_unlock(&kvm->lock);
2985 }
2986 
2987 /**
2988  * kvm_arch_fault_in_page - fault-in guest page if necessary
2989  * @vcpu: The corresponding virtual cpu
2990  * @gpa: Guest physical address
2991  * @writable: Whether the page should be writable or not
2992  *
2993  * Make sure that a guest page has been faulted-in on the host.
2994  *
2995  * Return: Zero on success, negative error code otherwise.
2996  */
2997 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2998 {
2999 	return gmap_fault(vcpu->arch.gmap, gpa,
3000 			  writable ? FAULT_FLAG_WRITE : 0);
3001 }
3002 
3003 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3004 				      unsigned long token)
3005 {
3006 	struct kvm_s390_interrupt inti;
3007 	struct kvm_s390_irq irq;
3008 
3009 	if (start_token) {
3010 		irq.u.ext.ext_params2 = token;
3011 		irq.type = KVM_S390_INT_PFAULT_INIT;
3012 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3013 	} else {
3014 		inti.type = KVM_S390_INT_PFAULT_DONE;
3015 		inti.parm64 = token;
3016 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3017 	}
3018 }
3019 
3020 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3021 				     struct kvm_async_pf *work)
3022 {
3023 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3024 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3025 }
3026 
3027 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3028 				 struct kvm_async_pf *work)
3029 {
3030 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3031 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3032 }
3033 
3034 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3035 			       struct kvm_async_pf *work)
3036 {
3037 	/* s390 will always inject the page directly */
3038 }
3039 
3040 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3041 {
3042 	/*
3043 	 * s390 will always inject the page directly,
3044 	 * but we still want check_async_completion to cleanup
3045 	 */
3046 	return true;
3047 }
3048 
3049 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3050 {
3051 	hva_t hva;
3052 	struct kvm_arch_async_pf arch;
3053 	int rc;
3054 
3055 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3056 		return 0;
3057 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3058 	    vcpu->arch.pfault_compare)
3059 		return 0;
3060 	if (psw_extint_disabled(vcpu))
3061 		return 0;
3062 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3063 		return 0;
3064 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3065 		return 0;
3066 	if (!vcpu->arch.gmap->pfault_enabled)
3067 		return 0;
3068 
3069 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3070 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3071 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3072 		return 0;
3073 
3074 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3075 	return rc;
3076 }
3077 
3078 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3079 {
3080 	int rc, cpuflags;
3081 
3082 	/*
3083 	 * On s390 notifications for arriving pages will be delivered directly
3084 	 * to the guest but the house keeping for completed pfaults is
3085 	 * handled outside the worker.
3086 	 */
3087 	kvm_check_async_pf_completion(vcpu);
3088 
3089 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3090 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3091 
3092 	if (need_resched())
3093 		schedule();
3094 
3095 	if (test_cpu_flag(CIF_MCCK_PENDING))
3096 		s390_handle_mcck();
3097 
3098 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3099 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3100 		if (rc)
3101 			return rc;
3102 	}
3103 
3104 	rc = kvm_s390_handle_requests(vcpu);
3105 	if (rc)
3106 		return rc;
3107 
3108 	if (guestdbg_enabled(vcpu)) {
3109 		kvm_s390_backup_guest_per_regs(vcpu);
3110 		kvm_s390_patch_guest_per_regs(vcpu);
3111 	}
3112 
3113 	vcpu->arch.sie_block->icptcode = 0;
3114 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3115 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3116 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3117 
3118 	return 0;
3119 }
3120 
3121 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3122 {
3123 	struct kvm_s390_pgm_info pgm_info = {
3124 		.code = PGM_ADDRESSING,
3125 	};
3126 	u8 opcode, ilen;
3127 	int rc;
3128 
3129 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3130 	trace_kvm_s390_sie_fault(vcpu);
3131 
3132 	/*
3133 	 * We want to inject an addressing exception, which is defined as a
3134 	 * suppressing or terminating exception. However, since we came here
3135 	 * by a DAT access exception, the PSW still points to the faulting
3136 	 * instruction since DAT exceptions are nullifying. So we've got
3137 	 * to look up the current opcode to get the length of the instruction
3138 	 * to be able to forward the PSW.
3139 	 */
3140 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3141 	ilen = insn_length(opcode);
3142 	if (rc < 0) {
3143 		return rc;
3144 	} else if (rc) {
3145 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3146 		 * Forward by arbitrary ilc, injection will take care of
3147 		 * nullification if necessary.
3148 		 */
3149 		pgm_info = vcpu->arch.pgm;
3150 		ilen = 4;
3151 	}
3152 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3153 	kvm_s390_forward_psw(vcpu, ilen);
3154 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3155 }
3156 
3157 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3158 {
3159 	struct mcck_volatile_info *mcck_info;
3160 	struct sie_page *sie_page;
3161 
3162 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3163 		   vcpu->arch.sie_block->icptcode);
3164 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3165 
3166 	if (guestdbg_enabled(vcpu))
3167 		kvm_s390_restore_guest_per_regs(vcpu);
3168 
3169 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3170 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3171 
3172 	if (exit_reason == -EINTR) {
3173 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3174 		sie_page = container_of(vcpu->arch.sie_block,
3175 					struct sie_page, sie_block);
3176 		mcck_info = &sie_page->mcck_info;
3177 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3178 		return 0;
3179 	}
3180 
3181 	if (vcpu->arch.sie_block->icptcode > 0) {
3182 		int rc = kvm_handle_sie_intercept(vcpu);
3183 
3184 		if (rc != -EOPNOTSUPP)
3185 			return rc;
3186 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3187 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3188 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3189 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3190 		return -EREMOTE;
3191 	} else if (exit_reason != -EFAULT) {
3192 		vcpu->stat.exit_null++;
3193 		return 0;
3194 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3195 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3196 		vcpu->run->s390_ucontrol.trans_exc_code =
3197 						current->thread.gmap_addr;
3198 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3199 		return -EREMOTE;
3200 	} else if (current->thread.gmap_pfault) {
3201 		trace_kvm_s390_major_guest_pfault(vcpu);
3202 		current->thread.gmap_pfault = 0;
3203 		if (kvm_arch_setup_async_pf(vcpu))
3204 			return 0;
3205 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3206 	}
3207 	return vcpu_post_run_fault_in_sie(vcpu);
3208 }
3209 
3210 static int __vcpu_run(struct kvm_vcpu *vcpu)
3211 {
3212 	int rc, exit_reason;
3213 
3214 	/*
3215 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3216 	 * ning the guest), so that memslots (and other stuff) are protected
3217 	 */
3218 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3219 
3220 	do {
3221 		rc = vcpu_pre_run(vcpu);
3222 		if (rc)
3223 			break;
3224 
3225 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3226 		/*
3227 		 * As PF_VCPU will be used in fault handler, between
3228 		 * guest_enter and guest_exit should be no uaccess.
3229 		 */
3230 		local_irq_disable();
3231 		guest_enter_irqoff();
3232 		__disable_cpu_timer_accounting(vcpu);
3233 		local_irq_enable();
3234 		exit_reason = sie64a(vcpu->arch.sie_block,
3235 				     vcpu->run->s.regs.gprs);
3236 		local_irq_disable();
3237 		__enable_cpu_timer_accounting(vcpu);
3238 		guest_exit_irqoff();
3239 		local_irq_enable();
3240 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3241 
3242 		rc = vcpu_post_run(vcpu, exit_reason);
3243 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3244 
3245 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3246 	return rc;
3247 }
3248 
3249 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3250 {
3251 	struct runtime_instr_cb *riccb;
3252 	struct gs_cb *gscb;
3253 
3254 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3255 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3256 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3257 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3258 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3259 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3260 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3261 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3262 		/* some control register changes require a tlb flush */
3263 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3264 	}
3265 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3266 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3267 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3268 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3269 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3270 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3271 	}
3272 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3273 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3274 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3275 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3276 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3277 			kvm_clear_async_pf_completion_queue(vcpu);
3278 	}
3279 	/*
3280 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3281 	 * we should enable RI here instead of doing the lazy enablement.
3282 	 */
3283 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3284 	    test_kvm_facility(vcpu->kvm, 64) &&
3285 	    riccb->v &&
3286 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3287 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3288 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3289 	}
3290 	/*
3291 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3292 	 * we should enable GS here instead of doing the lazy enablement.
3293 	 */
3294 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3295 	    test_kvm_facility(vcpu->kvm, 133) &&
3296 	    gscb->gssm &&
3297 	    !vcpu->arch.gs_enabled) {
3298 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3299 		vcpu->arch.sie_block->ecb |= ECB_GS;
3300 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3301 		vcpu->arch.gs_enabled = 1;
3302 	}
3303 	save_access_regs(vcpu->arch.host_acrs);
3304 	restore_access_regs(vcpu->run->s.regs.acrs);
3305 	/* save host (userspace) fprs/vrs */
3306 	save_fpu_regs();
3307 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3308 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3309 	if (MACHINE_HAS_VX)
3310 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3311 	else
3312 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3313 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3314 	if (test_fp_ctl(current->thread.fpu.fpc))
3315 		/* User space provided an invalid FPC, let's clear it */
3316 		current->thread.fpu.fpc = 0;
3317 	if (MACHINE_HAS_GS) {
3318 		preempt_disable();
3319 		__ctl_set_bit(2, 4);
3320 		if (current->thread.gs_cb) {
3321 			vcpu->arch.host_gscb = current->thread.gs_cb;
3322 			save_gs_cb(vcpu->arch.host_gscb);
3323 		}
3324 		if (vcpu->arch.gs_enabled) {
3325 			current->thread.gs_cb = (struct gs_cb *)
3326 						&vcpu->run->s.regs.gscb;
3327 			restore_gs_cb(current->thread.gs_cb);
3328 		}
3329 		preempt_enable();
3330 	}
3331 
3332 	kvm_run->kvm_dirty_regs = 0;
3333 }
3334 
3335 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3336 {
3337 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3338 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3339 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3340 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3341 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3342 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3343 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3344 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3345 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3346 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3347 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3348 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3349 	save_access_regs(vcpu->run->s.regs.acrs);
3350 	restore_access_regs(vcpu->arch.host_acrs);
3351 	/* Save guest register state */
3352 	save_fpu_regs();
3353 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3354 	/* Restore will be done lazily at return */
3355 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3356 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3357 	if (MACHINE_HAS_GS) {
3358 		__ctl_set_bit(2, 4);
3359 		if (vcpu->arch.gs_enabled)
3360 			save_gs_cb(current->thread.gs_cb);
3361 		preempt_disable();
3362 		current->thread.gs_cb = vcpu->arch.host_gscb;
3363 		restore_gs_cb(vcpu->arch.host_gscb);
3364 		preempt_enable();
3365 		if (!vcpu->arch.host_gscb)
3366 			__ctl_clear_bit(2, 4);
3367 		vcpu->arch.host_gscb = NULL;
3368 	}
3369 
3370 }
3371 
3372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3373 {
3374 	int rc;
3375 
3376 	if (kvm_run->immediate_exit)
3377 		return -EINTR;
3378 
3379 	if (guestdbg_exit_pending(vcpu)) {
3380 		kvm_s390_prepare_debug_exit(vcpu);
3381 		return 0;
3382 	}
3383 
3384 	kvm_sigset_activate(vcpu);
3385 
3386 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3387 		kvm_s390_vcpu_start(vcpu);
3388 	} else if (is_vcpu_stopped(vcpu)) {
3389 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3390 				   vcpu->vcpu_id);
3391 		return -EINVAL;
3392 	}
3393 
3394 	sync_regs(vcpu, kvm_run);
3395 	enable_cpu_timer_accounting(vcpu);
3396 
3397 	might_fault();
3398 	rc = __vcpu_run(vcpu);
3399 
3400 	if (signal_pending(current) && !rc) {
3401 		kvm_run->exit_reason = KVM_EXIT_INTR;
3402 		rc = -EINTR;
3403 	}
3404 
3405 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3406 		kvm_s390_prepare_debug_exit(vcpu);
3407 		rc = 0;
3408 	}
3409 
3410 	if (rc == -EREMOTE) {
3411 		/* userspace support is needed, kvm_run has been prepared */
3412 		rc = 0;
3413 	}
3414 
3415 	disable_cpu_timer_accounting(vcpu);
3416 	store_regs(vcpu, kvm_run);
3417 
3418 	kvm_sigset_deactivate(vcpu);
3419 
3420 	vcpu->stat.exit_userspace++;
3421 	return rc;
3422 }
3423 
3424 /*
3425  * store status at address
3426  * we use have two special cases:
3427  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3428  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3429  */
3430 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3431 {
3432 	unsigned char archmode = 1;
3433 	freg_t fprs[NUM_FPRS];
3434 	unsigned int px;
3435 	u64 clkcomp, cputm;
3436 	int rc;
3437 
3438 	px = kvm_s390_get_prefix(vcpu);
3439 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3440 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3441 			return -EFAULT;
3442 		gpa = 0;
3443 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3444 		if (write_guest_real(vcpu, 163, &archmode, 1))
3445 			return -EFAULT;
3446 		gpa = px;
3447 	} else
3448 		gpa -= __LC_FPREGS_SAVE_AREA;
3449 
3450 	/* manually convert vector registers if necessary */
3451 	if (MACHINE_HAS_VX) {
3452 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3453 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3454 				     fprs, 128);
3455 	} else {
3456 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3457 				     vcpu->run->s.regs.fprs, 128);
3458 	}
3459 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3460 			      vcpu->run->s.regs.gprs, 128);
3461 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3462 			      &vcpu->arch.sie_block->gpsw, 16);
3463 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3464 			      &px, 4);
3465 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3466 			      &vcpu->run->s.regs.fpc, 4);
3467 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3468 			      &vcpu->arch.sie_block->todpr, 4);
3469 	cputm = kvm_s390_get_cpu_timer(vcpu);
3470 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3471 			      &cputm, 8);
3472 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3473 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3474 			      &clkcomp, 8);
3475 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3476 			      &vcpu->run->s.regs.acrs, 64);
3477 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3478 			      &vcpu->arch.sie_block->gcr, 128);
3479 	return rc ? -EFAULT : 0;
3480 }
3481 
3482 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3483 {
3484 	/*
3485 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3486 	 * switch in the run ioctl. Let's update our copies before we save
3487 	 * it into the save area
3488 	 */
3489 	save_fpu_regs();
3490 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491 	save_access_regs(vcpu->run->s.regs.acrs);
3492 
3493 	return kvm_s390_store_status_unloaded(vcpu, addr);
3494 }
3495 
3496 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3497 {
3498 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3499 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3500 }
3501 
3502 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3503 {
3504 	unsigned int i;
3505 	struct kvm_vcpu *vcpu;
3506 
3507 	kvm_for_each_vcpu(i, vcpu, kvm) {
3508 		__disable_ibs_on_vcpu(vcpu);
3509 	}
3510 }
3511 
3512 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3513 {
3514 	if (!sclp.has_ibs)
3515 		return;
3516 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3517 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3518 }
3519 
3520 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3521 {
3522 	int i, online_vcpus, started_vcpus = 0;
3523 
3524 	if (!is_vcpu_stopped(vcpu))
3525 		return;
3526 
3527 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3528 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3529 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3530 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3531 
3532 	for (i = 0; i < online_vcpus; i++) {
3533 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3534 			started_vcpus++;
3535 	}
3536 
3537 	if (started_vcpus == 0) {
3538 		/* we're the only active VCPU -> speed it up */
3539 		__enable_ibs_on_vcpu(vcpu);
3540 	} else if (started_vcpus == 1) {
3541 		/*
3542 		 * As we are starting a second VCPU, we have to disable
3543 		 * the IBS facility on all VCPUs to remove potentially
3544 		 * oustanding ENABLE requests.
3545 		 */
3546 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3547 	}
3548 
3549 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3550 	/*
3551 	 * Another VCPU might have used IBS while we were offline.
3552 	 * Let's play safe and flush the VCPU at startup.
3553 	 */
3554 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3555 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3556 	return;
3557 }
3558 
3559 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3560 {
3561 	int i, online_vcpus, started_vcpus = 0;
3562 	struct kvm_vcpu *started_vcpu = NULL;
3563 
3564 	if (is_vcpu_stopped(vcpu))
3565 		return;
3566 
3567 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3568 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3569 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3570 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3571 
3572 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3573 	kvm_s390_clear_stop_irq(vcpu);
3574 
3575 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3576 	__disable_ibs_on_vcpu(vcpu);
3577 
3578 	for (i = 0; i < online_vcpus; i++) {
3579 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3580 			started_vcpus++;
3581 			started_vcpu = vcpu->kvm->vcpus[i];
3582 		}
3583 	}
3584 
3585 	if (started_vcpus == 1) {
3586 		/*
3587 		 * As we only have one VCPU left, we want to enable the
3588 		 * IBS facility for that VCPU to speed it up.
3589 		 */
3590 		__enable_ibs_on_vcpu(started_vcpu);
3591 	}
3592 
3593 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3594 	return;
3595 }
3596 
3597 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3598 				     struct kvm_enable_cap *cap)
3599 {
3600 	int r;
3601 
3602 	if (cap->flags)
3603 		return -EINVAL;
3604 
3605 	switch (cap->cap) {
3606 	case KVM_CAP_S390_CSS_SUPPORT:
3607 		if (!vcpu->kvm->arch.css_support) {
3608 			vcpu->kvm->arch.css_support = 1;
3609 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3610 			trace_kvm_s390_enable_css(vcpu->kvm);
3611 		}
3612 		r = 0;
3613 		break;
3614 	default:
3615 		r = -EINVAL;
3616 		break;
3617 	}
3618 	return r;
3619 }
3620 
3621 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3622 				  struct kvm_s390_mem_op *mop)
3623 {
3624 	void __user *uaddr = (void __user *)mop->buf;
3625 	void *tmpbuf = NULL;
3626 	int r, srcu_idx;
3627 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3628 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3629 
3630 	if (mop->flags & ~supported_flags)
3631 		return -EINVAL;
3632 
3633 	if (mop->size > MEM_OP_MAX_SIZE)
3634 		return -E2BIG;
3635 
3636 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3637 		tmpbuf = vmalloc(mop->size);
3638 		if (!tmpbuf)
3639 			return -ENOMEM;
3640 	}
3641 
3642 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3643 
3644 	switch (mop->op) {
3645 	case KVM_S390_MEMOP_LOGICAL_READ:
3646 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3647 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3648 					    mop->size, GACC_FETCH);
3649 			break;
3650 		}
3651 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3652 		if (r == 0) {
3653 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3654 				r = -EFAULT;
3655 		}
3656 		break;
3657 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3658 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3659 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3660 					    mop->size, GACC_STORE);
3661 			break;
3662 		}
3663 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3664 			r = -EFAULT;
3665 			break;
3666 		}
3667 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3668 		break;
3669 	default:
3670 		r = -EINVAL;
3671 	}
3672 
3673 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3674 
3675 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3676 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3677 
3678 	vfree(tmpbuf);
3679 	return r;
3680 }
3681 
3682 long kvm_arch_vcpu_ioctl(struct file *filp,
3683 			 unsigned int ioctl, unsigned long arg)
3684 {
3685 	struct kvm_vcpu *vcpu = filp->private_data;
3686 	void __user *argp = (void __user *)arg;
3687 	int idx;
3688 	long r;
3689 
3690 	switch (ioctl) {
3691 	case KVM_S390_IRQ: {
3692 		struct kvm_s390_irq s390irq;
3693 
3694 		r = -EFAULT;
3695 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3696 			break;
3697 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3698 		break;
3699 	}
3700 	case KVM_S390_INTERRUPT: {
3701 		struct kvm_s390_interrupt s390int;
3702 		struct kvm_s390_irq s390irq;
3703 
3704 		r = -EFAULT;
3705 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3706 			break;
3707 		if (s390int_to_s390irq(&s390int, &s390irq))
3708 			return -EINVAL;
3709 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3710 		break;
3711 	}
3712 	case KVM_S390_STORE_STATUS:
3713 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3714 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3715 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3716 		break;
3717 	case KVM_S390_SET_INITIAL_PSW: {
3718 		psw_t psw;
3719 
3720 		r = -EFAULT;
3721 		if (copy_from_user(&psw, argp, sizeof(psw)))
3722 			break;
3723 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3724 		break;
3725 	}
3726 	case KVM_S390_INITIAL_RESET:
3727 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3728 		break;
3729 	case KVM_SET_ONE_REG:
3730 	case KVM_GET_ONE_REG: {
3731 		struct kvm_one_reg reg;
3732 		r = -EFAULT;
3733 		if (copy_from_user(&reg, argp, sizeof(reg)))
3734 			break;
3735 		if (ioctl == KVM_SET_ONE_REG)
3736 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3737 		else
3738 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3739 		break;
3740 	}
3741 #ifdef CONFIG_KVM_S390_UCONTROL
3742 	case KVM_S390_UCAS_MAP: {
3743 		struct kvm_s390_ucas_mapping ucasmap;
3744 
3745 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3746 			r = -EFAULT;
3747 			break;
3748 		}
3749 
3750 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3751 			r = -EINVAL;
3752 			break;
3753 		}
3754 
3755 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3756 				     ucasmap.vcpu_addr, ucasmap.length);
3757 		break;
3758 	}
3759 	case KVM_S390_UCAS_UNMAP: {
3760 		struct kvm_s390_ucas_mapping ucasmap;
3761 
3762 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3763 			r = -EFAULT;
3764 			break;
3765 		}
3766 
3767 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3768 			r = -EINVAL;
3769 			break;
3770 		}
3771 
3772 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3773 			ucasmap.length);
3774 		break;
3775 	}
3776 #endif
3777 	case KVM_S390_VCPU_FAULT: {
3778 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3779 		break;
3780 	}
3781 	case KVM_ENABLE_CAP:
3782 	{
3783 		struct kvm_enable_cap cap;
3784 		r = -EFAULT;
3785 		if (copy_from_user(&cap, argp, sizeof(cap)))
3786 			break;
3787 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3788 		break;
3789 	}
3790 	case KVM_S390_MEM_OP: {
3791 		struct kvm_s390_mem_op mem_op;
3792 
3793 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3794 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3795 		else
3796 			r = -EFAULT;
3797 		break;
3798 	}
3799 	case KVM_S390_SET_IRQ_STATE: {
3800 		struct kvm_s390_irq_state irq_state;
3801 
3802 		r = -EFAULT;
3803 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3804 			break;
3805 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3806 		    irq_state.len == 0 ||
3807 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3808 			r = -EINVAL;
3809 			break;
3810 		}
3811 		r = kvm_s390_set_irq_state(vcpu,
3812 					   (void __user *) irq_state.buf,
3813 					   irq_state.len);
3814 		break;
3815 	}
3816 	case KVM_S390_GET_IRQ_STATE: {
3817 		struct kvm_s390_irq_state irq_state;
3818 
3819 		r = -EFAULT;
3820 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3821 			break;
3822 		if (irq_state.len == 0) {
3823 			r = -EINVAL;
3824 			break;
3825 		}
3826 		r = kvm_s390_get_irq_state(vcpu,
3827 					   (__u8 __user *)  irq_state.buf,
3828 					   irq_state.len);
3829 		break;
3830 	}
3831 	default:
3832 		r = -ENOTTY;
3833 	}
3834 	return r;
3835 }
3836 
3837 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3838 {
3839 #ifdef CONFIG_KVM_S390_UCONTROL
3840 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3841 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3842 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3843 		get_page(vmf->page);
3844 		return 0;
3845 	}
3846 #endif
3847 	return VM_FAULT_SIGBUS;
3848 }
3849 
3850 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3851 			    unsigned long npages)
3852 {
3853 	return 0;
3854 }
3855 
3856 /* Section: memory related */
3857 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3858 				   struct kvm_memory_slot *memslot,
3859 				   const struct kvm_userspace_memory_region *mem,
3860 				   enum kvm_mr_change change)
3861 {
3862 	/* A few sanity checks. We can have memory slots which have to be
3863 	   located/ended at a segment boundary (1MB). The memory in userland is
3864 	   ok to be fragmented into various different vmas. It is okay to mmap()
3865 	   and munmap() stuff in this slot after doing this call at any time */
3866 
3867 	if (mem->userspace_addr & 0xffffful)
3868 		return -EINVAL;
3869 
3870 	if (mem->memory_size & 0xffffful)
3871 		return -EINVAL;
3872 
3873 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3874 		return -EINVAL;
3875 
3876 	return 0;
3877 }
3878 
3879 void kvm_arch_commit_memory_region(struct kvm *kvm,
3880 				const struct kvm_userspace_memory_region *mem,
3881 				const struct kvm_memory_slot *old,
3882 				const struct kvm_memory_slot *new,
3883 				enum kvm_mr_change change)
3884 {
3885 	int rc;
3886 
3887 	/* If the basics of the memslot do not change, we do not want
3888 	 * to update the gmap. Every update causes several unnecessary
3889 	 * segment translation exceptions. This is usually handled just
3890 	 * fine by the normal fault handler + gmap, but it will also
3891 	 * cause faults on the prefix page of running guest CPUs.
3892 	 */
3893 	if (old->userspace_addr == mem->userspace_addr &&
3894 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3895 	    old->npages * PAGE_SIZE == mem->memory_size)
3896 		return;
3897 
3898 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3899 		mem->guest_phys_addr, mem->memory_size);
3900 	if (rc)
3901 		pr_warn("failed to commit memory region\n");
3902 	return;
3903 }
3904 
3905 static inline unsigned long nonhyp_mask(int i)
3906 {
3907 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3908 
3909 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3910 }
3911 
3912 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3913 {
3914 	vcpu->valid_wakeup = false;
3915 }
3916 
3917 static int __init kvm_s390_init(void)
3918 {
3919 	int i;
3920 
3921 	if (!sclp.has_sief2) {
3922 		pr_info("SIE not available\n");
3923 		return -ENODEV;
3924 	}
3925 
3926 	for (i = 0; i < 16; i++)
3927 		kvm_s390_fac_list_mask[i] |=
3928 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3929 
3930 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3931 }
3932 
3933 static void __exit kvm_s390_exit(void)
3934 {
3935 	kvm_exit();
3936 }
3937 
3938 module_init(kvm_s390_init);
3939 module_exit(kvm_s390_exit);
3940 
3941 /*
3942  * Enable autoloading of the kvm module.
3943  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3944  * since x86 takes a different approach.
3945  */
3946 #include <linux/miscdevice.h>
3947 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3948 MODULE_ALIAS("devname:kvm");
3949