xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 4a3fad70)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
63 	{ "exit_null", VCPU_STAT(exit_null) },
64 	{ "exit_validity", VCPU_STAT(exit_validity) },
65 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
67 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
69 	{ "exit_pei", VCPU_STAT(exit_pei) },
70 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
93 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
95 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
100 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
105 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
122 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
123 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
125 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
126 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
127 	{ NULL }
128 };
129 
130 struct kvm_s390_tod_clock_ext {
131 	__u8 epoch_idx;
132 	__u64 tod;
133 	__u8 reserved[7];
134 } __packed;
135 
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140 
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143 
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149 
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154 
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158 
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162 	/* every s390 is virtualization enabled ;-) */
163 	return 0;
164 }
165 
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167 			      unsigned long end);
168 
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176 			  void *v)
177 {
178 	struct kvm *kvm;
179 	struct kvm_vcpu *vcpu;
180 	int i;
181 	unsigned long long *delta = v;
182 
183 	list_for_each_entry(kvm, &vm_list, vm_list) {
184 		kvm->arch.epoch -= *delta;
185 		kvm_for_each_vcpu(i, vcpu, kvm) {
186 			vcpu->arch.sie_block->epoch -= *delta;
187 			if (vcpu->arch.cputm_enabled)
188 				vcpu->arch.cputm_start += *delta;
189 			if (vcpu->arch.vsie_block)
190 				vcpu->arch.vsie_block->epoch -= *delta;
191 		}
192 	}
193 	return NOTIFY_OK;
194 }
195 
196 static struct notifier_block kvm_clock_notifier = {
197 	.notifier_call = kvm_clock_sync,
198 };
199 
200 int kvm_arch_hardware_setup(void)
201 {
202 	gmap_notifier.notifier_call = kvm_gmap_notifier;
203 	gmap_register_pte_notifier(&gmap_notifier);
204 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205 	gmap_register_pte_notifier(&vsie_gmap_notifier);
206 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207 				       &kvm_clock_notifier);
208 	return 0;
209 }
210 
211 void kvm_arch_hardware_unsetup(void)
212 {
213 	gmap_unregister_pte_notifier(&gmap_notifier);
214 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216 					 &kvm_clock_notifier);
217 }
218 
219 static void allow_cpu_feat(unsigned long nr)
220 {
221 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223 
224 static inline int plo_test_bit(unsigned char nr)
225 {
226 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227 	int cc;
228 
229 	asm volatile(
230 		/* Parameter registers are ignored for "test bit" */
231 		"	plo	0,0,0,0(0)\n"
232 		"	ipm	%0\n"
233 		"	srl	%0,28\n"
234 		: "=d" (cc)
235 		: "d" (r0)
236 		: "cc");
237 	return cc == 0;
238 }
239 
240 static void kvm_s390_cpu_feat_init(void)
241 {
242 	int i;
243 
244 	for (i = 0; i < 256; ++i) {
245 		if (plo_test_bit(i))
246 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247 	}
248 
249 	if (test_facility(28)) /* TOD-clock steering */
250 		ptff(kvm_s390_available_subfunc.ptff,
251 		     sizeof(kvm_s390_available_subfunc.ptff),
252 		     PTFF_QAF);
253 
254 	if (test_facility(17)) { /* MSA */
255 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256 			      kvm_s390_available_subfunc.kmac);
257 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258 			      kvm_s390_available_subfunc.kmc);
259 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
260 			      kvm_s390_available_subfunc.km);
261 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262 			      kvm_s390_available_subfunc.kimd);
263 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264 			      kvm_s390_available_subfunc.klmd);
265 	}
266 	if (test_facility(76)) /* MSA3 */
267 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268 			      kvm_s390_available_subfunc.pckmo);
269 	if (test_facility(77)) { /* MSA4 */
270 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.kmctr);
272 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273 			      kvm_s390_available_subfunc.kmf);
274 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275 			      kvm_s390_available_subfunc.kmo);
276 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277 			      kvm_s390_available_subfunc.pcc);
278 	}
279 	if (test_facility(57)) /* MSA5 */
280 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281 			      kvm_s390_available_subfunc.ppno);
282 
283 	if (test_facility(146)) /* MSA8 */
284 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285 			      kvm_s390_available_subfunc.kma);
286 
287 	if (MACHINE_HAS_ESOP)
288 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289 	/*
290 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292 	 */
293 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294 	    !test_facility(3) || !nested)
295 		return;
296 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297 	if (sclp.has_64bscao)
298 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299 	if (sclp.has_siif)
300 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301 	if (sclp.has_gpere)
302 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303 	if (sclp.has_gsls)
304 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305 	if (sclp.has_ib)
306 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307 	if (sclp.has_cei)
308 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309 	if (sclp.has_ibs)
310 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311 	if (sclp.has_kss)
312 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313 	/*
314 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315 	 * all skey handling functions read/set the skey from the PGSTE
316 	 * instead of the real storage key.
317 	 *
318 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319 	 * pages being detected as preserved although they are resident.
320 	 *
321 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323 	 *
324 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327 	 *
328 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329 	 * cannot easily shadow the SCA because of the ipte lock.
330 	 */
331 }
332 
333 int kvm_arch_init(void *opaque)
334 {
335 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336 	if (!kvm_s390_dbf)
337 		return -ENOMEM;
338 
339 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340 		debug_unregister(kvm_s390_dbf);
341 		return -ENOMEM;
342 	}
343 
344 	kvm_s390_cpu_feat_init();
345 
346 	/* Register floating interrupt controller interface. */
347 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349 
350 void kvm_arch_exit(void)
351 {
352 	debug_unregister(kvm_s390_dbf);
353 }
354 
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357 			unsigned int ioctl, unsigned long arg)
358 {
359 	if (ioctl == KVM_S390_ENABLE_SIE)
360 		return s390_enable_sie();
361 	return -EINVAL;
362 }
363 
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366 	int r;
367 
368 	switch (ext) {
369 	case KVM_CAP_S390_PSW:
370 	case KVM_CAP_S390_GMAP:
371 	case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373 	case KVM_CAP_S390_UCONTROL:
374 #endif
375 	case KVM_CAP_ASYNC_PF:
376 	case KVM_CAP_SYNC_REGS:
377 	case KVM_CAP_ONE_REG:
378 	case KVM_CAP_ENABLE_CAP:
379 	case KVM_CAP_S390_CSS_SUPPORT:
380 	case KVM_CAP_IOEVENTFD:
381 	case KVM_CAP_DEVICE_CTRL:
382 	case KVM_CAP_ENABLE_CAP_VM:
383 	case KVM_CAP_S390_IRQCHIP:
384 	case KVM_CAP_VM_ATTRIBUTES:
385 	case KVM_CAP_MP_STATE:
386 	case KVM_CAP_IMMEDIATE_EXIT:
387 	case KVM_CAP_S390_INJECT_IRQ:
388 	case KVM_CAP_S390_USER_SIGP:
389 	case KVM_CAP_S390_USER_STSI:
390 	case KVM_CAP_S390_SKEYS:
391 	case KVM_CAP_S390_IRQ_STATE:
392 	case KVM_CAP_S390_USER_INSTR0:
393 	case KVM_CAP_S390_CMMA_MIGRATION:
394 	case KVM_CAP_S390_AIS:
395 	case KVM_CAP_S390_AIS_MIGRATION:
396 		r = 1;
397 		break;
398 	case KVM_CAP_S390_MEM_OP:
399 		r = MEM_OP_MAX_SIZE;
400 		break;
401 	case KVM_CAP_NR_VCPUS:
402 	case KVM_CAP_MAX_VCPUS:
403 		r = KVM_S390_BSCA_CPU_SLOTS;
404 		if (!kvm_s390_use_sca_entries())
405 			r = KVM_MAX_VCPUS;
406 		else if (sclp.has_esca && sclp.has_64bscao)
407 			r = KVM_S390_ESCA_CPU_SLOTS;
408 		break;
409 	case KVM_CAP_NR_MEMSLOTS:
410 		r = KVM_USER_MEM_SLOTS;
411 		break;
412 	case KVM_CAP_S390_COW:
413 		r = MACHINE_HAS_ESOP;
414 		break;
415 	case KVM_CAP_S390_VECTOR_REGISTERS:
416 		r = MACHINE_HAS_VX;
417 		break;
418 	case KVM_CAP_S390_RI:
419 		r = test_facility(64);
420 		break;
421 	case KVM_CAP_S390_GS:
422 		r = test_facility(133);
423 		break;
424 	default:
425 		r = 0;
426 	}
427 	return r;
428 }
429 
430 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
431 					struct kvm_memory_slot *memslot)
432 {
433 	gfn_t cur_gfn, last_gfn;
434 	unsigned long address;
435 	struct gmap *gmap = kvm->arch.gmap;
436 
437 	/* Loop over all guest pages */
438 	last_gfn = memslot->base_gfn + memslot->npages;
439 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
440 		address = gfn_to_hva_memslot(memslot, cur_gfn);
441 
442 		if (test_and_clear_guest_dirty(gmap->mm, address))
443 			mark_page_dirty(kvm, cur_gfn);
444 		if (fatal_signal_pending(current))
445 			return;
446 		cond_resched();
447 	}
448 }
449 
450 /* Section: vm related */
451 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
452 
453 /*
454  * Get (and clear) the dirty memory log for a memory slot.
455  */
456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
457 			       struct kvm_dirty_log *log)
458 {
459 	int r;
460 	unsigned long n;
461 	struct kvm_memslots *slots;
462 	struct kvm_memory_slot *memslot;
463 	int is_dirty = 0;
464 
465 	if (kvm_is_ucontrol(kvm))
466 		return -EINVAL;
467 
468 	mutex_lock(&kvm->slots_lock);
469 
470 	r = -EINVAL;
471 	if (log->slot >= KVM_USER_MEM_SLOTS)
472 		goto out;
473 
474 	slots = kvm_memslots(kvm);
475 	memslot = id_to_memslot(slots, log->slot);
476 	r = -ENOENT;
477 	if (!memslot->dirty_bitmap)
478 		goto out;
479 
480 	kvm_s390_sync_dirty_log(kvm, memslot);
481 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
482 	if (r)
483 		goto out;
484 
485 	/* Clear the dirty log */
486 	if (is_dirty) {
487 		n = kvm_dirty_bitmap_bytes(memslot);
488 		memset(memslot->dirty_bitmap, 0, n);
489 	}
490 	r = 0;
491 out:
492 	mutex_unlock(&kvm->slots_lock);
493 	return r;
494 }
495 
496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
497 {
498 	unsigned int i;
499 	struct kvm_vcpu *vcpu;
500 
501 	kvm_for_each_vcpu(i, vcpu, kvm) {
502 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
503 	}
504 }
505 
506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
507 {
508 	int r;
509 
510 	if (cap->flags)
511 		return -EINVAL;
512 
513 	switch (cap->cap) {
514 	case KVM_CAP_S390_IRQCHIP:
515 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
516 		kvm->arch.use_irqchip = 1;
517 		r = 0;
518 		break;
519 	case KVM_CAP_S390_USER_SIGP:
520 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
521 		kvm->arch.user_sigp = 1;
522 		r = 0;
523 		break;
524 	case KVM_CAP_S390_VECTOR_REGISTERS:
525 		mutex_lock(&kvm->lock);
526 		if (kvm->created_vcpus) {
527 			r = -EBUSY;
528 		} else if (MACHINE_HAS_VX) {
529 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
530 			set_kvm_facility(kvm->arch.model.fac_list, 129);
531 			if (test_facility(134)) {
532 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
533 				set_kvm_facility(kvm->arch.model.fac_list, 134);
534 			}
535 			if (test_facility(135)) {
536 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
537 				set_kvm_facility(kvm->arch.model.fac_list, 135);
538 			}
539 			r = 0;
540 		} else
541 			r = -EINVAL;
542 		mutex_unlock(&kvm->lock);
543 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
544 			 r ? "(not available)" : "(success)");
545 		break;
546 	case KVM_CAP_S390_RI:
547 		r = -EINVAL;
548 		mutex_lock(&kvm->lock);
549 		if (kvm->created_vcpus) {
550 			r = -EBUSY;
551 		} else if (test_facility(64)) {
552 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
553 			set_kvm_facility(kvm->arch.model.fac_list, 64);
554 			r = 0;
555 		}
556 		mutex_unlock(&kvm->lock);
557 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
558 			 r ? "(not available)" : "(success)");
559 		break;
560 	case KVM_CAP_S390_AIS:
561 		mutex_lock(&kvm->lock);
562 		if (kvm->created_vcpus) {
563 			r = -EBUSY;
564 		} else {
565 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
566 			set_kvm_facility(kvm->arch.model.fac_list, 72);
567 			r = 0;
568 		}
569 		mutex_unlock(&kvm->lock);
570 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
571 			 r ? "(not available)" : "(success)");
572 		break;
573 	case KVM_CAP_S390_GS:
574 		r = -EINVAL;
575 		mutex_lock(&kvm->lock);
576 		if (atomic_read(&kvm->online_vcpus)) {
577 			r = -EBUSY;
578 		} else if (test_facility(133)) {
579 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
580 			set_kvm_facility(kvm->arch.model.fac_list, 133);
581 			r = 0;
582 		}
583 		mutex_unlock(&kvm->lock);
584 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
585 			 r ? "(not available)" : "(success)");
586 		break;
587 	case KVM_CAP_S390_USER_STSI:
588 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
589 		kvm->arch.user_stsi = 1;
590 		r = 0;
591 		break;
592 	case KVM_CAP_S390_USER_INSTR0:
593 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
594 		kvm->arch.user_instr0 = 1;
595 		icpt_operexc_on_all_vcpus(kvm);
596 		r = 0;
597 		break;
598 	default:
599 		r = -EINVAL;
600 		break;
601 	}
602 	return r;
603 }
604 
605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
606 {
607 	int ret;
608 
609 	switch (attr->attr) {
610 	case KVM_S390_VM_MEM_LIMIT_SIZE:
611 		ret = 0;
612 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
613 			 kvm->arch.mem_limit);
614 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
615 			ret = -EFAULT;
616 		break;
617 	default:
618 		ret = -ENXIO;
619 		break;
620 	}
621 	return ret;
622 }
623 
624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
625 {
626 	int ret;
627 	unsigned int idx;
628 	switch (attr->attr) {
629 	case KVM_S390_VM_MEM_ENABLE_CMMA:
630 		ret = -ENXIO;
631 		if (!sclp.has_cmma)
632 			break;
633 
634 		ret = -EBUSY;
635 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
636 		mutex_lock(&kvm->lock);
637 		if (!kvm->created_vcpus) {
638 			kvm->arch.use_cmma = 1;
639 			ret = 0;
640 		}
641 		mutex_unlock(&kvm->lock);
642 		break;
643 	case KVM_S390_VM_MEM_CLR_CMMA:
644 		ret = -ENXIO;
645 		if (!sclp.has_cmma)
646 			break;
647 		ret = -EINVAL;
648 		if (!kvm->arch.use_cmma)
649 			break;
650 
651 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
652 		mutex_lock(&kvm->lock);
653 		idx = srcu_read_lock(&kvm->srcu);
654 		s390_reset_cmma(kvm->arch.gmap->mm);
655 		srcu_read_unlock(&kvm->srcu, idx);
656 		mutex_unlock(&kvm->lock);
657 		ret = 0;
658 		break;
659 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
660 		unsigned long new_limit;
661 
662 		if (kvm_is_ucontrol(kvm))
663 			return -EINVAL;
664 
665 		if (get_user(new_limit, (u64 __user *)attr->addr))
666 			return -EFAULT;
667 
668 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
669 		    new_limit > kvm->arch.mem_limit)
670 			return -E2BIG;
671 
672 		if (!new_limit)
673 			return -EINVAL;
674 
675 		/* gmap_create takes last usable address */
676 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
677 			new_limit -= 1;
678 
679 		ret = -EBUSY;
680 		mutex_lock(&kvm->lock);
681 		if (!kvm->created_vcpus) {
682 			/* gmap_create will round the limit up */
683 			struct gmap *new = gmap_create(current->mm, new_limit);
684 
685 			if (!new) {
686 				ret = -ENOMEM;
687 			} else {
688 				gmap_remove(kvm->arch.gmap);
689 				new->private = kvm;
690 				kvm->arch.gmap = new;
691 				ret = 0;
692 			}
693 		}
694 		mutex_unlock(&kvm->lock);
695 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
696 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
697 			 (void *) kvm->arch.gmap->asce);
698 		break;
699 	}
700 	default:
701 		ret = -ENXIO;
702 		break;
703 	}
704 	return ret;
705 }
706 
707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
708 
709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
710 {
711 	struct kvm_vcpu *vcpu;
712 	int i;
713 
714 	if (!test_kvm_facility(kvm, 76))
715 		return -EINVAL;
716 
717 	mutex_lock(&kvm->lock);
718 	switch (attr->attr) {
719 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
720 		get_random_bytes(
721 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
722 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
723 		kvm->arch.crypto.aes_kw = 1;
724 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
725 		break;
726 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
727 		get_random_bytes(
728 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
729 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
730 		kvm->arch.crypto.dea_kw = 1;
731 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
732 		break;
733 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
734 		kvm->arch.crypto.aes_kw = 0;
735 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
736 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
737 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
738 		break;
739 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
740 		kvm->arch.crypto.dea_kw = 0;
741 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
742 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
744 		break;
745 	default:
746 		mutex_unlock(&kvm->lock);
747 		return -ENXIO;
748 	}
749 
750 	kvm_for_each_vcpu(i, vcpu, kvm) {
751 		kvm_s390_vcpu_crypto_setup(vcpu);
752 		exit_sie(vcpu);
753 	}
754 	mutex_unlock(&kvm->lock);
755 	return 0;
756 }
757 
758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
759 {
760 	int cx;
761 	struct kvm_vcpu *vcpu;
762 
763 	kvm_for_each_vcpu(cx, vcpu, kvm)
764 		kvm_s390_sync_request(req, vcpu);
765 }
766 
767 /*
768  * Must be called with kvm->srcu held to avoid races on memslots, and with
769  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
770  */
771 static int kvm_s390_vm_start_migration(struct kvm *kvm)
772 {
773 	struct kvm_s390_migration_state *mgs;
774 	struct kvm_memory_slot *ms;
775 	/* should be the only one */
776 	struct kvm_memslots *slots;
777 	unsigned long ram_pages;
778 	int slotnr;
779 
780 	/* migration mode already enabled */
781 	if (kvm->arch.migration_state)
782 		return 0;
783 
784 	slots = kvm_memslots(kvm);
785 	if (!slots || !slots->used_slots)
786 		return -EINVAL;
787 
788 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
789 	if (!mgs)
790 		return -ENOMEM;
791 	kvm->arch.migration_state = mgs;
792 
793 	if (kvm->arch.use_cmma) {
794 		/*
795 		 * Get the last slot. They should be sorted by base_gfn, so the
796 		 * last slot is also the one at the end of the address space.
797 		 * We have verified above that at least one slot is present.
798 		 */
799 		ms = slots->memslots + slots->used_slots - 1;
800 		/* round up so we only use full longs */
801 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
802 		/* allocate enough bytes to store all the bits */
803 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
804 		if (!mgs->pgste_bitmap) {
805 			kfree(mgs);
806 			kvm->arch.migration_state = NULL;
807 			return -ENOMEM;
808 		}
809 
810 		mgs->bitmap_size = ram_pages;
811 		atomic64_set(&mgs->dirty_pages, ram_pages);
812 		/* mark all the pages in active slots as dirty */
813 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
814 			ms = slots->memslots + slotnr;
815 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
816 		}
817 
818 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
819 	}
820 	return 0;
821 }
822 
823 /*
824  * Must be called with kvm->lock to avoid races with ourselves and
825  * kvm_s390_vm_start_migration.
826  */
827 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
828 {
829 	struct kvm_s390_migration_state *mgs;
830 
831 	/* migration mode already disabled */
832 	if (!kvm->arch.migration_state)
833 		return 0;
834 	mgs = kvm->arch.migration_state;
835 	kvm->arch.migration_state = NULL;
836 
837 	if (kvm->arch.use_cmma) {
838 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
839 		vfree(mgs->pgste_bitmap);
840 	}
841 	kfree(mgs);
842 	return 0;
843 }
844 
845 static int kvm_s390_vm_set_migration(struct kvm *kvm,
846 				     struct kvm_device_attr *attr)
847 {
848 	int idx, res = -ENXIO;
849 
850 	mutex_lock(&kvm->lock);
851 	switch (attr->attr) {
852 	case KVM_S390_VM_MIGRATION_START:
853 		idx = srcu_read_lock(&kvm->srcu);
854 		res = kvm_s390_vm_start_migration(kvm);
855 		srcu_read_unlock(&kvm->srcu, idx);
856 		break;
857 	case KVM_S390_VM_MIGRATION_STOP:
858 		res = kvm_s390_vm_stop_migration(kvm);
859 		break;
860 	default:
861 		break;
862 	}
863 	mutex_unlock(&kvm->lock);
864 
865 	return res;
866 }
867 
868 static int kvm_s390_vm_get_migration(struct kvm *kvm,
869 				     struct kvm_device_attr *attr)
870 {
871 	u64 mig = (kvm->arch.migration_state != NULL);
872 
873 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
874 		return -ENXIO;
875 
876 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
877 		return -EFAULT;
878 	return 0;
879 }
880 
881 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
882 {
883 	struct kvm_s390_vm_tod_clock gtod;
884 
885 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
886 		return -EFAULT;
887 
888 	if (test_kvm_facility(kvm, 139))
889 		kvm_s390_set_tod_clock_ext(kvm, &gtod);
890 	else if (gtod.epoch_idx == 0)
891 		kvm_s390_set_tod_clock(kvm, gtod.tod);
892 	else
893 		return -EINVAL;
894 
895 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
896 		gtod.epoch_idx, gtod.tod);
897 
898 	return 0;
899 }
900 
901 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
902 {
903 	u8 gtod_high;
904 
905 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
906 					   sizeof(gtod_high)))
907 		return -EFAULT;
908 
909 	if (gtod_high != 0)
910 		return -EINVAL;
911 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
912 
913 	return 0;
914 }
915 
916 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
917 {
918 	u64 gtod;
919 
920 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
921 		return -EFAULT;
922 
923 	kvm_s390_set_tod_clock(kvm, gtod);
924 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
925 	return 0;
926 }
927 
928 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
929 {
930 	int ret;
931 
932 	if (attr->flags)
933 		return -EINVAL;
934 
935 	switch (attr->attr) {
936 	case KVM_S390_VM_TOD_EXT:
937 		ret = kvm_s390_set_tod_ext(kvm, attr);
938 		break;
939 	case KVM_S390_VM_TOD_HIGH:
940 		ret = kvm_s390_set_tod_high(kvm, attr);
941 		break;
942 	case KVM_S390_VM_TOD_LOW:
943 		ret = kvm_s390_set_tod_low(kvm, attr);
944 		break;
945 	default:
946 		ret = -ENXIO;
947 		break;
948 	}
949 	return ret;
950 }
951 
952 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
953 					struct kvm_s390_vm_tod_clock *gtod)
954 {
955 	struct kvm_s390_tod_clock_ext htod;
956 
957 	preempt_disable();
958 
959 	get_tod_clock_ext((char *)&htod);
960 
961 	gtod->tod = htod.tod + kvm->arch.epoch;
962 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
963 
964 	if (gtod->tod < htod.tod)
965 		gtod->epoch_idx += 1;
966 
967 	preempt_enable();
968 }
969 
970 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 	struct kvm_s390_vm_tod_clock gtod;
973 
974 	memset(&gtod, 0, sizeof(gtod));
975 
976 	if (test_kvm_facility(kvm, 139))
977 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
978 	else
979 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
980 
981 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
982 		return -EFAULT;
983 
984 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
985 		gtod.epoch_idx, gtod.tod);
986 	return 0;
987 }
988 
989 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
990 {
991 	u8 gtod_high = 0;
992 
993 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
994 					 sizeof(gtod_high)))
995 		return -EFAULT;
996 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
997 
998 	return 0;
999 }
1000 
1001 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1002 {
1003 	u64 gtod;
1004 
1005 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1006 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1007 		return -EFAULT;
1008 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1009 
1010 	return 0;
1011 }
1012 
1013 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1014 {
1015 	int ret;
1016 
1017 	if (attr->flags)
1018 		return -EINVAL;
1019 
1020 	switch (attr->attr) {
1021 	case KVM_S390_VM_TOD_EXT:
1022 		ret = kvm_s390_get_tod_ext(kvm, attr);
1023 		break;
1024 	case KVM_S390_VM_TOD_HIGH:
1025 		ret = kvm_s390_get_tod_high(kvm, attr);
1026 		break;
1027 	case KVM_S390_VM_TOD_LOW:
1028 		ret = kvm_s390_get_tod_low(kvm, attr);
1029 		break;
1030 	default:
1031 		ret = -ENXIO;
1032 		break;
1033 	}
1034 	return ret;
1035 }
1036 
1037 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1038 {
1039 	struct kvm_s390_vm_cpu_processor *proc;
1040 	u16 lowest_ibc, unblocked_ibc;
1041 	int ret = 0;
1042 
1043 	mutex_lock(&kvm->lock);
1044 	if (kvm->created_vcpus) {
1045 		ret = -EBUSY;
1046 		goto out;
1047 	}
1048 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1049 	if (!proc) {
1050 		ret = -ENOMEM;
1051 		goto out;
1052 	}
1053 	if (!copy_from_user(proc, (void __user *)attr->addr,
1054 			    sizeof(*proc))) {
1055 		kvm->arch.model.cpuid = proc->cpuid;
1056 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1057 		unblocked_ibc = sclp.ibc & 0xfff;
1058 		if (lowest_ibc && proc->ibc) {
1059 			if (proc->ibc > unblocked_ibc)
1060 				kvm->arch.model.ibc = unblocked_ibc;
1061 			else if (proc->ibc < lowest_ibc)
1062 				kvm->arch.model.ibc = lowest_ibc;
1063 			else
1064 				kvm->arch.model.ibc = proc->ibc;
1065 		}
1066 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1067 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1068 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1069 			 kvm->arch.model.ibc,
1070 			 kvm->arch.model.cpuid);
1071 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1072 			 kvm->arch.model.fac_list[0],
1073 			 kvm->arch.model.fac_list[1],
1074 			 kvm->arch.model.fac_list[2]);
1075 	} else
1076 		ret = -EFAULT;
1077 	kfree(proc);
1078 out:
1079 	mutex_unlock(&kvm->lock);
1080 	return ret;
1081 }
1082 
1083 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1084 				       struct kvm_device_attr *attr)
1085 {
1086 	struct kvm_s390_vm_cpu_feat data;
1087 	int ret = -EBUSY;
1088 
1089 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1090 		return -EFAULT;
1091 	if (!bitmap_subset((unsigned long *) data.feat,
1092 			   kvm_s390_available_cpu_feat,
1093 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1094 		return -EINVAL;
1095 
1096 	mutex_lock(&kvm->lock);
1097 	if (!atomic_read(&kvm->online_vcpus)) {
1098 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1099 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1100 		ret = 0;
1101 	}
1102 	mutex_unlock(&kvm->lock);
1103 	return ret;
1104 }
1105 
1106 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1107 					  struct kvm_device_attr *attr)
1108 {
1109 	/*
1110 	 * Once supported by kernel + hw, we have to store the subfunctions
1111 	 * in kvm->arch and remember that user space configured them.
1112 	 */
1113 	return -ENXIO;
1114 }
1115 
1116 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118 	int ret = -ENXIO;
1119 
1120 	switch (attr->attr) {
1121 	case KVM_S390_VM_CPU_PROCESSOR:
1122 		ret = kvm_s390_set_processor(kvm, attr);
1123 		break;
1124 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1125 		ret = kvm_s390_set_processor_feat(kvm, attr);
1126 		break;
1127 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1128 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1129 		break;
1130 	}
1131 	return ret;
1132 }
1133 
1134 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1135 {
1136 	struct kvm_s390_vm_cpu_processor *proc;
1137 	int ret = 0;
1138 
1139 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1140 	if (!proc) {
1141 		ret = -ENOMEM;
1142 		goto out;
1143 	}
1144 	proc->cpuid = kvm->arch.model.cpuid;
1145 	proc->ibc = kvm->arch.model.ibc;
1146 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1147 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1148 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1149 		 kvm->arch.model.ibc,
1150 		 kvm->arch.model.cpuid);
1151 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1152 		 kvm->arch.model.fac_list[0],
1153 		 kvm->arch.model.fac_list[1],
1154 		 kvm->arch.model.fac_list[2]);
1155 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1156 		ret = -EFAULT;
1157 	kfree(proc);
1158 out:
1159 	return ret;
1160 }
1161 
1162 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1163 {
1164 	struct kvm_s390_vm_cpu_machine *mach;
1165 	int ret = 0;
1166 
1167 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1168 	if (!mach) {
1169 		ret = -ENOMEM;
1170 		goto out;
1171 	}
1172 	get_cpu_id((struct cpuid *) &mach->cpuid);
1173 	mach->ibc = sclp.ibc;
1174 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1175 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1176 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1177 	       sizeof(S390_lowcore.stfle_fac_list));
1178 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1179 		 kvm->arch.model.ibc,
1180 		 kvm->arch.model.cpuid);
1181 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1182 		 mach->fac_mask[0],
1183 		 mach->fac_mask[1],
1184 		 mach->fac_mask[2]);
1185 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1186 		 mach->fac_list[0],
1187 		 mach->fac_list[1],
1188 		 mach->fac_list[2]);
1189 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1190 		ret = -EFAULT;
1191 	kfree(mach);
1192 out:
1193 	return ret;
1194 }
1195 
1196 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1197 				       struct kvm_device_attr *attr)
1198 {
1199 	struct kvm_s390_vm_cpu_feat data;
1200 
1201 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1202 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1203 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1204 		return -EFAULT;
1205 	return 0;
1206 }
1207 
1208 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1209 				     struct kvm_device_attr *attr)
1210 {
1211 	struct kvm_s390_vm_cpu_feat data;
1212 
1213 	bitmap_copy((unsigned long *) data.feat,
1214 		    kvm_s390_available_cpu_feat,
1215 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1216 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1217 		return -EFAULT;
1218 	return 0;
1219 }
1220 
1221 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1222 					  struct kvm_device_attr *attr)
1223 {
1224 	/*
1225 	 * Once we can actually configure subfunctions (kernel + hw support),
1226 	 * we have to check if they were already set by user space, if so copy
1227 	 * them from kvm->arch.
1228 	 */
1229 	return -ENXIO;
1230 }
1231 
1232 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1233 					struct kvm_device_attr *attr)
1234 {
1235 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1236 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1237 		return -EFAULT;
1238 	return 0;
1239 }
1240 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1241 {
1242 	int ret = -ENXIO;
1243 
1244 	switch (attr->attr) {
1245 	case KVM_S390_VM_CPU_PROCESSOR:
1246 		ret = kvm_s390_get_processor(kvm, attr);
1247 		break;
1248 	case KVM_S390_VM_CPU_MACHINE:
1249 		ret = kvm_s390_get_machine(kvm, attr);
1250 		break;
1251 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1252 		ret = kvm_s390_get_processor_feat(kvm, attr);
1253 		break;
1254 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1255 		ret = kvm_s390_get_machine_feat(kvm, attr);
1256 		break;
1257 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1258 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1259 		break;
1260 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1261 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1262 		break;
1263 	}
1264 	return ret;
1265 }
1266 
1267 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1268 {
1269 	int ret;
1270 
1271 	switch (attr->group) {
1272 	case KVM_S390_VM_MEM_CTRL:
1273 		ret = kvm_s390_set_mem_control(kvm, attr);
1274 		break;
1275 	case KVM_S390_VM_TOD:
1276 		ret = kvm_s390_set_tod(kvm, attr);
1277 		break;
1278 	case KVM_S390_VM_CPU_MODEL:
1279 		ret = kvm_s390_set_cpu_model(kvm, attr);
1280 		break;
1281 	case KVM_S390_VM_CRYPTO:
1282 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1283 		break;
1284 	case KVM_S390_VM_MIGRATION:
1285 		ret = kvm_s390_vm_set_migration(kvm, attr);
1286 		break;
1287 	default:
1288 		ret = -ENXIO;
1289 		break;
1290 	}
1291 
1292 	return ret;
1293 }
1294 
1295 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1296 {
1297 	int ret;
1298 
1299 	switch (attr->group) {
1300 	case KVM_S390_VM_MEM_CTRL:
1301 		ret = kvm_s390_get_mem_control(kvm, attr);
1302 		break;
1303 	case KVM_S390_VM_TOD:
1304 		ret = kvm_s390_get_tod(kvm, attr);
1305 		break;
1306 	case KVM_S390_VM_CPU_MODEL:
1307 		ret = kvm_s390_get_cpu_model(kvm, attr);
1308 		break;
1309 	case KVM_S390_VM_MIGRATION:
1310 		ret = kvm_s390_vm_get_migration(kvm, attr);
1311 		break;
1312 	default:
1313 		ret = -ENXIO;
1314 		break;
1315 	}
1316 
1317 	return ret;
1318 }
1319 
1320 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1321 {
1322 	int ret;
1323 
1324 	switch (attr->group) {
1325 	case KVM_S390_VM_MEM_CTRL:
1326 		switch (attr->attr) {
1327 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1328 		case KVM_S390_VM_MEM_CLR_CMMA:
1329 			ret = sclp.has_cmma ? 0 : -ENXIO;
1330 			break;
1331 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1332 			ret = 0;
1333 			break;
1334 		default:
1335 			ret = -ENXIO;
1336 			break;
1337 		}
1338 		break;
1339 	case KVM_S390_VM_TOD:
1340 		switch (attr->attr) {
1341 		case KVM_S390_VM_TOD_LOW:
1342 		case KVM_S390_VM_TOD_HIGH:
1343 			ret = 0;
1344 			break;
1345 		default:
1346 			ret = -ENXIO;
1347 			break;
1348 		}
1349 		break;
1350 	case KVM_S390_VM_CPU_MODEL:
1351 		switch (attr->attr) {
1352 		case KVM_S390_VM_CPU_PROCESSOR:
1353 		case KVM_S390_VM_CPU_MACHINE:
1354 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1355 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1356 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1357 			ret = 0;
1358 			break;
1359 		/* configuring subfunctions is not supported yet */
1360 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1361 		default:
1362 			ret = -ENXIO;
1363 			break;
1364 		}
1365 		break;
1366 	case KVM_S390_VM_CRYPTO:
1367 		switch (attr->attr) {
1368 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1369 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1370 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1371 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1372 			ret = 0;
1373 			break;
1374 		default:
1375 			ret = -ENXIO;
1376 			break;
1377 		}
1378 		break;
1379 	case KVM_S390_VM_MIGRATION:
1380 		ret = 0;
1381 		break;
1382 	default:
1383 		ret = -ENXIO;
1384 		break;
1385 	}
1386 
1387 	return ret;
1388 }
1389 
1390 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1391 {
1392 	uint8_t *keys;
1393 	uint64_t hva;
1394 	int srcu_idx, i, r = 0;
1395 
1396 	if (args->flags != 0)
1397 		return -EINVAL;
1398 
1399 	/* Is this guest using storage keys? */
1400 	if (!mm_use_skey(current->mm))
1401 		return KVM_S390_GET_SKEYS_NONE;
1402 
1403 	/* Enforce sane limit on memory allocation */
1404 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1405 		return -EINVAL;
1406 
1407 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1408 	if (!keys)
1409 		return -ENOMEM;
1410 
1411 	down_read(&current->mm->mmap_sem);
1412 	srcu_idx = srcu_read_lock(&kvm->srcu);
1413 	for (i = 0; i < args->count; i++) {
1414 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1415 		if (kvm_is_error_hva(hva)) {
1416 			r = -EFAULT;
1417 			break;
1418 		}
1419 
1420 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1421 		if (r)
1422 			break;
1423 	}
1424 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1425 	up_read(&current->mm->mmap_sem);
1426 
1427 	if (!r) {
1428 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1429 				 sizeof(uint8_t) * args->count);
1430 		if (r)
1431 			r = -EFAULT;
1432 	}
1433 
1434 	kvfree(keys);
1435 	return r;
1436 }
1437 
1438 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1439 {
1440 	uint8_t *keys;
1441 	uint64_t hva;
1442 	int srcu_idx, i, r = 0;
1443 
1444 	if (args->flags != 0)
1445 		return -EINVAL;
1446 
1447 	/* Enforce sane limit on memory allocation */
1448 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1449 		return -EINVAL;
1450 
1451 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1452 	if (!keys)
1453 		return -ENOMEM;
1454 
1455 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1456 			   sizeof(uint8_t) * args->count);
1457 	if (r) {
1458 		r = -EFAULT;
1459 		goto out;
1460 	}
1461 
1462 	/* Enable storage key handling for the guest */
1463 	r = s390_enable_skey();
1464 	if (r)
1465 		goto out;
1466 
1467 	down_read(&current->mm->mmap_sem);
1468 	srcu_idx = srcu_read_lock(&kvm->srcu);
1469 	for (i = 0; i < args->count; i++) {
1470 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1471 		if (kvm_is_error_hva(hva)) {
1472 			r = -EFAULT;
1473 			break;
1474 		}
1475 
1476 		/* Lowest order bit is reserved */
1477 		if (keys[i] & 0x01) {
1478 			r = -EINVAL;
1479 			break;
1480 		}
1481 
1482 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1483 		if (r)
1484 			break;
1485 	}
1486 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1487 	up_read(&current->mm->mmap_sem);
1488 out:
1489 	kvfree(keys);
1490 	return r;
1491 }
1492 
1493 /*
1494  * Base address and length must be sent at the start of each block, therefore
1495  * it's cheaper to send some clean data, as long as it's less than the size of
1496  * two longs.
1497  */
1498 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1499 /* for consistency */
1500 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1501 
1502 /*
1503  * This function searches for the next page with dirty CMMA attributes, and
1504  * saves the attributes in the buffer up to either the end of the buffer or
1505  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1506  * no trailing clean bytes are saved.
1507  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1508  * output buffer will indicate 0 as length.
1509  */
1510 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1511 				  struct kvm_s390_cmma_log *args)
1512 {
1513 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1514 	unsigned long bufsize, hva, pgstev, i, next, cur;
1515 	int srcu_idx, peek, r = 0, rr;
1516 	u8 *res;
1517 
1518 	cur = args->start_gfn;
1519 	i = next = pgstev = 0;
1520 
1521 	if (unlikely(!kvm->arch.use_cmma))
1522 		return -ENXIO;
1523 	/* Invalid/unsupported flags were specified */
1524 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1525 		return -EINVAL;
1526 	/* Migration mode query, and we are not doing a migration */
1527 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1528 	if (!peek && !s)
1529 		return -EINVAL;
1530 	/* CMMA is disabled or was not used, or the buffer has length zero */
1531 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1532 	if (!bufsize || !kvm->mm->context.use_cmma) {
1533 		memset(args, 0, sizeof(*args));
1534 		return 0;
1535 	}
1536 
1537 	if (!peek) {
1538 		/* We are not peeking, and there are no dirty pages */
1539 		if (!atomic64_read(&s->dirty_pages)) {
1540 			memset(args, 0, sizeof(*args));
1541 			return 0;
1542 		}
1543 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1544 				    args->start_gfn);
1545 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1546 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1547 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1548 			memset(args, 0, sizeof(*args));
1549 			return 0;
1550 		}
1551 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1552 	}
1553 
1554 	res = vmalloc(bufsize);
1555 	if (!res)
1556 		return -ENOMEM;
1557 
1558 	args->start_gfn = cur;
1559 
1560 	down_read(&kvm->mm->mmap_sem);
1561 	srcu_idx = srcu_read_lock(&kvm->srcu);
1562 	while (i < bufsize) {
1563 		hva = gfn_to_hva(kvm, cur);
1564 		if (kvm_is_error_hva(hva)) {
1565 			r = -EFAULT;
1566 			break;
1567 		}
1568 		/* decrement only if we actually flipped the bit to 0 */
1569 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1570 			atomic64_dec(&s->dirty_pages);
1571 		r = get_pgste(kvm->mm, hva, &pgstev);
1572 		if (r < 0)
1573 			pgstev = 0;
1574 		/* save the value */
1575 		res[i++] = (pgstev >> 24) & 0x43;
1576 		/*
1577 		 * if the next bit is too far away, stop.
1578 		 * if we reached the previous "next", find the next one
1579 		 */
1580 		if (!peek) {
1581 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1582 				break;
1583 			if (cur == next)
1584 				next = find_next_bit(s->pgste_bitmap,
1585 						     s->bitmap_size, cur + 1);
1586 		/* reached the end of the bitmap or of the buffer, stop */
1587 			if ((next >= s->bitmap_size) ||
1588 			    (next >= args->start_gfn + bufsize))
1589 				break;
1590 		}
1591 		cur++;
1592 	}
1593 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1594 	up_read(&kvm->mm->mmap_sem);
1595 	args->count = i;
1596 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1597 
1598 	rr = copy_to_user((void __user *)args->values, res, args->count);
1599 	if (rr)
1600 		r = -EFAULT;
1601 
1602 	vfree(res);
1603 	return r;
1604 }
1605 
1606 /*
1607  * This function sets the CMMA attributes for the given pages. If the input
1608  * buffer has zero length, no action is taken, otherwise the attributes are
1609  * set and the mm->context.use_cmma flag is set.
1610  */
1611 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1612 				  const struct kvm_s390_cmma_log *args)
1613 {
1614 	unsigned long hva, mask, pgstev, i;
1615 	uint8_t *bits;
1616 	int srcu_idx, r = 0;
1617 
1618 	mask = args->mask;
1619 
1620 	if (!kvm->arch.use_cmma)
1621 		return -ENXIO;
1622 	/* invalid/unsupported flags */
1623 	if (args->flags != 0)
1624 		return -EINVAL;
1625 	/* Enforce sane limit on memory allocation */
1626 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1627 		return -EINVAL;
1628 	/* Nothing to do */
1629 	if (args->count == 0)
1630 		return 0;
1631 
1632 	bits = vmalloc(sizeof(*bits) * args->count);
1633 	if (!bits)
1634 		return -ENOMEM;
1635 
1636 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1637 	if (r) {
1638 		r = -EFAULT;
1639 		goto out;
1640 	}
1641 
1642 	down_read(&kvm->mm->mmap_sem);
1643 	srcu_idx = srcu_read_lock(&kvm->srcu);
1644 	for (i = 0; i < args->count; i++) {
1645 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1646 		if (kvm_is_error_hva(hva)) {
1647 			r = -EFAULT;
1648 			break;
1649 		}
1650 
1651 		pgstev = bits[i];
1652 		pgstev = pgstev << 24;
1653 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1654 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1655 	}
1656 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1657 	up_read(&kvm->mm->mmap_sem);
1658 
1659 	if (!kvm->mm->context.use_cmma) {
1660 		down_write(&kvm->mm->mmap_sem);
1661 		kvm->mm->context.use_cmma = 1;
1662 		up_write(&kvm->mm->mmap_sem);
1663 	}
1664 out:
1665 	vfree(bits);
1666 	return r;
1667 }
1668 
1669 long kvm_arch_vm_ioctl(struct file *filp,
1670 		       unsigned int ioctl, unsigned long arg)
1671 {
1672 	struct kvm *kvm = filp->private_data;
1673 	void __user *argp = (void __user *)arg;
1674 	struct kvm_device_attr attr;
1675 	int r;
1676 
1677 	switch (ioctl) {
1678 	case KVM_S390_INTERRUPT: {
1679 		struct kvm_s390_interrupt s390int;
1680 
1681 		r = -EFAULT;
1682 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1683 			break;
1684 		r = kvm_s390_inject_vm(kvm, &s390int);
1685 		break;
1686 	}
1687 	case KVM_ENABLE_CAP: {
1688 		struct kvm_enable_cap cap;
1689 		r = -EFAULT;
1690 		if (copy_from_user(&cap, argp, sizeof(cap)))
1691 			break;
1692 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1693 		break;
1694 	}
1695 	case KVM_CREATE_IRQCHIP: {
1696 		struct kvm_irq_routing_entry routing;
1697 
1698 		r = -EINVAL;
1699 		if (kvm->arch.use_irqchip) {
1700 			/* Set up dummy routing. */
1701 			memset(&routing, 0, sizeof(routing));
1702 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1703 		}
1704 		break;
1705 	}
1706 	case KVM_SET_DEVICE_ATTR: {
1707 		r = -EFAULT;
1708 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1709 			break;
1710 		r = kvm_s390_vm_set_attr(kvm, &attr);
1711 		break;
1712 	}
1713 	case KVM_GET_DEVICE_ATTR: {
1714 		r = -EFAULT;
1715 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1716 			break;
1717 		r = kvm_s390_vm_get_attr(kvm, &attr);
1718 		break;
1719 	}
1720 	case KVM_HAS_DEVICE_ATTR: {
1721 		r = -EFAULT;
1722 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1723 			break;
1724 		r = kvm_s390_vm_has_attr(kvm, &attr);
1725 		break;
1726 	}
1727 	case KVM_S390_GET_SKEYS: {
1728 		struct kvm_s390_skeys args;
1729 
1730 		r = -EFAULT;
1731 		if (copy_from_user(&args, argp,
1732 				   sizeof(struct kvm_s390_skeys)))
1733 			break;
1734 		r = kvm_s390_get_skeys(kvm, &args);
1735 		break;
1736 	}
1737 	case KVM_S390_SET_SKEYS: {
1738 		struct kvm_s390_skeys args;
1739 
1740 		r = -EFAULT;
1741 		if (copy_from_user(&args, argp,
1742 				   sizeof(struct kvm_s390_skeys)))
1743 			break;
1744 		r = kvm_s390_set_skeys(kvm, &args);
1745 		break;
1746 	}
1747 	case KVM_S390_GET_CMMA_BITS: {
1748 		struct kvm_s390_cmma_log args;
1749 
1750 		r = -EFAULT;
1751 		if (copy_from_user(&args, argp, sizeof(args)))
1752 			break;
1753 		r = kvm_s390_get_cmma_bits(kvm, &args);
1754 		if (!r) {
1755 			r = copy_to_user(argp, &args, sizeof(args));
1756 			if (r)
1757 				r = -EFAULT;
1758 		}
1759 		break;
1760 	}
1761 	case KVM_S390_SET_CMMA_BITS: {
1762 		struct kvm_s390_cmma_log args;
1763 
1764 		r = -EFAULT;
1765 		if (copy_from_user(&args, argp, sizeof(args)))
1766 			break;
1767 		r = kvm_s390_set_cmma_bits(kvm, &args);
1768 		break;
1769 	}
1770 	default:
1771 		r = -ENOTTY;
1772 	}
1773 
1774 	return r;
1775 }
1776 
1777 static int kvm_s390_query_ap_config(u8 *config)
1778 {
1779 	u32 fcn_code = 0x04000000UL;
1780 	u32 cc = 0;
1781 
1782 	memset(config, 0, 128);
1783 	asm volatile(
1784 		"lgr 0,%1\n"
1785 		"lgr 2,%2\n"
1786 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1787 		"0: ipm %0\n"
1788 		"srl %0,28\n"
1789 		"1:\n"
1790 		EX_TABLE(0b, 1b)
1791 		: "+r" (cc)
1792 		: "r" (fcn_code), "r" (config)
1793 		: "cc", "0", "2", "memory"
1794 	);
1795 
1796 	return cc;
1797 }
1798 
1799 static int kvm_s390_apxa_installed(void)
1800 {
1801 	u8 config[128];
1802 	int cc;
1803 
1804 	if (test_facility(12)) {
1805 		cc = kvm_s390_query_ap_config(config);
1806 
1807 		if (cc)
1808 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1809 		else
1810 			return config[0] & 0x40;
1811 	}
1812 
1813 	return 0;
1814 }
1815 
1816 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1817 {
1818 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1819 
1820 	if (kvm_s390_apxa_installed())
1821 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1822 	else
1823 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1824 }
1825 
1826 static u64 kvm_s390_get_initial_cpuid(void)
1827 {
1828 	struct cpuid cpuid;
1829 
1830 	get_cpu_id(&cpuid);
1831 	cpuid.version = 0xff;
1832 	return *((u64 *) &cpuid);
1833 }
1834 
1835 static void kvm_s390_crypto_init(struct kvm *kvm)
1836 {
1837 	if (!test_kvm_facility(kvm, 76))
1838 		return;
1839 
1840 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1841 	kvm_s390_set_crycb_format(kvm);
1842 
1843 	/* Enable AES/DEA protected key functions by default */
1844 	kvm->arch.crypto.aes_kw = 1;
1845 	kvm->arch.crypto.dea_kw = 1;
1846 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1847 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1848 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1849 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1850 }
1851 
1852 static void sca_dispose(struct kvm *kvm)
1853 {
1854 	if (kvm->arch.use_esca)
1855 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1856 	else
1857 		free_page((unsigned long)(kvm->arch.sca));
1858 	kvm->arch.sca = NULL;
1859 }
1860 
1861 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1862 {
1863 	gfp_t alloc_flags = GFP_KERNEL;
1864 	int i, rc;
1865 	char debug_name[16];
1866 	static unsigned long sca_offset;
1867 
1868 	rc = -EINVAL;
1869 #ifdef CONFIG_KVM_S390_UCONTROL
1870 	if (type & ~KVM_VM_S390_UCONTROL)
1871 		goto out_err;
1872 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1873 		goto out_err;
1874 #else
1875 	if (type)
1876 		goto out_err;
1877 #endif
1878 
1879 	rc = s390_enable_sie();
1880 	if (rc)
1881 		goto out_err;
1882 
1883 	rc = -ENOMEM;
1884 
1885 	kvm->arch.use_esca = 0; /* start with basic SCA */
1886 	if (!sclp.has_64bscao)
1887 		alloc_flags |= GFP_DMA;
1888 	rwlock_init(&kvm->arch.sca_lock);
1889 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1890 	if (!kvm->arch.sca)
1891 		goto out_err;
1892 	spin_lock(&kvm_lock);
1893 	sca_offset += 16;
1894 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1895 		sca_offset = 0;
1896 	kvm->arch.sca = (struct bsca_block *)
1897 			((char *) kvm->arch.sca + sca_offset);
1898 	spin_unlock(&kvm_lock);
1899 
1900 	sprintf(debug_name, "kvm-%u", current->pid);
1901 
1902 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1903 	if (!kvm->arch.dbf)
1904 		goto out_err;
1905 
1906 	kvm->arch.sie_page2 =
1907 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1908 	if (!kvm->arch.sie_page2)
1909 		goto out_err;
1910 
1911 	/* Populate the facility mask initially. */
1912 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1913 	       sizeof(S390_lowcore.stfle_fac_list));
1914 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1915 		if (i < kvm_s390_fac_list_mask_size())
1916 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1917 		else
1918 			kvm->arch.model.fac_mask[i] = 0UL;
1919 	}
1920 
1921 	/* Populate the facility list initially. */
1922 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1923 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1924 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1925 
1926 	/* we are always in czam mode - even on pre z14 machines */
1927 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1928 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1929 	/* we emulate STHYI in kvm */
1930 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1931 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1932 	if (MACHINE_HAS_TLB_GUEST) {
1933 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1934 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1935 	}
1936 
1937 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1938 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1939 
1940 	kvm_s390_crypto_init(kvm);
1941 
1942 	mutex_init(&kvm->arch.float_int.ais_lock);
1943 	kvm->arch.float_int.simm = 0;
1944 	kvm->arch.float_int.nimm = 0;
1945 	spin_lock_init(&kvm->arch.float_int.lock);
1946 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1947 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1948 	init_waitqueue_head(&kvm->arch.ipte_wq);
1949 	mutex_init(&kvm->arch.ipte_mutex);
1950 
1951 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1952 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1953 
1954 	if (type & KVM_VM_S390_UCONTROL) {
1955 		kvm->arch.gmap = NULL;
1956 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1957 	} else {
1958 		if (sclp.hamax == U64_MAX)
1959 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1960 		else
1961 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1962 						    sclp.hamax + 1);
1963 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1964 		if (!kvm->arch.gmap)
1965 			goto out_err;
1966 		kvm->arch.gmap->private = kvm;
1967 		kvm->arch.gmap->pfault_enabled = 0;
1968 	}
1969 
1970 	kvm->arch.css_support = 0;
1971 	kvm->arch.use_irqchip = 0;
1972 	kvm->arch.epoch = 0;
1973 
1974 	spin_lock_init(&kvm->arch.start_stop_lock);
1975 	kvm_s390_vsie_init(kvm);
1976 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1977 
1978 	return 0;
1979 out_err:
1980 	free_page((unsigned long)kvm->arch.sie_page2);
1981 	debug_unregister(kvm->arch.dbf);
1982 	sca_dispose(kvm);
1983 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1984 	return rc;
1985 }
1986 
1987 bool kvm_arch_has_vcpu_debugfs(void)
1988 {
1989 	return false;
1990 }
1991 
1992 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1993 {
1994 	return 0;
1995 }
1996 
1997 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1998 {
1999 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2000 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2001 	kvm_s390_clear_local_irqs(vcpu);
2002 	kvm_clear_async_pf_completion_queue(vcpu);
2003 	if (!kvm_is_ucontrol(vcpu->kvm))
2004 		sca_del_vcpu(vcpu);
2005 
2006 	if (kvm_is_ucontrol(vcpu->kvm))
2007 		gmap_remove(vcpu->arch.gmap);
2008 
2009 	if (vcpu->kvm->arch.use_cmma)
2010 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2011 	free_page((unsigned long)(vcpu->arch.sie_block));
2012 
2013 	kvm_vcpu_uninit(vcpu);
2014 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2015 }
2016 
2017 static void kvm_free_vcpus(struct kvm *kvm)
2018 {
2019 	unsigned int i;
2020 	struct kvm_vcpu *vcpu;
2021 
2022 	kvm_for_each_vcpu(i, vcpu, kvm)
2023 		kvm_arch_vcpu_destroy(vcpu);
2024 
2025 	mutex_lock(&kvm->lock);
2026 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2027 		kvm->vcpus[i] = NULL;
2028 
2029 	atomic_set(&kvm->online_vcpus, 0);
2030 	mutex_unlock(&kvm->lock);
2031 }
2032 
2033 void kvm_arch_destroy_vm(struct kvm *kvm)
2034 {
2035 	kvm_free_vcpus(kvm);
2036 	sca_dispose(kvm);
2037 	debug_unregister(kvm->arch.dbf);
2038 	free_page((unsigned long)kvm->arch.sie_page2);
2039 	if (!kvm_is_ucontrol(kvm))
2040 		gmap_remove(kvm->arch.gmap);
2041 	kvm_s390_destroy_adapters(kvm);
2042 	kvm_s390_clear_float_irqs(kvm);
2043 	kvm_s390_vsie_destroy(kvm);
2044 	if (kvm->arch.migration_state) {
2045 		vfree(kvm->arch.migration_state->pgste_bitmap);
2046 		kfree(kvm->arch.migration_state);
2047 	}
2048 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2049 }
2050 
2051 /* Section: vcpu related */
2052 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2053 {
2054 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2055 	if (!vcpu->arch.gmap)
2056 		return -ENOMEM;
2057 	vcpu->arch.gmap->private = vcpu->kvm;
2058 
2059 	return 0;
2060 }
2061 
2062 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2063 {
2064 	if (!kvm_s390_use_sca_entries())
2065 		return;
2066 	read_lock(&vcpu->kvm->arch.sca_lock);
2067 	if (vcpu->kvm->arch.use_esca) {
2068 		struct esca_block *sca = vcpu->kvm->arch.sca;
2069 
2070 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2071 		sca->cpu[vcpu->vcpu_id].sda = 0;
2072 	} else {
2073 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2074 
2075 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2076 		sca->cpu[vcpu->vcpu_id].sda = 0;
2077 	}
2078 	read_unlock(&vcpu->kvm->arch.sca_lock);
2079 }
2080 
2081 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2082 {
2083 	if (!kvm_s390_use_sca_entries()) {
2084 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2085 
2086 		/* we still need the basic sca for the ipte control */
2087 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2088 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2089 	}
2090 	read_lock(&vcpu->kvm->arch.sca_lock);
2091 	if (vcpu->kvm->arch.use_esca) {
2092 		struct esca_block *sca = vcpu->kvm->arch.sca;
2093 
2094 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2095 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2096 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2097 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2098 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2099 	} else {
2100 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2101 
2102 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2103 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2104 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2105 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2106 	}
2107 	read_unlock(&vcpu->kvm->arch.sca_lock);
2108 }
2109 
2110 /* Basic SCA to Extended SCA data copy routines */
2111 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2112 {
2113 	d->sda = s->sda;
2114 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2115 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2116 }
2117 
2118 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2119 {
2120 	int i;
2121 
2122 	d->ipte_control = s->ipte_control;
2123 	d->mcn[0] = s->mcn;
2124 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2125 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2126 }
2127 
2128 static int sca_switch_to_extended(struct kvm *kvm)
2129 {
2130 	struct bsca_block *old_sca = kvm->arch.sca;
2131 	struct esca_block *new_sca;
2132 	struct kvm_vcpu *vcpu;
2133 	unsigned int vcpu_idx;
2134 	u32 scaol, scaoh;
2135 
2136 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2137 	if (!new_sca)
2138 		return -ENOMEM;
2139 
2140 	scaoh = (u32)((u64)(new_sca) >> 32);
2141 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2142 
2143 	kvm_s390_vcpu_block_all(kvm);
2144 	write_lock(&kvm->arch.sca_lock);
2145 
2146 	sca_copy_b_to_e(new_sca, old_sca);
2147 
2148 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2149 		vcpu->arch.sie_block->scaoh = scaoh;
2150 		vcpu->arch.sie_block->scaol = scaol;
2151 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2152 	}
2153 	kvm->arch.sca = new_sca;
2154 	kvm->arch.use_esca = 1;
2155 
2156 	write_unlock(&kvm->arch.sca_lock);
2157 	kvm_s390_vcpu_unblock_all(kvm);
2158 
2159 	free_page((unsigned long)old_sca);
2160 
2161 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2162 		 old_sca, kvm->arch.sca);
2163 	return 0;
2164 }
2165 
2166 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2167 {
2168 	int rc;
2169 
2170 	if (!kvm_s390_use_sca_entries()) {
2171 		if (id < KVM_MAX_VCPUS)
2172 			return true;
2173 		return false;
2174 	}
2175 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2176 		return true;
2177 	if (!sclp.has_esca || !sclp.has_64bscao)
2178 		return false;
2179 
2180 	mutex_lock(&kvm->lock);
2181 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2182 	mutex_unlock(&kvm->lock);
2183 
2184 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2185 }
2186 
2187 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2188 {
2189 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2190 	kvm_clear_async_pf_completion_queue(vcpu);
2191 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2192 				    KVM_SYNC_GPRS |
2193 				    KVM_SYNC_ACRS |
2194 				    KVM_SYNC_CRS |
2195 				    KVM_SYNC_ARCH0 |
2196 				    KVM_SYNC_PFAULT;
2197 	kvm_s390_set_prefix(vcpu, 0);
2198 	if (test_kvm_facility(vcpu->kvm, 64))
2199 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2200 	if (test_kvm_facility(vcpu->kvm, 133))
2201 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2202 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2203 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2204 	 */
2205 	if (MACHINE_HAS_VX)
2206 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2207 	else
2208 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2209 
2210 	if (kvm_is_ucontrol(vcpu->kvm))
2211 		return __kvm_ucontrol_vcpu_init(vcpu);
2212 
2213 	return 0;
2214 }
2215 
2216 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2217 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2218 {
2219 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2220 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2221 	vcpu->arch.cputm_start = get_tod_clock_fast();
2222 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2223 }
2224 
2225 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2226 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2227 {
2228 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2229 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2230 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2231 	vcpu->arch.cputm_start = 0;
2232 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2233 }
2234 
2235 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2236 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2237 {
2238 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2239 	vcpu->arch.cputm_enabled = true;
2240 	__start_cpu_timer_accounting(vcpu);
2241 }
2242 
2243 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2244 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2245 {
2246 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2247 	__stop_cpu_timer_accounting(vcpu);
2248 	vcpu->arch.cputm_enabled = false;
2249 }
2250 
2251 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2252 {
2253 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2254 	__enable_cpu_timer_accounting(vcpu);
2255 	preempt_enable();
2256 }
2257 
2258 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2259 {
2260 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2261 	__disable_cpu_timer_accounting(vcpu);
2262 	preempt_enable();
2263 }
2264 
2265 /* set the cpu timer - may only be called from the VCPU thread itself */
2266 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2267 {
2268 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2269 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2270 	if (vcpu->arch.cputm_enabled)
2271 		vcpu->arch.cputm_start = get_tod_clock_fast();
2272 	vcpu->arch.sie_block->cputm = cputm;
2273 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2274 	preempt_enable();
2275 }
2276 
2277 /* update and get the cpu timer - can also be called from other VCPU threads */
2278 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2279 {
2280 	unsigned int seq;
2281 	__u64 value;
2282 
2283 	if (unlikely(!vcpu->arch.cputm_enabled))
2284 		return vcpu->arch.sie_block->cputm;
2285 
2286 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2287 	do {
2288 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2289 		/*
2290 		 * If the writer would ever execute a read in the critical
2291 		 * section, e.g. in irq context, we have a deadlock.
2292 		 */
2293 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2294 		value = vcpu->arch.sie_block->cputm;
2295 		/* if cputm_start is 0, accounting is being started/stopped */
2296 		if (likely(vcpu->arch.cputm_start))
2297 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2298 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2299 	preempt_enable();
2300 	return value;
2301 }
2302 
2303 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2304 {
2305 
2306 	gmap_enable(vcpu->arch.enabled_gmap);
2307 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2308 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2309 		__start_cpu_timer_accounting(vcpu);
2310 	vcpu->cpu = cpu;
2311 }
2312 
2313 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2314 {
2315 	vcpu->cpu = -1;
2316 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2317 		__stop_cpu_timer_accounting(vcpu);
2318 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2319 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2320 	gmap_disable(vcpu->arch.enabled_gmap);
2321 
2322 }
2323 
2324 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2325 {
2326 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2327 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2328 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2329 	kvm_s390_set_prefix(vcpu, 0);
2330 	kvm_s390_set_cpu_timer(vcpu, 0);
2331 	vcpu->arch.sie_block->ckc       = 0UL;
2332 	vcpu->arch.sie_block->todpr     = 0;
2333 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2334 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2335 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2336 	/* make sure the new fpc will be lazily loaded */
2337 	save_fpu_regs();
2338 	current->thread.fpu.fpc = 0;
2339 	vcpu->arch.sie_block->gbea = 1;
2340 	vcpu->arch.sie_block->pp = 0;
2341 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2342 	kvm_clear_async_pf_completion_queue(vcpu);
2343 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2344 		kvm_s390_vcpu_stop(vcpu);
2345 	kvm_s390_clear_local_irqs(vcpu);
2346 }
2347 
2348 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2349 {
2350 	mutex_lock(&vcpu->kvm->lock);
2351 	preempt_disable();
2352 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2353 	preempt_enable();
2354 	mutex_unlock(&vcpu->kvm->lock);
2355 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2356 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2357 		sca_add_vcpu(vcpu);
2358 	}
2359 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2360 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2361 	/* make vcpu_load load the right gmap on the first trigger */
2362 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2363 }
2364 
2365 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2366 {
2367 	if (!test_kvm_facility(vcpu->kvm, 76))
2368 		return;
2369 
2370 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2371 
2372 	if (vcpu->kvm->arch.crypto.aes_kw)
2373 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2374 	if (vcpu->kvm->arch.crypto.dea_kw)
2375 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2376 
2377 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2378 }
2379 
2380 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2381 {
2382 	free_page(vcpu->arch.sie_block->cbrlo);
2383 	vcpu->arch.sie_block->cbrlo = 0;
2384 }
2385 
2386 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2387 {
2388 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2389 	if (!vcpu->arch.sie_block->cbrlo)
2390 		return -ENOMEM;
2391 
2392 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2393 	return 0;
2394 }
2395 
2396 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2397 {
2398 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2399 
2400 	vcpu->arch.sie_block->ibc = model->ibc;
2401 	if (test_kvm_facility(vcpu->kvm, 7))
2402 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2403 }
2404 
2405 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2406 {
2407 	int rc = 0;
2408 
2409 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2410 						    CPUSTAT_SM |
2411 						    CPUSTAT_STOPPED);
2412 
2413 	if (test_kvm_facility(vcpu->kvm, 78))
2414 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2415 	else if (test_kvm_facility(vcpu->kvm, 8))
2416 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2417 
2418 	kvm_s390_vcpu_setup_model(vcpu);
2419 
2420 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2421 	if (MACHINE_HAS_ESOP)
2422 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2423 	if (test_kvm_facility(vcpu->kvm, 9))
2424 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2425 	if (test_kvm_facility(vcpu->kvm, 73))
2426 		vcpu->arch.sie_block->ecb |= ECB_TE;
2427 
2428 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2429 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2430 	if (test_kvm_facility(vcpu->kvm, 130))
2431 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2432 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2433 	if (sclp.has_cei)
2434 		vcpu->arch.sie_block->eca |= ECA_CEI;
2435 	if (sclp.has_ib)
2436 		vcpu->arch.sie_block->eca |= ECA_IB;
2437 	if (sclp.has_siif)
2438 		vcpu->arch.sie_block->eca |= ECA_SII;
2439 	if (sclp.has_sigpif)
2440 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2441 	if (test_kvm_facility(vcpu->kvm, 129)) {
2442 		vcpu->arch.sie_block->eca |= ECA_VX;
2443 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2444 	}
2445 	if (test_kvm_facility(vcpu->kvm, 139))
2446 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2447 
2448 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2449 					| SDNXC;
2450 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2451 
2452 	if (sclp.has_kss)
2453 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2454 	else
2455 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2456 
2457 	if (vcpu->kvm->arch.use_cmma) {
2458 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2459 		if (rc)
2460 			return rc;
2461 	}
2462 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2463 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2464 
2465 	kvm_s390_vcpu_crypto_setup(vcpu);
2466 
2467 	return rc;
2468 }
2469 
2470 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2471 				      unsigned int id)
2472 {
2473 	struct kvm_vcpu *vcpu;
2474 	struct sie_page *sie_page;
2475 	int rc = -EINVAL;
2476 
2477 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2478 		goto out;
2479 
2480 	rc = -ENOMEM;
2481 
2482 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2483 	if (!vcpu)
2484 		goto out;
2485 
2486 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2487 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2488 	if (!sie_page)
2489 		goto out_free_cpu;
2490 
2491 	vcpu->arch.sie_block = &sie_page->sie_block;
2492 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2493 
2494 	/* the real guest size will always be smaller than msl */
2495 	vcpu->arch.sie_block->mso = 0;
2496 	vcpu->arch.sie_block->msl = sclp.hamax;
2497 
2498 	vcpu->arch.sie_block->icpua = id;
2499 	spin_lock_init(&vcpu->arch.local_int.lock);
2500 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2501 	vcpu->arch.local_int.wq = &vcpu->wq;
2502 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2503 	seqcount_init(&vcpu->arch.cputm_seqcount);
2504 
2505 	rc = kvm_vcpu_init(vcpu, kvm, id);
2506 	if (rc)
2507 		goto out_free_sie_block;
2508 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2509 		 vcpu->arch.sie_block);
2510 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2511 
2512 	return vcpu;
2513 out_free_sie_block:
2514 	free_page((unsigned long)(vcpu->arch.sie_block));
2515 out_free_cpu:
2516 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2517 out:
2518 	return ERR_PTR(rc);
2519 }
2520 
2521 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2522 {
2523 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2524 }
2525 
2526 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2527 {
2528 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2529 }
2530 
2531 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2532 {
2533 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2534 	exit_sie(vcpu);
2535 }
2536 
2537 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2538 {
2539 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2540 }
2541 
2542 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2543 {
2544 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2545 	exit_sie(vcpu);
2546 }
2547 
2548 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2549 {
2550 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2551 }
2552 
2553 /*
2554  * Kick a guest cpu out of SIE and wait until SIE is not running.
2555  * If the CPU is not running (e.g. waiting as idle) the function will
2556  * return immediately. */
2557 void exit_sie(struct kvm_vcpu *vcpu)
2558 {
2559 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2560 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2561 		cpu_relax();
2562 }
2563 
2564 /* Kick a guest cpu out of SIE to process a request synchronously */
2565 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2566 {
2567 	kvm_make_request(req, vcpu);
2568 	kvm_s390_vcpu_request(vcpu);
2569 }
2570 
2571 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2572 			      unsigned long end)
2573 {
2574 	struct kvm *kvm = gmap->private;
2575 	struct kvm_vcpu *vcpu;
2576 	unsigned long prefix;
2577 	int i;
2578 
2579 	if (gmap_is_shadow(gmap))
2580 		return;
2581 	if (start >= 1UL << 31)
2582 		/* We are only interested in prefix pages */
2583 		return;
2584 	kvm_for_each_vcpu(i, vcpu, kvm) {
2585 		/* match against both prefix pages */
2586 		prefix = kvm_s390_get_prefix(vcpu);
2587 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2588 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2589 				   start, end);
2590 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2591 		}
2592 	}
2593 }
2594 
2595 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2596 {
2597 	/* kvm common code refers to this, but never calls it */
2598 	BUG();
2599 	return 0;
2600 }
2601 
2602 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2603 					   struct kvm_one_reg *reg)
2604 {
2605 	int r = -EINVAL;
2606 
2607 	switch (reg->id) {
2608 	case KVM_REG_S390_TODPR:
2609 		r = put_user(vcpu->arch.sie_block->todpr,
2610 			     (u32 __user *)reg->addr);
2611 		break;
2612 	case KVM_REG_S390_EPOCHDIFF:
2613 		r = put_user(vcpu->arch.sie_block->epoch,
2614 			     (u64 __user *)reg->addr);
2615 		break;
2616 	case KVM_REG_S390_CPU_TIMER:
2617 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2618 			     (u64 __user *)reg->addr);
2619 		break;
2620 	case KVM_REG_S390_CLOCK_COMP:
2621 		r = put_user(vcpu->arch.sie_block->ckc,
2622 			     (u64 __user *)reg->addr);
2623 		break;
2624 	case KVM_REG_S390_PFTOKEN:
2625 		r = put_user(vcpu->arch.pfault_token,
2626 			     (u64 __user *)reg->addr);
2627 		break;
2628 	case KVM_REG_S390_PFCOMPARE:
2629 		r = put_user(vcpu->arch.pfault_compare,
2630 			     (u64 __user *)reg->addr);
2631 		break;
2632 	case KVM_REG_S390_PFSELECT:
2633 		r = put_user(vcpu->arch.pfault_select,
2634 			     (u64 __user *)reg->addr);
2635 		break;
2636 	case KVM_REG_S390_PP:
2637 		r = put_user(vcpu->arch.sie_block->pp,
2638 			     (u64 __user *)reg->addr);
2639 		break;
2640 	case KVM_REG_S390_GBEA:
2641 		r = put_user(vcpu->arch.sie_block->gbea,
2642 			     (u64 __user *)reg->addr);
2643 		break;
2644 	default:
2645 		break;
2646 	}
2647 
2648 	return r;
2649 }
2650 
2651 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2652 					   struct kvm_one_reg *reg)
2653 {
2654 	int r = -EINVAL;
2655 	__u64 val;
2656 
2657 	switch (reg->id) {
2658 	case KVM_REG_S390_TODPR:
2659 		r = get_user(vcpu->arch.sie_block->todpr,
2660 			     (u32 __user *)reg->addr);
2661 		break;
2662 	case KVM_REG_S390_EPOCHDIFF:
2663 		r = get_user(vcpu->arch.sie_block->epoch,
2664 			     (u64 __user *)reg->addr);
2665 		break;
2666 	case KVM_REG_S390_CPU_TIMER:
2667 		r = get_user(val, (u64 __user *)reg->addr);
2668 		if (!r)
2669 			kvm_s390_set_cpu_timer(vcpu, val);
2670 		break;
2671 	case KVM_REG_S390_CLOCK_COMP:
2672 		r = get_user(vcpu->arch.sie_block->ckc,
2673 			     (u64 __user *)reg->addr);
2674 		break;
2675 	case KVM_REG_S390_PFTOKEN:
2676 		r = get_user(vcpu->arch.pfault_token,
2677 			     (u64 __user *)reg->addr);
2678 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2679 			kvm_clear_async_pf_completion_queue(vcpu);
2680 		break;
2681 	case KVM_REG_S390_PFCOMPARE:
2682 		r = get_user(vcpu->arch.pfault_compare,
2683 			     (u64 __user *)reg->addr);
2684 		break;
2685 	case KVM_REG_S390_PFSELECT:
2686 		r = get_user(vcpu->arch.pfault_select,
2687 			     (u64 __user *)reg->addr);
2688 		break;
2689 	case KVM_REG_S390_PP:
2690 		r = get_user(vcpu->arch.sie_block->pp,
2691 			     (u64 __user *)reg->addr);
2692 		break;
2693 	case KVM_REG_S390_GBEA:
2694 		r = get_user(vcpu->arch.sie_block->gbea,
2695 			     (u64 __user *)reg->addr);
2696 		break;
2697 	default:
2698 		break;
2699 	}
2700 
2701 	return r;
2702 }
2703 
2704 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2705 {
2706 	kvm_s390_vcpu_initial_reset(vcpu);
2707 	return 0;
2708 }
2709 
2710 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2711 {
2712 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2713 	return 0;
2714 }
2715 
2716 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2717 {
2718 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2719 	return 0;
2720 }
2721 
2722 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2723 				  struct kvm_sregs *sregs)
2724 {
2725 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2726 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2727 	return 0;
2728 }
2729 
2730 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2731 				  struct kvm_sregs *sregs)
2732 {
2733 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2734 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2735 	return 0;
2736 }
2737 
2738 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2739 {
2740 	if (test_fp_ctl(fpu->fpc))
2741 		return -EINVAL;
2742 	vcpu->run->s.regs.fpc = fpu->fpc;
2743 	if (MACHINE_HAS_VX)
2744 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2745 				 (freg_t *) fpu->fprs);
2746 	else
2747 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2748 	return 0;
2749 }
2750 
2751 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2752 {
2753 	/* make sure we have the latest values */
2754 	save_fpu_regs();
2755 	if (MACHINE_HAS_VX)
2756 		convert_vx_to_fp((freg_t *) fpu->fprs,
2757 				 (__vector128 *) vcpu->run->s.regs.vrs);
2758 	else
2759 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2760 	fpu->fpc = vcpu->run->s.regs.fpc;
2761 	return 0;
2762 }
2763 
2764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2765 {
2766 	int rc = 0;
2767 
2768 	if (!is_vcpu_stopped(vcpu))
2769 		rc = -EBUSY;
2770 	else {
2771 		vcpu->run->psw_mask = psw.mask;
2772 		vcpu->run->psw_addr = psw.addr;
2773 	}
2774 	return rc;
2775 }
2776 
2777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2778 				  struct kvm_translation *tr)
2779 {
2780 	return -EINVAL; /* not implemented yet */
2781 }
2782 
2783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2784 			      KVM_GUESTDBG_USE_HW_BP | \
2785 			      KVM_GUESTDBG_ENABLE)
2786 
2787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2788 					struct kvm_guest_debug *dbg)
2789 {
2790 	int rc = 0;
2791 
2792 	vcpu->guest_debug = 0;
2793 	kvm_s390_clear_bp_data(vcpu);
2794 
2795 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2796 		return -EINVAL;
2797 	if (!sclp.has_gpere)
2798 		return -EINVAL;
2799 
2800 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2801 		vcpu->guest_debug = dbg->control;
2802 		/* enforce guest PER */
2803 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2804 
2805 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2806 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2807 	} else {
2808 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2809 		vcpu->arch.guestdbg.last_bp = 0;
2810 	}
2811 
2812 	if (rc) {
2813 		vcpu->guest_debug = 0;
2814 		kvm_s390_clear_bp_data(vcpu);
2815 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2816 	}
2817 
2818 	return rc;
2819 }
2820 
2821 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2822 				    struct kvm_mp_state *mp_state)
2823 {
2824 	/* CHECK_STOP and LOAD are not supported yet */
2825 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2826 				       KVM_MP_STATE_OPERATING;
2827 }
2828 
2829 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2830 				    struct kvm_mp_state *mp_state)
2831 {
2832 	int rc = 0;
2833 
2834 	/* user space knows about this interface - let it control the state */
2835 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2836 
2837 	switch (mp_state->mp_state) {
2838 	case KVM_MP_STATE_STOPPED:
2839 		kvm_s390_vcpu_stop(vcpu);
2840 		break;
2841 	case KVM_MP_STATE_OPERATING:
2842 		kvm_s390_vcpu_start(vcpu);
2843 		break;
2844 	case KVM_MP_STATE_LOAD:
2845 	case KVM_MP_STATE_CHECK_STOP:
2846 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2847 	default:
2848 		rc = -ENXIO;
2849 	}
2850 
2851 	return rc;
2852 }
2853 
2854 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2855 {
2856 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2857 }
2858 
2859 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2860 {
2861 retry:
2862 	kvm_s390_vcpu_request_handled(vcpu);
2863 	if (!kvm_request_pending(vcpu))
2864 		return 0;
2865 	/*
2866 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2867 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2868 	 * This ensures that the ipte instruction for this request has
2869 	 * already finished. We might race against a second unmapper that
2870 	 * wants to set the blocking bit. Lets just retry the request loop.
2871 	 */
2872 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2873 		int rc;
2874 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2875 					  kvm_s390_get_prefix(vcpu),
2876 					  PAGE_SIZE * 2, PROT_WRITE);
2877 		if (rc) {
2878 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2879 			return rc;
2880 		}
2881 		goto retry;
2882 	}
2883 
2884 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2885 		vcpu->arch.sie_block->ihcpu = 0xffff;
2886 		goto retry;
2887 	}
2888 
2889 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2890 		if (!ibs_enabled(vcpu)) {
2891 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2892 			atomic_or(CPUSTAT_IBS,
2893 					&vcpu->arch.sie_block->cpuflags);
2894 		}
2895 		goto retry;
2896 	}
2897 
2898 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2899 		if (ibs_enabled(vcpu)) {
2900 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2901 			atomic_andnot(CPUSTAT_IBS,
2902 					  &vcpu->arch.sie_block->cpuflags);
2903 		}
2904 		goto retry;
2905 	}
2906 
2907 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2908 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2909 		goto retry;
2910 	}
2911 
2912 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2913 		/*
2914 		 * Disable CMMA virtualization; we will emulate the ESSA
2915 		 * instruction manually, in order to provide additional
2916 		 * functionalities needed for live migration.
2917 		 */
2918 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2919 		goto retry;
2920 	}
2921 
2922 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2923 		/*
2924 		 * Re-enable CMMA virtualization if CMMA is available and
2925 		 * was used.
2926 		 */
2927 		if ((vcpu->kvm->arch.use_cmma) &&
2928 		    (vcpu->kvm->mm->context.use_cmma))
2929 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2930 		goto retry;
2931 	}
2932 
2933 	/* nothing to do, just clear the request */
2934 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2935 
2936 	return 0;
2937 }
2938 
2939 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2940 				 const struct kvm_s390_vm_tod_clock *gtod)
2941 {
2942 	struct kvm_vcpu *vcpu;
2943 	struct kvm_s390_tod_clock_ext htod;
2944 	int i;
2945 
2946 	mutex_lock(&kvm->lock);
2947 	preempt_disable();
2948 
2949 	get_tod_clock_ext((char *)&htod);
2950 
2951 	kvm->arch.epoch = gtod->tod - htod.tod;
2952 	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2953 
2954 	if (kvm->arch.epoch > gtod->tod)
2955 		kvm->arch.epdx -= 1;
2956 
2957 	kvm_s390_vcpu_block_all(kvm);
2958 	kvm_for_each_vcpu(i, vcpu, kvm) {
2959 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2960 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2961 	}
2962 
2963 	kvm_s390_vcpu_unblock_all(kvm);
2964 	preempt_enable();
2965 	mutex_unlock(&kvm->lock);
2966 }
2967 
2968 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2969 {
2970 	struct kvm_vcpu *vcpu;
2971 	int i;
2972 
2973 	mutex_lock(&kvm->lock);
2974 	preempt_disable();
2975 	kvm->arch.epoch = tod - get_tod_clock();
2976 	kvm_s390_vcpu_block_all(kvm);
2977 	kvm_for_each_vcpu(i, vcpu, kvm)
2978 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2979 	kvm_s390_vcpu_unblock_all(kvm);
2980 	preempt_enable();
2981 	mutex_unlock(&kvm->lock);
2982 }
2983 
2984 /**
2985  * kvm_arch_fault_in_page - fault-in guest page if necessary
2986  * @vcpu: The corresponding virtual cpu
2987  * @gpa: Guest physical address
2988  * @writable: Whether the page should be writable or not
2989  *
2990  * Make sure that a guest page has been faulted-in on the host.
2991  *
2992  * Return: Zero on success, negative error code otherwise.
2993  */
2994 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2995 {
2996 	return gmap_fault(vcpu->arch.gmap, gpa,
2997 			  writable ? FAULT_FLAG_WRITE : 0);
2998 }
2999 
3000 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3001 				      unsigned long token)
3002 {
3003 	struct kvm_s390_interrupt inti;
3004 	struct kvm_s390_irq irq;
3005 
3006 	if (start_token) {
3007 		irq.u.ext.ext_params2 = token;
3008 		irq.type = KVM_S390_INT_PFAULT_INIT;
3009 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3010 	} else {
3011 		inti.type = KVM_S390_INT_PFAULT_DONE;
3012 		inti.parm64 = token;
3013 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3014 	}
3015 }
3016 
3017 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3018 				     struct kvm_async_pf *work)
3019 {
3020 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3021 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3022 }
3023 
3024 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3025 				 struct kvm_async_pf *work)
3026 {
3027 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3028 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3029 }
3030 
3031 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3032 			       struct kvm_async_pf *work)
3033 {
3034 	/* s390 will always inject the page directly */
3035 }
3036 
3037 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3038 {
3039 	/*
3040 	 * s390 will always inject the page directly,
3041 	 * but we still want check_async_completion to cleanup
3042 	 */
3043 	return true;
3044 }
3045 
3046 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3047 {
3048 	hva_t hva;
3049 	struct kvm_arch_async_pf arch;
3050 	int rc;
3051 
3052 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3053 		return 0;
3054 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3055 	    vcpu->arch.pfault_compare)
3056 		return 0;
3057 	if (psw_extint_disabled(vcpu))
3058 		return 0;
3059 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3060 		return 0;
3061 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3062 		return 0;
3063 	if (!vcpu->arch.gmap->pfault_enabled)
3064 		return 0;
3065 
3066 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3067 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3068 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3069 		return 0;
3070 
3071 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3072 	return rc;
3073 }
3074 
3075 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3076 {
3077 	int rc, cpuflags;
3078 
3079 	/*
3080 	 * On s390 notifications for arriving pages will be delivered directly
3081 	 * to the guest but the house keeping for completed pfaults is
3082 	 * handled outside the worker.
3083 	 */
3084 	kvm_check_async_pf_completion(vcpu);
3085 
3086 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3087 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3088 
3089 	if (need_resched())
3090 		schedule();
3091 
3092 	if (test_cpu_flag(CIF_MCCK_PENDING))
3093 		s390_handle_mcck();
3094 
3095 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3096 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3097 		if (rc)
3098 			return rc;
3099 	}
3100 
3101 	rc = kvm_s390_handle_requests(vcpu);
3102 	if (rc)
3103 		return rc;
3104 
3105 	if (guestdbg_enabled(vcpu)) {
3106 		kvm_s390_backup_guest_per_regs(vcpu);
3107 		kvm_s390_patch_guest_per_regs(vcpu);
3108 	}
3109 
3110 	vcpu->arch.sie_block->icptcode = 0;
3111 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3112 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3113 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3114 
3115 	return 0;
3116 }
3117 
3118 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3119 {
3120 	struct kvm_s390_pgm_info pgm_info = {
3121 		.code = PGM_ADDRESSING,
3122 	};
3123 	u8 opcode, ilen;
3124 	int rc;
3125 
3126 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3127 	trace_kvm_s390_sie_fault(vcpu);
3128 
3129 	/*
3130 	 * We want to inject an addressing exception, which is defined as a
3131 	 * suppressing or terminating exception. However, since we came here
3132 	 * by a DAT access exception, the PSW still points to the faulting
3133 	 * instruction since DAT exceptions are nullifying. So we've got
3134 	 * to look up the current opcode to get the length of the instruction
3135 	 * to be able to forward the PSW.
3136 	 */
3137 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3138 	ilen = insn_length(opcode);
3139 	if (rc < 0) {
3140 		return rc;
3141 	} else if (rc) {
3142 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3143 		 * Forward by arbitrary ilc, injection will take care of
3144 		 * nullification if necessary.
3145 		 */
3146 		pgm_info = vcpu->arch.pgm;
3147 		ilen = 4;
3148 	}
3149 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3150 	kvm_s390_forward_psw(vcpu, ilen);
3151 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3152 }
3153 
3154 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3155 {
3156 	struct mcck_volatile_info *mcck_info;
3157 	struct sie_page *sie_page;
3158 
3159 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3160 		   vcpu->arch.sie_block->icptcode);
3161 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3162 
3163 	if (guestdbg_enabled(vcpu))
3164 		kvm_s390_restore_guest_per_regs(vcpu);
3165 
3166 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3167 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3168 
3169 	if (exit_reason == -EINTR) {
3170 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3171 		sie_page = container_of(vcpu->arch.sie_block,
3172 					struct sie_page, sie_block);
3173 		mcck_info = &sie_page->mcck_info;
3174 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3175 		return 0;
3176 	}
3177 
3178 	if (vcpu->arch.sie_block->icptcode > 0) {
3179 		int rc = kvm_handle_sie_intercept(vcpu);
3180 
3181 		if (rc != -EOPNOTSUPP)
3182 			return rc;
3183 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3184 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3185 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3186 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3187 		return -EREMOTE;
3188 	} else if (exit_reason != -EFAULT) {
3189 		vcpu->stat.exit_null++;
3190 		return 0;
3191 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3192 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3193 		vcpu->run->s390_ucontrol.trans_exc_code =
3194 						current->thread.gmap_addr;
3195 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3196 		return -EREMOTE;
3197 	} else if (current->thread.gmap_pfault) {
3198 		trace_kvm_s390_major_guest_pfault(vcpu);
3199 		current->thread.gmap_pfault = 0;
3200 		if (kvm_arch_setup_async_pf(vcpu))
3201 			return 0;
3202 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3203 	}
3204 	return vcpu_post_run_fault_in_sie(vcpu);
3205 }
3206 
3207 static int __vcpu_run(struct kvm_vcpu *vcpu)
3208 {
3209 	int rc, exit_reason;
3210 
3211 	/*
3212 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3213 	 * ning the guest), so that memslots (and other stuff) are protected
3214 	 */
3215 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3216 
3217 	do {
3218 		rc = vcpu_pre_run(vcpu);
3219 		if (rc)
3220 			break;
3221 
3222 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3223 		/*
3224 		 * As PF_VCPU will be used in fault handler, between
3225 		 * guest_enter and guest_exit should be no uaccess.
3226 		 */
3227 		local_irq_disable();
3228 		guest_enter_irqoff();
3229 		__disable_cpu_timer_accounting(vcpu);
3230 		local_irq_enable();
3231 		exit_reason = sie64a(vcpu->arch.sie_block,
3232 				     vcpu->run->s.regs.gprs);
3233 		local_irq_disable();
3234 		__enable_cpu_timer_accounting(vcpu);
3235 		guest_exit_irqoff();
3236 		local_irq_enable();
3237 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3238 
3239 		rc = vcpu_post_run(vcpu, exit_reason);
3240 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3241 
3242 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3243 	return rc;
3244 }
3245 
3246 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3247 {
3248 	struct runtime_instr_cb *riccb;
3249 	struct gs_cb *gscb;
3250 
3251 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3252 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3253 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3254 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3255 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3256 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3257 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3258 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3259 		/* some control register changes require a tlb flush */
3260 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3261 	}
3262 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3263 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3264 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3265 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3266 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3267 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3268 	}
3269 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3270 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3271 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3272 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3273 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3274 			kvm_clear_async_pf_completion_queue(vcpu);
3275 	}
3276 	/*
3277 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3278 	 * we should enable RI here instead of doing the lazy enablement.
3279 	 */
3280 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3281 	    test_kvm_facility(vcpu->kvm, 64) &&
3282 	    riccb->v &&
3283 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3284 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3285 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3286 	}
3287 	/*
3288 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3289 	 * we should enable GS here instead of doing the lazy enablement.
3290 	 */
3291 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3292 	    test_kvm_facility(vcpu->kvm, 133) &&
3293 	    gscb->gssm &&
3294 	    !vcpu->arch.gs_enabled) {
3295 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3296 		vcpu->arch.sie_block->ecb |= ECB_GS;
3297 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3298 		vcpu->arch.gs_enabled = 1;
3299 	}
3300 	save_access_regs(vcpu->arch.host_acrs);
3301 	restore_access_regs(vcpu->run->s.regs.acrs);
3302 	/* save host (userspace) fprs/vrs */
3303 	save_fpu_regs();
3304 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3305 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3306 	if (MACHINE_HAS_VX)
3307 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3308 	else
3309 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3310 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3311 	if (test_fp_ctl(current->thread.fpu.fpc))
3312 		/* User space provided an invalid FPC, let's clear it */
3313 		current->thread.fpu.fpc = 0;
3314 	if (MACHINE_HAS_GS) {
3315 		preempt_disable();
3316 		__ctl_set_bit(2, 4);
3317 		if (current->thread.gs_cb) {
3318 			vcpu->arch.host_gscb = current->thread.gs_cb;
3319 			save_gs_cb(vcpu->arch.host_gscb);
3320 		}
3321 		if (vcpu->arch.gs_enabled) {
3322 			current->thread.gs_cb = (struct gs_cb *)
3323 						&vcpu->run->s.regs.gscb;
3324 			restore_gs_cb(current->thread.gs_cb);
3325 		}
3326 		preempt_enable();
3327 	}
3328 
3329 	kvm_run->kvm_dirty_regs = 0;
3330 }
3331 
3332 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3333 {
3334 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3335 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3336 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3337 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3338 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3339 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3340 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3341 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3342 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3343 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3344 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3345 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3346 	save_access_regs(vcpu->run->s.regs.acrs);
3347 	restore_access_regs(vcpu->arch.host_acrs);
3348 	/* Save guest register state */
3349 	save_fpu_regs();
3350 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3351 	/* Restore will be done lazily at return */
3352 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3353 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3354 	if (MACHINE_HAS_GS) {
3355 		__ctl_set_bit(2, 4);
3356 		if (vcpu->arch.gs_enabled)
3357 			save_gs_cb(current->thread.gs_cb);
3358 		preempt_disable();
3359 		current->thread.gs_cb = vcpu->arch.host_gscb;
3360 		restore_gs_cb(vcpu->arch.host_gscb);
3361 		preempt_enable();
3362 		if (!vcpu->arch.host_gscb)
3363 			__ctl_clear_bit(2, 4);
3364 		vcpu->arch.host_gscb = NULL;
3365 	}
3366 
3367 }
3368 
3369 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3370 {
3371 	int rc;
3372 
3373 	if (kvm_run->immediate_exit)
3374 		return -EINTR;
3375 
3376 	if (guestdbg_exit_pending(vcpu)) {
3377 		kvm_s390_prepare_debug_exit(vcpu);
3378 		return 0;
3379 	}
3380 
3381 	kvm_sigset_activate(vcpu);
3382 
3383 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3384 		kvm_s390_vcpu_start(vcpu);
3385 	} else if (is_vcpu_stopped(vcpu)) {
3386 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3387 				   vcpu->vcpu_id);
3388 		return -EINVAL;
3389 	}
3390 
3391 	sync_regs(vcpu, kvm_run);
3392 	enable_cpu_timer_accounting(vcpu);
3393 
3394 	might_fault();
3395 	rc = __vcpu_run(vcpu);
3396 
3397 	if (signal_pending(current) && !rc) {
3398 		kvm_run->exit_reason = KVM_EXIT_INTR;
3399 		rc = -EINTR;
3400 	}
3401 
3402 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3403 		kvm_s390_prepare_debug_exit(vcpu);
3404 		rc = 0;
3405 	}
3406 
3407 	if (rc == -EREMOTE) {
3408 		/* userspace support is needed, kvm_run has been prepared */
3409 		rc = 0;
3410 	}
3411 
3412 	disable_cpu_timer_accounting(vcpu);
3413 	store_regs(vcpu, kvm_run);
3414 
3415 	kvm_sigset_deactivate(vcpu);
3416 
3417 	vcpu->stat.exit_userspace++;
3418 	return rc;
3419 }
3420 
3421 /*
3422  * store status at address
3423  * we use have two special cases:
3424  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3425  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3426  */
3427 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3428 {
3429 	unsigned char archmode = 1;
3430 	freg_t fprs[NUM_FPRS];
3431 	unsigned int px;
3432 	u64 clkcomp, cputm;
3433 	int rc;
3434 
3435 	px = kvm_s390_get_prefix(vcpu);
3436 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3437 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3438 			return -EFAULT;
3439 		gpa = 0;
3440 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3441 		if (write_guest_real(vcpu, 163, &archmode, 1))
3442 			return -EFAULT;
3443 		gpa = px;
3444 	} else
3445 		gpa -= __LC_FPREGS_SAVE_AREA;
3446 
3447 	/* manually convert vector registers if necessary */
3448 	if (MACHINE_HAS_VX) {
3449 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3450 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3451 				     fprs, 128);
3452 	} else {
3453 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3454 				     vcpu->run->s.regs.fprs, 128);
3455 	}
3456 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3457 			      vcpu->run->s.regs.gprs, 128);
3458 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3459 			      &vcpu->arch.sie_block->gpsw, 16);
3460 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3461 			      &px, 4);
3462 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3463 			      &vcpu->run->s.regs.fpc, 4);
3464 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3465 			      &vcpu->arch.sie_block->todpr, 4);
3466 	cputm = kvm_s390_get_cpu_timer(vcpu);
3467 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3468 			      &cputm, 8);
3469 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3470 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3471 			      &clkcomp, 8);
3472 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3473 			      &vcpu->run->s.regs.acrs, 64);
3474 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3475 			      &vcpu->arch.sie_block->gcr, 128);
3476 	return rc ? -EFAULT : 0;
3477 }
3478 
3479 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3480 {
3481 	/*
3482 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3483 	 * switch in the run ioctl. Let's update our copies before we save
3484 	 * it into the save area
3485 	 */
3486 	save_fpu_regs();
3487 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3488 	save_access_regs(vcpu->run->s.regs.acrs);
3489 
3490 	return kvm_s390_store_status_unloaded(vcpu, addr);
3491 }
3492 
3493 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3494 {
3495 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3496 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3497 }
3498 
3499 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3500 {
3501 	unsigned int i;
3502 	struct kvm_vcpu *vcpu;
3503 
3504 	kvm_for_each_vcpu(i, vcpu, kvm) {
3505 		__disable_ibs_on_vcpu(vcpu);
3506 	}
3507 }
3508 
3509 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3510 {
3511 	if (!sclp.has_ibs)
3512 		return;
3513 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3514 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3515 }
3516 
3517 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3518 {
3519 	int i, online_vcpus, started_vcpus = 0;
3520 
3521 	if (!is_vcpu_stopped(vcpu))
3522 		return;
3523 
3524 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3525 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3526 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3527 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3528 
3529 	for (i = 0; i < online_vcpus; i++) {
3530 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3531 			started_vcpus++;
3532 	}
3533 
3534 	if (started_vcpus == 0) {
3535 		/* we're the only active VCPU -> speed it up */
3536 		__enable_ibs_on_vcpu(vcpu);
3537 	} else if (started_vcpus == 1) {
3538 		/*
3539 		 * As we are starting a second VCPU, we have to disable
3540 		 * the IBS facility on all VCPUs to remove potentially
3541 		 * oustanding ENABLE requests.
3542 		 */
3543 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3544 	}
3545 
3546 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3547 	/*
3548 	 * Another VCPU might have used IBS while we were offline.
3549 	 * Let's play safe and flush the VCPU at startup.
3550 	 */
3551 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3552 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3553 	return;
3554 }
3555 
3556 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3557 {
3558 	int i, online_vcpus, started_vcpus = 0;
3559 	struct kvm_vcpu *started_vcpu = NULL;
3560 
3561 	if (is_vcpu_stopped(vcpu))
3562 		return;
3563 
3564 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3565 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3566 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3567 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3568 
3569 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3570 	kvm_s390_clear_stop_irq(vcpu);
3571 
3572 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3573 	__disable_ibs_on_vcpu(vcpu);
3574 
3575 	for (i = 0; i < online_vcpus; i++) {
3576 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3577 			started_vcpus++;
3578 			started_vcpu = vcpu->kvm->vcpus[i];
3579 		}
3580 	}
3581 
3582 	if (started_vcpus == 1) {
3583 		/*
3584 		 * As we only have one VCPU left, we want to enable the
3585 		 * IBS facility for that VCPU to speed it up.
3586 		 */
3587 		__enable_ibs_on_vcpu(started_vcpu);
3588 	}
3589 
3590 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3591 	return;
3592 }
3593 
3594 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3595 				     struct kvm_enable_cap *cap)
3596 {
3597 	int r;
3598 
3599 	if (cap->flags)
3600 		return -EINVAL;
3601 
3602 	switch (cap->cap) {
3603 	case KVM_CAP_S390_CSS_SUPPORT:
3604 		if (!vcpu->kvm->arch.css_support) {
3605 			vcpu->kvm->arch.css_support = 1;
3606 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3607 			trace_kvm_s390_enable_css(vcpu->kvm);
3608 		}
3609 		r = 0;
3610 		break;
3611 	default:
3612 		r = -EINVAL;
3613 		break;
3614 	}
3615 	return r;
3616 }
3617 
3618 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3619 				  struct kvm_s390_mem_op *mop)
3620 {
3621 	void __user *uaddr = (void __user *)mop->buf;
3622 	void *tmpbuf = NULL;
3623 	int r, srcu_idx;
3624 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3625 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3626 
3627 	if (mop->flags & ~supported_flags)
3628 		return -EINVAL;
3629 
3630 	if (mop->size > MEM_OP_MAX_SIZE)
3631 		return -E2BIG;
3632 
3633 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3634 		tmpbuf = vmalloc(mop->size);
3635 		if (!tmpbuf)
3636 			return -ENOMEM;
3637 	}
3638 
3639 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3640 
3641 	switch (mop->op) {
3642 	case KVM_S390_MEMOP_LOGICAL_READ:
3643 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3644 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3645 					    mop->size, GACC_FETCH);
3646 			break;
3647 		}
3648 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3649 		if (r == 0) {
3650 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3651 				r = -EFAULT;
3652 		}
3653 		break;
3654 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3655 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3656 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3657 					    mop->size, GACC_STORE);
3658 			break;
3659 		}
3660 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3661 			r = -EFAULT;
3662 			break;
3663 		}
3664 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3665 		break;
3666 	default:
3667 		r = -EINVAL;
3668 	}
3669 
3670 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3671 
3672 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3673 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3674 
3675 	vfree(tmpbuf);
3676 	return r;
3677 }
3678 
3679 long kvm_arch_vcpu_ioctl(struct file *filp,
3680 			 unsigned int ioctl, unsigned long arg)
3681 {
3682 	struct kvm_vcpu *vcpu = filp->private_data;
3683 	void __user *argp = (void __user *)arg;
3684 	int idx;
3685 	long r;
3686 
3687 	switch (ioctl) {
3688 	case KVM_S390_IRQ: {
3689 		struct kvm_s390_irq s390irq;
3690 
3691 		r = -EFAULT;
3692 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3693 			break;
3694 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3695 		break;
3696 	}
3697 	case KVM_S390_INTERRUPT: {
3698 		struct kvm_s390_interrupt s390int;
3699 		struct kvm_s390_irq s390irq;
3700 
3701 		r = -EFAULT;
3702 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3703 			break;
3704 		if (s390int_to_s390irq(&s390int, &s390irq))
3705 			return -EINVAL;
3706 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3707 		break;
3708 	}
3709 	case KVM_S390_STORE_STATUS:
3710 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3711 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3712 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3713 		break;
3714 	case KVM_S390_SET_INITIAL_PSW: {
3715 		psw_t psw;
3716 
3717 		r = -EFAULT;
3718 		if (copy_from_user(&psw, argp, sizeof(psw)))
3719 			break;
3720 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3721 		break;
3722 	}
3723 	case KVM_S390_INITIAL_RESET:
3724 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3725 		break;
3726 	case KVM_SET_ONE_REG:
3727 	case KVM_GET_ONE_REG: {
3728 		struct kvm_one_reg reg;
3729 		r = -EFAULT;
3730 		if (copy_from_user(&reg, argp, sizeof(reg)))
3731 			break;
3732 		if (ioctl == KVM_SET_ONE_REG)
3733 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3734 		else
3735 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3736 		break;
3737 	}
3738 #ifdef CONFIG_KVM_S390_UCONTROL
3739 	case KVM_S390_UCAS_MAP: {
3740 		struct kvm_s390_ucas_mapping ucasmap;
3741 
3742 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3743 			r = -EFAULT;
3744 			break;
3745 		}
3746 
3747 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3748 			r = -EINVAL;
3749 			break;
3750 		}
3751 
3752 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3753 				     ucasmap.vcpu_addr, ucasmap.length);
3754 		break;
3755 	}
3756 	case KVM_S390_UCAS_UNMAP: {
3757 		struct kvm_s390_ucas_mapping ucasmap;
3758 
3759 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3760 			r = -EFAULT;
3761 			break;
3762 		}
3763 
3764 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3765 			r = -EINVAL;
3766 			break;
3767 		}
3768 
3769 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3770 			ucasmap.length);
3771 		break;
3772 	}
3773 #endif
3774 	case KVM_S390_VCPU_FAULT: {
3775 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3776 		break;
3777 	}
3778 	case KVM_ENABLE_CAP:
3779 	{
3780 		struct kvm_enable_cap cap;
3781 		r = -EFAULT;
3782 		if (copy_from_user(&cap, argp, sizeof(cap)))
3783 			break;
3784 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3785 		break;
3786 	}
3787 	case KVM_S390_MEM_OP: {
3788 		struct kvm_s390_mem_op mem_op;
3789 
3790 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3791 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3792 		else
3793 			r = -EFAULT;
3794 		break;
3795 	}
3796 	case KVM_S390_SET_IRQ_STATE: {
3797 		struct kvm_s390_irq_state irq_state;
3798 
3799 		r = -EFAULT;
3800 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3801 			break;
3802 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3803 		    irq_state.len == 0 ||
3804 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3805 			r = -EINVAL;
3806 			break;
3807 		}
3808 		/* do not use irq_state.flags, it will break old QEMUs */
3809 		r = kvm_s390_set_irq_state(vcpu,
3810 					   (void __user *) irq_state.buf,
3811 					   irq_state.len);
3812 		break;
3813 	}
3814 	case KVM_S390_GET_IRQ_STATE: {
3815 		struct kvm_s390_irq_state irq_state;
3816 
3817 		r = -EFAULT;
3818 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3819 			break;
3820 		if (irq_state.len == 0) {
3821 			r = -EINVAL;
3822 			break;
3823 		}
3824 		/* do not use irq_state.flags, it will break old QEMUs */
3825 		r = kvm_s390_get_irq_state(vcpu,
3826 					   (__u8 __user *)  irq_state.buf,
3827 					   irq_state.len);
3828 		break;
3829 	}
3830 	default:
3831 		r = -ENOTTY;
3832 	}
3833 	return r;
3834 }
3835 
3836 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3837 {
3838 #ifdef CONFIG_KVM_S390_UCONTROL
3839 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3840 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3841 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3842 		get_page(vmf->page);
3843 		return 0;
3844 	}
3845 #endif
3846 	return VM_FAULT_SIGBUS;
3847 }
3848 
3849 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3850 			    unsigned long npages)
3851 {
3852 	return 0;
3853 }
3854 
3855 /* Section: memory related */
3856 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3857 				   struct kvm_memory_slot *memslot,
3858 				   const struct kvm_userspace_memory_region *mem,
3859 				   enum kvm_mr_change change)
3860 {
3861 	/* A few sanity checks. We can have memory slots which have to be
3862 	   located/ended at a segment boundary (1MB). The memory in userland is
3863 	   ok to be fragmented into various different vmas. It is okay to mmap()
3864 	   and munmap() stuff in this slot after doing this call at any time */
3865 
3866 	if (mem->userspace_addr & 0xffffful)
3867 		return -EINVAL;
3868 
3869 	if (mem->memory_size & 0xffffful)
3870 		return -EINVAL;
3871 
3872 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3873 		return -EINVAL;
3874 
3875 	return 0;
3876 }
3877 
3878 void kvm_arch_commit_memory_region(struct kvm *kvm,
3879 				const struct kvm_userspace_memory_region *mem,
3880 				const struct kvm_memory_slot *old,
3881 				const struct kvm_memory_slot *new,
3882 				enum kvm_mr_change change)
3883 {
3884 	int rc;
3885 
3886 	/* If the basics of the memslot do not change, we do not want
3887 	 * to update the gmap. Every update causes several unnecessary
3888 	 * segment translation exceptions. This is usually handled just
3889 	 * fine by the normal fault handler + gmap, but it will also
3890 	 * cause faults on the prefix page of running guest CPUs.
3891 	 */
3892 	if (old->userspace_addr == mem->userspace_addr &&
3893 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3894 	    old->npages * PAGE_SIZE == mem->memory_size)
3895 		return;
3896 
3897 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3898 		mem->guest_phys_addr, mem->memory_size);
3899 	if (rc)
3900 		pr_warn("failed to commit memory region\n");
3901 	return;
3902 }
3903 
3904 static inline unsigned long nonhyp_mask(int i)
3905 {
3906 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3907 
3908 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3909 }
3910 
3911 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3912 {
3913 	vcpu->valid_wakeup = false;
3914 }
3915 
3916 static int __init kvm_s390_init(void)
3917 {
3918 	int i;
3919 
3920 	if (!sclp.has_sief2) {
3921 		pr_info("SIE not available\n");
3922 		return -ENODEV;
3923 	}
3924 
3925 	for (i = 0; i < 16; i++)
3926 		kvm_s390_fac_list_mask[i] |=
3927 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3928 
3929 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3930 }
3931 
3932 static void __exit kvm_s390_exit(void)
3933 {
3934 	kvm_exit();
3935 }
3936 
3937 module_init(kvm_s390_init);
3938 module_exit(kvm_s390_exit);
3939 
3940 /*
3941  * Enable autoloading of the kvm module.
3942  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3943  * since x86 takes a different approach.
3944  */
3945 #include <linux/miscdevice.h>
3946 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3947 MODULE_ALIAS("devname:kvm");
3948