1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2018 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #include <linux/compiler.h> 15 #include <linux/err.h> 16 #include <linux/fs.h> 17 #include <linux/hrtimer.h> 18 #include <linux/init.h> 19 #include <linux/kvm.h> 20 #include <linux/kvm_host.h> 21 #include <linux/mman.h> 22 #include <linux/module.h> 23 #include <linux/moduleparam.h> 24 #include <linux/random.h> 25 #include <linux/slab.h> 26 #include <linux/timer.h> 27 #include <linux/vmalloc.h> 28 #include <linux/bitmap.h> 29 #include <linux/sched/signal.h> 30 #include <linux/string.h> 31 32 #include <asm/asm-offsets.h> 33 #include <asm/lowcore.h> 34 #include <asm/stp.h> 35 #include <asm/pgtable.h> 36 #include <asm/gmap.h> 37 #include <asm/nmi.h> 38 #include <asm/switch_to.h> 39 #include <asm/isc.h> 40 #include <asm/sclp.h> 41 #include <asm/cpacf.h> 42 #include <asm/timex.h> 43 #include "kvm-s390.h" 44 #include "gaccess.h" 45 46 #define KMSG_COMPONENT "kvm-s390" 47 #undef pr_fmt 48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 49 50 #define CREATE_TRACE_POINTS 51 #include "trace.h" 52 #include "trace-s390.h" 53 54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 55 #define LOCAL_IRQS 32 56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 57 (KVM_MAX_VCPUS + LOCAL_IRQS)) 58 59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 61 62 struct kvm_stats_debugfs_item debugfs_entries[] = { 63 { "userspace_handled", VCPU_STAT(exit_userspace) }, 64 { "exit_null", VCPU_STAT(exit_null) }, 65 { "exit_validity", VCPU_STAT(exit_validity) }, 66 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 67 { "exit_external_request", VCPU_STAT(exit_external_request) }, 68 { "exit_io_request", VCPU_STAT(exit_io_request) }, 69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 70 { "exit_instruction", VCPU_STAT(exit_instruction) }, 71 { "exit_pei", VCPU_STAT(exit_pei) }, 72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 78 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 80 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 81 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 82 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 83 { "deliver_ckc", VCPU_STAT(deliver_ckc) }, 84 { "deliver_cputm", VCPU_STAT(deliver_cputm) }, 85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 86 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 88 { "deliver_virtio", VCPU_STAT(deliver_virtio) }, 89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 92 { "deliver_program", VCPU_STAT(deliver_program) }, 93 { "deliver_io", VCPU_STAT(deliver_io) }, 94 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, 95 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 96 { "inject_ckc", VCPU_STAT(inject_ckc) }, 97 { "inject_cputm", VCPU_STAT(inject_cputm) }, 98 { "inject_external_call", VCPU_STAT(inject_external_call) }, 99 { "inject_float_mchk", VM_STAT(inject_float_mchk) }, 100 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, 101 { "inject_io", VM_STAT(inject_io) }, 102 { "inject_mchk", VCPU_STAT(inject_mchk) }, 103 { "inject_pfault_done", VM_STAT(inject_pfault_done) }, 104 { "inject_program", VCPU_STAT(inject_program) }, 105 { "inject_restart", VCPU_STAT(inject_restart) }, 106 { "inject_service_signal", VM_STAT(inject_service_signal) }, 107 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, 108 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, 109 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, 110 { "inject_virtio", VM_STAT(inject_virtio) }, 111 { "instruction_epsw", VCPU_STAT(instruction_epsw) }, 112 { "instruction_gs", VCPU_STAT(instruction_gs) }, 113 { "instruction_io_other", VCPU_STAT(instruction_io_other) }, 114 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, 115 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, 116 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 117 { "instruction_ptff", VCPU_STAT(instruction_ptff) }, 118 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 119 { "instruction_sck", VCPU_STAT(instruction_sck) }, 120 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, 121 { "instruction_spx", VCPU_STAT(instruction_spx) }, 122 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 123 { "instruction_stap", VCPU_STAT(instruction_stap) }, 124 { "instruction_iske", VCPU_STAT(instruction_iske) }, 125 { "instruction_ri", VCPU_STAT(instruction_ri) }, 126 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, 127 { "instruction_sske", VCPU_STAT(instruction_sske) }, 128 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 129 { "instruction_essa", VCPU_STAT(instruction_essa) }, 130 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 131 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 132 { "instruction_tb", VCPU_STAT(instruction_tb) }, 133 { "instruction_tpi", VCPU_STAT(instruction_tpi) }, 134 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 135 { "instruction_tsch", VCPU_STAT(instruction_tsch) }, 136 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 137 { "instruction_sie", VCPU_STAT(instruction_sie) }, 138 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 139 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 140 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 141 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 142 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 143 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 144 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 145 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 146 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 147 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 148 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 149 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 150 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 151 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 152 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 153 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 154 { "instruction_diag_10", VCPU_STAT(diagnose_10) }, 155 { "instruction_diag_44", VCPU_STAT(diagnose_44) }, 156 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, 157 { "instruction_diag_258", VCPU_STAT(diagnose_258) }, 158 { "instruction_diag_308", VCPU_STAT(diagnose_308) }, 159 { "instruction_diag_500", VCPU_STAT(diagnose_500) }, 160 { "instruction_diag_other", VCPU_STAT(diagnose_other) }, 161 { NULL } 162 }; 163 164 struct kvm_s390_tod_clock_ext { 165 __u8 epoch_idx; 166 __u64 tod; 167 __u8 reserved[7]; 168 } __packed; 169 170 /* allow nested virtualization in KVM (if enabled by user space) */ 171 static int nested; 172 module_param(nested, int, S_IRUGO); 173 MODULE_PARM_DESC(nested, "Nested virtualization support"); 174 175 /* allow 1m huge page guest backing, if !nested */ 176 static int hpage; 177 module_param(hpage, int, 0444); 178 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 179 180 /* 181 * For now we handle at most 16 double words as this is what the s390 base 182 * kernel handles and stores in the prefix page. If we ever need to go beyond 183 * this, this requires changes to code, but the external uapi can stay. 184 */ 185 #define SIZE_INTERNAL 16 186 187 /* 188 * Base feature mask that defines default mask for facilities. Consists of the 189 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 190 */ 191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 192 /* 193 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 194 * and defines the facilities that can be enabled via a cpu model. 195 */ 196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 197 198 static unsigned long kvm_s390_fac_size(void) 199 { 200 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 202 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 203 sizeof(S390_lowcore.stfle_fac_list)); 204 205 return SIZE_INTERNAL; 206 } 207 208 /* available cpu features supported by kvm */ 209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 210 /* available subfunctions indicated via query / "test bit" */ 211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 212 213 static struct gmap_notifier gmap_notifier; 214 static struct gmap_notifier vsie_gmap_notifier; 215 debug_info_t *kvm_s390_dbf; 216 217 /* Section: not file related */ 218 int kvm_arch_hardware_enable(void) 219 { 220 /* every s390 is virtualization enabled ;-) */ 221 return 0; 222 } 223 224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 225 unsigned long end); 226 227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 228 { 229 u8 delta_idx = 0; 230 231 /* 232 * The TOD jumps by delta, we have to compensate this by adding 233 * -delta to the epoch. 234 */ 235 delta = -delta; 236 237 /* sign-extension - we're adding to signed values below */ 238 if ((s64)delta < 0) 239 delta_idx = -1; 240 241 scb->epoch += delta; 242 if (scb->ecd & ECD_MEF) { 243 scb->epdx += delta_idx; 244 if (scb->epoch < delta) 245 scb->epdx += 1; 246 } 247 } 248 249 /* 250 * This callback is executed during stop_machine(). All CPUs are therefore 251 * temporarily stopped. In order not to change guest behavior, we have to 252 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 253 * so a CPU won't be stopped while calculating with the epoch. 254 */ 255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 256 void *v) 257 { 258 struct kvm *kvm; 259 struct kvm_vcpu *vcpu; 260 int i; 261 unsigned long long *delta = v; 262 263 list_for_each_entry(kvm, &vm_list, vm_list) { 264 kvm_for_each_vcpu(i, vcpu, kvm) { 265 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 266 if (i == 0) { 267 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 268 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 269 } 270 if (vcpu->arch.cputm_enabled) 271 vcpu->arch.cputm_start += *delta; 272 if (vcpu->arch.vsie_block) 273 kvm_clock_sync_scb(vcpu->arch.vsie_block, 274 *delta); 275 } 276 } 277 return NOTIFY_OK; 278 } 279 280 static struct notifier_block kvm_clock_notifier = { 281 .notifier_call = kvm_clock_sync, 282 }; 283 284 int kvm_arch_hardware_setup(void) 285 { 286 gmap_notifier.notifier_call = kvm_gmap_notifier; 287 gmap_register_pte_notifier(&gmap_notifier); 288 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 289 gmap_register_pte_notifier(&vsie_gmap_notifier); 290 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 291 &kvm_clock_notifier); 292 return 0; 293 } 294 295 void kvm_arch_hardware_unsetup(void) 296 { 297 gmap_unregister_pte_notifier(&gmap_notifier); 298 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 299 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 300 &kvm_clock_notifier); 301 } 302 303 static void allow_cpu_feat(unsigned long nr) 304 { 305 set_bit_inv(nr, kvm_s390_available_cpu_feat); 306 } 307 308 static inline int plo_test_bit(unsigned char nr) 309 { 310 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 311 int cc; 312 313 asm volatile( 314 /* Parameter registers are ignored for "test bit" */ 315 " plo 0,0,0,0(0)\n" 316 " ipm %0\n" 317 " srl %0,28\n" 318 : "=d" (cc) 319 : "d" (r0) 320 : "cc"); 321 return cc == 0; 322 } 323 324 static void kvm_s390_cpu_feat_init(void) 325 { 326 int i; 327 328 for (i = 0; i < 256; ++i) { 329 if (plo_test_bit(i)) 330 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 331 } 332 333 if (test_facility(28)) /* TOD-clock steering */ 334 ptff(kvm_s390_available_subfunc.ptff, 335 sizeof(kvm_s390_available_subfunc.ptff), 336 PTFF_QAF); 337 338 if (test_facility(17)) { /* MSA */ 339 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 340 kvm_s390_available_subfunc.kmac); 341 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 342 kvm_s390_available_subfunc.kmc); 343 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 344 kvm_s390_available_subfunc.km); 345 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 346 kvm_s390_available_subfunc.kimd); 347 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 348 kvm_s390_available_subfunc.klmd); 349 } 350 if (test_facility(76)) /* MSA3 */ 351 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 352 kvm_s390_available_subfunc.pckmo); 353 if (test_facility(77)) { /* MSA4 */ 354 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 355 kvm_s390_available_subfunc.kmctr); 356 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 357 kvm_s390_available_subfunc.kmf); 358 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 359 kvm_s390_available_subfunc.kmo); 360 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 361 kvm_s390_available_subfunc.pcc); 362 } 363 if (test_facility(57)) /* MSA5 */ 364 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 365 kvm_s390_available_subfunc.ppno); 366 367 if (test_facility(146)) /* MSA8 */ 368 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 369 kvm_s390_available_subfunc.kma); 370 371 if (MACHINE_HAS_ESOP) 372 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 373 /* 374 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 375 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 376 */ 377 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 378 !test_facility(3) || !nested) 379 return; 380 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 381 if (sclp.has_64bscao) 382 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 383 if (sclp.has_siif) 384 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 385 if (sclp.has_gpere) 386 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 387 if (sclp.has_gsls) 388 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 389 if (sclp.has_ib) 390 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 391 if (sclp.has_cei) 392 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 393 if (sclp.has_ibs) 394 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 395 if (sclp.has_kss) 396 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 397 /* 398 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 399 * all skey handling functions read/set the skey from the PGSTE 400 * instead of the real storage key. 401 * 402 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 403 * pages being detected as preserved although they are resident. 404 * 405 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 406 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 407 * 408 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 409 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 410 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 411 * 412 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 413 * cannot easily shadow the SCA because of the ipte lock. 414 */ 415 } 416 417 int kvm_arch_init(void *opaque) 418 { 419 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 420 if (!kvm_s390_dbf) 421 return -ENOMEM; 422 423 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { 424 debug_unregister(kvm_s390_dbf); 425 return -ENOMEM; 426 } 427 428 kvm_s390_cpu_feat_init(); 429 430 /* Register floating interrupt controller interface. */ 431 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 432 } 433 434 void kvm_arch_exit(void) 435 { 436 debug_unregister(kvm_s390_dbf); 437 } 438 439 /* Section: device related */ 440 long kvm_arch_dev_ioctl(struct file *filp, 441 unsigned int ioctl, unsigned long arg) 442 { 443 if (ioctl == KVM_S390_ENABLE_SIE) 444 return s390_enable_sie(); 445 return -EINVAL; 446 } 447 448 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 449 { 450 int r; 451 452 switch (ext) { 453 case KVM_CAP_S390_PSW: 454 case KVM_CAP_S390_GMAP: 455 case KVM_CAP_SYNC_MMU: 456 #ifdef CONFIG_KVM_S390_UCONTROL 457 case KVM_CAP_S390_UCONTROL: 458 #endif 459 case KVM_CAP_ASYNC_PF: 460 case KVM_CAP_SYNC_REGS: 461 case KVM_CAP_ONE_REG: 462 case KVM_CAP_ENABLE_CAP: 463 case KVM_CAP_S390_CSS_SUPPORT: 464 case KVM_CAP_IOEVENTFD: 465 case KVM_CAP_DEVICE_CTRL: 466 case KVM_CAP_ENABLE_CAP_VM: 467 case KVM_CAP_S390_IRQCHIP: 468 case KVM_CAP_VM_ATTRIBUTES: 469 case KVM_CAP_MP_STATE: 470 case KVM_CAP_IMMEDIATE_EXIT: 471 case KVM_CAP_S390_INJECT_IRQ: 472 case KVM_CAP_S390_USER_SIGP: 473 case KVM_CAP_S390_USER_STSI: 474 case KVM_CAP_S390_SKEYS: 475 case KVM_CAP_S390_IRQ_STATE: 476 case KVM_CAP_S390_USER_INSTR0: 477 case KVM_CAP_S390_CMMA_MIGRATION: 478 case KVM_CAP_S390_AIS: 479 case KVM_CAP_S390_AIS_MIGRATION: 480 r = 1; 481 break; 482 case KVM_CAP_S390_HPAGE_1M: 483 r = 0; 484 if (hpage) 485 r = 1; 486 break; 487 case KVM_CAP_S390_MEM_OP: 488 r = MEM_OP_MAX_SIZE; 489 break; 490 case KVM_CAP_NR_VCPUS: 491 case KVM_CAP_MAX_VCPUS: 492 r = KVM_S390_BSCA_CPU_SLOTS; 493 if (!kvm_s390_use_sca_entries()) 494 r = KVM_MAX_VCPUS; 495 else if (sclp.has_esca && sclp.has_64bscao) 496 r = KVM_S390_ESCA_CPU_SLOTS; 497 break; 498 case KVM_CAP_NR_MEMSLOTS: 499 r = KVM_USER_MEM_SLOTS; 500 break; 501 case KVM_CAP_S390_COW: 502 r = MACHINE_HAS_ESOP; 503 break; 504 case KVM_CAP_S390_VECTOR_REGISTERS: 505 r = MACHINE_HAS_VX; 506 break; 507 case KVM_CAP_S390_RI: 508 r = test_facility(64); 509 break; 510 case KVM_CAP_S390_GS: 511 r = test_facility(133); 512 break; 513 case KVM_CAP_S390_BPB: 514 r = test_facility(82); 515 break; 516 default: 517 r = 0; 518 } 519 return r; 520 } 521 522 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 523 struct kvm_memory_slot *memslot) 524 { 525 int i; 526 gfn_t cur_gfn, last_gfn; 527 unsigned long gaddr, vmaddr; 528 struct gmap *gmap = kvm->arch.gmap; 529 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 530 531 /* Loop over all guest segments */ 532 cur_gfn = memslot->base_gfn; 533 last_gfn = memslot->base_gfn + memslot->npages; 534 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 535 gaddr = gfn_to_gpa(cur_gfn); 536 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 537 if (kvm_is_error_hva(vmaddr)) 538 continue; 539 540 bitmap_zero(bitmap, _PAGE_ENTRIES); 541 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 542 for (i = 0; i < _PAGE_ENTRIES; i++) { 543 if (test_bit(i, bitmap)) 544 mark_page_dirty(kvm, cur_gfn + i); 545 } 546 547 if (fatal_signal_pending(current)) 548 return; 549 cond_resched(); 550 } 551 } 552 553 /* Section: vm related */ 554 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 555 556 /* 557 * Get (and clear) the dirty memory log for a memory slot. 558 */ 559 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 560 struct kvm_dirty_log *log) 561 { 562 int r; 563 unsigned long n; 564 struct kvm_memslots *slots; 565 struct kvm_memory_slot *memslot; 566 int is_dirty = 0; 567 568 if (kvm_is_ucontrol(kvm)) 569 return -EINVAL; 570 571 mutex_lock(&kvm->slots_lock); 572 573 r = -EINVAL; 574 if (log->slot >= KVM_USER_MEM_SLOTS) 575 goto out; 576 577 slots = kvm_memslots(kvm); 578 memslot = id_to_memslot(slots, log->slot); 579 r = -ENOENT; 580 if (!memslot->dirty_bitmap) 581 goto out; 582 583 kvm_s390_sync_dirty_log(kvm, memslot); 584 r = kvm_get_dirty_log(kvm, log, &is_dirty); 585 if (r) 586 goto out; 587 588 /* Clear the dirty log */ 589 if (is_dirty) { 590 n = kvm_dirty_bitmap_bytes(memslot); 591 memset(memslot->dirty_bitmap, 0, n); 592 } 593 r = 0; 594 out: 595 mutex_unlock(&kvm->slots_lock); 596 return r; 597 } 598 599 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 600 { 601 unsigned int i; 602 struct kvm_vcpu *vcpu; 603 604 kvm_for_each_vcpu(i, vcpu, kvm) { 605 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 606 } 607 } 608 609 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 610 { 611 int r; 612 613 if (cap->flags) 614 return -EINVAL; 615 616 switch (cap->cap) { 617 case KVM_CAP_S390_IRQCHIP: 618 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 619 kvm->arch.use_irqchip = 1; 620 r = 0; 621 break; 622 case KVM_CAP_S390_USER_SIGP: 623 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 624 kvm->arch.user_sigp = 1; 625 r = 0; 626 break; 627 case KVM_CAP_S390_VECTOR_REGISTERS: 628 mutex_lock(&kvm->lock); 629 if (kvm->created_vcpus) { 630 r = -EBUSY; 631 } else if (MACHINE_HAS_VX) { 632 set_kvm_facility(kvm->arch.model.fac_mask, 129); 633 set_kvm_facility(kvm->arch.model.fac_list, 129); 634 if (test_facility(134)) { 635 set_kvm_facility(kvm->arch.model.fac_mask, 134); 636 set_kvm_facility(kvm->arch.model.fac_list, 134); 637 } 638 if (test_facility(135)) { 639 set_kvm_facility(kvm->arch.model.fac_mask, 135); 640 set_kvm_facility(kvm->arch.model.fac_list, 135); 641 } 642 r = 0; 643 } else 644 r = -EINVAL; 645 mutex_unlock(&kvm->lock); 646 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 647 r ? "(not available)" : "(success)"); 648 break; 649 case KVM_CAP_S390_RI: 650 r = -EINVAL; 651 mutex_lock(&kvm->lock); 652 if (kvm->created_vcpus) { 653 r = -EBUSY; 654 } else if (test_facility(64)) { 655 set_kvm_facility(kvm->arch.model.fac_mask, 64); 656 set_kvm_facility(kvm->arch.model.fac_list, 64); 657 r = 0; 658 } 659 mutex_unlock(&kvm->lock); 660 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 661 r ? "(not available)" : "(success)"); 662 break; 663 case KVM_CAP_S390_AIS: 664 mutex_lock(&kvm->lock); 665 if (kvm->created_vcpus) { 666 r = -EBUSY; 667 } else { 668 set_kvm_facility(kvm->arch.model.fac_mask, 72); 669 set_kvm_facility(kvm->arch.model.fac_list, 72); 670 r = 0; 671 } 672 mutex_unlock(&kvm->lock); 673 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 674 r ? "(not available)" : "(success)"); 675 break; 676 case KVM_CAP_S390_GS: 677 r = -EINVAL; 678 mutex_lock(&kvm->lock); 679 if (kvm->created_vcpus) { 680 r = -EBUSY; 681 } else if (test_facility(133)) { 682 set_kvm_facility(kvm->arch.model.fac_mask, 133); 683 set_kvm_facility(kvm->arch.model.fac_list, 133); 684 r = 0; 685 } 686 mutex_unlock(&kvm->lock); 687 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 688 r ? "(not available)" : "(success)"); 689 break; 690 case KVM_CAP_S390_HPAGE_1M: 691 mutex_lock(&kvm->lock); 692 if (kvm->created_vcpus) 693 r = -EBUSY; 694 else if (!hpage || kvm->arch.use_cmma) 695 r = -EINVAL; 696 else { 697 r = 0; 698 kvm->mm->context.allow_gmap_hpage_1m = 1; 699 /* 700 * We might have to create fake 4k page 701 * tables. To avoid that the hardware works on 702 * stale PGSTEs, we emulate these instructions. 703 */ 704 kvm->arch.use_skf = 0; 705 kvm->arch.use_pfmfi = 0; 706 } 707 mutex_unlock(&kvm->lock); 708 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 709 r ? "(not available)" : "(success)"); 710 break; 711 case KVM_CAP_S390_USER_STSI: 712 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 713 kvm->arch.user_stsi = 1; 714 r = 0; 715 break; 716 case KVM_CAP_S390_USER_INSTR0: 717 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 718 kvm->arch.user_instr0 = 1; 719 icpt_operexc_on_all_vcpus(kvm); 720 r = 0; 721 break; 722 default: 723 r = -EINVAL; 724 break; 725 } 726 return r; 727 } 728 729 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 730 { 731 int ret; 732 733 switch (attr->attr) { 734 case KVM_S390_VM_MEM_LIMIT_SIZE: 735 ret = 0; 736 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 737 kvm->arch.mem_limit); 738 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 739 ret = -EFAULT; 740 break; 741 default: 742 ret = -ENXIO; 743 break; 744 } 745 return ret; 746 } 747 748 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 749 { 750 int ret; 751 unsigned int idx; 752 switch (attr->attr) { 753 case KVM_S390_VM_MEM_ENABLE_CMMA: 754 ret = -ENXIO; 755 if (!sclp.has_cmma) 756 break; 757 758 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 759 mutex_lock(&kvm->lock); 760 if (kvm->created_vcpus) 761 ret = -EBUSY; 762 else if (kvm->mm->context.allow_gmap_hpage_1m) 763 ret = -EINVAL; 764 else { 765 kvm->arch.use_cmma = 1; 766 /* Not compatible with cmma. */ 767 kvm->arch.use_pfmfi = 0; 768 ret = 0; 769 } 770 mutex_unlock(&kvm->lock); 771 break; 772 case KVM_S390_VM_MEM_CLR_CMMA: 773 ret = -ENXIO; 774 if (!sclp.has_cmma) 775 break; 776 ret = -EINVAL; 777 if (!kvm->arch.use_cmma) 778 break; 779 780 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 781 mutex_lock(&kvm->lock); 782 idx = srcu_read_lock(&kvm->srcu); 783 s390_reset_cmma(kvm->arch.gmap->mm); 784 srcu_read_unlock(&kvm->srcu, idx); 785 mutex_unlock(&kvm->lock); 786 ret = 0; 787 break; 788 case KVM_S390_VM_MEM_LIMIT_SIZE: { 789 unsigned long new_limit; 790 791 if (kvm_is_ucontrol(kvm)) 792 return -EINVAL; 793 794 if (get_user(new_limit, (u64 __user *)attr->addr)) 795 return -EFAULT; 796 797 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 798 new_limit > kvm->arch.mem_limit) 799 return -E2BIG; 800 801 if (!new_limit) 802 return -EINVAL; 803 804 /* gmap_create takes last usable address */ 805 if (new_limit != KVM_S390_NO_MEM_LIMIT) 806 new_limit -= 1; 807 808 ret = -EBUSY; 809 mutex_lock(&kvm->lock); 810 if (!kvm->created_vcpus) { 811 /* gmap_create will round the limit up */ 812 struct gmap *new = gmap_create(current->mm, new_limit); 813 814 if (!new) { 815 ret = -ENOMEM; 816 } else { 817 gmap_remove(kvm->arch.gmap); 818 new->private = kvm; 819 kvm->arch.gmap = new; 820 ret = 0; 821 } 822 } 823 mutex_unlock(&kvm->lock); 824 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 825 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 826 (void *) kvm->arch.gmap->asce); 827 break; 828 } 829 default: 830 ret = -ENXIO; 831 break; 832 } 833 return ret; 834 } 835 836 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 837 838 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 839 { 840 struct kvm_vcpu *vcpu; 841 int i; 842 843 kvm_s390_vcpu_block_all(kvm); 844 845 kvm_for_each_vcpu(i, vcpu, kvm) 846 kvm_s390_vcpu_crypto_setup(vcpu); 847 848 kvm_s390_vcpu_unblock_all(kvm); 849 } 850 851 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 852 { 853 if (!test_kvm_facility(kvm, 76)) 854 return -EINVAL; 855 856 mutex_lock(&kvm->lock); 857 switch (attr->attr) { 858 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 859 get_random_bytes( 860 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 861 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 862 kvm->arch.crypto.aes_kw = 1; 863 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 864 break; 865 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 866 get_random_bytes( 867 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 868 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 869 kvm->arch.crypto.dea_kw = 1; 870 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 871 break; 872 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 873 kvm->arch.crypto.aes_kw = 0; 874 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 875 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 876 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 877 break; 878 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 879 kvm->arch.crypto.dea_kw = 0; 880 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 881 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 882 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 883 break; 884 default: 885 mutex_unlock(&kvm->lock); 886 return -ENXIO; 887 } 888 889 kvm_s390_vcpu_crypto_reset_all(kvm); 890 mutex_unlock(&kvm->lock); 891 return 0; 892 } 893 894 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 895 { 896 int cx; 897 struct kvm_vcpu *vcpu; 898 899 kvm_for_each_vcpu(cx, vcpu, kvm) 900 kvm_s390_sync_request(req, vcpu); 901 } 902 903 /* 904 * Must be called with kvm->srcu held to avoid races on memslots, and with 905 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 906 */ 907 static int kvm_s390_vm_start_migration(struct kvm *kvm) 908 { 909 struct kvm_memory_slot *ms; 910 struct kvm_memslots *slots; 911 unsigned long ram_pages = 0; 912 int slotnr; 913 914 /* migration mode already enabled */ 915 if (kvm->arch.migration_mode) 916 return 0; 917 slots = kvm_memslots(kvm); 918 if (!slots || !slots->used_slots) 919 return -EINVAL; 920 921 if (!kvm->arch.use_cmma) { 922 kvm->arch.migration_mode = 1; 923 return 0; 924 } 925 /* mark all the pages in active slots as dirty */ 926 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 927 ms = slots->memslots + slotnr; 928 /* 929 * The second half of the bitmap is only used on x86, 930 * and would be wasted otherwise, so we put it to good 931 * use here to keep track of the state of the storage 932 * attributes. 933 */ 934 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 935 ram_pages += ms->npages; 936 } 937 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 938 kvm->arch.migration_mode = 1; 939 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 940 return 0; 941 } 942 943 /* 944 * Must be called with kvm->slots_lock to avoid races with ourselves and 945 * kvm_s390_vm_start_migration. 946 */ 947 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 948 { 949 /* migration mode already disabled */ 950 if (!kvm->arch.migration_mode) 951 return 0; 952 kvm->arch.migration_mode = 0; 953 if (kvm->arch.use_cmma) 954 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 955 return 0; 956 } 957 958 static int kvm_s390_vm_set_migration(struct kvm *kvm, 959 struct kvm_device_attr *attr) 960 { 961 int res = -ENXIO; 962 963 mutex_lock(&kvm->slots_lock); 964 switch (attr->attr) { 965 case KVM_S390_VM_MIGRATION_START: 966 res = kvm_s390_vm_start_migration(kvm); 967 break; 968 case KVM_S390_VM_MIGRATION_STOP: 969 res = kvm_s390_vm_stop_migration(kvm); 970 break; 971 default: 972 break; 973 } 974 mutex_unlock(&kvm->slots_lock); 975 976 return res; 977 } 978 979 static int kvm_s390_vm_get_migration(struct kvm *kvm, 980 struct kvm_device_attr *attr) 981 { 982 u64 mig = kvm->arch.migration_mode; 983 984 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 985 return -ENXIO; 986 987 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 988 return -EFAULT; 989 return 0; 990 } 991 992 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 993 { 994 struct kvm_s390_vm_tod_clock gtod; 995 996 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 997 return -EFAULT; 998 999 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1000 return -EINVAL; 1001 kvm_s390_set_tod_clock(kvm, >od); 1002 1003 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1004 gtod.epoch_idx, gtod.tod); 1005 1006 return 0; 1007 } 1008 1009 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1010 { 1011 u8 gtod_high; 1012 1013 if (copy_from_user(>od_high, (void __user *)attr->addr, 1014 sizeof(gtod_high))) 1015 return -EFAULT; 1016 1017 if (gtod_high != 0) 1018 return -EINVAL; 1019 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1020 1021 return 0; 1022 } 1023 1024 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1025 { 1026 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1027 1028 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1029 sizeof(gtod.tod))) 1030 return -EFAULT; 1031 1032 kvm_s390_set_tod_clock(kvm, >od); 1033 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1034 return 0; 1035 } 1036 1037 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1038 { 1039 int ret; 1040 1041 if (attr->flags) 1042 return -EINVAL; 1043 1044 switch (attr->attr) { 1045 case KVM_S390_VM_TOD_EXT: 1046 ret = kvm_s390_set_tod_ext(kvm, attr); 1047 break; 1048 case KVM_S390_VM_TOD_HIGH: 1049 ret = kvm_s390_set_tod_high(kvm, attr); 1050 break; 1051 case KVM_S390_VM_TOD_LOW: 1052 ret = kvm_s390_set_tod_low(kvm, attr); 1053 break; 1054 default: 1055 ret = -ENXIO; 1056 break; 1057 } 1058 return ret; 1059 } 1060 1061 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1062 struct kvm_s390_vm_tod_clock *gtod) 1063 { 1064 struct kvm_s390_tod_clock_ext htod; 1065 1066 preempt_disable(); 1067 1068 get_tod_clock_ext((char *)&htod); 1069 1070 gtod->tod = htod.tod + kvm->arch.epoch; 1071 gtod->epoch_idx = 0; 1072 if (test_kvm_facility(kvm, 139)) { 1073 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 1074 if (gtod->tod < htod.tod) 1075 gtod->epoch_idx += 1; 1076 } 1077 1078 preempt_enable(); 1079 } 1080 1081 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1082 { 1083 struct kvm_s390_vm_tod_clock gtod; 1084 1085 memset(>od, 0, sizeof(gtod)); 1086 kvm_s390_get_tod_clock(kvm, >od); 1087 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1088 return -EFAULT; 1089 1090 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1091 gtod.epoch_idx, gtod.tod); 1092 return 0; 1093 } 1094 1095 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1096 { 1097 u8 gtod_high = 0; 1098 1099 if (copy_to_user((void __user *)attr->addr, >od_high, 1100 sizeof(gtod_high))) 1101 return -EFAULT; 1102 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1103 1104 return 0; 1105 } 1106 1107 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1108 { 1109 u64 gtod; 1110 1111 gtod = kvm_s390_get_tod_clock_fast(kvm); 1112 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1113 return -EFAULT; 1114 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1115 1116 return 0; 1117 } 1118 1119 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1120 { 1121 int ret; 1122 1123 if (attr->flags) 1124 return -EINVAL; 1125 1126 switch (attr->attr) { 1127 case KVM_S390_VM_TOD_EXT: 1128 ret = kvm_s390_get_tod_ext(kvm, attr); 1129 break; 1130 case KVM_S390_VM_TOD_HIGH: 1131 ret = kvm_s390_get_tod_high(kvm, attr); 1132 break; 1133 case KVM_S390_VM_TOD_LOW: 1134 ret = kvm_s390_get_tod_low(kvm, attr); 1135 break; 1136 default: 1137 ret = -ENXIO; 1138 break; 1139 } 1140 return ret; 1141 } 1142 1143 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1144 { 1145 struct kvm_s390_vm_cpu_processor *proc; 1146 u16 lowest_ibc, unblocked_ibc; 1147 int ret = 0; 1148 1149 mutex_lock(&kvm->lock); 1150 if (kvm->created_vcpus) { 1151 ret = -EBUSY; 1152 goto out; 1153 } 1154 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1155 if (!proc) { 1156 ret = -ENOMEM; 1157 goto out; 1158 } 1159 if (!copy_from_user(proc, (void __user *)attr->addr, 1160 sizeof(*proc))) { 1161 kvm->arch.model.cpuid = proc->cpuid; 1162 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1163 unblocked_ibc = sclp.ibc & 0xfff; 1164 if (lowest_ibc && proc->ibc) { 1165 if (proc->ibc > unblocked_ibc) 1166 kvm->arch.model.ibc = unblocked_ibc; 1167 else if (proc->ibc < lowest_ibc) 1168 kvm->arch.model.ibc = lowest_ibc; 1169 else 1170 kvm->arch.model.ibc = proc->ibc; 1171 } 1172 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1173 S390_ARCH_FAC_LIST_SIZE_BYTE); 1174 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1175 kvm->arch.model.ibc, 1176 kvm->arch.model.cpuid); 1177 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1178 kvm->arch.model.fac_list[0], 1179 kvm->arch.model.fac_list[1], 1180 kvm->arch.model.fac_list[2]); 1181 } else 1182 ret = -EFAULT; 1183 kfree(proc); 1184 out: 1185 mutex_unlock(&kvm->lock); 1186 return ret; 1187 } 1188 1189 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1190 struct kvm_device_attr *attr) 1191 { 1192 struct kvm_s390_vm_cpu_feat data; 1193 1194 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1195 return -EFAULT; 1196 if (!bitmap_subset((unsigned long *) data.feat, 1197 kvm_s390_available_cpu_feat, 1198 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1199 return -EINVAL; 1200 1201 mutex_lock(&kvm->lock); 1202 if (kvm->created_vcpus) { 1203 mutex_unlock(&kvm->lock); 1204 return -EBUSY; 1205 } 1206 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1207 KVM_S390_VM_CPU_FEAT_NR_BITS); 1208 mutex_unlock(&kvm->lock); 1209 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1210 data.feat[0], 1211 data.feat[1], 1212 data.feat[2]); 1213 return 0; 1214 } 1215 1216 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1217 struct kvm_device_attr *attr) 1218 { 1219 /* 1220 * Once supported by kernel + hw, we have to store the subfunctions 1221 * in kvm->arch and remember that user space configured them. 1222 */ 1223 return -ENXIO; 1224 } 1225 1226 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1227 { 1228 int ret = -ENXIO; 1229 1230 switch (attr->attr) { 1231 case KVM_S390_VM_CPU_PROCESSOR: 1232 ret = kvm_s390_set_processor(kvm, attr); 1233 break; 1234 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1235 ret = kvm_s390_set_processor_feat(kvm, attr); 1236 break; 1237 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1238 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1239 break; 1240 } 1241 return ret; 1242 } 1243 1244 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1245 { 1246 struct kvm_s390_vm_cpu_processor *proc; 1247 int ret = 0; 1248 1249 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1250 if (!proc) { 1251 ret = -ENOMEM; 1252 goto out; 1253 } 1254 proc->cpuid = kvm->arch.model.cpuid; 1255 proc->ibc = kvm->arch.model.ibc; 1256 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1257 S390_ARCH_FAC_LIST_SIZE_BYTE); 1258 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1259 kvm->arch.model.ibc, 1260 kvm->arch.model.cpuid); 1261 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1262 kvm->arch.model.fac_list[0], 1263 kvm->arch.model.fac_list[1], 1264 kvm->arch.model.fac_list[2]); 1265 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1266 ret = -EFAULT; 1267 kfree(proc); 1268 out: 1269 return ret; 1270 } 1271 1272 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1273 { 1274 struct kvm_s390_vm_cpu_machine *mach; 1275 int ret = 0; 1276 1277 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1278 if (!mach) { 1279 ret = -ENOMEM; 1280 goto out; 1281 } 1282 get_cpu_id((struct cpuid *) &mach->cpuid); 1283 mach->ibc = sclp.ibc; 1284 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1285 S390_ARCH_FAC_LIST_SIZE_BYTE); 1286 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1287 sizeof(S390_lowcore.stfle_fac_list)); 1288 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1289 kvm->arch.model.ibc, 1290 kvm->arch.model.cpuid); 1291 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1292 mach->fac_mask[0], 1293 mach->fac_mask[1], 1294 mach->fac_mask[2]); 1295 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1296 mach->fac_list[0], 1297 mach->fac_list[1], 1298 mach->fac_list[2]); 1299 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1300 ret = -EFAULT; 1301 kfree(mach); 1302 out: 1303 return ret; 1304 } 1305 1306 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1307 struct kvm_device_attr *attr) 1308 { 1309 struct kvm_s390_vm_cpu_feat data; 1310 1311 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1312 KVM_S390_VM_CPU_FEAT_NR_BITS); 1313 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1314 return -EFAULT; 1315 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1316 data.feat[0], 1317 data.feat[1], 1318 data.feat[2]); 1319 return 0; 1320 } 1321 1322 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1323 struct kvm_device_attr *attr) 1324 { 1325 struct kvm_s390_vm_cpu_feat data; 1326 1327 bitmap_copy((unsigned long *) data.feat, 1328 kvm_s390_available_cpu_feat, 1329 KVM_S390_VM_CPU_FEAT_NR_BITS); 1330 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1331 return -EFAULT; 1332 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1333 data.feat[0], 1334 data.feat[1], 1335 data.feat[2]); 1336 return 0; 1337 } 1338 1339 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1340 struct kvm_device_attr *attr) 1341 { 1342 /* 1343 * Once we can actually configure subfunctions (kernel + hw support), 1344 * we have to check if they were already set by user space, if so copy 1345 * them from kvm->arch. 1346 */ 1347 return -ENXIO; 1348 } 1349 1350 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1351 struct kvm_device_attr *attr) 1352 { 1353 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1354 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1355 return -EFAULT; 1356 return 0; 1357 } 1358 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1359 { 1360 int ret = -ENXIO; 1361 1362 switch (attr->attr) { 1363 case KVM_S390_VM_CPU_PROCESSOR: 1364 ret = kvm_s390_get_processor(kvm, attr); 1365 break; 1366 case KVM_S390_VM_CPU_MACHINE: 1367 ret = kvm_s390_get_machine(kvm, attr); 1368 break; 1369 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1370 ret = kvm_s390_get_processor_feat(kvm, attr); 1371 break; 1372 case KVM_S390_VM_CPU_MACHINE_FEAT: 1373 ret = kvm_s390_get_machine_feat(kvm, attr); 1374 break; 1375 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1376 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1377 break; 1378 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1379 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1380 break; 1381 } 1382 return ret; 1383 } 1384 1385 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1386 { 1387 int ret; 1388 1389 switch (attr->group) { 1390 case KVM_S390_VM_MEM_CTRL: 1391 ret = kvm_s390_set_mem_control(kvm, attr); 1392 break; 1393 case KVM_S390_VM_TOD: 1394 ret = kvm_s390_set_tod(kvm, attr); 1395 break; 1396 case KVM_S390_VM_CPU_MODEL: 1397 ret = kvm_s390_set_cpu_model(kvm, attr); 1398 break; 1399 case KVM_S390_VM_CRYPTO: 1400 ret = kvm_s390_vm_set_crypto(kvm, attr); 1401 break; 1402 case KVM_S390_VM_MIGRATION: 1403 ret = kvm_s390_vm_set_migration(kvm, attr); 1404 break; 1405 default: 1406 ret = -ENXIO; 1407 break; 1408 } 1409 1410 return ret; 1411 } 1412 1413 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1414 { 1415 int ret; 1416 1417 switch (attr->group) { 1418 case KVM_S390_VM_MEM_CTRL: 1419 ret = kvm_s390_get_mem_control(kvm, attr); 1420 break; 1421 case KVM_S390_VM_TOD: 1422 ret = kvm_s390_get_tod(kvm, attr); 1423 break; 1424 case KVM_S390_VM_CPU_MODEL: 1425 ret = kvm_s390_get_cpu_model(kvm, attr); 1426 break; 1427 case KVM_S390_VM_MIGRATION: 1428 ret = kvm_s390_vm_get_migration(kvm, attr); 1429 break; 1430 default: 1431 ret = -ENXIO; 1432 break; 1433 } 1434 1435 return ret; 1436 } 1437 1438 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1439 { 1440 int ret; 1441 1442 switch (attr->group) { 1443 case KVM_S390_VM_MEM_CTRL: 1444 switch (attr->attr) { 1445 case KVM_S390_VM_MEM_ENABLE_CMMA: 1446 case KVM_S390_VM_MEM_CLR_CMMA: 1447 ret = sclp.has_cmma ? 0 : -ENXIO; 1448 break; 1449 case KVM_S390_VM_MEM_LIMIT_SIZE: 1450 ret = 0; 1451 break; 1452 default: 1453 ret = -ENXIO; 1454 break; 1455 } 1456 break; 1457 case KVM_S390_VM_TOD: 1458 switch (attr->attr) { 1459 case KVM_S390_VM_TOD_LOW: 1460 case KVM_S390_VM_TOD_HIGH: 1461 ret = 0; 1462 break; 1463 default: 1464 ret = -ENXIO; 1465 break; 1466 } 1467 break; 1468 case KVM_S390_VM_CPU_MODEL: 1469 switch (attr->attr) { 1470 case KVM_S390_VM_CPU_PROCESSOR: 1471 case KVM_S390_VM_CPU_MACHINE: 1472 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1473 case KVM_S390_VM_CPU_MACHINE_FEAT: 1474 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1475 ret = 0; 1476 break; 1477 /* configuring subfunctions is not supported yet */ 1478 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1479 default: 1480 ret = -ENXIO; 1481 break; 1482 } 1483 break; 1484 case KVM_S390_VM_CRYPTO: 1485 switch (attr->attr) { 1486 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1487 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1488 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1489 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1490 ret = 0; 1491 break; 1492 default: 1493 ret = -ENXIO; 1494 break; 1495 } 1496 break; 1497 case KVM_S390_VM_MIGRATION: 1498 ret = 0; 1499 break; 1500 default: 1501 ret = -ENXIO; 1502 break; 1503 } 1504 1505 return ret; 1506 } 1507 1508 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1509 { 1510 uint8_t *keys; 1511 uint64_t hva; 1512 int srcu_idx, i, r = 0; 1513 1514 if (args->flags != 0) 1515 return -EINVAL; 1516 1517 /* Is this guest using storage keys? */ 1518 if (!mm_uses_skeys(current->mm)) 1519 return KVM_S390_GET_SKEYS_NONE; 1520 1521 /* Enforce sane limit on memory allocation */ 1522 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1523 return -EINVAL; 1524 1525 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1526 if (!keys) 1527 return -ENOMEM; 1528 1529 down_read(¤t->mm->mmap_sem); 1530 srcu_idx = srcu_read_lock(&kvm->srcu); 1531 for (i = 0; i < args->count; i++) { 1532 hva = gfn_to_hva(kvm, args->start_gfn + i); 1533 if (kvm_is_error_hva(hva)) { 1534 r = -EFAULT; 1535 break; 1536 } 1537 1538 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1539 if (r) 1540 break; 1541 } 1542 srcu_read_unlock(&kvm->srcu, srcu_idx); 1543 up_read(¤t->mm->mmap_sem); 1544 1545 if (!r) { 1546 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1547 sizeof(uint8_t) * args->count); 1548 if (r) 1549 r = -EFAULT; 1550 } 1551 1552 kvfree(keys); 1553 return r; 1554 } 1555 1556 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1557 { 1558 uint8_t *keys; 1559 uint64_t hva; 1560 int srcu_idx, i, r = 0; 1561 bool unlocked; 1562 1563 if (args->flags != 0) 1564 return -EINVAL; 1565 1566 /* Enforce sane limit on memory allocation */ 1567 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1568 return -EINVAL; 1569 1570 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1571 if (!keys) 1572 return -ENOMEM; 1573 1574 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1575 sizeof(uint8_t) * args->count); 1576 if (r) { 1577 r = -EFAULT; 1578 goto out; 1579 } 1580 1581 /* Enable storage key handling for the guest */ 1582 r = s390_enable_skey(); 1583 if (r) 1584 goto out; 1585 1586 i = 0; 1587 down_read(¤t->mm->mmap_sem); 1588 srcu_idx = srcu_read_lock(&kvm->srcu); 1589 while (i < args->count) { 1590 unlocked = false; 1591 hva = gfn_to_hva(kvm, args->start_gfn + i); 1592 if (kvm_is_error_hva(hva)) { 1593 r = -EFAULT; 1594 break; 1595 } 1596 1597 /* Lowest order bit is reserved */ 1598 if (keys[i] & 0x01) { 1599 r = -EINVAL; 1600 break; 1601 } 1602 1603 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1604 if (r) { 1605 r = fixup_user_fault(current, current->mm, hva, 1606 FAULT_FLAG_WRITE, &unlocked); 1607 if (r) 1608 break; 1609 } 1610 if (!r) 1611 i++; 1612 } 1613 srcu_read_unlock(&kvm->srcu, srcu_idx); 1614 up_read(¤t->mm->mmap_sem); 1615 out: 1616 kvfree(keys); 1617 return r; 1618 } 1619 1620 /* 1621 * Base address and length must be sent at the start of each block, therefore 1622 * it's cheaper to send some clean data, as long as it's less than the size of 1623 * two longs. 1624 */ 1625 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1626 /* for consistency */ 1627 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1628 1629 /* 1630 * Similar to gfn_to_memslot, but returns the index of a memslot also when the 1631 * address falls in a hole. In that case the index of one of the memslots 1632 * bordering the hole is returned. 1633 */ 1634 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) 1635 { 1636 int start = 0, end = slots->used_slots; 1637 int slot = atomic_read(&slots->lru_slot); 1638 struct kvm_memory_slot *memslots = slots->memslots; 1639 1640 if (gfn >= memslots[slot].base_gfn && 1641 gfn < memslots[slot].base_gfn + memslots[slot].npages) 1642 return slot; 1643 1644 while (start < end) { 1645 slot = start + (end - start) / 2; 1646 1647 if (gfn >= memslots[slot].base_gfn) 1648 end = slot; 1649 else 1650 start = slot + 1; 1651 } 1652 1653 if (gfn >= memslots[start].base_gfn && 1654 gfn < memslots[start].base_gfn + memslots[start].npages) { 1655 atomic_set(&slots->lru_slot, start); 1656 } 1657 1658 return start; 1659 } 1660 1661 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1662 u8 *res, unsigned long bufsize) 1663 { 1664 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1665 1666 args->count = 0; 1667 while (args->count < bufsize) { 1668 hva = gfn_to_hva(kvm, cur_gfn); 1669 /* 1670 * We return an error if the first value was invalid, but we 1671 * return successfully if at least one value was copied. 1672 */ 1673 if (kvm_is_error_hva(hva)) 1674 return args->count ? 0 : -EFAULT; 1675 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1676 pgstev = 0; 1677 res[args->count++] = (pgstev >> 24) & 0x43; 1678 cur_gfn++; 1679 } 1680 1681 return 0; 1682 } 1683 1684 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1685 unsigned long cur_gfn) 1686 { 1687 int slotidx = gfn_to_memslot_approx(slots, cur_gfn); 1688 struct kvm_memory_slot *ms = slots->memslots + slotidx; 1689 unsigned long ofs = cur_gfn - ms->base_gfn; 1690 1691 if (ms->base_gfn + ms->npages <= cur_gfn) { 1692 slotidx--; 1693 /* If we are above the highest slot, wrap around */ 1694 if (slotidx < 0) 1695 slotidx = slots->used_slots - 1; 1696 1697 ms = slots->memslots + slotidx; 1698 ofs = 0; 1699 } 1700 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1701 while ((slotidx > 0) && (ofs >= ms->npages)) { 1702 slotidx--; 1703 ms = slots->memslots + slotidx; 1704 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 1705 } 1706 return ms->base_gfn + ofs; 1707 } 1708 1709 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1710 u8 *res, unsigned long bufsize) 1711 { 1712 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 1713 struct kvm_memslots *slots = kvm_memslots(kvm); 1714 struct kvm_memory_slot *ms; 1715 1716 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 1717 ms = gfn_to_memslot(kvm, cur_gfn); 1718 args->count = 0; 1719 args->start_gfn = cur_gfn; 1720 if (!ms) 1721 return 0; 1722 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 1723 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; 1724 1725 while (args->count < bufsize) { 1726 hva = gfn_to_hva(kvm, cur_gfn); 1727 if (kvm_is_error_hva(hva)) 1728 return 0; 1729 /* Decrement only if we actually flipped the bit to 0 */ 1730 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 1731 atomic64_dec(&kvm->arch.cmma_dirty_pages); 1732 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1733 pgstev = 0; 1734 /* Save the value */ 1735 res[args->count++] = (pgstev >> 24) & 0x43; 1736 /* If the next bit is too far away, stop. */ 1737 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 1738 return 0; 1739 /* If we reached the previous "next", find the next one */ 1740 if (cur_gfn == next_gfn) 1741 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 1742 /* Reached the end of memory or of the buffer, stop */ 1743 if ((next_gfn >= mem_end) || 1744 (next_gfn - args->start_gfn >= bufsize)) 1745 return 0; 1746 cur_gfn++; 1747 /* Reached the end of the current memslot, take the next one. */ 1748 if (cur_gfn - ms->base_gfn >= ms->npages) { 1749 ms = gfn_to_memslot(kvm, cur_gfn); 1750 if (!ms) 1751 return 0; 1752 } 1753 } 1754 return 0; 1755 } 1756 1757 /* 1758 * This function searches for the next page with dirty CMMA attributes, and 1759 * saves the attributes in the buffer up to either the end of the buffer or 1760 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 1761 * no trailing clean bytes are saved. 1762 * In case no dirty bits were found, or if CMMA was not enabled or used, the 1763 * output buffer will indicate 0 as length. 1764 */ 1765 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 1766 struct kvm_s390_cmma_log *args) 1767 { 1768 unsigned long bufsize; 1769 int srcu_idx, peek, ret; 1770 u8 *values; 1771 1772 if (!kvm->arch.use_cmma) 1773 return -ENXIO; 1774 /* Invalid/unsupported flags were specified */ 1775 if (args->flags & ~KVM_S390_CMMA_PEEK) 1776 return -EINVAL; 1777 /* Migration mode query, and we are not doing a migration */ 1778 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 1779 if (!peek && !kvm->arch.migration_mode) 1780 return -EINVAL; 1781 /* CMMA is disabled or was not used, or the buffer has length zero */ 1782 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 1783 if (!bufsize || !kvm->mm->context.uses_cmm) { 1784 memset(args, 0, sizeof(*args)); 1785 return 0; 1786 } 1787 /* We are not peeking, and there are no dirty pages */ 1788 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 1789 memset(args, 0, sizeof(*args)); 1790 return 0; 1791 } 1792 1793 values = vmalloc(bufsize); 1794 if (!values) 1795 return -ENOMEM; 1796 1797 down_read(&kvm->mm->mmap_sem); 1798 srcu_idx = srcu_read_lock(&kvm->srcu); 1799 if (peek) 1800 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 1801 else 1802 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 1803 srcu_read_unlock(&kvm->srcu, srcu_idx); 1804 up_read(&kvm->mm->mmap_sem); 1805 1806 if (kvm->arch.migration_mode) 1807 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 1808 else 1809 args->remaining = 0; 1810 1811 if (copy_to_user((void __user *)args->values, values, args->count)) 1812 ret = -EFAULT; 1813 1814 vfree(values); 1815 return ret; 1816 } 1817 1818 /* 1819 * This function sets the CMMA attributes for the given pages. If the input 1820 * buffer has zero length, no action is taken, otherwise the attributes are 1821 * set and the mm->context.uses_cmm flag is set. 1822 */ 1823 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 1824 const struct kvm_s390_cmma_log *args) 1825 { 1826 unsigned long hva, mask, pgstev, i; 1827 uint8_t *bits; 1828 int srcu_idx, r = 0; 1829 1830 mask = args->mask; 1831 1832 if (!kvm->arch.use_cmma) 1833 return -ENXIO; 1834 /* invalid/unsupported flags */ 1835 if (args->flags != 0) 1836 return -EINVAL; 1837 /* Enforce sane limit on memory allocation */ 1838 if (args->count > KVM_S390_CMMA_SIZE_MAX) 1839 return -EINVAL; 1840 /* Nothing to do */ 1841 if (args->count == 0) 1842 return 0; 1843 1844 bits = vmalloc(array_size(sizeof(*bits), args->count)); 1845 if (!bits) 1846 return -ENOMEM; 1847 1848 r = copy_from_user(bits, (void __user *)args->values, args->count); 1849 if (r) { 1850 r = -EFAULT; 1851 goto out; 1852 } 1853 1854 down_read(&kvm->mm->mmap_sem); 1855 srcu_idx = srcu_read_lock(&kvm->srcu); 1856 for (i = 0; i < args->count; i++) { 1857 hva = gfn_to_hva(kvm, args->start_gfn + i); 1858 if (kvm_is_error_hva(hva)) { 1859 r = -EFAULT; 1860 break; 1861 } 1862 1863 pgstev = bits[i]; 1864 pgstev = pgstev << 24; 1865 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 1866 set_pgste_bits(kvm->mm, hva, mask, pgstev); 1867 } 1868 srcu_read_unlock(&kvm->srcu, srcu_idx); 1869 up_read(&kvm->mm->mmap_sem); 1870 1871 if (!kvm->mm->context.uses_cmm) { 1872 down_write(&kvm->mm->mmap_sem); 1873 kvm->mm->context.uses_cmm = 1; 1874 up_write(&kvm->mm->mmap_sem); 1875 } 1876 out: 1877 vfree(bits); 1878 return r; 1879 } 1880 1881 long kvm_arch_vm_ioctl(struct file *filp, 1882 unsigned int ioctl, unsigned long arg) 1883 { 1884 struct kvm *kvm = filp->private_data; 1885 void __user *argp = (void __user *)arg; 1886 struct kvm_device_attr attr; 1887 int r; 1888 1889 switch (ioctl) { 1890 case KVM_S390_INTERRUPT: { 1891 struct kvm_s390_interrupt s390int; 1892 1893 r = -EFAULT; 1894 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1895 break; 1896 r = kvm_s390_inject_vm(kvm, &s390int); 1897 break; 1898 } 1899 case KVM_ENABLE_CAP: { 1900 struct kvm_enable_cap cap; 1901 r = -EFAULT; 1902 if (copy_from_user(&cap, argp, sizeof(cap))) 1903 break; 1904 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 1905 break; 1906 } 1907 case KVM_CREATE_IRQCHIP: { 1908 struct kvm_irq_routing_entry routing; 1909 1910 r = -EINVAL; 1911 if (kvm->arch.use_irqchip) { 1912 /* Set up dummy routing. */ 1913 memset(&routing, 0, sizeof(routing)); 1914 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 1915 } 1916 break; 1917 } 1918 case KVM_SET_DEVICE_ATTR: { 1919 r = -EFAULT; 1920 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1921 break; 1922 r = kvm_s390_vm_set_attr(kvm, &attr); 1923 break; 1924 } 1925 case KVM_GET_DEVICE_ATTR: { 1926 r = -EFAULT; 1927 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1928 break; 1929 r = kvm_s390_vm_get_attr(kvm, &attr); 1930 break; 1931 } 1932 case KVM_HAS_DEVICE_ATTR: { 1933 r = -EFAULT; 1934 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1935 break; 1936 r = kvm_s390_vm_has_attr(kvm, &attr); 1937 break; 1938 } 1939 case KVM_S390_GET_SKEYS: { 1940 struct kvm_s390_skeys args; 1941 1942 r = -EFAULT; 1943 if (copy_from_user(&args, argp, 1944 sizeof(struct kvm_s390_skeys))) 1945 break; 1946 r = kvm_s390_get_skeys(kvm, &args); 1947 break; 1948 } 1949 case KVM_S390_SET_SKEYS: { 1950 struct kvm_s390_skeys args; 1951 1952 r = -EFAULT; 1953 if (copy_from_user(&args, argp, 1954 sizeof(struct kvm_s390_skeys))) 1955 break; 1956 r = kvm_s390_set_skeys(kvm, &args); 1957 break; 1958 } 1959 case KVM_S390_GET_CMMA_BITS: { 1960 struct kvm_s390_cmma_log args; 1961 1962 r = -EFAULT; 1963 if (copy_from_user(&args, argp, sizeof(args))) 1964 break; 1965 mutex_lock(&kvm->slots_lock); 1966 r = kvm_s390_get_cmma_bits(kvm, &args); 1967 mutex_unlock(&kvm->slots_lock); 1968 if (!r) { 1969 r = copy_to_user(argp, &args, sizeof(args)); 1970 if (r) 1971 r = -EFAULT; 1972 } 1973 break; 1974 } 1975 case KVM_S390_SET_CMMA_BITS: { 1976 struct kvm_s390_cmma_log args; 1977 1978 r = -EFAULT; 1979 if (copy_from_user(&args, argp, sizeof(args))) 1980 break; 1981 mutex_lock(&kvm->slots_lock); 1982 r = kvm_s390_set_cmma_bits(kvm, &args); 1983 mutex_unlock(&kvm->slots_lock); 1984 break; 1985 } 1986 default: 1987 r = -ENOTTY; 1988 } 1989 1990 return r; 1991 } 1992 1993 static int kvm_s390_query_ap_config(u8 *config) 1994 { 1995 u32 fcn_code = 0x04000000UL; 1996 u32 cc = 0; 1997 1998 memset(config, 0, 128); 1999 asm volatile( 2000 "lgr 0,%1\n" 2001 "lgr 2,%2\n" 2002 ".long 0xb2af0000\n" /* PQAP(QCI) */ 2003 "0: ipm %0\n" 2004 "srl %0,28\n" 2005 "1:\n" 2006 EX_TABLE(0b, 1b) 2007 : "+r" (cc) 2008 : "r" (fcn_code), "r" (config) 2009 : "cc", "0", "2", "memory" 2010 ); 2011 2012 return cc; 2013 } 2014 2015 static int kvm_s390_apxa_installed(void) 2016 { 2017 u8 config[128]; 2018 int cc; 2019 2020 if (test_facility(12)) { 2021 cc = kvm_s390_query_ap_config(config); 2022 2023 if (cc) 2024 pr_err("PQAP(QCI) failed with cc=%d", cc); 2025 else 2026 return config[0] & 0x40; 2027 } 2028 2029 return 0; 2030 } 2031 2032 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2033 { 2034 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2035 2036 if (kvm_s390_apxa_installed()) 2037 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2038 else 2039 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2040 } 2041 2042 static u64 kvm_s390_get_initial_cpuid(void) 2043 { 2044 struct cpuid cpuid; 2045 2046 get_cpu_id(&cpuid); 2047 cpuid.version = 0xff; 2048 return *((u64 *) &cpuid); 2049 } 2050 2051 static void kvm_s390_crypto_init(struct kvm *kvm) 2052 { 2053 if (!test_kvm_facility(kvm, 76)) 2054 return; 2055 2056 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2057 kvm_s390_set_crycb_format(kvm); 2058 2059 /* Enable AES/DEA protected key functions by default */ 2060 kvm->arch.crypto.aes_kw = 1; 2061 kvm->arch.crypto.dea_kw = 1; 2062 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2063 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2064 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2065 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2066 } 2067 2068 static void sca_dispose(struct kvm *kvm) 2069 { 2070 if (kvm->arch.use_esca) 2071 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2072 else 2073 free_page((unsigned long)(kvm->arch.sca)); 2074 kvm->arch.sca = NULL; 2075 } 2076 2077 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2078 { 2079 gfp_t alloc_flags = GFP_KERNEL; 2080 int i, rc; 2081 char debug_name[16]; 2082 static unsigned long sca_offset; 2083 2084 rc = -EINVAL; 2085 #ifdef CONFIG_KVM_S390_UCONTROL 2086 if (type & ~KVM_VM_S390_UCONTROL) 2087 goto out_err; 2088 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2089 goto out_err; 2090 #else 2091 if (type) 2092 goto out_err; 2093 #endif 2094 2095 rc = s390_enable_sie(); 2096 if (rc) 2097 goto out_err; 2098 2099 rc = -ENOMEM; 2100 2101 if (!sclp.has_64bscao) 2102 alloc_flags |= GFP_DMA; 2103 rwlock_init(&kvm->arch.sca_lock); 2104 /* start with basic SCA */ 2105 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2106 if (!kvm->arch.sca) 2107 goto out_err; 2108 spin_lock(&kvm_lock); 2109 sca_offset += 16; 2110 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2111 sca_offset = 0; 2112 kvm->arch.sca = (struct bsca_block *) 2113 ((char *) kvm->arch.sca + sca_offset); 2114 spin_unlock(&kvm_lock); 2115 2116 sprintf(debug_name, "kvm-%u", current->pid); 2117 2118 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2119 if (!kvm->arch.dbf) 2120 goto out_err; 2121 2122 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2123 kvm->arch.sie_page2 = 2124 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 2125 if (!kvm->arch.sie_page2) 2126 goto out_err; 2127 2128 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2129 2130 for (i = 0; i < kvm_s390_fac_size(); i++) { 2131 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] & 2132 (kvm_s390_fac_base[i] | 2133 kvm_s390_fac_ext[i]); 2134 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & 2135 kvm_s390_fac_base[i]; 2136 } 2137 2138 /* we are always in czam mode - even on pre z14 machines */ 2139 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2140 set_kvm_facility(kvm->arch.model.fac_list, 138); 2141 /* we emulate STHYI in kvm */ 2142 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2143 set_kvm_facility(kvm->arch.model.fac_list, 74); 2144 if (MACHINE_HAS_TLB_GUEST) { 2145 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2146 set_kvm_facility(kvm->arch.model.fac_list, 147); 2147 } 2148 2149 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2150 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2151 2152 kvm_s390_crypto_init(kvm); 2153 2154 mutex_init(&kvm->arch.float_int.ais_lock); 2155 spin_lock_init(&kvm->arch.float_int.lock); 2156 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2157 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2158 init_waitqueue_head(&kvm->arch.ipte_wq); 2159 mutex_init(&kvm->arch.ipte_mutex); 2160 2161 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2162 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2163 2164 if (type & KVM_VM_S390_UCONTROL) { 2165 kvm->arch.gmap = NULL; 2166 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2167 } else { 2168 if (sclp.hamax == U64_MAX) 2169 kvm->arch.mem_limit = TASK_SIZE_MAX; 2170 else 2171 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2172 sclp.hamax + 1); 2173 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2174 if (!kvm->arch.gmap) 2175 goto out_err; 2176 kvm->arch.gmap->private = kvm; 2177 kvm->arch.gmap->pfault_enabled = 0; 2178 } 2179 2180 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2181 kvm->arch.use_skf = sclp.has_skey; 2182 spin_lock_init(&kvm->arch.start_stop_lock); 2183 kvm_s390_vsie_init(kvm); 2184 kvm_s390_gisa_init(kvm); 2185 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2186 2187 return 0; 2188 out_err: 2189 free_page((unsigned long)kvm->arch.sie_page2); 2190 debug_unregister(kvm->arch.dbf); 2191 sca_dispose(kvm); 2192 KVM_EVENT(3, "creation of vm failed: %d", rc); 2193 return rc; 2194 } 2195 2196 bool kvm_arch_has_vcpu_debugfs(void) 2197 { 2198 return false; 2199 } 2200 2201 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 2202 { 2203 return 0; 2204 } 2205 2206 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2207 { 2208 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2209 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2210 kvm_s390_clear_local_irqs(vcpu); 2211 kvm_clear_async_pf_completion_queue(vcpu); 2212 if (!kvm_is_ucontrol(vcpu->kvm)) 2213 sca_del_vcpu(vcpu); 2214 2215 if (kvm_is_ucontrol(vcpu->kvm)) 2216 gmap_remove(vcpu->arch.gmap); 2217 2218 if (vcpu->kvm->arch.use_cmma) 2219 kvm_s390_vcpu_unsetup_cmma(vcpu); 2220 free_page((unsigned long)(vcpu->arch.sie_block)); 2221 2222 kvm_vcpu_uninit(vcpu); 2223 kmem_cache_free(kvm_vcpu_cache, vcpu); 2224 } 2225 2226 static void kvm_free_vcpus(struct kvm *kvm) 2227 { 2228 unsigned int i; 2229 struct kvm_vcpu *vcpu; 2230 2231 kvm_for_each_vcpu(i, vcpu, kvm) 2232 kvm_arch_vcpu_destroy(vcpu); 2233 2234 mutex_lock(&kvm->lock); 2235 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2236 kvm->vcpus[i] = NULL; 2237 2238 atomic_set(&kvm->online_vcpus, 0); 2239 mutex_unlock(&kvm->lock); 2240 } 2241 2242 void kvm_arch_destroy_vm(struct kvm *kvm) 2243 { 2244 kvm_free_vcpus(kvm); 2245 sca_dispose(kvm); 2246 debug_unregister(kvm->arch.dbf); 2247 kvm_s390_gisa_destroy(kvm); 2248 free_page((unsigned long)kvm->arch.sie_page2); 2249 if (!kvm_is_ucontrol(kvm)) 2250 gmap_remove(kvm->arch.gmap); 2251 kvm_s390_destroy_adapters(kvm); 2252 kvm_s390_clear_float_irqs(kvm); 2253 kvm_s390_vsie_destroy(kvm); 2254 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2255 } 2256 2257 /* Section: vcpu related */ 2258 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2259 { 2260 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2261 if (!vcpu->arch.gmap) 2262 return -ENOMEM; 2263 vcpu->arch.gmap->private = vcpu->kvm; 2264 2265 return 0; 2266 } 2267 2268 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2269 { 2270 if (!kvm_s390_use_sca_entries()) 2271 return; 2272 read_lock(&vcpu->kvm->arch.sca_lock); 2273 if (vcpu->kvm->arch.use_esca) { 2274 struct esca_block *sca = vcpu->kvm->arch.sca; 2275 2276 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2277 sca->cpu[vcpu->vcpu_id].sda = 0; 2278 } else { 2279 struct bsca_block *sca = vcpu->kvm->arch.sca; 2280 2281 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2282 sca->cpu[vcpu->vcpu_id].sda = 0; 2283 } 2284 read_unlock(&vcpu->kvm->arch.sca_lock); 2285 } 2286 2287 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2288 { 2289 if (!kvm_s390_use_sca_entries()) { 2290 struct bsca_block *sca = vcpu->kvm->arch.sca; 2291 2292 /* we still need the basic sca for the ipte control */ 2293 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2294 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2295 return; 2296 } 2297 read_lock(&vcpu->kvm->arch.sca_lock); 2298 if (vcpu->kvm->arch.use_esca) { 2299 struct esca_block *sca = vcpu->kvm->arch.sca; 2300 2301 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2302 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2303 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2304 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2305 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2306 } else { 2307 struct bsca_block *sca = vcpu->kvm->arch.sca; 2308 2309 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2310 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2311 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2312 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2313 } 2314 read_unlock(&vcpu->kvm->arch.sca_lock); 2315 } 2316 2317 /* Basic SCA to Extended SCA data copy routines */ 2318 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2319 { 2320 d->sda = s->sda; 2321 d->sigp_ctrl.c = s->sigp_ctrl.c; 2322 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2323 } 2324 2325 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2326 { 2327 int i; 2328 2329 d->ipte_control = s->ipte_control; 2330 d->mcn[0] = s->mcn; 2331 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2332 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2333 } 2334 2335 static int sca_switch_to_extended(struct kvm *kvm) 2336 { 2337 struct bsca_block *old_sca = kvm->arch.sca; 2338 struct esca_block *new_sca; 2339 struct kvm_vcpu *vcpu; 2340 unsigned int vcpu_idx; 2341 u32 scaol, scaoh; 2342 2343 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2344 if (!new_sca) 2345 return -ENOMEM; 2346 2347 scaoh = (u32)((u64)(new_sca) >> 32); 2348 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2349 2350 kvm_s390_vcpu_block_all(kvm); 2351 write_lock(&kvm->arch.sca_lock); 2352 2353 sca_copy_b_to_e(new_sca, old_sca); 2354 2355 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2356 vcpu->arch.sie_block->scaoh = scaoh; 2357 vcpu->arch.sie_block->scaol = scaol; 2358 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2359 } 2360 kvm->arch.sca = new_sca; 2361 kvm->arch.use_esca = 1; 2362 2363 write_unlock(&kvm->arch.sca_lock); 2364 kvm_s390_vcpu_unblock_all(kvm); 2365 2366 free_page((unsigned long)old_sca); 2367 2368 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2369 old_sca, kvm->arch.sca); 2370 return 0; 2371 } 2372 2373 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2374 { 2375 int rc; 2376 2377 if (!kvm_s390_use_sca_entries()) { 2378 if (id < KVM_MAX_VCPUS) 2379 return true; 2380 return false; 2381 } 2382 if (id < KVM_S390_BSCA_CPU_SLOTS) 2383 return true; 2384 if (!sclp.has_esca || !sclp.has_64bscao) 2385 return false; 2386 2387 mutex_lock(&kvm->lock); 2388 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2389 mutex_unlock(&kvm->lock); 2390 2391 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2392 } 2393 2394 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 2395 { 2396 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2397 kvm_clear_async_pf_completion_queue(vcpu); 2398 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 2399 KVM_SYNC_GPRS | 2400 KVM_SYNC_ACRS | 2401 KVM_SYNC_CRS | 2402 KVM_SYNC_ARCH0 | 2403 KVM_SYNC_PFAULT; 2404 kvm_s390_set_prefix(vcpu, 0); 2405 if (test_kvm_facility(vcpu->kvm, 64)) 2406 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 2407 if (test_kvm_facility(vcpu->kvm, 82)) 2408 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 2409 if (test_kvm_facility(vcpu->kvm, 133)) 2410 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 2411 if (test_kvm_facility(vcpu->kvm, 156)) 2412 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 2413 /* fprs can be synchronized via vrs, even if the guest has no vx. With 2414 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 2415 */ 2416 if (MACHINE_HAS_VX) 2417 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 2418 else 2419 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 2420 2421 if (kvm_is_ucontrol(vcpu->kvm)) 2422 return __kvm_ucontrol_vcpu_init(vcpu); 2423 2424 return 0; 2425 } 2426 2427 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2428 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2429 { 2430 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2431 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2432 vcpu->arch.cputm_start = get_tod_clock_fast(); 2433 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2434 } 2435 2436 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2437 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2438 { 2439 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2440 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2441 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2442 vcpu->arch.cputm_start = 0; 2443 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2444 } 2445 2446 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2447 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2448 { 2449 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2450 vcpu->arch.cputm_enabled = true; 2451 __start_cpu_timer_accounting(vcpu); 2452 } 2453 2454 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2455 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2456 { 2457 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2458 __stop_cpu_timer_accounting(vcpu); 2459 vcpu->arch.cputm_enabled = false; 2460 } 2461 2462 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2463 { 2464 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2465 __enable_cpu_timer_accounting(vcpu); 2466 preempt_enable(); 2467 } 2468 2469 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2470 { 2471 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2472 __disable_cpu_timer_accounting(vcpu); 2473 preempt_enable(); 2474 } 2475 2476 /* set the cpu timer - may only be called from the VCPU thread itself */ 2477 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2478 { 2479 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2480 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2481 if (vcpu->arch.cputm_enabled) 2482 vcpu->arch.cputm_start = get_tod_clock_fast(); 2483 vcpu->arch.sie_block->cputm = cputm; 2484 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2485 preempt_enable(); 2486 } 2487 2488 /* update and get the cpu timer - can also be called from other VCPU threads */ 2489 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2490 { 2491 unsigned int seq; 2492 __u64 value; 2493 2494 if (unlikely(!vcpu->arch.cputm_enabled)) 2495 return vcpu->arch.sie_block->cputm; 2496 2497 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2498 do { 2499 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2500 /* 2501 * If the writer would ever execute a read in the critical 2502 * section, e.g. in irq context, we have a deadlock. 2503 */ 2504 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 2505 value = vcpu->arch.sie_block->cputm; 2506 /* if cputm_start is 0, accounting is being started/stopped */ 2507 if (likely(vcpu->arch.cputm_start)) 2508 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2509 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 2510 preempt_enable(); 2511 return value; 2512 } 2513 2514 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2515 { 2516 2517 gmap_enable(vcpu->arch.enabled_gmap); 2518 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 2519 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2520 __start_cpu_timer_accounting(vcpu); 2521 vcpu->cpu = cpu; 2522 } 2523 2524 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2525 { 2526 vcpu->cpu = -1; 2527 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2528 __stop_cpu_timer_accounting(vcpu); 2529 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 2530 vcpu->arch.enabled_gmap = gmap_get_enabled(); 2531 gmap_disable(vcpu->arch.enabled_gmap); 2532 2533 } 2534 2535 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 2536 { 2537 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 2538 vcpu->arch.sie_block->gpsw.mask = 0UL; 2539 vcpu->arch.sie_block->gpsw.addr = 0UL; 2540 kvm_s390_set_prefix(vcpu, 0); 2541 kvm_s390_set_cpu_timer(vcpu, 0); 2542 vcpu->arch.sie_block->ckc = 0UL; 2543 vcpu->arch.sie_block->todpr = 0; 2544 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 2545 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 | 2546 CR0_INTERRUPT_KEY_SUBMASK | 2547 CR0_MEASUREMENT_ALERT_SUBMASK; 2548 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | 2549 CR14_UNUSED_33 | 2550 CR14_EXTERNAL_DAMAGE_SUBMASK; 2551 /* make sure the new fpc will be lazily loaded */ 2552 save_fpu_regs(); 2553 current->thread.fpu.fpc = 0; 2554 vcpu->arch.sie_block->gbea = 1; 2555 vcpu->arch.sie_block->pp = 0; 2556 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 2557 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2558 kvm_clear_async_pf_completion_queue(vcpu); 2559 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 2560 kvm_s390_vcpu_stop(vcpu); 2561 kvm_s390_clear_local_irqs(vcpu); 2562 } 2563 2564 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 2565 { 2566 mutex_lock(&vcpu->kvm->lock); 2567 preempt_disable(); 2568 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2569 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 2570 preempt_enable(); 2571 mutex_unlock(&vcpu->kvm->lock); 2572 if (!kvm_is_ucontrol(vcpu->kvm)) { 2573 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 2574 sca_add_vcpu(vcpu); 2575 } 2576 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 2577 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2578 /* make vcpu_load load the right gmap on the first trigger */ 2579 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 2580 } 2581 2582 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 2583 { 2584 if (!test_kvm_facility(vcpu->kvm, 76)) 2585 return; 2586 2587 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 2588 2589 if (vcpu->kvm->arch.crypto.aes_kw) 2590 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 2591 if (vcpu->kvm->arch.crypto.dea_kw) 2592 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 2593 2594 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 2595 } 2596 2597 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 2598 { 2599 free_page(vcpu->arch.sie_block->cbrlo); 2600 vcpu->arch.sie_block->cbrlo = 0; 2601 } 2602 2603 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 2604 { 2605 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 2606 if (!vcpu->arch.sie_block->cbrlo) 2607 return -ENOMEM; 2608 return 0; 2609 } 2610 2611 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 2612 { 2613 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 2614 2615 vcpu->arch.sie_block->ibc = model->ibc; 2616 if (test_kvm_facility(vcpu->kvm, 7)) 2617 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 2618 } 2619 2620 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 2621 { 2622 int rc = 0; 2623 2624 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 2625 CPUSTAT_SM | 2626 CPUSTAT_STOPPED); 2627 2628 if (test_kvm_facility(vcpu->kvm, 78)) 2629 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 2630 else if (test_kvm_facility(vcpu->kvm, 8)) 2631 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 2632 2633 kvm_s390_vcpu_setup_model(vcpu); 2634 2635 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2636 if (MACHINE_HAS_ESOP) 2637 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 2638 if (test_kvm_facility(vcpu->kvm, 9)) 2639 vcpu->arch.sie_block->ecb |= ECB_SRSI; 2640 if (test_kvm_facility(vcpu->kvm, 73)) 2641 vcpu->arch.sie_block->ecb |= ECB_TE; 2642 2643 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 2644 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 2645 if (test_kvm_facility(vcpu->kvm, 130)) 2646 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 2647 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 2648 if (sclp.has_cei) 2649 vcpu->arch.sie_block->eca |= ECA_CEI; 2650 if (sclp.has_ib) 2651 vcpu->arch.sie_block->eca |= ECA_IB; 2652 if (sclp.has_siif) 2653 vcpu->arch.sie_block->eca |= ECA_SII; 2654 if (sclp.has_sigpif) 2655 vcpu->arch.sie_block->eca |= ECA_SIGPI; 2656 if (test_kvm_facility(vcpu->kvm, 129)) { 2657 vcpu->arch.sie_block->eca |= ECA_VX; 2658 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 2659 } 2660 if (test_kvm_facility(vcpu->kvm, 139)) 2661 vcpu->arch.sie_block->ecd |= ECD_MEF; 2662 if (test_kvm_facility(vcpu->kvm, 156)) 2663 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 2664 if (vcpu->arch.sie_block->gd) { 2665 vcpu->arch.sie_block->eca |= ECA_AIV; 2666 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 2667 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 2668 } 2669 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 2670 | SDNXC; 2671 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2672 2673 if (sclp.has_kss) 2674 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 2675 else 2676 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2677 2678 if (vcpu->kvm->arch.use_cmma) { 2679 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2680 if (rc) 2681 return rc; 2682 } 2683 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2684 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 2685 2686 kvm_s390_vcpu_crypto_setup(vcpu); 2687 2688 return rc; 2689 } 2690 2691 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 2692 unsigned int id) 2693 { 2694 struct kvm_vcpu *vcpu; 2695 struct sie_page *sie_page; 2696 int rc = -EINVAL; 2697 2698 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 2699 goto out; 2700 2701 rc = -ENOMEM; 2702 2703 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 2704 if (!vcpu) 2705 goto out; 2706 2707 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 2708 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 2709 if (!sie_page) 2710 goto out_free_cpu; 2711 2712 vcpu->arch.sie_block = &sie_page->sie_block; 2713 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 2714 2715 /* the real guest size will always be smaller than msl */ 2716 vcpu->arch.sie_block->mso = 0; 2717 vcpu->arch.sie_block->msl = sclp.hamax; 2718 2719 vcpu->arch.sie_block->icpua = id; 2720 spin_lock_init(&vcpu->arch.local_int.lock); 2721 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa; 2722 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 2723 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 2724 seqcount_init(&vcpu->arch.cputm_seqcount); 2725 2726 rc = kvm_vcpu_init(vcpu, kvm, id); 2727 if (rc) 2728 goto out_free_sie_block; 2729 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 2730 vcpu->arch.sie_block); 2731 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 2732 2733 return vcpu; 2734 out_free_sie_block: 2735 free_page((unsigned long)(vcpu->arch.sie_block)); 2736 out_free_cpu: 2737 kmem_cache_free(kvm_vcpu_cache, vcpu); 2738 out: 2739 return ERR_PTR(rc); 2740 } 2741 2742 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 2743 { 2744 return kvm_s390_vcpu_has_irq(vcpu, 0); 2745 } 2746 2747 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 2748 { 2749 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 2750 } 2751 2752 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 2753 { 2754 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2755 exit_sie(vcpu); 2756 } 2757 2758 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 2759 { 2760 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2761 } 2762 2763 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 2764 { 2765 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2766 exit_sie(vcpu); 2767 } 2768 2769 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 2770 { 2771 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2772 } 2773 2774 /* 2775 * Kick a guest cpu out of SIE and wait until SIE is not running. 2776 * If the CPU is not running (e.g. waiting as idle) the function will 2777 * return immediately. */ 2778 void exit_sie(struct kvm_vcpu *vcpu) 2779 { 2780 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 2781 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 2782 cpu_relax(); 2783 } 2784 2785 /* Kick a guest cpu out of SIE to process a request synchronously */ 2786 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 2787 { 2788 kvm_make_request(req, vcpu); 2789 kvm_s390_vcpu_request(vcpu); 2790 } 2791 2792 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 2793 unsigned long end) 2794 { 2795 struct kvm *kvm = gmap->private; 2796 struct kvm_vcpu *vcpu; 2797 unsigned long prefix; 2798 int i; 2799 2800 if (gmap_is_shadow(gmap)) 2801 return; 2802 if (start >= 1UL << 31) 2803 /* We are only interested in prefix pages */ 2804 return; 2805 kvm_for_each_vcpu(i, vcpu, kvm) { 2806 /* match against both prefix pages */ 2807 prefix = kvm_s390_get_prefix(vcpu); 2808 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 2809 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 2810 start, end); 2811 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 2812 } 2813 } 2814 } 2815 2816 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 2817 { 2818 /* kvm common code refers to this, but never calls it */ 2819 BUG(); 2820 return 0; 2821 } 2822 2823 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 2824 struct kvm_one_reg *reg) 2825 { 2826 int r = -EINVAL; 2827 2828 switch (reg->id) { 2829 case KVM_REG_S390_TODPR: 2830 r = put_user(vcpu->arch.sie_block->todpr, 2831 (u32 __user *)reg->addr); 2832 break; 2833 case KVM_REG_S390_EPOCHDIFF: 2834 r = put_user(vcpu->arch.sie_block->epoch, 2835 (u64 __user *)reg->addr); 2836 break; 2837 case KVM_REG_S390_CPU_TIMER: 2838 r = put_user(kvm_s390_get_cpu_timer(vcpu), 2839 (u64 __user *)reg->addr); 2840 break; 2841 case KVM_REG_S390_CLOCK_COMP: 2842 r = put_user(vcpu->arch.sie_block->ckc, 2843 (u64 __user *)reg->addr); 2844 break; 2845 case KVM_REG_S390_PFTOKEN: 2846 r = put_user(vcpu->arch.pfault_token, 2847 (u64 __user *)reg->addr); 2848 break; 2849 case KVM_REG_S390_PFCOMPARE: 2850 r = put_user(vcpu->arch.pfault_compare, 2851 (u64 __user *)reg->addr); 2852 break; 2853 case KVM_REG_S390_PFSELECT: 2854 r = put_user(vcpu->arch.pfault_select, 2855 (u64 __user *)reg->addr); 2856 break; 2857 case KVM_REG_S390_PP: 2858 r = put_user(vcpu->arch.sie_block->pp, 2859 (u64 __user *)reg->addr); 2860 break; 2861 case KVM_REG_S390_GBEA: 2862 r = put_user(vcpu->arch.sie_block->gbea, 2863 (u64 __user *)reg->addr); 2864 break; 2865 default: 2866 break; 2867 } 2868 2869 return r; 2870 } 2871 2872 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 2873 struct kvm_one_reg *reg) 2874 { 2875 int r = -EINVAL; 2876 __u64 val; 2877 2878 switch (reg->id) { 2879 case KVM_REG_S390_TODPR: 2880 r = get_user(vcpu->arch.sie_block->todpr, 2881 (u32 __user *)reg->addr); 2882 break; 2883 case KVM_REG_S390_EPOCHDIFF: 2884 r = get_user(vcpu->arch.sie_block->epoch, 2885 (u64 __user *)reg->addr); 2886 break; 2887 case KVM_REG_S390_CPU_TIMER: 2888 r = get_user(val, (u64 __user *)reg->addr); 2889 if (!r) 2890 kvm_s390_set_cpu_timer(vcpu, val); 2891 break; 2892 case KVM_REG_S390_CLOCK_COMP: 2893 r = get_user(vcpu->arch.sie_block->ckc, 2894 (u64 __user *)reg->addr); 2895 break; 2896 case KVM_REG_S390_PFTOKEN: 2897 r = get_user(vcpu->arch.pfault_token, 2898 (u64 __user *)reg->addr); 2899 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 2900 kvm_clear_async_pf_completion_queue(vcpu); 2901 break; 2902 case KVM_REG_S390_PFCOMPARE: 2903 r = get_user(vcpu->arch.pfault_compare, 2904 (u64 __user *)reg->addr); 2905 break; 2906 case KVM_REG_S390_PFSELECT: 2907 r = get_user(vcpu->arch.pfault_select, 2908 (u64 __user *)reg->addr); 2909 break; 2910 case KVM_REG_S390_PP: 2911 r = get_user(vcpu->arch.sie_block->pp, 2912 (u64 __user *)reg->addr); 2913 break; 2914 case KVM_REG_S390_GBEA: 2915 r = get_user(vcpu->arch.sie_block->gbea, 2916 (u64 __user *)reg->addr); 2917 break; 2918 default: 2919 break; 2920 } 2921 2922 return r; 2923 } 2924 2925 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 2926 { 2927 kvm_s390_vcpu_initial_reset(vcpu); 2928 return 0; 2929 } 2930 2931 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2932 { 2933 vcpu_load(vcpu); 2934 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 2935 vcpu_put(vcpu); 2936 return 0; 2937 } 2938 2939 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2940 { 2941 vcpu_load(vcpu); 2942 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 2943 vcpu_put(vcpu); 2944 return 0; 2945 } 2946 2947 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 2948 struct kvm_sregs *sregs) 2949 { 2950 vcpu_load(vcpu); 2951 2952 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 2953 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 2954 2955 vcpu_put(vcpu); 2956 return 0; 2957 } 2958 2959 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 2960 struct kvm_sregs *sregs) 2961 { 2962 vcpu_load(vcpu); 2963 2964 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 2965 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 2966 2967 vcpu_put(vcpu); 2968 return 0; 2969 } 2970 2971 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2972 { 2973 int ret = 0; 2974 2975 vcpu_load(vcpu); 2976 2977 if (test_fp_ctl(fpu->fpc)) { 2978 ret = -EINVAL; 2979 goto out; 2980 } 2981 vcpu->run->s.regs.fpc = fpu->fpc; 2982 if (MACHINE_HAS_VX) 2983 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 2984 (freg_t *) fpu->fprs); 2985 else 2986 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 2987 2988 out: 2989 vcpu_put(vcpu); 2990 return ret; 2991 } 2992 2993 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2994 { 2995 vcpu_load(vcpu); 2996 2997 /* make sure we have the latest values */ 2998 save_fpu_regs(); 2999 if (MACHINE_HAS_VX) 3000 convert_vx_to_fp((freg_t *) fpu->fprs, 3001 (__vector128 *) vcpu->run->s.regs.vrs); 3002 else 3003 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3004 fpu->fpc = vcpu->run->s.regs.fpc; 3005 3006 vcpu_put(vcpu); 3007 return 0; 3008 } 3009 3010 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3011 { 3012 int rc = 0; 3013 3014 if (!is_vcpu_stopped(vcpu)) 3015 rc = -EBUSY; 3016 else { 3017 vcpu->run->psw_mask = psw.mask; 3018 vcpu->run->psw_addr = psw.addr; 3019 } 3020 return rc; 3021 } 3022 3023 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3024 struct kvm_translation *tr) 3025 { 3026 return -EINVAL; /* not implemented yet */ 3027 } 3028 3029 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3030 KVM_GUESTDBG_USE_HW_BP | \ 3031 KVM_GUESTDBG_ENABLE) 3032 3033 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3034 struct kvm_guest_debug *dbg) 3035 { 3036 int rc = 0; 3037 3038 vcpu_load(vcpu); 3039 3040 vcpu->guest_debug = 0; 3041 kvm_s390_clear_bp_data(vcpu); 3042 3043 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3044 rc = -EINVAL; 3045 goto out; 3046 } 3047 if (!sclp.has_gpere) { 3048 rc = -EINVAL; 3049 goto out; 3050 } 3051 3052 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3053 vcpu->guest_debug = dbg->control; 3054 /* enforce guest PER */ 3055 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3056 3057 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3058 rc = kvm_s390_import_bp_data(vcpu, dbg); 3059 } else { 3060 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3061 vcpu->arch.guestdbg.last_bp = 0; 3062 } 3063 3064 if (rc) { 3065 vcpu->guest_debug = 0; 3066 kvm_s390_clear_bp_data(vcpu); 3067 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3068 } 3069 3070 out: 3071 vcpu_put(vcpu); 3072 return rc; 3073 } 3074 3075 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3076 struct kvm_mp_state *mp_state) 3077 { 3078 int ret; 3079 3080 vcpu_load(vcpu); 3081 3082 /* CHECK_STOP and LOAD are not supported yet */ 3083 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3084 KVM_MP_STATE_OPERATING; 3085 3086 vcpu_put(vcpu); 3087 return ret; 3088 } 3089 3090 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3091 struct kvm_mp_state *mp_state) 3092 { 3093 int rc = 0; 3094 3095 vcpu_load(vcpu); 3096 3097 /* user space knows about this interface - let it control the state */ 3098 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 3099 3100 switch (mp_state->mp_state) { 3101 case KVM_MP_STATE_STOPPED: 3102 kvm_s390_vcpu_stop(vcpu); 3103 break; 3104 case KVM_MP_STATE_OPERATING: 3105 kvm_s390_vcpu_start(vcpu); 3106 break; 3107 case KVM_MP_STATE_LOAD: 3108 case KVM_MP_STATE_CHECK_STOP: 3109 /* fall through - CHECK_STOP and LOAD are not supported yet */ 3110 default: 3111 rc = -ENXIO; 3112 } 3113 3114 vcpu_put(vcpu); 3115 return rc; 3116 } 3117 3118 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3119 { 3120 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3121 } 3122 3123 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3124 { 3125 retry: 3126 kvm_s390_vcpu_request_handled(vcpu); 3127 if (!kvm_request_pending(vcpu)) 3128 return 0; 3129 /* 3130 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3131 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3132 * This ensures that the ipte instruction for this request has 3133 * already finished. We might race against a second unmapper that 3134 * wants to set the blocking bit. Lets just retry the request loop. 3135 */ 3136 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3137 int rc; 3138 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3139 kvm_s390_get_prefix(vcpu), 3140 PAGE_SIZE * 2, PROT_WRITE); 3141 if (rc) { 3142 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3143 return rc; 3144 } 3145 goto retry; 3146 } 3147 3148 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3149 vcpu->arch.sie_block->ihcpu = 0xffff; 3150 goto retry; 3151 } 3152 3153 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3154 if (!ibs_enabled(vcpu)) { 3155 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3156 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3157 } 3158 goto retry; 3159 } 3160 3161 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3162 if (ibs_enabled(vcpu)) { 3163 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3164 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3165 } 3166 goto retry; 3167 } 3168 3169 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3170 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3171 goto retry; 3172 } 3173 3174 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3175 /* 3176 * Disable CMM virtualization; we will emulate the ESSA 3177 * instruction manually, in order to provide additional 3178 * functionalities needed for live migration. 3179 */ 3180 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3181 goto retry; 3182 } 3183 3184 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3185 /* 3186 * Re-enable CMM virtualization if CMMA is available and 3187 * CMM has been used. 3188 */ 3189 if ((vcpu->kvm->arch.use_cmma) && 3190 (vcpu->kvm->mm->context.uses_cmm)) 3191 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3192 goto retry; 3193 } 3194 3195 /* nothing to do, just clear the request */ 3196 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3197 3198 return 0; 3199 } 3200 3201 void kvm_s390_set_tod_clock(struct kvm *kvm, 3202 const struct kvm_s390_vm_tod_clock *gtod) 3203 { 3204 struct kvm_vcpu *vcpu; 3205 struct kvm_s390_tod_clock_ext htod; 3206 int i; 3207 3208 mutex_lock(&kvm->lock); 3209 preempt_disable(); 3210 3211 get_tod_clock_ext((char *)&htod); 3212 3213 kvm->arch.epoch = gtod->tod - htod.tod; 3214 kvm->arch.epdx = 0; 3215 if (test_kvm_facility(kvm, 139)) { 3216 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 3217 if (kvm->arch.epoch > gtod->tod) 3218 kvm->arch.epdx -= 1; 3219 } 3220 3221 kvm_s390_vcpu_block_all(kvm); 3222 kvm_for_each_vcpu(i, vcpu, kvm) { 3223 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3224 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3225 } 3226 3227 kvm_s390_vcpu_unblock_all(kvm); 3228 preempt_enable(); 3229 mutex_unlock(&kvm->lock); 3230 } 3231 3232 /** 3233 * kvm_arch_fault_in_page - fault-in guest page if necessary 3234 * @vcpu: The corresponding virtual cpu 3235 * @gpa: Guest physical address 3236 * @writable: Whether the page should be writable or not 3237 * 3238 * Make sure that a guest page has been faulted-in on the host. 3239 * 3240 * Return: Zero on success, negative error code otherwise. 3241 */ 3242 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3243 { 3244 return gmap_fault(vcpu->arch.gmap, gpa, 3245 writable ? FAULT_FLAG_WRITE : 0); 3246 } 3247 3248 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3249 unsigned long token) 3250 { 3251 struct kvm_s390_interrupt inti; 3252 struct kvm_s390_irq irq; 3253 3254 if (start_token) { 3255 irq.u.ext.ext_params2 = token; 3256 irq.type = KVM_S390_INT_PFAULT_INIT; 3257 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3258 } else { 3259 inti.type = KVM_S390_INT_PFAULT_DONE; 3260 inti.parm64 = token; 3261 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3262 } 3263 } 3264 3265 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3266 struct kvm_async_pf *work) 3267 { 3268 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3269 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3270 } 3271 3272 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3273 struct kvm_async_pf *work) 3274 { 3275 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3276 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3277 } 3278 3279 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3280 struct kvm_async_pf *work) 3281 { 3282 /* s390 will always inject the page directly */ 3283 } 3284 3285 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3286 { 3287 /* 3288 * s390 will always inject the page directly, 3289 * but we still want check_async_completion to cleanup 3290 */ 3291 return true; 3292 } 3293 3294 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3295 { 3296 hva_t hva; 3297 struct kvm_arch_async_pf arch; 3298 int rc; 3299 3300 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3301 return 0; 3302 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3303 vcpu->arch.pfault_compare) 3304 return 0; 3305 if (psw_extint_disabled(vcpu)) 3306 return 0; 3307 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3308 return 0; 3309 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 3310 return 0; 3311 if (!vcpu->arch.gmap->pfault_enabled) 3312 return 0; 3313 3314 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3315 hva += current->thread.gmap_addr & ~PAGE_MASK; 3316 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3317 return 0; 3318 3319 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3320 return rc; 3321 } 3322 3323 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3324 { 3325 int rc, cpuflags; 3326 3327 /* 3328 * On s390 notifications for arriving pages will be delivered directly 3329 * to the guest but the house keeping for completed pfaults is 3330 * handled outside the worker. 3331 */ 3332 kvm_check_async_pf_completion(vcpu); 3333 3334 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3335 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3336 3337 if (need_resched()) 3338 schedule(); 3339 3340 if (test_cpu_flag(CIF_MCCK_PENDING)) 3341 s390_handle_mcck(); 3342 3343 if (!kvm_is_ucontrol(vcpu->kvm)) { 3344 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3345 if (rc) 3346 return rc; 3347 } 3348 3349 rc = kvm_s390_handle_requests(vcpu); 3350 if (rc) 3351 return rc; 3352 3353 if (guestdbg_enabled(vcpu)) { 3354 kvm_s390_backup_guest_per_regs(vcpu); 3355 kvm_s390_patch_guest_per_regs(vcpu); 3356 } 3357 3358 vcpu->arch.sie_block->icptcode = 0; 3359 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3360 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3361 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3362 3363 return 0; 3364 } 3365 3366 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3367 { 3368 struct kvm_s390_pgm_info pgm_info = { 3369 .code = PGM_ADDRESSING, 3370 }; 3371 u8 opcode, ilen; 3372 int rc; 3373 3374 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3375 trace_kvm_s390_sie_fault(vcpu); 3376 3377 /* 3378 * We want to inject an addressing exception, which is defined as a 3379 * suppressing or terminating exception. However, since we came here 3380 * by a DAT access exception, the PSW still points to the faulting 3381 * instruction since DAT exceptions are nullifying. So we've got 3382 * to look up the current opcode to get the length of the instruction 3383 * to be able to forward the PSW. 3384 */ 3385 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3386 ilen = insn_length(opcode); 3387 if (rc < 0) { 3388 return rc; 3389 } else if (rc) { 3390 /* Instruction-Fetching Exceptions - we can't detect the ilen. 3391 * Forward by arbitrary ilc, injection will take care of 3392 * nullification if necessary. 3393 */ 3394 pgm_info = vcpu->arch.pgm; 3395 ilen = 4; 3396 } 3397 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 3398 kvm_s390_forward_psw(vcpu, ilen); 3399 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 3400 } 3401 3402 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 3403 { 3404 struct mcck_volatile_info *mcck_info; 3405 struct sie_page *sie_page; 3406 3407 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 3408 vcpu->arch.sie_block->icptcode); 3409 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 3410 3411 if (guestdbg_enabled(vcpu)) 3412 kvm_s390_restore_guest_per_regs(vcpu); 3413 3414 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 3415 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 3416 3417 if (exit_reason == -EINTR) { 3418 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 3419 sie_page = container_of(vcpu->arch.sie_block, 3420 struct sie_page, sie_block); 3421 mcck_info = &sie_page->mcck_info; 3422 kvm_s390_reinject_machine_check(vcpu, mcck_info); 3423 return 0; 3424 } 3425 3426 if (vcpu->arch.sie_block->icptcode > 0) { 3427 int rc = kvm_handle_sie_intercept(vcpu); 3428 3429 if (rc != -EOPNOTSUPP) 3430 return rc; 3431 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 3432 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 3433 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 3434 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 3435 return -EREMOTE; 3436 } else if (exit_reason != -EFAULT) { 3437 vcpu->stat.exit_null++; 3438 return 0; 3439 } else if (kvm_is_ucontrol(vcpu->kvm)) { 3440 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 3441 vcpu->run->s390_ucontrol.trans_exc_code = 3442 current->thread.gmap_addr; 3443 vcpu->run->s390_ucontrol.pgm_code = 0x10; 3444 return -EREMOTE; 3445 } else if (current->thread.gmap_pfault) { 3446 trace_kvm_s390_major_guest_pfault(vcpu); 3447 current->thread.gmap_pfault = 0; 3448 if (kvm_arch_setup_async_pf(vcpu)) 3449 return 0; 3450 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 3451 } 3452 return vcpu_post_run_fault_in_sie(vcpu); 3453 } 3454 3455 static int __vcpu_run(struct kvm_vcpu *vcpu) 3456 { 3457 int rc, exit_reason; 3458 3459 /* 3460 * We try to hold kvm->srcu during most of vcpu_run (except when run- 3461 * ning the guest), so that memslots (and other stuff) are protected 3462 */ 3463 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3464 3465 do { 3466 rc = vcpu_pre_run(vcpu); 3467 if (rc) 3468 break; 3469 3470 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3471 /* 3472 * As PF_VCPU will be used in fault handler, between 3473 * guest_enter and guest_exit should be no uaccess. 3474 */ 3475 local_irq_disable(); 3476 guest_enter_irqoff(); 3477 __disable_cpu_timer_accounting(vcpu); 3478 local_irq_enable(); 3479 exit_reason = sie64a(vcpu->arch.sie_block, 3480 vcpu->run->s.regs.gprs); 3481 local_irq_disable(); 3482 __enable_cpu_timer_accounting(vcpu); 3483 guest_exit_irqoff(); 3484 local_irq_enable(); 3485 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3486 3487 rc = vcpu_post_run(vcpu, exit_reason); 3488 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 3489 3490 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3491 return rc; 3492 } 3493 3494 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3495 { 3496 struct runtime_instr_cb *riccb; 3497 struct gs_cb *gscb; 3498 3499 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 3500 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 3501 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 3502 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 3503 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 3504 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 3505 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 3506 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 3507 /* some control register changes require a tlb flush */ 3508 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3509 } 3510 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 3511 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 3512 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 3513 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 3514 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 3515 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 3516 } 3517 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 3518 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 3519 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 3520 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 3521 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3522 kvm_clear_async_pf_completion_queue(vcpu); 3523 } 3524 /* 3525 * If userspace sets the riccb (e.g. after migration) to a valid state, 3526 * we should enable RI here instead of doing the lazy enablement. 3527 */ 3528 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 3529 test_kvm_facility(vcpu->kvm, 64) && 3530 riccb->v && 3531 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 3532 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 3533 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 3534 } 3535 /* 3536 * If userspace sets the gscb (e.g. after migration) to non-zero, 3537 * we should enable GS here instead of doing the lazy enablement. 3538 */ 3539 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 3540 test_kvm_facility(vcpu->kvm, 133) && 3541 gscb->gssm && 3542 !vcpu->arch.gs_enabled) { 3543 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 3544 vcpu->arch.sie_block->ecb |= ECB_GS; 3545 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3546 vcpu->arch.gs_enabled = 1; 3547 } 3548 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 3549 test_kvm_facility(vcpu->kvm, 82)) { 3550 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3551 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 3552 } 3553 save_access_regs(vcpu->arch.host_acrs); 3554 restore_access_regs(vcpu->run->s.regs.acrs); 3555 /* save host (userspace) fprs/vrs */ 3556 save_fpu_regs(); 3557 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 3558 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 3559 if (MACHINE_HAS_VX) 3560 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 3561 else 3562 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 3563 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 3564 if (test_fp_ctl(current->thread.fpu.fpc)) 3565 /* User space provided an invalid FPC, let's clear it */ 3566 current->thread.fpu.fpc = 0; 3567 if (MACHINE_HAS_GS) { 3568 preempt_disable(); 3569 __ctl_set_bit(2, 4); 3570 if (current->thread.gs_cb) { 3571 vcpu->arch.host_gscb = current->thread.gs_cb; 3572 save_gs_cb(vcpu->arch.host_gscb); 3573 } 3574 if (vcpu->arch.gs_enabled) { 3575 current->thread.gs_cb = (struct gs_cb *) 3576 &vcpu->run->s.regs.gscb; 3577 restore_gs_cb(current->thread.gs_cb); 3578 } 3579 preempt_enable(); 3580 } 3581 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 3582 3583 kvm_run->kvm_dirty_regs = 0; 3584 } 3585 3586 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3587 { 3588 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 3589 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 3590 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 3591 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 3592 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 3593 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 3594 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 3595 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 3596 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 3597 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 3598 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 3599 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 3600 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 3601 save_access_regs(vcpu->run->s.regs.acrs); 3602 restore_access_regs(vcpu->arch.host_acrs); 3603 /* Save guest register state */ 3604 save_fpu_regs(); 3605 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3606 /* Restore will be done lazily at return */ 3607 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 3608 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 3609 if (MACHINE_HAS_GS) { 3610 __ctl_set_bit(2, 4); 3611 if (vcpu->arch.gs_enabled) 3612 save_gs_cb(current->thread.gs_cb); 3613 preempt_disable(); 3614 current->thread.gs_cb = vcpu->arch.host_gscb; 3615 restore_gs_cb(vcpu->arch.host_gscb); 3616 preempt_enable(); 3617 if (!vcpu->arch.host_gscb) 3618 __ctl_clear_bit(2, 4); 3619 vcpu->arch.host_gscb = NULL; 3620 } 3621 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 3622 } 3623 3624 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3625 { 3626 int rc; 3627 3628 if (kvm_run->immediate_exit) 3629 return -EINTR; 3630 3631 vcpu_load(vcpu); 3632 3633 if (guestdbg_exit_pending(vcpu)) { 3634 kvm_s390_prepare_debug_exit(vcpu); 3635 rc = 0; 3636 goto out; 3637 } 3638 3639 kvm_sigset_activate(vcpu); 3640 3641 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3642 kvm_s390_vcpu_start(vcpu); 3643 } else if (is_vcpu_stopped(vcpu)) { 3644 pr_err_ratelimited("can't run stopped vcpu %d\n", 3645 vcpu->vcpu_id); 3646 rc = -EINVAL; 3647 goto out; 3648 } 3649 3650 sync_regs(vcpu, kvm_run); 3651 enable_cpu_timer_accounting(vcpu); 3652 3653 might_fault(); 3654 rc = __vcpu_run(vcpu); 3655 3656 if (signal_pending(current) && !rc) { 3657 kvm_run->exit_reason = KVM_EXIT_INTR; 3658 rc = -EINTR; 3659 } 3660 3661 if (guestdbg_exit_pending(vcpu) && !rc) { 3662 kvm_s390_prepare_debug_exit(vcpu); 3663 rc = 0; 3664 } 3665 3666 if (rc == -EREMOTE) { 3667 /* userspace support is needed, kvm_run has been prepared */ 3668 rc = 0; 3669 } 3670 3671 disable_cpu_timer_accounting(vcpu); 3672 store_regs(vcpu, kvm_run); 3673 3674 kvm_sigset_deactivate(vcpu); 3675 3676 vcpu->stat.exit_userspace++; 3677 out: 3678 vcpu_put(vcpu); 3679 return rc; 3680 } 3681 3682 /* 3683 * store status at address 3684 * we use have two special cases: 3685 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 3686 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 3687 */ 3688 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 3689 { 3690 unsigned char archmode = 1; 3691 freg_t fprs[NUM_FPRS]; 3692 unsigned int px; 3693 u64 clkcomp, cputm; 3694 int rc; 3695 3696 px = kvm_s390_get_prefix(vcpu); 3697 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 3698 if (write_guest_abs(vcpu, 163, &archmode, 1)) 3699 return -EFAULT; 3700 gpa = 0; 3701 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 3702 if (write_guest_real(vcpu, 163, &archmode, 1)) 3703 return -EFAULT; 3704 gpa = px; 3705 } else 3706 gpa -= __LC_FPREGS_SAVE_AREA; 3707 3708 /* manually convert vector registers if necessary */ 3709 if (MACHINE_HAS_VX) { 3710 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 3711 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3712 fprs, 128); 3713 } else { 3714 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3715 vcpu->run->s.regs.fprs, 128); 3716 } 3717 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 3718 vcpu->run->s.regs.gprs, 128); 3719 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 3720 &vcpu->arch.sie_block->gpsw, 16); 3721 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 3722 &px, 4); 3723 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 3724 &vcpu->run->s.regs.fpc, 4); 3725 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 3726 &vcpu->arch.sie_block->todpr, 4); 3727 cputm = kvm_s390_get_cpu_timer(vcpu); 3728 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 3729 &cputm, 8); 3730 clkcomp = vcpu->arch.sie_block->ckc >> 8; 3731 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 3732 &clkcomp, 8); 3733 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 3734 &vcpu->run->s.regs.acrs, 64); 3735 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 3736 &vcpu->arch.sie_block->gcr, 128); 3737 return rc ? -EFAULT : 0; 3738 } 3739 3740 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 3741 { 3742 /* 3743 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 3744 * switch in the run ioctl. Let's update our copies before we save 3745 * it into the save area 3746 */ 3747 save_fpu_regs(); 3748 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3749 save_access_regs(vcpu->run->s.regs.acrs); 3750 3751 return kvm_s390_store_status_unloaded(vcpu, addr); 3752 } 3753 3754 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3755 { 3756 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 3757 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 3758 } 3759 3760 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 3761 { 3762 unsigned int i; 3763 struct kvm_vcpu *vcpu; 3764 3765 kvm_for_each_vcpu(i, vcpu, kvm) { 3766 __disable_ibs_on_vcpu(vcpu); 3767 } 3768 } 3769 3770 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3771 { 3772 if (!sclp.has_ibs) 3773 return; 3774 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 3775 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 3776 } 3777 3778 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 3779 { 3780 int i, online_vcpus, started_vcpus = 0; 3781 3782 if (!is_vcpu_stopped(vcpu)) 3783 return; 3784 3785 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 3786 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3787 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3788 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3789 3790 for (i = 0; i < online_vcpus; i++) { 3791 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 3792 started_vcpus++; 3793 } 3794 3795 if (started_vcpus == 0) { 3796 /* we're the only active VCPU -> speed it up */ 3797 __enable_ibs_on_vcpu(vcpu); 3798 } else if (started_vcpus == 1) { 3799 /* 3800 * As we are starting a second VCPU, we have to disable 3801 * the IBS facility on all VCPUs to remove potentially 3802 * oustanding ENABLE requests. 3803 */ 3804 __disable_ibs_on_all_vcpus(vcpu->kvm); 3805 } 3806 3807 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 3808 /* 3809 * Another VCPU might have used IBS while we were offline. 3810 * Let's play safe and flush the VCPU at startup. 3811 */ 3812 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3813 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3814 return; 3815 } 3816 3817 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 3818 { 3819 int i, online_vcpus, started_vcpus = 0; 3820 struct kvm_vcpu *started_vcpu = NULL; 3821 3822 if (is_vcpu_stopped(vcpu)) 3823 return; 3824 3825 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 3826 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3827 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3828 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3829 3830 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 3831 kvm_s390_clear_stop_irq(vcpu); 3832 3833 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 3834 __disable_ibs_on_vcpu(vcpu); 3835 3836 for (i = 0; i < online_vcpus; i++) { 3837 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 3838 started_vcpus++; 3839 started_vcpu = vcpu->kvm->vcpus[i]; 3840 } 3841 } 3842 3843 if (started_vcpus == 1) { 3844 /* 3845 * As we only have one VCPU left, we want to enable the 3846 * IBS facility for that VCPU to speed it up. 3847 */ 3848 __enable_ibs_on_vcpu(started_vcpu); 3849 } 3850 3851 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3852 return; 3853 } 3854 3855 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3856 struct kvm_enable_cap *cap) 3857 { 3858 int r; 3859 3860 if (cap->flags) 3861 return -EINVAL; 3862 3863 switch (cap->cap) { 3864 case KVM_CAP_S390_CSS_SUPPORT: 3865 if (!vcpu->kvm->arch.css_support) { 3866 vcpu->kvm->arch.css_support = 1; 3867 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 3868 trace_kvm_s390_enable_css(vcpu->kvm); 3869 } 3870 r = 0; 3871 break; 3872 default: 3873 r = -EINVAL; 3874 break; 3875 } 3876 return r; 3877 } 3878 3879 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 3880 struct kvm_s390_mem_op *mop) 3881 { 3882 void __user *uaddr = (void __user *)mop->buf; 3883 void *tmpbuf = NULL; 3884 int r, srcu_idx; 3885 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 3886 | KVM_S390_MEMOP_F_CHECK_ONLY; 3887 3888 if (mop->flags & ~supported_flags) 3889 return -EINVAL; 3890 3891 if (mop->size > MEM_OP_MAX_SIZE) 3892 return -E2BIG; 3893 3894 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 3895 tmpbuf = vmalloc(mop->size); 3896 if (!tmpbuf) 3897 return -ENOMEM; 3898 } 3899 3900 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3901 3902 switch (mop->op) { 3903 case KVM_S390_MEMOP_LOGICAL_READ: 3904 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3905 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3906 mop->size, GACC_FETCH); 3907 break; 3908 } 3909 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3910 if (r == 0) { 3911 if (copy_to_user(uaddr, tmpbuf, mop->size)) 3912 r = -EFAULT; 3913 } 3914 break; 3915 case KVM_S390_MEMOP_LOGICAL_WRITE: 3916 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3917 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3918 mop->size, GACC_STORE); 3919 break; 3920 } 3921 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 3922 r = -EFAULT; 3923 break; 3924 } 3925 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3926 break; 3927 default: 3928 r = -EINVAL; 3929 } 3930 3931 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 3932 3933 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 3934 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 3935 3936 vfree(tmpbuf); 3937 return r; 3938 } 3939 3940 long kvm_arch_vcpu_async_ioctl(struct file *filp, 3941 unsigned int ioctl, unsigned long arg) 3942 { 3943 struct kvm_vcpu *vcpu = filp->private_data; 3944 void __user *argp = (void __user *)arg; 3945 3946 switch (ioctl) { 3947 case KVM_S390_IRQ: { 3948 struct kvm_s390_irq s390irq; 3949 3950 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 3951 return -EFAULT; 3952 return kvm_s390_inject_vcpu(vcpu, &s390irq); 3953 } 3954 case KVM_S390_INTERRUPT: { 3955 struct kvm_s390_interrupt s390int; 3956 struct kvm_s390_irq s390irq; 3957 3958 if (copy_from_user(&s390int, argp, sizeof(s390int))) 3959 return -EFAULT; 3960 if (s390int_to_s390irq(&s390int, &s390irq)) 3961 return -EINVAL; 3962 return kvm_s390_inject_vcpu(vcpu, &s390irq); 3963 } 3964 } 3965 return -ENOIOCTLCMD; 3966 } 3967 3968 long kvm_arch_vcpu_ioctl(struct file *filp, 3969 unsigned int ioctl, unsigned long arg) 3970 { 3971 struct kvm_vcpu *vcpu = filp->private_data; 3972 void __user *argp = (void __user *)arg; 3973 int idx; 3974 long r; 3975 3976 vcpu_load(vcpu); 3977 3978 switch (ioctl) { 3979 case KVM_S390_STORE_STATUS: 3980 idx = srcu_read_lock(&vcpu->kvm->srcu); 3981 r = kvm_s390_vcpu_store_status(vcpu, arg); 3982 srcu_read_unlock(&vcpu->kvm->srcu, idx); 3983 break; 3984 case KVM_S390_SET_INITIAL_PSW: { 3985 psw_t psw; 3986 3987 r = -EFAULT; 3988 if (copy_from_user(&psw, argp, sizeof(psw))) 3989 break; 3990 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 3991 break; 3992 } 3993 case KVM_S390_INITIAL_RESET: 3994 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3995 break; 3996 case KVM_SET_ONE_REG: 3997 case KVM_GET_ONE_REG: { 3998 struct kvm_one_reg reg; 3999 r = -EFAULT; 4000 if (copy_from_user(®, argp, sizeof(reg))) 4001 break; 4002 if (ioctl == KVM_SET_ONE_REG) 4003 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4004 else 4005 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4006 break; 4007 } 4008 #ifdef CONFIG_KVM_S390_UCONTROL 4009 case KVM_S390_UCAS_MAP: { 4010 struct kvm_s390_ucas_mapping ucasmap; 4011 4012 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4013 r = -EFAULT; 4014 break; 4015 } 4016 4017 if (!kvm_is_ucontrol(vcpu->kvm)) { 4018 r = -EINVAL; 4019 break; 4020 } 4021 4022 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4023 ucasmap.vcpu_addr, ucasmap.length); 4024 break; 4025 } 4026 case KVM_S390_UCAS_UNMAP: { 4027 struct kvm_s390_ucas_mapping ucasmap; 4028 4029 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4030 r = -EFAULT; 4031 break; 4032 } 4033 4034 if (!kvm_is_ucontrol(vcpu->kvm)) { 4035 r = -EINVAL; 4036 break; 4037 } 4038 4039 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4040 ucasmap.length); 4041 break; 4042 } 4043 #endif 4044 case KVM_S390_VCPU_FAULT: { 4045 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4046 break; 4047 } 4048 case KVM_ENABLE_CAP: 4049 { 4050 struct kvm_enable_cap cap; 4051 r = -EFAULT; 4052 if (copy_from_user(&cap, argp, sizeof(cap))) 4053 break; 4054 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4055 break; 4056 } 4057 case KVM_S390_MEM_OP: { 4058 struct kvm_s390_mem_op mem_op; 4059 4060 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4061 r = kvm_s390_guest_mem_op(vcpu, &mem_op); 4062 else 4063 r = -EFAULT; 4064 break; 4065 } 4066 case KVM_S390_SET_IRQ_STATE: { 4067 struct kvm_s390_irq_state irq_state; 4068 4069 r = -EFAULT; 4070 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4071 break; 4072 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4073 irq_state.len == 0 || 4074 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4075 r = -EINVAL; 4076 break; 4077 } 4078 /* do not use irq_state.flags, it will break old QEMUs */ 4079 r = kvm_s390_set_irq_state(vcpu, 4080 (void __user *) irq_state.buf, 4081 irq_state.len); 4082 break; 4083 } 4084 case KVM_S390_GET_IRQ_STATE: { 4085 struct kvm_s390_irq_state irq_state; 4086 4087 r = -EFAULT; 4088 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4089 break; 4090 if (irq_state.len == 0) { 4091 r = -EINVAL; 4092 break; 4093 } 4094 /* do not use irq_state.flags, it will break old QEMUs */ 4095 r = kvm_s390_get_irq_state(vcpu, 4096 (__u8 __user *) irq_state.buf, 4097 irq_state.len); 4098 break; 4099 } 4100 default: 4101 r = -ENOTTY; 4102 } 4103 4104 vcpu_put(vcpu); 4105 return r; 4106 } 4107 4108 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 4109 { 4110 #ifdef CONFIG_KVM_S390_UCONTROL 4111 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 4112 && (kvm_is_ucontrol(vcpu->kvm))) { 4113 vmf->page = virt_to_page(vcpu->arch.sie_block); 4114 get_page(vmf->page); 4115 return 0; 4116 } 4117 #endif 4118 return VM_FAULT_SIGBUS; 4119 } 4120 4121 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 4122 unsigned long npages) 4123 { 4124 return 0; 4125 } 4126 4127 /* Section: memory related */ 4128 int kvm_arch_prepare_memory_region(struct kvm *kvm, 4129 struct kvm_memory_slot *memslot, 4130 const struct kvm_userspace_memory_region *mem, 4131 enum kvm_mr_change change) 4132 { 4133 /* A few sanity checks. We can have memory slots which have to be 4134 located/ended at a segment boundary (1MB). The memory in userland is 4135 ok to be fragmented into various different vmas. It is okay to mmap() 4136 and munmap() stuff in this slot after doing this call at any time */ 4137 4138 if (mem->userspace_addr & 0xffffful) 4139 return -EINVAL; 4140 4141 if (mem->memory_size & 0xffffful) 4142 return -EINVAL; 4143 4144 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 4145 return -EINVAL; 4146 4147 return 0; 4148 } 4149 4150 void kvm_arch_commit_memory_region(struct kvm *kvm, 4151 const struct kvm_userspace_memory_region *mem, 4152 const struct kvm_memory_slot *old, 4153 const struct kvm_memory_slot *new, 4154 enum kvm_mr_change change) 4155 { 4156 int rc; 4157 4158 /* If the basics of the memslot do not change, we do not want 4159 * to update the gmap. Every update causes several unnecessary 4160 * segment translation exceptions. This is usually handled just 4161 * fine by the normal fault handler + gmap, but it will also 4162 * cause faults on the prefix page of running guest CPUs. 4163 */ 4164 if (old->userspace_addr == mem->userspace_addr && 4165 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 4166 old->npages * PAGE_SIZE == mem->memory_size) 4167 return; 4168 4169 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 4170 mem->guest_phys_addr, mem->memory_size); 4171 if (rc) 4172 pr_warn("failed to commit memory region\n"); 4173 return; 4174 } 4175 4176 static inline unsigned long nonhyp_mask(int i) 4177 { 4178 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 4179 4180 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 4181 } 4182 4183 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 4184 { 4185 vcpu->valid_wakeup = false; 4186 } 4187 4188 static int __init kvm_s390_init(void) 4189 { 4190 int i; 4191 4192 if (!sclp.has_sief2) { 4193 pr_info("SIE not available\n"); 4194 return -ENODEV; 4195 } 4196 4197 if (nested && hpage) { 4198 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); 4199 return -EINVAL; 4200 } 4201 4202 for (i = 0; i < 16; i++) 4203 kvm_s390_fac_base[i] |= 4204 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 4205 4206 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 4207 } 4208 4209 static void __exit kvm_s390_exit(void) 4210 { 4211 kvm_exit(); 4212 } 4213 4214 module_init(kvm_s390_init); 4215 module_exit(kvm_s390_exit); 4216 4217 /* 4218 * Enable autoloading of the kvm module. 4219 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 4220 * since x86 takes a different approach. 4221 */ 4222 #include <linux/miscdevice.h> 4223 MODULE_ALIAS_MISCDEV(KVM_MINOR); 4224 MODULE_ALIAS("devname:kvm"); 4225