1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2018 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #define KMSG_COMPONENT "kvm-s390" 15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 35 #include <asm/asm-offsets.h> 36 #include <asm/lowcore.h> 37 #include <asm/stp.h> 38 #include <asm/pgtable.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include "kvm-s390.h" 48 #include "gaccess.h" 49 50 #define CREATE_TRACE_POINTS 51 #include "trace.h" 52 #include "trace-s390.h" 53 54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 55 #define LOCAL_IRQS 32 56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 57 (KVM_MAX_VCPUS + LOCAL_IRQS)) 58 59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 61 62 struct kvm_stats_debugfs_item debugfs_entries[] = { 63 { "userspace_handled", VCPU_STAT(exit_userspace) }, 64 { "exit_null", VCPU_STAT(exit_null) }, 65 { "exit_validity", VCPU_STAT(exit_validity) }, 66 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 67 { "exit_external_request", VCPU_STAT(exit_external_request) }, 68 { "exit_io_request", VCPU_STAT(exit_io_request) }, 69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 70 { "exit_instruction", VCPU_STAT(exit_instruction) }, 71 { "exit_pei", VCPU_STAT(exit_pei) }, 72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 78 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 80 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 81 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 82 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 83 { "deliver_ckc", VCPU_STAT(deliver_ckc) }, 84 { "deliver_cputm", VCPU_STAT(deliver_cputm) }, 85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 86 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 88 { "deliver_virtio", VCPU_STAT(deliver_virtio) }, 89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 92 { "deliver_program", VCPU_STAT(deliver_program) }, 93 { "deliver_io", VCPU_STAT(deliver_io) }, 94 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, 95 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 96 { "inject_ckc", VCPU_STAT(inject_ckc) }, 97 { "inject_cputm", VCPU_STAT(inject_cputm) }, 98 { "inject_external_call", VCPU_STAT(inject_external_call) }, 99 { "inject_float_mchk", VM_STAT(inject_float_mchk) }, 100 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, 101 { "inject_io", VM_STAT(inject_io) }, 102 { "inject_mchk", VCPU_STAT(inject_mchk) }, 103 { "inject_pfault_done", VM_STAT(inject_pfault_done) }, 104 { "inject_program", VCPU_STAT(inject_program) }, 105 { "inject_restart", VCPU_STAT(inject_restart) }, 106 { "inject_service_signal", VM_STAT(inject_service_signal) }, 107 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, 108 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, 109 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, 110 { "inject_virtio", VM_STAT(inject_virtio) }, 111 { "instruction_epsw", VCPU_STAT(instruction_epsw) }, 112 { "instruction_gs", VCPU_STAT(instruction_gs) }, 113 { "instruction_io_other", VCPU_STAT(instruction_io_other) }, 114 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, 115 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, 116 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 117 { "instruction_ptff", VCPU_STAT(instruction_ptff) }, 118 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 119 { "instruction_sck", VCPU_STAT(instruction_sck) }, 120 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, 121 { "instruction_spx", VCPU_STAT(instruction_spx) }, 122 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 123 { "instruction_stap", VCPU_STAT(instruction_stap) }, 124 { "instruction_iske", VCPU_STAT(instruction_iske) }, 125 { "instruction_ri", VCPU_STAT(instruction_ri) }, 126 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, 127 { "instruction_sske", VCPU_STAT(instruction_sske) }, 128 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 129 { "instruction_essa", VCPU_STAT(instruction_essa) }, 130 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 131 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 132 { "instruction_tb", VCPU_STAT(instruction_tb) }, 133 { "instruction_tpi", VCPU_STAT(instruction_tpi) }, 134 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 135 { "instruction_tsch", VCPU_STAT(instruction_tsch) }, 136 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 137 { "instruction_sie", VCPU_STAT(instruction_sie) }, 138 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 139 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 140 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 141 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 142 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 143 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 144 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 145 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 146 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 147 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 148 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 149 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 150 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 151 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 152 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 153 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 154 { "instruction_diag_10", VCPU_STAT(diagnose_10) }, 155 { "instruction_diag_44", VCPU_STAT(diagnose_44) }, 156 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, 157 { "instruction_diag_258", VCPU_STAT(diagnose_258) }, 158 { "instruction_diag_308", VCPU_STAT(diagnose_308) }, 159 { "instruction_diag_500", VCPU_STAT(diagnose_500) }, 160 { "instruction_diag_other", VCPU_STAT(diagnose_other) }, 161 { NULL } 162 }; 163 164 struct kvm_s390_tod_clock_ext { 165 __u8 epoch_idx; 166 __u64 tod; 167 __u8 reserved[7]; 168 } __packed; 169 170 /* allow nested virtualization in KVM (if enabled by user space) */ 171 static int nested; 172 module_param(nested, int, S_IRUGO); 173 MODULE_PARM_DESC(nested, "Nested virtualization support"); 174 175 /* allow 1m huge page guest backing, if !nested */ 176 static int hpage; 177 module_param(hpage, int, 0444); 178 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 179 180 /* 181 * For now we handle at most 16 double words as this is what the s390 base 182 * kernel handles and stores in the prefix page. If we ever need to go beyond 183 * this, this requires changes to code, but the external uapi can stay. 184 */ 185 #define SIZE_INTERNAL 16 186 187 /* 188 * Base feature mask that defines default mask for facilities. Consists of the 189 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 190 */ 191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 192 /* 193 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 194 * and defines the facilities that can be enabled via a cpu model. 195 */ 196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 197 198 static unsigned long kvm_s390_fac_size(void) 199 { 200 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 202 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 203 sizeof(S390_lowcore.stfle_fac_list)); 204 205 return SIZE_INTERNAL; 206 } 207 208 /* available cpu features supported by kvm */ 209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 210 /* available subfunctions indicated via query / "test bit" */ 211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 212 213 static struct gmap_notifier gmap_notifier; 214 static struct gmap_notifier vsie_gmap_notifier; 215 debug_info_t *kvm_s390_dbf; 216 217 /* Section: not file related */ 218 int kvm_arch_hardware_enable(void) 219 { 220 /* every s390 is virtualization enabled ;-) */ 221 return 0; 222 } 223 224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 225 unsigned long end); 226 227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 228 { 229 u8 delta_idx = 0; 230 231 /* 232 * The TOD jumps by delta, we have to compensate this by adding 233 * -delta to the epoch. 234 */ 235 delta = -delta; 236 237 /* sign-extension - we're adding to signed values below */ 238 if ((s64)delta < 0) 239 delta_idx = -1; 240 241 scb->epoch += delta; 242 if (scb->ecd & ECD_MEF) { 243 scb->epdx += delta_idx; 244 if (scb->epoch < delta) 245 scb->epdx += 1; 246 } 247 } 248 249 /* 250 * This callback is executed during stop_machine(). All CPUs are therefore 251 * temporarily stopped. In order not to change guest behavior, we have to 252 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 253 * so a CPU won't be stopped while calculating with the epoch. 254 */ 255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 256 void *v) 257 { 258 struct kvm *kvm; 259 struct kvm_vcpu *vcpu; 260 int i; 261 unsigned long long *delta = v; 262 263 list_for_each_entry(kvm, &vm_list, vm_list) { 264 kvm_for_each_vcpu(i, vcpu, kvm) { 265 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 266 if (i == 0) { 267 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 268 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 269 } 270 if (vcpu->arch.cputm_enabled) 271 vcpu->arch.cputm_start += *delta; 272 if (vcpu->arch.vsie_block) 273 kvm_clock_sync_scb(vcpu->arch.vsie_block, 274 *delta); 275 } 276 } 277 return NOTIFY_OK; 278 } 279 280 static struct notifier_block kvm_clock_notifier = { 281 .notifier_call = kvm_clock_sync, 282 }; 283 284 int kvm_arch_hardware_setup(void) 285 { 286 gmap_notifier.notifier_call = kvm_gmap_notifier; 287 gmap_register_pte_notifier(&gmap_notifier); 288 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 289 gmap_register_pte_notifier(&vsie_gmap_notifier); 290 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 291 &kvm_clock_notifier); 292 return 0; 293 } 294 295 void kvm_arch_hardware_unsetup(void) 296 { 297 gmap_unregister_pte_notifier(&gmap_notifier); 298 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 299 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 300 &kvm_clock_notifier); 301 } 302 303 static void allow_cpu_feat(unsigned long nr) 304 { 305 set_bit_inv(nr, kvm_s390_available_cpu_feat); 306 } 307 308 static inline int plo_test_bit(unsigned char nr) 309 { 310 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 311 int cc; 312 313 asm volatile( 314 /* Parameter registers are ignored for "test bit" */ 315 " plo 0,0,0,0(0)\n" 316 " ipm %0\n" 317 " srl %0,28\n" 318 : "=d" (cc) 319 : "d" (r0) 320 : "cc"); 321 return cc == 0; 322 } 323 324 static void kvm_s390_cpu_feat_init(void) 325 { 326 int i; 327 328 for (i = 0; i < 256; ++i) { 329 if (plo_test_bit(i)) 330 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 331 } 332 333 if (test_facility(28)) /* TOD-clock steering */ 334 ptff(kvm_s390_available_subfunc.ptff, 335 sizeof(kvm_s390_available_subfunc.ptff), 336 PTFF_QAF); 337 338 if (test_facility(17)) { /* MSA */ 339 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 340 kvm_s390_available_subfunc.kmac); 341 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 342 kvm_s390_available_subfunc.kmc); 343 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 344 kvm_s390_available_subfunc.km); 345 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 346 kvm_s390_available_subfunc.kimd); 347 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 348 kvm_s390_available_subfunc.klmd); 349 } 350 if (test_facility(76)) /* MSA3 */ 351 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 352 kvm_s390_available_subfunc.pckmo); 353 if (test_facility(77)) { /* MSA4 */ 354 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 355 kvm_s390_available_subfunc.kmctr); 356 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 357 kvm_s390_available_subfunc.kmf); 358 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 359 kvm_s390_available_subfunc.kmo); 360 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 361 kvm_s390_available_subfunc.pcc); 362 } 363 if (test_facility(57)) /* MSA5 */ 364 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 365 kvm_s390_available_subfunc.ppno); 366 367 if (test_facility(146)) /* MSA8 */ 368 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 369 kvm_s390_available_subfunc.kma); 370 371 if (MACHINE_HAS_ESOP) 372 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 373 /* 374 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 375 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 376 */ 377 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 378 !test_facility(3) || !nested) 379 return; 380 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 381 if (sclp.has_64bscao) 382 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 383 if (sclp.has_siif) 384 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 385 if (sclp.has_gpere) 386 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 387 if (sclp.has_gsls) 388 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 389 if (sclp.has_ib) 390 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 391 if (sclp.has_cei) 392 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 393 if (sclp.has_ibs) 394 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 395 if (sclp.has_kss) 396 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 397 /* 398 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 399 * all skey handling functions read/set the skey from the PGSTE 400 * instead of the real storage key. 401 * 402 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 403 * pages being detected as preserved although they are resident. 404 * 405 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 406 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 407 * 408 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 409 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 410 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 411 * 412 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 413 * cannot easily shadow the SCA because of the ipte lock. 414 */ 415 } 416 417 int kvm_arch_init(void *opaque) 418 { 419 int rc; 420 421 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 422 if (!kvm_s390_dbf) 423 return -ENOMEM; 424 425 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { 426 rc = -ENOMEM; 427 goto out_debug_unreg; 428 } 429 430 kvm_s390_cpu_feat_init(); 431 432 /* Register floating interrupt controller interface. */ 433 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 434 if (rc) { 435 pr_err("Failed to register FLIC rc=%d\n", rc); 436 goto out_debug_unreg; 437 } 438 return 0; 439 440 out_debug_unreg: 441 debug_unregister(kvm_s390_dbf); 442 return rc; 443 } 444 445 void kvm_arch_exit(void) 446 { 447 debug_unregister(kvm_s390_dbf); 448 } 449 450 /* Section: device related */ 451 long kvm_arch_dev_ioctl(struct file *filp, 452 unsigned int ioctl, unsigned long arg) 453 { 454 if (ioctl == KVM_S390_ENABLE_SIE) 455 return s390_enable_sie(); 456 return -EINVAL; 457 } 458 459 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 460 { 461 int r; 462 463 switch (ext) { 464 case KVM_CAP_S390_PSW: 465 case KVM_CAP_S390_GMAP: 466 case KVM_CAP_SYNC_MMU: 467 #ifdef CONFIG_KVM_S390_UCONTROL 468 case KVM_CAP_S390_UCONTROL: 469 #endif 470 case KVM_CAP_ASYNC_PF: 471 case KVM_CAP_SYNC_REGS: 472 case KVM_CAP_ONE_REG: 473 case KVM_CAP_ENABLE_CAP: 474 case KVM_CAP_S390_CSS_SUPPORT: 475 case KVM_CAP_IOEVENTFD: 476 case KVM_CAP_DEVICE_CTRL: 477 case KVM_CAP_S390_IRQCHIP: 478 case KVM_CAP_VM_ATTRIBUTES: 479 case KVM_CAP_MP_STATE: 480 case KVM_CAP_IMMEDIATE_EXIT: 481 case KVM_CAP_S390_INJECT_IRQ: 482 case KVM_CAP_S390_USER_SIGP: 483 case KVM_CAP_S390_USER_STSI: 484 case KVM_CAP_S390_SKEYS: 485 case KVM_CAP_S390_IRQ_STATE: 486 case KVM_CAP_S390_USER_INSTR0: 487 case KVM_CAP_S390_CMMA_MIGRATION: 488 case KVM_CAP_S390_AIS: 489 case KVM_CAP_S390_AIS_MIGRATION: 490 r = 1; 491 break; 492 case KVM_CAP_S390_HPAGE_1M: 493 r = 0; 494 if (hpage && !kvm_is_ucontrol(kvm)) 495 r = 1; 496 break; 497 case KVM_CAP_S390_MEM_OP: 498 r = MEM_OP_MAX_SIZE; 499 break; 500 case KVM_CAP_NR_VCPUS: 501 case KVM_CAP_MAX_VCPUS: 502 r = KVM_S390_BSCA_CPU_SLOTS; 503 if (!kvm_s390_use_sca_entries()) 504 r = KVM_MAX_VCPUS; 505 else if (sclp.has_esca && sclp.has_64bscao) 506 r = KVM_S390_ESCA_CPU_SLOTS; 507 break; 508 case KVM_CAP_NR_MEMSLOTS: 509 r = KVM_USER_MEM_SLOTS; 510 break; 511 case KVM_CAP_S390_COW: 512 r = MACHINE_HAS_ESOP; 513 break; 514 case KVM_CAP_S390_VECTOR_REGISTERS: 515 r = MACHINE_HAS_VX; 516 break; 517 case KVM_CAP_S390_RI: 518 r = test_facility(64); 519 break; 520 case KVM_CAP_S390_GS: 521 r = test_facility(133); 522 break; 523 case KVM_CAP_S390_BPB: 524 r = test_facility(82); 525 break; 526 default: 527 r = 0; 528 } 529 return r; 530 } 531 532 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 533 struct kvm_memory_slot *memslot) 534 { 535 int i; 536 gfn_t cur_gfn, last_gfn; 537 unsigned long gaddr, vmaddr; 538 struct gmap *gmap = kvm->arch.gmap; 539 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 540 541 /* Loop over all guest segments */ 542 cur_gfn = memslot->base_gfn; 543 last_gfn = memslot->base_gfn + memslot->npages; 544 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 545 gaddr = gfn_to_gpa(cur_gfn); 546 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 547 if (kvm_is_error_hva(vmaddr)) 548 continue; 549 550 bitmap_zero(bitmap, _PAGE_ENTRIES); 551 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 552 for (i = 0; i < _PAGE_ENTRIES; i++) { 553 if (test_bit(i, bitmap)) 554 mark_page_dirty(kvm, cur_gfn + i); 555 } 556 557 if (fatal_signal_pending(current)) 558 return; 559 cond_resched(); 560 } 561 } 562 563 /* Section: vm related */ 564 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 565 566 /* 567 * Get (and clear) the dirty memory log for a memory slot. 568 */ 569 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 570 struct kvm_dirty_log *log) 571 { 572 int r; 573 unsigned long n; 574 struct kvm_memslots *slots; 575 struct kvm_memory_slot *memslot; 576 int is_dirty = 0; 577 578 if (kvm_is_ucontrol(kvm)) 579 return -EINVAL; 580 581 mutex_lock(&kvm->slots_lock); 582 583 r = -EINVAL; 584 if (log->slot >= KVM_USER_MEM_SLOTS) 585 goto out; 586 587 slots = kvm_memslots(kvm); 588 memslot = id_to_memslot(slots, log->slot); 589 r = -ENOENT; 590 if (!memslot->dirty_bitmap) 591 goto out; 592 593 kvm_s390_sync_dirty_log(kvm, memslot); 594 r = kvm_get_dirty_log(kvm, log, &is_dirty); 595 if (r) 596 goto out; 597 598 /* Clear the dirty log */ 599 if (is_dirty) { 600 n = kvm_dirty_bitmap_bytes(memslot); 601 memset(memslot->dirty_bitmap, 0, n); 602 } 603 r = 0; 604 out: 605 mutex_unlock(&kvm->slots_lock); 606 return r; 607 } 608 609 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 610 { 611 unsigned int i; 612 struct kvm_vcpu *vcpu; 613 614 kvm_for_each_vcpu(i, vcpu, kvm) { 615 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 616 } 617 } 618 619 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 620 { 621 int r; 622 623 if (cap->flags) 624 return -EINVAL; 625 626 switch (cap->cap) { 627 case KVM_CAP_S390_IRQCHIP: 628 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 629 kvm->arch.use_irqchip = 1; 630 r = 0; 631 break; 632 case KVM_CAP_S390_USER_SIGP: 633 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 634 kvm->arch.user_sigp = 1; 635 r = 0; 636 break; 637 case KVM_CAP_S390_VECTOR_REGISTERS: 638 mutex_lock(&kvm->lock); 639 if (kvm->created_vcpus) { 640 r = -EBUSY; 641 } else if (MACHINE_HAS_VX) { 642 set_kvm_facility(kvm->arch.model.fac_mask, 129); 643 set_kvm_facility(kvm->arch.model.fac_list, 129); 644 if (test_facility(134)) { 645 set_kvm_facility(kvm->arch.model.fac_mask, 134); 646 set_kvm_facility(kvm->arch.model.fac_list, 134); 647 } 648 if (test_facility(135)) { 649 set_kvm_facility(kvm->arch.model.fac_mask, 135); 650 set_kvm_facility(kvm->arch.model.fac_list, 135); 651 } 652 r = 0; 653 } else 654 r = -EINVAL; 655 mutex_unlock(&kvm->lock); 656 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 657 r ? "(not available)" : "(success)"); 658 break; 659 case KVM_CAP_S390_RI: 660 r = -EINVAL; 661 mutex_lock(&kvm->lock); 662 if (kvm->created_vcpus) { 663 r = -EBUSY; 664 } else if (test_facility(64)) { 665 set_kvm_facility(kvm->arch.model.fac_mask, 64); 666 set_kvm_facility(kvm->arch.model.fac_list, 64); 667 r = 0; 668 } 669 mutex_unlock(&kvm->lock); 670 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 671 r ? "(not available)" : "(success)"); 672 break; 673 case KVM_CAP_S390_AIS: 674 mutex_lock(&kvm->lock); 675 if (kvm->created_vcpus) { 676 r = -EBUSY; 677 } else { 678 set_kvm_facility(kvm->arch.model.fac_mask, 72); 679 set_kvm_facility(kvm->arch.model.fac_list, 72); 680 r = 0; 681 } 682 mutex_unlock(&kvm->lock); 683 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 684 r ? "(not available)" : "(success)"); 685 break; 686 case KVM_CAP_S390_GS: 687 r = -EINVAL; 688 mutex_lock(&kvm->lock); 689 if (kvm->created_vcpus) { 690 r = -EBUSY; 691 } else if (test_facility(133)) { 692 set_kvm_facility(kvm->arch.model.fac_mask, 133); 693 set_kvm_facility(kvm->arch.model.fac_list, 133); 694 r = 0; 695 } 696 mutex_unlock(&kvm->lock); 697 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 698 r ? "(not available)" : "(success)"); 699 break; 700 case KVM_CAP_S390_HPAGE_1M: 701 mutex_lock(&kvm->lock); 702 if (kvm->created_vcpus) 703 r = -EBUSY; 704 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 705 r = -EINVAL; 706 else { 707 r = 0; 708 down_write(&kvm->mm->mmap_sem); 709 kvm->mm->context.allow_gmap_hpage_1m = 1; 710 up_write(&kvm->mm->mmap_sem); 711 /* 712 * We might have to create fake 4k page 713 * tables. To avoid that the hardware works on 714 * stale PGSTEs, we emulate these instructions. 715 */ 716 kvm->arch.use_skf = 0; 717 kvm->arch.use_pfmfi = 0; 718 } 719 mutex_unlock(&kvm->lock); 720 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 721 r ? "(not available)" : "(success)"); 722 break; 723 case KVM_CAP_S390_USER_STSI: 724 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 725 kvm->arch.user_stsi = 1; 726 r = 0; 727 break; 728 case KVM_CAP_S390_USER_INSTR0: 729 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 730 kvm->arch.user_instr0 = 1; 731 icpt_operexc_on_all_vcpus(kvm); 732 r = 0; 733 break; 734 default: 735 r = -EINVAL; 736 break; 737 } 738 return r; 739 } 740 741 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 742 { 743 int ret; 744 745 switch (attr->attr) { 746 case KVM_S390_VM_MEM_LIMIT_SIZE: 747 ret = 0; 748 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 749 kvm->arch.mem_limit); 750 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 751 ret = -EFAULT; 752 break; 753 default: 754 ret = -ENXIO; 755 break; 756 } 757 return ret; 758 } 759 760 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 761 { 762 int ret; 763 unsigned int idx; 764 switch (attr->attr) { 765 case KVM_S390_VM_MEM_ENABLE_CMMA: 766 ret = -ENXIO; 767 if (!sclp.has_cmma) 768 break; 769 770 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 771 mutex_lock(&kvm->lock); 772 if (kvm->created_vcpus) 773 ret = -EBUSY; 774 else if (kvm->mm->context.allow_gmap_hpage_1m) 775 ret = -EINVAL; 776 else { 777 kvm->arch.use_cmma = 1; 778 /* Not compatible with cmma. */ 779 kvm->arch.use_pfmfi = 0; 780 ret = 0; 781 } 782 mutex_unlock(&kvm->lock); 783 break; 784 case KVM_S390_VM_MEM_CLR_CMMA: 785 ret = -ENXIO; 786 if (!sclp.has_cmma) 787 break; 788 ret = -EINVAL; 789 if (!kvm->arch.use_cmma) 790 break; 791 792 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 793 mutex_lock(&kvm->lock); 794 idx = srcu_read_lock(&kvm->srcu); 795 s390_reset_cmma(kvm->arch.gmap->mm); 796 srcu_read_unlock(&kvm->srcu, idx); 797 mutex_unlock(&kvm->lock); 798 ret = 0; 799 break; 800 case KVM_S390_VM_MEM_LIMIT_SIZE: { 801 unsigned long new_limit; 802 803 if (kvm_is_ucontrol(kvm)) 804 return -EINVAL; 805 806 if (get_user(new_limit, (u64 __user *)attr->addr)) 807 return -EFAULT; 808 809 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 810 new_limit > kvm->arch.mem_limit) 811 return -E2BIG; 812 813 if (!new_limit) 814 return -EINVAL; 815 816 /* gmap_create takes last usable address */ 817 if (new_limit != KVM_S390_NO_MEM_LIMIT) 818 new_limit -= 1; 819 820 ret = -EBUSY; 821 mutex_lock(&kvm->lock); 822 if (!kvm->created_vcpus) { 823 /* gmap_create will round the limit up */ 824 struct gmap *new = gmap_create(current->mm, new_limit); 825 826 if (!new) { 827 ret = -ENOMEM; 828 } else { 829 gmap_remove(kvm->arch.gmap); 830 new->private = kvm; 831 kvm->arch.gmap = new; 832 ret = 0; 833 } 834 } 835 mutex_unlock(&kvm->lock); 836 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 837 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 838 (void *) kvm->arch.gmap->asce); 839 break; 840 } 841 default: 842 ret = -ENXIO; 843 break; 844 } 845 return ret; 846 } 847 848 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 849 850 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 851 { 852 struct kvm_vcpu *vcpu; 853 int i; 854 855 kvm_s390_vcpu_block_all(kvm); 856 857 kvm_for_each_vcpu(i, vcpu, kvm) { 858 kvm_s390_vcpu_crypto_setup(vcpu); 859 /* recreate the shadow crycb by leaving the VSIE handler */ 860 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 861 } 862 863 kvm_s390_vcpu_unblock_all(kvm); 864 } 865 866 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 867 { 868 mutex_lock(&kvm->lock); 869 switch (attr->attr) { 870 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 871 if (!test_kvm_facility(kvm, 76)) { 872 mutex_unlock(&kvm->lock); 873 return -EINVAL; 874 } 875 get_random_bytes( 876 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 877 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 878 kvm->arch.crypto.aes_kw = 1; 879 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 880 break; 881 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 882 if (!test_kvm_facility(kvm, 76)) { 883 mutex_unlock(&kvm->lock); 884 return -EINVAL; 885 } 886 get_random_bytes( 887 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 888 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 889 kvm->arch.crypto.dea_kw = 1; 890 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 891 break; 892 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 893 if (!test_kvm_facility(kvm, 76)) { 894 mutex_unlock(&kvm->lock); 895 return -EINVAL; 896 } 897 kvm->arch.crypto.aes_kw = 0; 898 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 899 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 900 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 901 break; 902 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 903 if (!test_kvm_facility(kvm, 76)) { 904 mutex_unlock(&kvm->lock); 905 return -EINVAL; 906 } 907 kvm->arch.crypto.dea_kw = 0; 908 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 909 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 910 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 911 break; 912 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 913 if (!ap_instructions_available()) { 914 mutex_unlock(&kvm->lock); 915 return -EOPNOTSUPP; 916 } 917 kvm->arch.crypto.apie = 1; 918 break; 919 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 920 if (!ap_instructions_available()) { 921 mutex_unlock(&kvm->lock); 922 return -EOPNOTSUPP; 923 } 924 kvm->arch.crypto.apie = 0; 925 break; 926 default: 927 mutex_unlock(&kvm->lock); 928 return -ENXIO; 929 } 930 931 kvm_s390_vcpu_crypto_reset_all(kvm); 932 mutex_unlock(&kvm->lock); 933 return 0; 934 } 935 936 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 937 { 938 int cx; 939 struct kvm_vcpu *vcpu; 940 941 kvm_for_each_vcpu(cx, vcpu, kvm) 942 kvm_s390_sync_request(req, vcpu); 943 } 944 945 /* 946 * Must be called with kvm->srcu held to avoid races on memslots, and with 947 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 948 */ 949 static int kvm_s390_vm_start_migration(struct kvm *kvm) 950 { 951 struct kvm_memory_slot *ms; 952 struct kvm_memslots *slots; 953 unsigned long ram_pages = 0; 954 int slotnr; 955 956 /* migration mode already enabled */ 957 if (kvm->arch.migration_mode) 958 return 0; 959 slots = kvm_memslots(kvm); 960 if (!slots || !slots->used_slots) 961 return -EINVAL; 962 963 if (!kvm->arch.use_cmma) { 964 kvm->arch.migration_mode = 1; 965 return 0; 966 } 967 /* mark all the pages in active slots as dirty */ 968 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 969 ms = slots->memslots + slotnr; 970 /* 971 * The second half of the bitmap is only used on x86, 972 * and would be wasted otherwise, so we put it to good 973 * use here to keep track of the state of the storage 974 * attributes. 975 */ 976 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 977 ram_pages += ms->npages; 978 } 979 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 980 kvm->arch.migration_mode = 1; 981 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 982 return 0; 983 } 984 985 /* 986 * Must be called with kvm->slots_lock to avoid races with ourselves and 987 * kvm_s390_vm_start_migration. 988 */ 989 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 990 { 991 /* migration mode already disabled */ 992 if (!kvm->arch.migration_mode) 993 return 0; 994 kvm->arch.migration_mode = 0; 995 if (kvm->arch.use_cmma) 996 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 997 return 0; 998 } 999 1000 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1001 struct kvm_device_attr *attr) 1002 { 1003 int res = -ENXIO; 1004 1005 mutex_lock(&kvm->slots_lock); 1006 switch (attr->attr) { 1007 case KVM_S390_VM_MIGRATION_START: 1008 res = kvm_s390_vm_start_migration(kvm); 1009 break; 1010 case KVM_S390_VM_MIGRATION_STOP: 1011 res = kvm_s390_vm_stop_migration(kvm); 1012 break; 1013 default: 1014 break; 1015 } 1016 mutex_unlock(&kvm->slots_lock); 1017 1018 return res; 1019 } 1020 1021 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1022 struct kvm_device_attr *attr) 1023 { 1024 u64 mig = kvm->arch.migration_mode; 1025 1026 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1027 return -ENXIO; 1028 1029 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1030 return -EFAULT; 1031 return 0; 1032 } 1033 1034 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1035 { 1036 struct kvm_s390_vm_tod_clock gtod; 1037 1038 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1039 return -EFAULT; 1040 1041 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1042 return -EINVAL; 1043 kvm_s390_set_tod_clock(kvm, >od); 1044 1045 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1046 gtod.epoch_idx, gtod.tod); 1047 1048 return 0; 1049 } 1050 1051 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1052 { 1053 u8 gtod_high; 1054 1055 if (copy_from_user(>od_high, (void __user *)attr->addr, 1056 sizeof(gtod_high))) 1057 return -EFAULT; 1058 1059 if (gtod_high != 0) 1060 return -EINVAL; 1061 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1062 1063 return 0; 1064 } 1065 1066 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1067 { 1068 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1069 1070 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1071 sizeof(gtod.tod))) 1072 return -EFAULT; 1073 1074 kvm_s390_set_tod_clock(kvm, >od); 1075 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1076 return 0; 1077 } 1078 1079 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1080 { 1081 int ret; 1082 1083 if (attr->flags) 1084 return -EINVAL; 1085 1086 switch (attr->attr) { 1087 case KVM_S390_VM_TOD_EXT: 1088 ret = kvm_s390_set_tod_ext(kvm, attr); 1089 break; 1090 case KVM_S390_VM_TOD_HIGH: 1091 ret = kvm_s390_set_tod_high(kvm, attr); 1092 break; 1093 case KVM_S390_VM_TOD_LOW: 1094 ret = kvm_s390_set_tod_low(kvm, attr); 1095 break; 1096 default: 1097 ret = -ENXIO; 1098 break; 1099 } 1100 return ret; 1101 } 1102 1103 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1104 struct kvm_s390_vm_tod_clock *gtod) 1105 { 1106 struct kvm_s390_tod_clock_ext htod; 1107 1108 preempt_disable(); 1109 1110 get_tod_clock_ext((char *)&htod); 1111 1112 gtod->tod = htod.tod + kvm->arch.epoch; 1113 gtod->epoch_idx = 0; 1114 if (test_kvm_facility(kvm, 139)) { 1115 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 1116 if (gtod->tod < htod.tod) 1117 gtod->epoch_idx += 1; 1118 } 1119 1120 preempt_enable(); 1121 } 1122 1123 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1124 { 1125 struct kvm_s390_vm_tod_clock gtod; 1126 1127 memset(>od, 0, sizeof(gtod)); 1128 kvm_s390_get_tod_clock(kvm, >od); 1129 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1130 return -EFAULT; 1131 1132 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1133 gtod.epoch_idx, gtod.tod); 1134 return 0; 1135 } 1136 1137 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1138 { 1139 u8 gtod_high = 0; 1140 1141 if (copy_to_user((void __user *)attr->addr, >od_high, 1142 sizeof(gtod_high))) 1143 return -EFAULT; 1144 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1145 1146 return 0; 1147 } 1148 1149 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1150 { 1151 u64 gtod; 1152 1153 gtod = kvm_s390_get_tod_clock_fast(kvm); 1154 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1155 return -EFAULT; 1156 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1157 1158 return 0; 1159 } 1160 1161 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1162 { 1163 int ret; 1164 1165 if (attr->flags) 1166 return -EINVAL; 1167 1168 switch (attr->attr) { 1169 case KVM_S390_VM_TOD_EXT: 1170 ret = kvm_s390_get_tod_ext(kvm, attr); 1171 break; 1172 case KVM_S390_VM_TOD_HIGH: 1173 ret = kvm_s390_get_tod_high(kvm, attr); 1174 break; 1175 case KVM_S390_VM_TOD_LOW: 1176 ret = kvm_s390_get_tod_low(kvm, attr); 1177 break; 1178 default: 1179 ret = -ENXIO; 1180 break; 1181 } 1182 return ret; 1183 } 1184 1185 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1186 { 1187 struct kvm_s390_vm_cpu_processor *proc; 1188 u16 lowest_ibc, unblocked_ibc; 1189 int ret = 0; 1190 1191 mutex_lock(&kvm->lock); 1192 if (kvm->created_vcpus) { 1193 ret = -EBUSY; 1194 goto out; 1195 } 1196 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1197 if (!proc) { 1198 ret = -ENOMEM; 1199 goto out; 1200 } 1201 if (!copy_from_user(proc, (void __user *)attr->addr, 1202 sizeof(*proc))) { 1203 kvm->arch.model.cpuid = proc->cpuid; 1204 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1205 unblocked_ibc = sclp.ibc & 0xfff; 1206 if (lowest_ibc && proc->ibc) { 1207 if (proc->ibc > unblocked_ibc) 1208 kvm->arch.model.ibc = unblocked_ibc; 1209 else if (proc->ibc < lowest_ibc) 1210 kvm->arch.model.ibc = lowest_ibc; 1211 else 1212 kvm->arch.model.ibc = proc->ibc; 1213 } 1214 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1215 S390_ARCH_FAC_LIST_SIZE_BYTE); 1216 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1217 kvm->arch.model.ibc, 1218 kvm->arch.model.cpuid); 1219 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1220 kvm->arch.model.fac_list[0], 1221 kvm->arch.model.fac_list[1], 1222 kvm->arch.model.fac_list[2]); 1223 } else 1224 ret = -EFAULT; 1225 kfree(proc); 1226 out: 1227 mutex_unlock(&kvm->lock); 1228 return ret; 1229 } 1230 1231 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1232 struct kvm_device_attr *attr) 1233 { 1234 struct kvm_s390_vm_cpu_feat data; 1235 1236 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1237 return -EFAULT; 1238 if (!bitmap_subset((unsigned long *) data.feat, 1239 kvm_s390_available_cpu_feat, 1240 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1241 return -EINVAL; 1242 1243 mutex_lock(&kvm->lock); 1244 if (kvm->created_vcpus) { 1245 mutex_unlock(&kvm->lock); 1246 return -EBUSY; 1247 } 1248 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1249 KVM_S390_VM_CPU_FEAT_NR_BITS); 1250 mutex_unlock(&kvm->lock); 1251 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1252 data.feat[0], 1253 data.feat[1], 1254 data.feat[2]); 1255 return 0; 1256 } 1257 1258 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1259 struct kvm_device_attr *attr) 1260 { 1261 /* 1262 * Once supported by kernel + hw, we have to store the subfunctions 1263 * in kvm->arch and remember that user space configured them. 1264 */ 1265 return -ENXIO; 1266 } 1267 1268 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1269 { 1270 int ret = -ENXIO; 1271 1272 switch (attr->attr) { 1273 case KVM_S390_VM_CPU_PROCESSOR: 1274 ret = kvm_s390_set_processor(kvm, attr); 1275 break; 1276 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1277 ret = kvm_s390_set_processor_feat(kvm, attr); 1278 break; 1279 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1280 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1281 break; 1282 } 1283 return ret; 1284 } 1285 1286 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1287 { 1288 struct kvm_s390_vm_cpu_processor *proc; 1289 int ret = 0; 1290 1291 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1292 if (!proc) { 1293 ret = -ENOMEM; 1294 goto out; 1295 } 1296 proc->cpuid = kvm->arch.model.cpuid; 1297 proc->ibc = kvm->arch.model.ibc; 1298 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1299 S390_ARCH_FAC_LIST_SIZE_BYTE); 1300 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1301 kvm->arch.model.ibc, 1302 kvm->arch.model.cpuid); 1303 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1304 kvm->arch.model.fac_list[0], 1305 kvm->arch.model.fac_list[1], 1306 kvm->arch.model.fac_list[2]); 1307 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1308 ret = -EFAULT; 1309 kfree(proc); 1310 out: 1311 return ret; 1312 } 1313 1314 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1315 { 1316 struct kvm_s390_vm_cpu_machine *mach; 1317 int ret = 0; 1318 1319 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1320 if (!mach) { 1321 ret = -ENOMEM; 1322 goto out; 1323 } 1324 get_cpu_id((struct cpuid *) &mach->cpuid); 1325 mach->ibc = sclp.ibc; 1326 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1327 S390_ARCH_FAC_LIST_SIZE_BYTE); 1328 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1329 sizeof(S390_lowcore.stfle_fac_list)); 1330 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1331 kvm->arch.model.ibc, 1332 kvm->arch.model.cpuid); 1333 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1334 mach->fac_mask[0], 1335 mach->fac_mask[1], 1336 mach->fac_mask[2]); 1337 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1338 mach->fac_list[0], 1339 mach->fac_list[1], 1340 mach->fac_list[2]); 1341 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1342 ret = -EFAULT; 1343 kfree(mach); 1344 out: 1345 return ret; 1346 } 1347 1348 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1349 struct kvm_device_attr *attr) 1350 { 1351 struct kvm_s390_vm_cpu_feat data; 1352 1353 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1354 KVM_S390_VM_CPU_FEAT_NR_BITS); 1355 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1356 return -EFAULT; 1357 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1358 data.feat[0], 1359 data.feat[1], 1360 data.feat[2]); 1361 return 0; 1362 } 1363 1364 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1365 struct kvm_device_attr *attr) 1366 { 1367 struct kvm_s390_vm_cpu_feat data; 1368 1369 bitmap_copy((unsigned long *) data.feat, 1370 kvm_s390_available_cpu_feat, 1371 KVM_S390_VM_CPU_FEAT_NR_BITS); 1372 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1373 return -EFAULT; 1374 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1375 data.feat[0], 1376 data.feat[1], 1377 data.feat[2]); 1378 return 0; 1379 } 1380 1381 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1382 struct kvm_device_attr *attr) 1383 { 1384 /* 1385 * Once we can actually configure subfunctions (kernel + hw support), 1386 * we have to check if they were already set by user space, if so copy 1387 * them from kvm->arch. 1388 */ 1389 return -ENXIO; 1390 } 1391 1392 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1393 struct kvm_device_attr *attr) 1394 { 1395 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1396 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1397 return -EFAULT; 1398 return 0; 1399 } 1400 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1401 { 1402 int ret = -ENXIO; 1403 1404 switch (attr->attr) { 1405 case KVM_S390_VM_CPU_PROCESSOR: 1406 ret = kvm_s390_get_processor(kvm, attr); 1407 break; 1408 case KVM_S390_VM_CPU_MACHINE: 1409 ret = kvm_s390_get_machine(kvm, attr); 1410 break; 1411 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1412 ret = kvm_s390_get_processor_feat(kvm, attr); 1413 break; 1414 case KVM_S390_VM_CPU_MACHINE_FEAT: 1415 ret = kvm_s390_get_machine_feat(kvm, attr); 1416 break; 1417 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1418 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1419 break; 1420 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1421 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1422 break; 1423 } 1424 return ret; 1425 } 1426 1427 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1428 { 1429 int ret; 1430 1431 switch (attr->group) { 1432 case KVM_S390_VM_MEM_CTRL: 1433 ret = kvm_s390_set_mem_control(kvm, attr); 1434 break; 1435 case KVM_S390_VM_TOD: 1436 ret = kvm_s390_set_tod(kvm, attr); 1437 break; 1438 case KVM_S390_VM_CPU_MODEL: 1439 ret = kvm_s390_set_cpu_model(kvm, attr); 1440 break; 1441 case KVM_S390_VM_CRYPTO: 1442 ret = kvm_s390_vm_set_crypto(kvm, attr); 1443 break; 1444 case KVM_S390_VM_MIGRATION: 1445 ret = kvm_s390_vm_set_migration(kvm, attr); 1446 break; 1447 default: 1448 ret = -ENXIO; 1449 break; 1450 } 1451 1452 return ret; 1453 } 1454 1455 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1456 { 1457 int ret; 1458 1459 switch (attr->group) { 1460 case KVM_S390_VM_MEM_CTRL: 1461 ret = kvm_s390_get_mem_control(kvm, attr); 1462 break; 1463 case KVM_S390_VM_TOD: 1464 ret = kvm_s390_get_tod(kvm, attr); 1465 break; 1466 case KVM_S390_VM_CPU_MODEL: 1467 ret = kvm_s390_get_cpu_model(kvm, attr); 1468 break; 1469 case KVM_S390_VM_MIGRATION: 1470 ret = kvm_s390_vm_get_migration(kvm, attr); 1471 break; 1472 default: 1473 ret = -ENXIO; 1474 break; 1475 } 1476 1477 return ret; 1478 } 1479 1480 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1481 { 1482 int ret; 1483 1484 switch (attr->group) { 1485 case KVM_S390_VM_MEM_CTRL: 1486 switch (attr->attr) { 1487 case KVM_S390_VM_MEM_ENABLE_CMMA: 1488 case KVM_S390_VM_MEM_CLR_CMMA: 1489 ret = sclp.has_cmma ? 0 : -ENXIO; 1490 break; 1491 case KVM_S390_VM_MEM_LIMIT_SIZE: 1492 ret = 0; 1493 break; 1494 default: 1495 ret = -ENXIO; 1496 break; 1497 } 1498 break; 1499 case KVM_S390_VM_TOD: 1500 switch (attr->attr) { 1501 case KVM_S390_VM_TOD_LOW: 1502 case KVM_S390_VM_TOD_HIGH: 1503 ret = 0; 1504 break; 1505 default: 1506 ret = -ENXIO; 1507 break; 1508 } 1509 break; 1510 case KVM_S390_VM_CPU_MODEL: 1511 switch (attr->attr) { 1512 case KVM_S390_VM_CPU_PROCESSOR: 1513 case KVM_S390_VM_CPU_MACHINE: 1514 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1515 case KVM_S390_VM_CPU_MACHINE_FEAT: 1516 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1517 ret = 0; 1518 break; 1519 /* configuring subfunctions is not supported yet */ 1520 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1521 default: 1522 ret = -ENXIO; 1523 break; 1524 } 1525 break; 1526 case KVM_S390_VM_CRYPTO: 1527 switch (attr->attr) { 1528 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1529 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1530 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1531 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1532 ret = 0; 1533 break; 1534 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1535 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1536 ret = ap_instructions_available() ? 0 : -ENXIO; 1537 break; 1538 default: 1539 ret = -ENXIO; 1540 break; 1541 } 1542 break; 1543 case KVM_S390_VM_MIGRATION: 1544 ret = 0; 1545 break; 1546 default: 1547 ret = -ENXIO; 1548 break; 1549 } 1550 1551 return ret; 1552 } 1553 1554 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1555 { 1556 uint8_t *keys; 1557 uint64_t hva; 1558 int srcu_idx, i, r = 0; 1559 1560 if (args->flags != 0) 1561 return -EINVAL; 1562 1563 /* Is this guest using storage keys? */ 1564 if (!mm_uses_skeys(current->mm)) 1565 return KVM_S390_GET_SKEYS_NONE; 1566 1567 /* Enforce sane limit on memory allocation */ 1568 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1569 return -EINVAL; 1570 1571 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1572 if (!keys) 1573 return -ENOMEM; 1574 1575 down_read(¤t->mm->mmap_sem); 1576 srcu_idx = srcu_read_lock(&kvm->srcu); 1577 for (i = 0; i < args->count; i++) { 1578 hva = gfn_to_hva(kvm, args->start_gfn + i); 1579 if (kvm_is_error_hva(hva)) { 1580 r = -EFAULT; 1581 break; 1582 } 1583 1584 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1585 if (r) 1586 break; 1587 } 1588 srcu_read_unlock(&kvm->srcu, srcu_idx); 1589 up_read(¤t->mm->mmap_sem); 1590 1591 if (!r) { 1592 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1593 sizeof(uint8_t) * args->count); 1594 if (r) 1595 r = -EFAULT; 1596 } 1597 1598 kvfree(keys); 1599 return r; 1600 } 1601 1602 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1603 { 1604 uint8_t *keys; 1605 uint64_t hva; 1606 int srcu_idx, i, r = 0; 1607 bool unlocked; 1608 1609 if (args->flags != 0) 1610 return -EINVAL; 1611 1612 /* Enforce sane limit on memory allocation */ 1613 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1614 return -EINVAL; 1615 1616 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1617 if (!keys) 1618 return -ENOMEM; 1619 1620 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1621 sizeof(uint8_t) * args->count); 1622 if (r) { 1623 r = -EFAULT; 1624 goto out; 1625 } 1626 1627 /* Enable storage key handling for the guest */ 1628 r = s390_enable_skey(); 1629 if (r) 1630 goto out; 1631 1632 i = 0; 1633 down_read(¤t->mm->mmap_sem); 1634 srcu_idx = srcu_read_lock(&kvm->srcu); 1635 while (i < args->count) { 1636 unlocked = false; 1637 hva = gfn_to_hva(kvm, args->start_gfn + i); 1638 if (kvm_is_error_hva(hva)) { 1639 r = -EFAULT; 1640 break; 1641 } 1642 1643 /* Lowest order bit is reserved */ 1644 if (keys[i] & 0x01) { 1645 r = -EINVAL; 1646 break; 1647 } 1648 1649 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1650 if (r) { 1651 r = fixup_user_fault(current, current->mm, hva, 1652 FAULT_FLAG_WRITE, &unlocked); 1653 if (r) 1654 break; 1655 } 1656 if (!r) 1657 i++; 1658 } 1659 srcu_read_unlock(&kvm->srcu, srcu_idx); 1660 up_read(¤t->mm->mmap_sem); 1661 out: 1662 kvfree(keys); 1663 return r; 1664 } 1665 1666 /* 1667 * Base address and length must be sent at the start of each block, therefore 1668 * it's cheaper to send some clean data, as long as it's less than the size of 1669 * two longs. 1670 */ 1671 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1672 /* for consistency */ 1673 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1674 1675 /* 1676 * Similar to gfn_to_memslot, but returns the index of a memslot also when the 1677 * address falls in a hole. In that case the index of one of the memslots 1678 * bordering the hole is returned. 1679 */ 1680 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) 1681 { 1682 int start = 0, end = slots->used_slots; 1683 int slot = atomic_read(&slots->lru_slot); 1684 struct kvm_memory_slot *memslots = slots->memslots; 1685 1686 if (gfn >= memslots[slot].base_gfn && 1687 gfn < memslots[slot].base_gfn + memslots[slot].npages) 1688 return slot; 1689 1690 while (start < end) { 1691 slot = start + (end - start) / 2; 1692 1693 if (gfn >= memslots[slot].base_gfn) 1694 end = slot; 1695 else 1696 start = slot + 1; 1697 } 1698 1699 if (gfn >= memslots[start].base_gfn && 1700 gfn < memslots[start].base_gfn + memslots[start].npages) { 1701 atomic_set(&slots->lru_slot, start); 1702 } 1703 1704 return start; 1705 } 1706 1707 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1708 u8 *res, unsigned long bufsize) 1709 { 1710 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1711 1712 args->count = 0; 1713 while (args->count < bufsize) { 1714 hva = gfn_to_hva(kvm, cur_gfn); 1715 /* 1716 * We return an error if the first value was invalid, but we 1717 * return successfully if at least one value was copied. 1718 */ 1719 if (kvm_is_error_hva(hva)) 1720 return args->count ? 0 : -EFAULT; 1721 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1722 pgstev = 0; 1723 res[args->count++] = (pgstev >> 24) & 0x43; 1724 cur_gfn++; 1725 } 1726 1727 return 0; 1728 } 1729 1730 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1731 unsigned long cur_gfn) 1732 { 1733 int slotidx = gfn_to_memslot_approx(slots, cur_gfn); 1734 struct kvm_memory_slot *ms = slots->memslots + slotidx; 1735 unsigned long ofs = cur_gfn - ms->base_gfn; 1736 1737 if (ms->base_gfn + ms->npages <= cur_gfn) { 1738 slotidx--; 1739 /* If we are above the highest slot, wrap around */ 1740 if (slotidx < 0) 1741 slotidx = slots->used_slots - 1; 1742 1743 ms = slots->memslots + slotidx; 1744 ofs = 0; 1745 } 1746 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1747 while ((slotidx > 0) && (ofs >= ms->npages)) { 1748 slotidx--; 1749 ms = slots->memslots + slotidx; 1750 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 1751 } 1752 return ms->base_gfn + ofs; 1753 } 1754 1755 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1756 u8 *res, unsigned long bufsize) 1757 { 1758 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 1759 struct kvm_memslots *slots = kvm_memslots(kvm); 1760 struct kvm_memory_slot *ms; 1761 1762 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 1763 ms = gfn_to_memslot(kvm, cur_gfn); 1764 args->count = 0; 1765 args->start_gfn = cur_gfn; 1766 if (!ms) 1767 return 0; 1768 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 1769 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; 1770 1771 while (args->count < bufsize) { 1772 hva = gfn_to_hva(kvm, cur_gfn); 1773 if (kvm_is_error_hva(hva)) 1774 return 0; 1775 /* Decrement only if we actually flipped the bit to 0 */ 1776 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 1777 atomic64_dec(&kvm->arch.cmma_dirty_pages); 1778 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1779 pgstev = 0; 1780 /* Save the value */ 1781 res[args->count++] = (pgstev >> 24) & 0x43; 1782 /* If the next bit is too far away, stop. */ 1783 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 1784 return 0; 1785 /* If we reached the previous "next", find the next one */ 1786 if (cur_gfn == next_gfn) 1787 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 1788 /* Reached the end of memory or of the buffer, stop */ 1789 if ((next_gfn >= mem_end) || 1790 (next_gfn - args->start_gfn >= bufsize)) 1791 return 0; 1792 cur_gfn++; 1793 /* Reached the end of the current memslot, take the next one. */ 1794 if (cur_gfn - ms->base_gfn >= ms->npages) { 1795 ms = gfn_to_memslot(kvm, cur_gfn); 1796 if (!ms) 1797 return 0; 1798 } 1799 } 1800 return 0; 1801 } 1802 1803 /* 1804 * This function searches for the next page with dirty CMMA attributes, and 1805 * saves the attributes in the buffer up to either the end of the buffer or 1806 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 1807 * no trailing clean bytes are saved. 1808 * In case no dirty bits were found, or if CMMA was not enabled or used, the 1809 * output buffer will indicate 0 as length. 1810 */ 1811 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 1812 struct kvm_s390_cmma_log *args) 1813 { 1814 unsigned long bufsize; 1815 int srcu_idx, peek, ret; 1816 u8 *values; 1817 1818 if (!kvm->arch.use_cmma) 1819 return -ENXIO; 1820 /* Invalid/unsupported flags were specified */ 1821 if (args->flags & ~KVM_S390_CMMA_PEEK) 1822 return -EINVAL; 1823 /* Migration mode query, and we are not doing a migration */ 1824 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 1825 if (!peek && !kvm->arch.migration_mode) 1826 return -EINVAL; 1827 /* CMMA is disabled or was not used, or the buffer has length zero */ 1828 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 1829 if (!bufsize || !kvm->mm->context.uses_cmm) { 1830 memset(args, 0, sizeof(*args)); 1831 return 0; 1832 } 1833 /* We are not peeking, and there are no dirty pages */ 1834 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 1835 memset(args, 0, sizeof(*args)); 1836 return 0; 1837 } 1838 1839 values = vmalloc(bufsize); 1840 if (!values) 1841 return -ENOMEM; 1842 1843 down_read(&kvm->mm->mmap_sem); 1844 srcu_idx = srcu_read_lock(&kvm->srcu); 1845 if (peek) 1846 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 1847 else 1848 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 1849 srcu_read_unlock(&kvm->srcu, srcu_idx); 1850 up_read(&kvm->mm->mmap_sem); 1851 1852 if (kvm->arch.migration_mode) 1853 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 1854 else 1855 args->remaining = 0; 1856 1857 if (copy_to_user((void __user *)args->values, values, args->count)) 1858 ret = -EFAULT; 1859 1860 vfree(values); 1861 return ret; 1862 } 1863 1864 /* 1865 * This function sets the CMMA attributes for the given pages. If the input 1866 * buffer has zero length, no action is taken, otherwise the attributes are 1867 * set and the mm->context.uses_cmm flag is set. 1868 */ 1869 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 1870 const struct kvm_s390_cmma_log *args) 1871 { 1872 unsigned long hva, mask, pgstev, i; 1873 uint8_t *bits; 1874 int srcu_idx, r = 0; 1875 1876 mask = args->mask; 1877 1878 if (!kvm->arch.use_cmma) 1879 return -ENXIO; 1880 /* invalid/unsupported flags */ 1881 if (args->flags != 0) 1882 return -EINVAL; 1883 /* Enforce sane limit on memory allocation */ 1884 if (args->count > KVM_S390_CMMA_SIZE_MAX) 1885 return -EINVAL; 1886 /* Nothing to do */ 1887 if (args->count == 0) 1888 return 0; 1889 1890 bits = vmalloc(array_size(sizeof(*bits), args->count)); 1891 if (!bits) 1892 return -ENOMEM; 1893 1894 r = copy_from_user(bits, (void __user *)args->values, args->count); 1895 if (r) { 1896 r = -EFAULT; 1897 goto out; 1898 } 1899 1900 down_read(&kvm->mm->mmap_sem); 1901 srcu_idx = srcu_read_lock(&kvm->srcu); 1902 for (i = 0; i < args->count; i++) { 1903 hva = gfn_to_hva(kvm, args->start_gfn + i); 1904 if (kvm_is_error_hva(hva)) { 1905 r = -EFAULT; 1906 break; 1907 } 1908 1909 pgstev = bits[i]; 1910 pgstev = pgstev << 24; 1911 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 1912 set_pgste_bits(kvm->mm, hva, mask, pgstev); 1913 } 1914 srcu_read_unlock(&kvm->srcu, srcu_idx); 1915 up_read(&kvm->mm->mmap_sem); 1916 1917 if (!kvm->mm->context.uses_cmm) { 1918 down_write(&kvm->mm->mmap_sem); 1919 kvm->mm->context.uses_cmm = 1; 1920 up_write(&kvm->mm->mmap_sem); 1921 } 1922 out: 1923 vfree(bits); 1924 return r; 1925 } 1926 1927 long kvm_arch_vm_ioctl(struct file *filp, 1928 unsigned int ioctl, unsigned long arg) 1929 { 1930 struct kvm *kvm = filp->private_data; 1931 void __user *argp = (void __user *)arg; 1932 struct kvm_device_attr attr; 1933 int r; 1934 1935 switch (ioctl) { 1936 case KVM_S390_INTERRUPT: { 1937 struct kvm_s390_interrupt s390int; 1938 1939 r = -EFAULT; 1940 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1941 break; 1942 r = kvm_s390_inject_vm(kvm, &s390int); 1943 break; 1944 } 1945 case KVM_CREATE_IRQCHIP: { 1946 struct kvm_irq_routing_entry routing; 1947 1948 r = -EINVAL; 1949 if (kvm->arch.use_irqchip) { 1950 /* Set up dummy routing. */ 1951 memset(&routing, 0, sizeof(routing)); 1952 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 1953 } 1954 break; 1955 } 1956 case KVM_SET_DEVICE_ATTR: { 1957 r = -EFAULT; 1958 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1959 break; 1960 r = kvm_s390_vm_set_attr(kvm, &attr); 1961 break; 1962 } 1963 case KVM_GET_DEVICE_ATTR: { 1964 r = -EFAULT; 1965 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1966 break; 1967 r = kvm_s390_vm_get_attr(kvm, &attr); 1968 break; 1969 } 1970 case KVM_HAS_DEVICE_ATTR: { 1971 r = -EFAULT; 1972 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1973 break; 1974 r = kvm_s390_vm_has_attr(kvm, &attr); 1975 break; 1976 } 1977 case KVM_S390_GET_SKEYS: { 1978 struct kvm_s390_skeys args; 1979 1980 r = -EFAULT; 1981 if (copy_from_user(&args, argp, 1982 sizeof(struct kvm_s390_skeys))) 1983 break; 1984 r = kvm_s390_get_skeys(kvm, &args); 1985 break; 1986 } 1987 case KVM_S390_SET_SKEYS: { 1988 struct kvm_s390_skeys args; 1989 1990 r = -EFAULT; 1991 if (copy_from_user(&args, argp, 1992 sizeof(struct kvm_s390_skeys))) 1993 break; 1994 r = kvm_s390_set_skeys(kvm, &args); 1995 break; 1996 } 1997 case KVM_S390_GET_CMMA_BITS: { 1998 struct kvm_s390_cmma_log args; 1999 2000 r = -EFAULT; 2001 if (copy_from_user(&args, argp, sizeof(args))) 2002 break; 2003 mutex_lock(&kvm->slots_lock); 2004 r = kvm_s390_get_cmma_bits(kvm, &args); 2005 mutex_unlock(&kvm->slots_lock); 2006 if (!r) { 2007 r = copy_to_user(argp, &args, sizeof(args)); 2008 if (r) 2009 r = -EFAULT; 2010 } 2011 break; 2012 } 2013 case KVM_S390_SET_CMMA_BITS: { 2014 struct kvm_s390_cmma_log args; 2015 2016 r = -EFAULT; 2017 if (copy_from_user(&args, argp, sizeof(args))) 2018 break; 2019 mutex_lock(&kvm->slots_lock); 2020 r = kvm_s390_set_cmma_bits(kvm, &args); 2021 mutex_unlock(&kvm->slots_lock); 2022 break; 2023 } 2024 default: 2025 r = -ENOTTY; 2026 } 2027 2028 return r; 2029 } 2030 2031 static int kvm_s390_apxa_installed(void) 2032 { 2033 struct ap_config_info info; 2034 2035 if (ap_instructions_available()) { 2036 if (ap_qci(&info) == 0) 2037 return info.apxa; 2038 } 2039 2040 return 0; 2041 } 2042 2043 /* 2044 * The format of the crypto control block (CRYCB) is specified in the 3 low 2045 * order bits of the CRYCB designation (CRYCBD) field as follows: 2046 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2047 * AP extended addressing (APXA) facility are installed. 2048 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2049 * Format 2: Both the APXA and MSAX3 facilities are installed 2050 */ 2051 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2052 { 2053 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2054 2055 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2056 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2057 2058 /* Check whether MSAX3 is installed */ 2059 if (!test_kvm_facility(kvm, 76)) 2060 return; 2061 2062 if (kvm_s390_apxa_installed()) 2063 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2064 else 2065 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2066 } 2067 2068 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2069 unsigned long *aqm, unsigned long *adm) 2070 { 2071 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2072 2073 mutex_lock(&kvm->lock); 2074 kvm_s390_vcpu_block_all(kvm); 2075 2076 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2077 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2078 memcpy(crycb->apcb1.apm, apm, 32); 2079 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2080 apm[0], apm[1], apm[2], apm[3]); 2081 memcpy(crycb->apcb1.aqm, aqm, 32); 2082 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2083 aqm[0], aqm[1], aqm[2], aqm[3]); 2084 memcpy(crycb->apcb1.adm, adm, 32); 2085 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2086 adm[0], adm[1], adm[2], adm[3]); 2087 break; 2088 case CRYCB_FORMAT1: 2089 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2090 memcpy(crycb->apcb0.apm, apm, 8); 2091 memcpy(crycb->apcb0.aqm, aqm, 2); 2092 memcpy(crycb->apcb0.adm, adm, 2); 2093 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2094 apm[0], *((unsigned short *)aqm), 2095 *((unsigned short *)adm)); 2096 break; 2097 default: /* Can not happen */ 2098 break; 2099 } 2100 2101 /* recreate the shadow crycb for each vcpu */ 2102 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2103 kvm_s390_vcpu_unblock_all(kvm); 2104 mutex_unlock(&kvm->lock); 2105 } 2106 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2107 2108 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2109 { 2110 mutex_lock(&kvm->lock); 2111 kvm_s390_vcpu_block_all(kvm); 2112 2113 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2114 sizeof(kvm->arch.crypto.crycb->apcb0)); 2115 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2116 sizeof(kvm->arch.crypto.crycb->apcb1)); 2117 2118 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2119 /* recreate the shadow crycb for each vcpu */ 2120 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2121 kvm_s390_vcpu_unblock_all(kvm); 2122 mutex_unlock(&kvm->lock); 2123 } 2124 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2125 2126 static u64 kvm_s390_get_initial_cpuid(void) 2127 { 2128 struct cpuid cpuid; 2129 2130 get_cpu_id(&cpuid); 2131 cpuid.version = 0xff; 2132 return *((u64 *) &cpuid); 2133 } 2134 2135 static void kvm_s390_crypto_init(struct kvm *kvm) 2136 { 2137 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2138 kvm_s390_set_crycb_format(kvm); 2139 2140 if (!test_kvm_facility(kvm, 76)) 2141 return; 2142 2143 /* Enable AES/DEA protected key functions by default */ 2144 kvm->arch.crypto.aes_kw = 1; 2145 kvm->arch.crypto.dea_kw = 1; 2146 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2147 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2148 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2149 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2150 } 2151 2152 static void sca_dispose(struct kvm *kvm) 2153 { 2154 if (kvm->arch.use_esca) 2155 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2156 else 2157 free_page((unsigned long)(kvm->arch.sca)); 2158 kvm->arch.sca = NULL; 2159 } 2160 2161 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2162 { 2163 gfp_t alloc_flags = GFP_KERNEL; 2164 int i, rc; 2165 char debug_name[16]; 2166 static unsigned long sca_offset; 2167 2168 rc = -EINVAL; 2169 #ifdef CONFIG_KVM_S390_UCONTROL 2170 if (type & ~KVM_VM_S390_UCONTROL) 2171 goto out_err; 2172 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2173 goto out_err; 2174 #else 2175 if (type) 2176 goto out_err; 2177 #endif 2178 2179 rc = s390_enable_sie(); 2180 if (rc) 2181 goto out_err; 2182 2183 rc = -ENOMEM; 2184 2185 if (!sclp.has_64bscao) 2186 alloc_flags |= GFP_DMA; 2187 rwlock_init(&kvm->arch.sca_lock); 2188 /* start with basic SCA */ 2189 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2190 if (!kvm->arch.sca) 2191 goto out_err; 2192 spin_lock(&kvm_lock); 2193 sca_offset += 16; 2194 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2195 sca_offset = 0; 2196 kvm->arch.sca = (struct bsca_block *) 2197 ((char *) kvm->arch.sca + sca_offset); 2198 spin_unlock(&kvm_lock); 2199 2200 sprintf(debug_name, "kvm-%u", current->pid); 2201 2202 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2203 if (!kvm->arch.dbf) 2204 goto out_err; 2205 2206 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2207 kvm->arch.sie_page2 = 2208 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 2209 if (!kvm->arch.sie_page2) 2210 goto out_err; 2211 2212 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2213 2214 for (i = 0; i < kvm_s390_fac_size(); i++) { 2215 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] & 2216 (kvm_s390_fac_base[i] | 2217 kvm_s390_fac_ext[i]); 2218 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & 2219 kvm_s390_fac_base[i]; 2220 } 2221 2222 /* we are always in czam mode - even on pre z14 machines */ 2223 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2224 set_kvm_facility(kvm->arch.model.fac_list, 138); 2225 /* we emulate STHYI in kvm */ 2226 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2227 set_kvm_facility(kvm->arch.model.fac_list, 74); 2228 if (MACHINE_HAS_TLB_GUEST) { 2229 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2230 set_kvm_facility(kvm->arch.model.fac_list, 147); 2231 } 2232 2233 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2234 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2235 2236 kvm_s390_crypto_init(kvm); 2237 2238 mutex_init(&kvm->arch.float_int.ais_lock); 2239 spin_lock_init(&kvm->arch.float_int.lock); 2240 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2241 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2242 init_waitqueue_head(&kvm->arch.ipte_wq); 2243 mutex_init(&kvm->arch.ipte_mutex); 2244 2245 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2246 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2247 2248 if (type & KVM_VM_S390_UCONTROL) { 2249 kvm->arch.gmap = NULL; 2250 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2251 } else { 2252 if (sclp.hamax == U64_MAX) 2253 kvm->arch.mem_limit = TASK_SIZE_MAX; 2254 else 2255 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2256 sclp.hamax + 1); 2257 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2258 if (!kvm->arch.gmap) 2259 goto out_err; 2260 kvm->arch.gmap->private = kvm; 2261 kvm->arch.gmap->pfault_enabled = 0; 2262 } 2263 2264 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2265 kvm->arch.use_skf = sclp.has_skey; 2266 spin_lock_init(&kvm->arch.start_stop_lock); 2267 kvm_s390_vsie_init(kvm); 2268 kvm_s390_gisa_init(kvm); 2269 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2270 2271 return 0; 2272 out_err: 2273 free_page((unsigned long)kvm->arch.sie_page2); 2274 debug_unregister(kvm->arch.dbf); 2275 sca_dispose(kvm); 2276 KVM_EVENT(3, "creation of vm failed: %d", rc); 2277 return rc; 2278 } 2279 2280 bool kvm_arch_has_vcpu_debugfs(void) 2281 { 2282 return false; 2283 } 2284 2285 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 2286 { 2287 return 0; 2288 } 2289 2290 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2291 { 2292 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2293 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2294 kvm_s390_clear_local_irqs(vcpu); 2295 kvm_clear_async_pf_completion_queue(vcpu); 2296 if (!kvm_is_ucontrol(vcpu->kvm)) 2297 sca_del_vcpu(vcpu); 2298 2299 if (kvm_is_ucontrol(vcpu->kvm)) 2300 gmap_remove(vcpu->arch.gmap); 2301 2302 if (vcpu->kvm->arch.use_cmma) 2303 kvm_s390_vcpu_unsetup_cmma(vcpu); 2304 free_page((unsigned long)(vcpu->arch.sie_block)); 2305 2306 kvm_vcpu_uninit(vcpu); 2307 kmem_cache_free(kvm_vcpu_cache, vcpu); 2308 } 2309 2310 static void kvm_free_vcpus(struct kvm *kvm) 2311 { 2312 unsigned int i; 2313 struct kvm_vcpu *vcpu; 2314 2315 kvm_for_each_vcpu(i, vcpu, kvm) 2316 kvm_arch_vcpu_destroy(vcpu); 2317 2318 mutex_lock(&kvm->lock); 2319 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2320 kvm->vcpus[i] = NULL; 2321 2322 atomic_set(&kvm->online_vcpus, 0); 2323 mutex_unlock(&kvm->lock); 2324 } 2325 2326 void kvm_arch_destroy_vm(struct kvm *kvm) 2327 { 2328 kvm_free_vcpus(kvm); 2329 sca_dispose(kvm); 2330 debug_unregister(kvm->arch.dbf); 2331 kvm_s390_gisa_destroy(kvm); 2332 free_page((unsigned long)kvm->arch.sie_page2); 2333 if (!kvm_is_ucontrol(kvm)) 2334 gmap_remove(kvm->arch.gmap); 2335 kvm_s390_destroy_adapters(kvm); 2336 kvm_s390_clear_float_irqs(kvm); 2337 kvm_s390_vsie_destroy(kvm); 2338 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2339 } 2340 2341 /* Section: vcpu related */ 2342 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2343 { 2344 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2345 if (!vcpu->arch.gmap) 2346 return -ENOMEM; 2347 vcpu->arch.gmap->private = vcpu->kvm; 2348 2349 return 0; 2350 } 2351 2352 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2353 { 2354 if (!kvm_s390_use_sca_entries()) 2355 return; 2356 read_lock(&vcpu->kvm->arch.sca_lock); 2357 if (vcpu->kvm->arch.use_esca) { 2358 struct esca_block *sca = vcpu->kvm->arch.sca; 2359 2360 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2361 sca->cpu[vcpu->vcpu_id].sda = 0; 2362 } else { 2363 struct bsca_block *sca = vcpu->kvm->arch.sca; 2364 2365 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2366 sca->cpu[vcpu->vcpu_id].sda = 0; 2367 } 2368 read_unlock(&vcpu->kvm->arch.sca_lock); 2369 } 2370 2371 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2372 { 2373 if (!kvm_s390_use_sca_entries()) { 2374 struct bsca_block *sca = vcpu->kvm->arch.sca; 2375 2376 /* we still need the basic sca for the ipte control */ 2377 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2378 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2379 return; 2380 } 2381 read_lock(&vcpu->kvm->arch.sca_lock); 2382 if (vcpu->kvm->arch.use_esca) { 2383 struct esca_block *sca = vcpu->kvm->arch.sca; 2384 2385 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2386 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2387 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2388 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2389 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2390 } else { 2391 struct bsca_block *sca = vcpu->kvm->arch.sca; 2392 2393 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2394 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2395 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2396 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2397 } 2398 read_unlock(&vcpu->kvm->arch.sca_lock); 2399 } 2400 2401 /* Basic SCA to Extended SCA data copy routines */ 2402 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2403 { 2404 d->sda = s->sda; 2405 d->sigp_ctrl.c = s->sigp_ctrl.c; 2406 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2407 } 2408 2409 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2410 { 2411 int i; 2412 2413 d->ipte_control = s->ipte_control; 2414 d->mcn[0] = s->mcn; 2415 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2416 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2417 } 2418 2419 static int sca_switch_to_extended(struct kvm *kvm) 2420 { 2421 struct bsca_block *old_sca = kvm->arch.sca; 2422 struct esca_block *new_sca; 2423 struct kvm_vcpu *vcpu; 2424 unsigned int vcpu_idx; 2425 u32 scaol, scaoh; 2426 2427 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2428 if (!new_sca) 2429 return -ENOMEM; 2430 2431 scaoh = (u32)((u64)(new_sca) >> 32); 2432 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2433 2434 kvm_s390_vcpu_block_all(kvm); 2435 write_lock(&kvm->arch.sca_lock); 2436 2437 sca_copy_b_to_e(new_sca, old_sca); 2438 2439 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2440 vcpu->arch.sie_block->scaoh = scaoh; 2441 vcpu->arch.sie_block->scaol = scaol; 2442 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2443 } 2444 kvm->arch.sca = new_sca; 2445 kvm->arch.use_esca = 1; 2446 2447 write_unlock(&kvm->arch.sca_lock); 2448 kvm_s390_vcpu_unblock_all(kvm); 2449 2450 free_page((unsigned long)old_sca); 2451 2452 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2453 old_sca, kvm->arch.sca); 2454 return 0; 2455 } 2456 2457 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2458 { 2459 int rc; 2460 2461 if (!kvm_s390_use_sca_entries()) { 2462 if (id < KVM_MAX_VCPUS) 2463 return true; 2464 return false; 2465 } 2466 if (id < KVM_S390_BSCA_CPU_SLOTS) 2467 return true; 2468 if (!sclp.has_esca || !sclp.has_64bscao) 2469 return false; 2470 2471 mutex_lock(&kvm->lock); 2472 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2473 mutex_unlock(&kvm->lock); 2474 2475 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2476 } 2477 2478 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 2479 { 2480 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2481 kvm_clear_async_pf_completion_queue(vcpu); 2482 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 2483 KVM_SYNC_GPRS | 2484 KVM_SYNC_ACRS | 2485 KVM_SYNC_CRS | 2486 KVM_SYNC_ARCH0 | 2487 KVM_SYNC_PFAULT; 2488 kvm_s390_set_prefix(vcpu, 0); 2489 if (test_kvm_facility(vcpu->kvm, 64)) 2490 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 2491 if (test_kvm_facility(vcpu->kvm, 82)) 2492 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 2493 if (test_kvm_facility(vcpu->kvm, 133)) 2494 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 2495 if (test_kvm_facility(vcpu->kvm, 156)) 2496 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 2497 /* fprs can be synchronized via vrs, even if the guest has no vx. With 2498 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 2499 */ 2500 if (MACHINE_HAS_VX) 2501 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 2502 else 2503 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 2504 2505 if (kvm_is_ucontrol(vcpu->kvm)) 2506 return __kvm_ucontrol_vcpu_init(vcpu); 2507 2508 return 0; 2509 } 2510 2511 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2512 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2513 { 2514 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2515 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2516 vcpu->arch.cputm_start = get_tod_clock_fast(); 2517 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2518 } 2519 2520 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2521 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2522 { 2523 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2524 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2525 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2526 vcpu->arch.cputm_start = 0; 2527 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2528 } 2529 2530 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2531 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2532 { 2533 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2534 vcpu->arch.cputm_enabled = true; 2535 __start_cpu_timer_accounting(vcpu); 2536 } 2537 2538 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2539 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2540 { 2541 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2542 __stop_cpu_timer_accounting(vcpu); 2543 vcpu->arch.cputm_enabled = false; 2544 } 2545 2546 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2547 { 2548 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2549 __enable_cpu_timer_accounting(vcpu); 2550 preempt_enable(); 2551 } 2552 2553 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2554 { 2555 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2556 __disable_cpu_timer_accounting(vcpu); 2557 preempt_enable(); 2558 } 2559 2560 /* set the cpu timer - may only be called from the VCPU thread itself */ 2561 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2562 { 2563 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2564 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2565 if (vcpu->arch.cputm_enabled) 2566 vcpu->arch.cputm_start = get_tod_clock_fast(); 2567 vcpu->arch.sie_block->cputm = cputm; 2568 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2569 preempt_enable(); 2570 } 2571 2572 /* update and get the cpu timer - can also be called from other VCPU threads */ 2573 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2574 { 2575 unsigned int seq; 2576 __u64 value; 2577 2578 if (unlikely(!vcpu->arch.cputm_enabled)) 2579 return vcpu->arch.sie_block->cputm; 2580 2581 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2582 do { 2583 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2584 /* 2585 * If the writer would ever execute a read in the critical 2586 * section, e.g. in irq context, we have a deadlock. 2587 */ 2588 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 2589 value = vcpu->arch.sie_block->cputm; 2590 /* if cputm_start is 0, accounting is being started/stopped */ 2591 if (likely(vcpu->arch.cputm_start)) 2592 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2593 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 2594 preempt_enable(); 2595 return value; 2596 } 2597 2598 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2599 { 2600 2601 gmap_enable(vcpu->arch.enabled_gmap); 2602 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 2603 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2604 __start_cpu_timer_accounting(vcpu); 2605 vcpu->cpu = cpu; 2606 } 2607 2608 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2609 { 2610 vcpu->cpu = -1; 2611 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2612 __stop_cpu_timer_accounting(vcpu); 2613 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 2614 vcpu->arch.enabled_gmap = gmap_get_enabled(); 2615 gmap_disable(vcpu->arch.enabled_gmap); 2616 2617 } 2618 2619 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 2620 { 2621 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 2622 vcpu->arch.sie_block->gpsw.mask = 0UL; 2623 vcpu->arch.sie_block->gpsw.addr = 0UL; 2624 kvm_s390_set_prefix(vcpu, 0); 2625 kvm_s390_set_cpu_timer(vcpu, 0); 2626 vcpu->arch.sie_block->ckc = 0UL; 2627 vcpu->arch.sie_block->todpr = 0; 2628 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 2629 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 | 2630 CR0_INTERRUPT_KEY_SUBMASK | 2631 CR0_MEASUREMENT_ALERT_SUBMASK; 2632 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | 2633 CR14_UNUSED_33 | 2634 CR14_EXTERNAL_DAMAGE_SUBMASK; 2635 /* make sure the new fpc will be lazily loaded */ 2636 save_fpu_regs(); 2637 current->thread.fpu.fpc = 0; 2638 vcpu->arch.sie_block->gbea = 1; 2639 vcpu->arch.sie_block->pp = 0; 2640 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 2641 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2642 kvm_clear_async_pf_completion_queue(vcpu); 2643 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 2644 kvm_s390_vcpu_stop(vcpu); 2645 kvm_s390_clear_local_irqs(vcpu); 2646 } 2647 2648 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 2649 { 2650 mutex_lock(&vcpu->kvm->lock); 2651 preempt_disable(); 2652 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2653 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 2654 preempt_enable(); 2655 mutex_unlock(&vcpu->kvm->lock); 2656 if (!kvm_is_ucontrol(vcpu->kvm)) { 2657 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 2658 sca_add_vcpu(vcpu); 2659 } 2660 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 2661 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2662 /* make vcpu_load load the right gmap on the first trigger */ 2663 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 2664 } 2665 2666 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 2667 { 2668 /* 2669 * If the AP instructions are not being interpreted and the MSAX3 2670 * facility is not configured for the guest, there is nothing to set up. 2671 */ 2672 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 2673 return; 2674 2675 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 2676 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 2677 vcpu->arch.sie_block->eca &= ~ECA_APIE; 2678 2679 if (vcpu->kvm->arch.crypto.apie) 2680 vcpu->arch.sie_block->eca |= ECA_APIE; 2681 2682 /* Set up protected key support */ 2683 if (vcpu->kvm->arch.crypto.aes_kw) 2684 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 2685 if (vcpu->kvm->arch.crypto.dea_kw) 2686 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 2687 } 2688 2689 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 2690 { 2691 free_page(vcpu->arch.sie_block->cbrlo); 2692 vcpu->arch.sie_block->cbrlo = 0; 2693 } 2694 2695 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 2696 { 2697 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 2698 if (!vcpu->arch.sie_block->cbrlo) 2699 return -ENOMEM; 2700 return 0; 2701 } 2702 2703 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 2704 { 2705 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 2706 2707 vcpu->arch.sie_block->ibc = model->ibc; 2708 if (test_kvm_facility(vcpu->kvm, 7)) 2709 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 2710 } 2711 2712 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 2713 { 2714 int rc = 0; 2715 2716 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 2717 CPUSTAT_SM | 2718 CPUSTAT_STOPPED); 2719 2720 if (test_kvm_facility(vcpu->kvm, 78)) 2721 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 2722 else if (test_kvm_facility(vcpu->kvm, 8)) 2723 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 2724 2725 kvm_s390_vcpu_setup_model(vcpu); 2726 2727 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2728 if (MACHINE_HAS_ESOP) 2729 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 2730 if (test_kvm_facility(vcpu->kvm, 9)) 2731 vcpu->arch.sie_block->ecb |= ECB_SRSI; 2732 if (test_kvm_facility(vcpu->kvm, 73)) 2733 vcpu->arch.sie_block->ecb |= ECB_TE; 2734 2735 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 2736 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 2737 if (test_kvm_facility(vcpu->kvm, 130)) 2738 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 2739 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 2740 if (sclp.has_cei) 2741 vcpu->arch.sie_block->eca |= ECA_CEI; 2742 if (sclp.has_ib) 2743 vcpu->arch.sie_block->eca |= ECA_IB; 2744 if (sclp.has_siif) 2745 vcpu->arch.sie_block->eca |= ECA_SII; 2746 if (sclp.has_sigpif) 2747 vcpu->arch.sie_block->eca |= ECA_SIGPI; 2748 if (test_kvm_facility(vcpu->kvm, 129)) { 2749 vcpu->arch.sie_block->eca |= ECA_VX; 2750 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 2751 } 2752 if (test_kvm_facility(vcpu->kvm, 139)) 2753 vcpu->arch.sie_block->ecd |= ECD_MEF; 2754 if (test_kvm_facility(vcpu->kvm, 156)) 2755 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 2756 if (vcpu->arch.sie_block->gd) { 2757 vcpu->arch.sie_block->eca |= ECA_AIV; 2758 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 2759 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 2760 } 2761 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 2762 | SDNXC; 2763 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2764 2765 if (sclp.has_kss) 2766 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 2767 else 2768 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2769 2770 if (vcpu->kvm->arch.use_cmma) { 2771 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2772 if (rc) 2773 return rc; 2774 } 2775 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2776 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 2777 2778 vcpu->arch.sie_block->hpid = HPID_KVM; 2779 2780 kvm_s390_vcpu_crypto_setup(vcpu); 2781 2782 return rc; 2783 } 2784 2785 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 2786 unsigned int id) 2787 { 2788 struct kvm_vcpu *vcpu; 2789 struct sie_page *sie_page; 2790 int rc = -EINVAL; 2791 2792 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 2793 goto out; 2794 2795 rc = -ENOMEM; 2796 2797 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 2798 if (!vcpu) 2799 goto out; 2800 2801 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 2802 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 2803 if (!sie_page) 2804 goto out_free_cpu; 2805 2806 vcpu->arch.sie_block = &sie_page->sie_block; 2807 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 2808 2809 /* the real guest size will always be smaller than msl */ 2810 vcpu->arch.sie_block->mso = 0; 2811 vcpu->arch.sie_block->msl = sclp.hamax; 2812 2813 vcpu->arch.sie_block->icpua = id; 2814 spin_lock_init(&vcpu->arch.local_int.lock); 2815 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa; 2816 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 2817 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 2818 seqcount_init(&vcpu->arch.cputm_seqcount); 2819 2820 rc = kvm_vcpu_init(vcpu, kvm, id); 2821 if (rc) 2822 goto out_free_sie_block; 2823 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 2824 vcpu->arch.sie_block); 2825 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 2826 2827 return vcpu; 2828 out_free_sie_block: 2829 free_page((unsigned long)(vcpu->arch.sie_block)); 2830 out_free_cpu: 2831 kmem_cache_free(kvm_vcpu_cache, vcpu); 2832 out: 2833 return ERR_PTR(rc); 2834 } 2835 2836 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 2837 { 2838 return kvm_s390_vcpu_has_irq(vcpu, 0); 2839 } 2840 2841 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 2842 { 2843 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 2844 } 2845 2846 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 2847 { 2848 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2849 exit_sie(vcpu); 2850 } 2851 2852 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 2853 { 2854 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2855 } 2856 2857 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 2858 { 2859 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2860 exit_sie(vcpu); 2861 } 2862 2863 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 2864 { 2865 return atomic_read(&vcpu->arch.sie_block->prog20) & 2866 (PROG_BLOCK_SIE | PROG_REQUEST); 2867 } 2868 2869 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 2870 { 2871 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2872 } 2873 2874 /* 2875 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 2876 * If the CPU is not running (e.g. waiting as idle) the function will 2877 * return immediately. */ 2878 void exit_sie(struct kvm_vcpu *vcpu) 2879 { 2880 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 2881 kvm_s390_vsie_kick(vcpu); 2882 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 2883 cpu_relax(); 2884 } 2885 2886 /* Kick a guest cpu out of SIE to process a request synchronously */ 2887 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 2888 { 2889 kvm_make_request(req, vcpu); 2890 kvm_s390_vcpu_request(vcpu); 2891 } 2892 2893 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 2894 unsigned long end) 2895 { 2896 struct kvm *kvm = gmap->private; 2897 struct kvm_vcpu *vcpu; 2898 unsigned long prefix; 2899 int i; 2900 2901 if (gmap_is_shadow(gmap)) 2902 return; 2903 if (start >= 1UL << 31) 2904 /* We are only interested in prefix pages */ 2905 return; 2906 kvm_for_each_vcpu(i, vcpu, kvm) { 2907 /* match against both prefix pages */ 2908 prefix = kvm_s390_get_prefix(vcpu); 2909 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 2910 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 2911 start, end); 2912 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 2913 } 2914 } 2915 } 2916 2917 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 2918 { 2919 /* kvm common code refers to this, but never calls it */ 2920 BUG(); 2921 return 0; 2922 } 2923 2924 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 2925 struct kvm_one_reg *reg) 2926 { 2927 int r = -EINVAL; 2928 2929 switch (reg->id) { 2930 case KVM_REG_S390_TODPR: 2931 r = put_user(vcpu->arch.sie_block->todpr, 2932 (u32 __user *)reg->addr); 2933 break; 2934 case KVM_REG_S390_EPOCHDIFF: 2935 r = put_user(vcpu->arch.sie_block->epoch, 2936 (u64 __user *)reg->addr); 2937 break; 2938 case KVM_REG_S390_CPU_TIMER: 2939 r = put_user(kvm_s390_get_cpu_timer(vcpu), 2940 (u64 __user *)reg->addr); 2941 break; 2942 case KVM_REG_S390_CLOCK_COMP: 2943 r = put_user(vcpu->arch.sie_block->ckc, 2944 (u64 __user *)reg->addr); 2945 break; 2946 case KVM_REG_S390_PFTOKEN: 2947 r = put_user(vcpu->arch.pfault_token, 2948 (u64 __user *)reg->addr); 2949 break; 2950 case KVM_REG_S390_PFCOMPARE: 2951 r = put_user(vcpu->arch.pfault_compare, 2952 (u64 __user *)reg->addr); 2953 break; 2954 case KVM_REG_S390_PFSELECT: 2955 r = put_user(vcpu->arch.pfault_select, 2956 (u64 __user *)reg->addr); 2957 break; 2958 case KVM_REG_S390_PP: 2959 r = put_user(vcpu->arch.sie_block->pp, 2960 (u64 __user *)reg->addr); 2961 break; 2962 case KVM_REG_S390_GBEA: 2963 r = put_user(vcpu->arch.sie_block->gbea, 2964 (u64 __user *)reg->addr); 2965 break; 2966 default: 2967 break; 2968 } 2969 2970 return r; 2971 } 2972 2973 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 2974 struct kvm_one_reg *reg) 2975 { 2976 int r = -EINVAL; 2977 __u64 val; 2978 2979 switch (reg->id) { 2980 case KVM_REG_S390_TODPR: 2981 r = get_user(vcpu->arch.sie_block->todpr, 2982 (u32 __user *)reg->addr); 2983 break; 2984 case KVM_REG_S390_EPOCHDIFF: 2985 r = get_user(vcpu->arch.sie_block->epoch, 2986 (u64 __user *)reg->addr); 2987 break; 2988 case KVM_REG_S390_CPU_TIMER: 2989 r = get_user(val, (u64 __user *)reg->addr); 2990 if (!r) 2991 kvm_s390_set_cpu_timer(vcpu, val); 2992 break; 2993 case KVM_REG_S390_CLOCK_COMP: 2994 r = get_user(vcpu->arch.sie_block->ckc, 2995 (u64 __user *)reg->addr); 2996 break; 2997 case KVM_REG_S390_PFTOKEN: 2998 r = get_user(vcpu->arch.pfault_token, 2999 (u64 __user *)reg->addr); 3000 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3001 kvm_clear_async_pf_completion_queue(vcpu); 3002 break; 3003 case KVM_REG_S390_PFCOMPARE: 3004 r = get_user(vcpu->arch.pfault_compare, 3005 (u64 __user *)reg->addr); 3006 break; 3007 case KVM_REG_S390_PFSELECT: 3008 r = get_user(vcpu->arch.pfault_select, 3009 (u64 __user *)reg->addr); 3010 break; 3011 case KVM_REG_S390_PP: 3012 r = get_user(vcpu->arch.sie_block->pp, 3013 (u64 __user *)reg->addr); 3014 break; 3015 case KVM_REG_S390_GBEA: 3016 r = get_user(vcpu->arch.sie_block->gbea, 3017 (u64 __user *)reg->addr); 3018 break; 3019 default: 3020 break; 3021 } 3022 3023 return r; 3024 } 3025 3026 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3027 { 3028 kvm_s390_vcpu_initial_reset(vcpu); 3029 return 0; 3030 } 3031 3032 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3033 { 3034 vcpu_load(vcpu); 3035 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3036 vcpu_put(vcpu); 3037 return 0; 3038 } 3039 3040 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3041 { 3042 vcpu_load(vcpu); 3043 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3044 vcpu_put(vcpu); 3045 return 0; 3046 } 3047 3048 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3049 struct kvm_sregs *sregs) 3050 { 3051 vcpu_load(vcpu); 3052 3053 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3054 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3055 3056 vcpu_put(vcpu); 3057 return 0; 3058 } 3059 3060 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3061 struct kvm_sregs *sregs) 3062 { 3063 vcpu_load(vcpu); 3064 3065 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3066 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3067 3068 vcpu_put(vcpu); 3069 return 0; 3070 } 3071 3072 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3073 { 3074 int ret = 0; 3075 3076 vcpu_load(vcpu); 3077 3078 if (test_fp_ctl(fpu->fpc)) { 3079 ret = -EINVAL; 3080 goto out; 3081 } 3082 vcpu->run->s.regs.fpc = fpu->fpc; 3083 if (MACHINE_HAS_VX) 3084 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3085 (freg_t *) fpu->fprs); 3086 else 3087 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3088 3089 out: 3090 vcpu_put(vcpu); 3091 return ret; 3092 } 3093 3094 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3095 { 3096 vcpu_load(vcpu); 3097 3098 /* make sure we have the latest values */ 3099 save_fpu_regs(); 3100 if (MACHINE_HAS_VX) 3101 convert_vx_to_fp((freg_t *) fpu->fprs, 3102 (__vector128 *) vcpu->run->s.regs.vrs); 3103 else 3104 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3105 fpu->fpc = vcpu->run->s.regs.fpc; 3106 3107 vcpu_put(vcpu); 3108 return 0; 3109 } 3110 3111 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3112 { 3113 int rc = 0; 3114 3115 if (!is_vcpu_stopped(vcpu)) 3116 rc = -EBUSY; 3117 else { 3118 vcpu->run->psw_mask = psw.mask; 3119 vcpu->run->psw_addr = psw.addr; 3120 } 3121 return rc; 3122 } 3123 3124 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3125 struct kvm_translation *tr) 3126 { 3127 return -EINVAL; /* not implemented yet */ 3128 } 3129 3130 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3131 KVM_GUESTDBG_USE_HW_BP | \ 3132 KVM_GUESTDBG_ENABLE) 3133 3134 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3135 struct kvm_guest_debug *dbg) 3136 { 3137 int rc = 0; 3138 3139 vcpu_load(vcpu); 3140 3141 vcpu->guest_debug = 0; 3142 kvm_s390_clear_bp_data(vcpu); 3143 3144 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3145 rc = -EINVAL; 3146 goto out; 3147 } 3148 if (!sclp.has_gpere) { 3149 rc = -EINVAL; 3150 goto out; 3151 } 3152 3153 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3154 vcpu->guest_debug = dbg->control; 3155 /* enforce guest PER */ 3156 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3157 3158 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3159 rc = kvm_s390_import_bp_data(vcpu, dbg); 3160 } else { 3161 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3162 vcpu->arch.guestdbg.last_bp = 0; 3163 } 3164 3165 if (rc) { 3166 vcpu->guest_debug = 0; 3167 kvm_s390_clear_bp_data(vcpu); 3168 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3169 } 3170 3171 out: 3172 vcpu_put(vcpu); 3173 return rc; 3174 } 3175 3176 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3177 struct kvm_mp_state *mp_state) 3178 { 3179 int ret; 3180 3181 vcpu_load(vcpu); 3182 3183 /* CHECK_STOP and LOAD are not supported yet */ 3184 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3185 KVM_MP_STATE_OPERATING; 3186 3187 vcpu_put(vcpu); 3188 return ret; 3189 } 3190 3191 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3192 struct kvm_mp_state *mp_state) 3193 { 3194 int rc = 0; 3195 3196 vcpu_load(vcpu); 3197 3198 /* user space knows about this interface - let it control the state */ 3199 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 3200 3201 switch (mp_state->mp_state) { 3202 case KVM_MP_STATE_STOPPED: 3203 kvm_s390_vcpu_stop(vcpu); 3204 break; 3205 case KVM_MP_STATE_OPERATING: 3206 kvm_s390_vcpu_start(vcpu); 3207 break; 3208 case KVM_MP_STATE_LOAD: 3209 case KVM_MP_STATE_CHECK_STOP: 3210 /* fall through - CHECK_STOP and LOAD are not supported yet */ 3211 default: 3212 rc = -ENXIO; 3213 } 3214 3215 vcpu_put(vcpu); 3216 return rc; 3217 } 3218 3219 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3220 { 3221 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3222 } 3223 3224 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3225 { 3226 retry: 3227 kvm_s390_vcpu_request_handled(vcpu); 3228 if (!kvm_request_pending(vcpu)) 3229 return 0; 3230 /* 3231 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3232 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3233 * This ensures that the ipte instruction for this request has 3234 * already finished. We might race against a second unmapper that 3235 * wants to set the blocking bit. Lets just retry the request loop. 3236 */ 3237 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3238 int rc; 3239 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3240 kvm_s390_get_prefix(vcpu), 3241 PAGE_SIZE * 2, PROT_WRITE); 3242 if (rc) { 3243 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3244 return rc; 3245 } 3246 goto retry; 3247 } 3248 3249 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3250 vcpu->arch.sie_block->ihcpu = 0xffff; 3251 goto retry; 3252 } 3253 3254 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3255 if (!ibs_enabled(vcpu)) { 3256 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3257 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3258 } 3259 goto retry; 3260 } 3261 3262 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3263 if (ibs_enabled(vcpu)) { 3264 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3265 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3266 } 3267 goto retry; 3268 } 3269 3270 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3271 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3272 goto retry; 3273 } 3274 3275 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3276 /* 3277 * Disable CMM virtualization; we will emulate the ESSA 3278 * instruction manually, in order to provide additional 3279 * functionalities needed for live migration. 3280 */ 3281 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3282 goto retry; 3283 } 3284 3285 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3286 /* 3287 * Re-enable CMM virtualization if CMMA is available and 3288 * CMM has been used. 3289 */ 3290 if ((vcpu->kvm->arch.use_cmma) && 3291 (vcpu->kvm->mm->context.uses_cmm)) 3292 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3293 goto retry; 3294 } 3295 3296 /* nothing to do, just clear the request */ 3297 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3298 /* we left the vsie handler, nothing to do, just clear the request */ 3299 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3300 3301 return 0; 3302 } 3303 3304 void kvm_s390_set_tod_clock(struct kvm *kvm, 3305 const struct kvm_s390_vm_tod_clock *gtod) 3306 { 3307 struct kvm_vcpu *vcpu; 3308 struct kvm_s390_tod_clock_ext htod; 3309 int i; 3310 3311 mutex_lock(&kvm->lock); 3312 preempt_disable(); 3313 3314 get_tod_clock_ext((char *)&htod); 3315 3316 kvm->arch.epoch = gtod->tod - htod.tod; 3317 kvm->arch.epdx = 0; 3318 if (test_kvm_facility(kvm, 139)) { 3319 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 3320 if (kvm->arch.epoch > gtod->tod) 3321 kvm->arch.epdx -= 1; 3322 } 3323 3324 kvm_s390_vcpu_block_all(kvm); 3325 kvm_for_each_vcpu(i, vcpu, kvm) { 3326 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3327 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3328 } 3329 3330 kvm_s390_vcpu_unblock_all(kvm); 3331 preempt_enable(); 3332 mutex_unlock(&kvm->lock); 3333 } 3334 3335 /** 3336 * kvm_arch_fault_in_page - fault-in guest page if necessary 3337 * @vcpu: The corresponding virtual cpu 3338 * @gpa: Guest physical address 3339 * @writable: Whether the page should be writable or not 3340 * 3341 * Make sure that a guest page has been faulted-in on the host. 3342 * 3343 * Return: Zero on success, negative error code otherwise. 3344 */ 3345 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3346 { 3347 return gmap_fault(vcpu->arch.gmap, gpa, 3348 writable ? FAULT_FLAG_WRITE : 0); 3349 } 3350 3351 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3352 unsigned long token) 3353 { 3354 struct kvm_s390_interrupt inti; 3355 struct kvm_s390_irq irq; 3356 3357 if (start_token) { 3358 irq.u.ext.ext_params2 = token; 3359 irq.type = KVM_S390_INT_PFAULT_INIT; 3360 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3361 } else { 3362 inti.type = KVM_S390_INT_PFAULT_DONE; 3363 inti.parm64 = token; 3364 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3365 } 3366 } 3367 3368 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3369 struct kvm_async_pf *work) 3370 { 3371 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3372 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3373 } 3374 3375 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3376 struct kvm_async_pf *work) 3377 { 3378 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3379 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3380 } 3381 3382 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3383 struct kvm_async_pf *work) 3384 { 3385 /* s390 will always inject the page directly */ 3386 } 3387 3388 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3389 { 3390 /* 3391 * s390 will always inject the page directly, 3392 * but we still want check_async_completion to cleanup 3393 */ 3394 return true; 3395 } 3396 3397 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3398 { 3399 hva_t hva; 3400 struct kvm_arch_async_pf arch; 3401 int rc; 3402 3403 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3404 return 0; 3405 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3406 vcpu->arch.pfault_compare) 3407 return 0; 3408 if (psw_extint_disabled(vcpu)) 3409 return 0; 3410 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3411 return 0; 3412 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 3413 return 0; 3414 if (!vcpu->arch.gmap->pfault_enabled) 3415 return 0; 3416 3417 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3418 hva += current->thread.gmap_addr & ~PAGE_MASK; 3419 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3420 return 0; 3421 3422 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3423 return rc; 3424 } 3425 3426 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3427 { 3428 int rc, cpuflags; 3429 3430 /* 3431 * On s390 notifications for arriving pages will be delivered directly 3432 * to the guest but the house keeping for completed pfaults is 3433 * handled outside the worker. 3434 */ 3435 kvm_check_async_pf_completion(vcpu); 3436 3437 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3438 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3439 3440 if (need_resched()) 3441 schedule(); 3442 3443 if (test_cpu_flag(CIF_MCCK_PENDING)) 3444 s390_handle_mcck(); 3445 3446 if (!kvm_is_ucontrol(vcpu->kvm)) { 3447 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3448 if (rc) 3449 return rc; 3450 } 3451 3452 rc = kvm_s390_handle_requests(vcpu); 3453 if (rc) 3454 return rc; 3455 3456 if (guestdbg_enabled(vcpu)) { 3457 kvm_s390_backup_guest_per_regs(vcpu); 3458 kvm_s390_patch_guest_per_regs(vcpu); 3459 } 3460 3461 vcpu->arch.sie_block->icptcode = 0; 3462 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3463 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3464 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3465 3466 return 0; 3467 } 3468 3469 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3470 { 3471 struct kvm_s390_pgm_info pgm_info = { 3472 .code = PGM_ADDRESSING, 3473 }; 3474 u8 opcode, ilen; 3475 int rc; 3476 3477 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3478 trace_kvm_s390_sie_fault(vcpu); 3479 3480 /* 3481 * We want to inject an addressing exception, which is defined as a 3482 * suppressing or terminating exception. However, since we came here 3483 * by a DAT access exception, the PSW still points to the faulting 3484 * instruction since DAT exceptions are nullifying. So we've got 3485 * to look up the current opcode to get the length of the instruction 3486 * to be able to forward the PSW. 3487 */ 3488 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3489 ilen = insn_length(opcode); 3490 if (rc < 0) { 3491 return rc; 3492 } else if (rc) { 3493 /* Instruction-Fetching Exceptions - we can't detect the ilen. 3494 * Forward by arbitrary ilc, injection will take care of 3495 * nullification if necessary. 3496 */ 3497 pgm_info = vcpu->arch.pgm; 3498 ilen = 4; 3499 } 3500 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 3501 kvm_s390_forward_psw(vcpu, ilen); 3502 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 3503 } 3504 3505 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 3506 { 3507 struct mcck_volatile_info *mcck_info; 3508 struct sie_page *sie_page; 3509 3510 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 3511 vcpu->arch.sie_block->icptcode); 3512 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 3513 3514 if (guestdbg_enabled(vcpu)) 3515 kvm_s390_restore_guest_per_regs(vcpu); 3516 3517 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 3518 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 3519 3520 if (exit_reason == -EINTR) { 3521 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 3522 sie_page = container_of(vcpu->arch.sie_block, 3523 struct sie_page, sie_block); 3524 mcck_info = &sie_page->mcck_info; 3525 kvm_s390_reinject_machine_check(vcpu, mcck_info); 3526 return 0; 3527 } 3528 3529 if (vcpu->arch.sie_block->icptcode > 0) { 3530 int rc = kvm_handle_sie_intercept(vcpu); 3531 3532 if (rc != -EOPNOTSUPP) 3533 return rc; 3534 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 3535 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 3536 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 3537 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 3538 return -EREMOTE; 3539 } else if (exit_reason != -EFAULT) { 3540 vcpu->stat.exit_null++; 3541 return 0; 3542 } else if (kvm_is_ucontrol(vcpu->kvm)) { 3543 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 3544 vcpu->run->s390_ucontrol.trans_exc_code = 3545 current->thread.gmap_addr; 3546 vcpu->run->s390_ucontrol.pgm_code = 0x10; 3547 return -EREMOTE; 3548 } else if (current->thread.gmap_pfault) { 3549 trace_kvm_s390_major_guest_pfault(vcpu); 3550 current->thread.gmap_pfault = 0; 3551 if (kvm_arch_setup_async_pf(vcpu)) 3552 return 0; 3553 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 3554 } 3555 return vcpu_post_run_fault_in_sie(vcpu); 3556 } 3557 3558 static int __vcpu_run(struct kvm_vcpu *vcpu) 3559 { 3560 int rc, exit_reason; 3561 3562 /* 3563 * We try to hold kvm->srcu during most of vcpu_run (except when run- 3564 * ning the guest), so that memslots (and other stuff) are protected 3565 */ 3566 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3567 3568 do { 3569 rc = vcpu_pre_run(vcpu); 3570 if (rc) 3571 break; 3572 3573 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3574 /* 3575 * As PF_VCPU will be used in fault handler, between 3576 * guest_enter and guest_exit should be no uaccess. 3577 */ 3578 local_irq_disable(); 3579 guest_enter_irqoff(); 3580 __disable_cpu_timer_accounting(vcpu); 3581 local_irq_enable(); 3582 exit_reason = sie64a(vcpu->arch.sie_block, 3583 vcpu->run->s.regs.gprs); 3584 local_irq_disable(); 3585 __enable_cpu_timer_accounting(vcpu); 3586 guest_exit_irqoff(); 3587 local_irq_enable(); 3588 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3589 3590 rc = vcpu_post_run(vcpu, exit_reason); 3591 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 3592 3593 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3594 return rc; 3595 } 3596 3597 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3598 { 3599 struct runtime_instr_cb *riccb; 3600 struct gs_cb *gscb; 3601 3602 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 3603 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 3604 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 3605 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 3606 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 3607 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 3608 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 3609 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 3610 /* some control register changes require a tlb flush */ 3611 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3612 } 3613 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 3614 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 3615 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 3616 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 3617 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 3618 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 3619 } 3620 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 3621 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 3622 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 3623 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 3624 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3625 kvm_clear_async_pf_completion_queue(vcpu); 3626 } 3627 /* 3628 * If userspace sets the riccb (e.g. after migration) to a valid state, 3629 * we should enable RI here instead of doing the lazy enablement. 3630 */ 3631 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 3632 test_kvm_facility(vcpu->kvm, 64) && 3633 riccb->v && 3634 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 3635 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 3636 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 3637 } 3638 /* 3639 * If userspace sets the gscb (e.g. after migration) to non-zero, 3640 * we should enable GS here instead of doing the lazy enablement. 3641 */ 3642 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 3643 test_kvm_facility(vcpu->kvm, 133) && 3644 gscb->gssm && 3645 !vcpu->arch.gs_enabled) { 3646 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 3647 vcpu->arch.sie_block->ecb |= ECB_GS; 3648 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3649 vcpu->arch.gs_enabled = 1; 3650 } 3651 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 3652 test_kvm_facility(vcpu->kvm, 82)) { 3653 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3654 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 3655 } 3656 save_access_regs(vcpu->arch.host_acrs); 3657 restore_access_regs(vcpu->run->s.regs.acrs); 3658 /* save host (userspace) fprs/vrs */ 3659 save_fpu_regs(); 3660 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 3661 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 3662 if (MACHINE_HAS_VX) 3663 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 3664 else 3665 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 3666 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 3667 if (test_fp_ctl(current->thread.fpu.fpc)) 3668 /* User space provided an invalid FPC, let's clear it */ 3669 current->thread.fpu.fpc = 0; 3670 if (MACHINE_HAS_GS) { 3671 preempt_disable(); 3672 __ctl_set_bit(2, 4); 3673 if (current->thread.gs_cb) { 3674 vcpu->arch.host_gscb = current->thread.gs_cb; 3675 save_gs_cb(vcpu->arch.host_gscb); 3676 } 3677 if (vcpu->arch.gs_enabled) { 3678 current->thread.gs_cb = (struct gs_cb *) 3679 &vcpu->run->s.regs.gscb; 3680 restore_gs_cb(current->thread.gs_cb); 3681 } 3682 preempt_enable(); 3683 } 3684 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 3685 3686 kvm_run->kvm_dirty_regs = 0; 3687 } 3688 3689 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3690 { 3691 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 3692 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 3693 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 3694 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 3695 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 3696 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 3697 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 3698 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 3699 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 3700 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 3701 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 3702 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 3703 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 3704 save_access_regs(vcpu->run->s.regs.acrs); 3705 restore_access_regs(vcpu->arch.host_acrs); 3706 /* Save guest register state */ 3707 save_fpu_regs(); 3708 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3709 /* Restore will be done lazily at return */ 3710 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 3711 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 3712 if (MACHINE_HAS_GS) { 3713 __ctl_set_bit(2, 4); 3714 if (vcpu->arch.gs_enabled) 3715 save_gs_cb(current->thread.gs_cb); 3716 preempt_disable(); 3717 current->thread.gs_cb = vcpu->arch.host_gscb; 3718 restore_gs_cb(vcpu->arch.host_gscb); 3719 preempt_enable(); 3720 if (!vcpu->arch.host_gscb) 3721 __ctl_clear_bit(2, 4); 3722 vcpu->arch.host_gscb = NULL; 3723 } 3724 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 3725 } 3726 3727 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3728 { 3729 int rc; 3730 3731 if (kvm_run->immediate_exit) 3732 return -EINTR; 3733 3734 vcpu_load(vcpu); 3735 3736 if (guestdbg_exit_pending(vcpu)) { 3737 kvm_s390_prepare_debug_exit(vcpu); 3738 rc = 0; 3739 goto out; 3740 } 3741 3742 kvm_sigset_activate(vcpu); 3743 3744 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3745 kvm_s390_vcpu_start(vcpu); 3746 } else if (is_vcpu_stopped(vcpu)) { 3747 pr_err_ratelimited("can't run stopped vcpu %d\n", 3748 vcpu->vcpu_id); 3749 rc = -EINVAL; 3750 goto out; 3751 } 3752 3753 sync_regs(vcpu, kvm_run); 3754 enable_cpu_timer_accounting(vcpu); 3755 3756 might_fault(); 3757 rc = __vcpu_run(vcpu); 3758 3759 if (signal_pending(current) && !rc) { 3760 kvm_run->exit_reason = KVM_EXIT_INTR; 3761 rc = -EINTR; 3762 } 3763 3764 if (guestdbg_exit_pending(vcpu) && !rc) { 3765 kvm_s390_prepare_debug_exit(vcpu); 3766 rc = 0; 3767 } 3768 3769 if (rc == -EREMOTE) { 3770 /* userspace support is needed, kvm_run has been prepared */ 3771 rc = 0; 3772 } 3773 3774 disable_cpu_timer_accounting(vcpu); 3775 store_regs(vcpu, kvm_run); 3776 3777 kvm_sigset_deactivate(vcpu); 3778 3779 vcpu->stat.exit_userspace++; 3780 out: 3781 vcpu_put(vcpu); 3782 return rc; 3783 } 3784 3785 /* 3786 * store status at address 3787 * we use have two special cases: 3788 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 3789 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 3790 */ 3791 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 3792 { 3793 unsigned char archmode = 1; 3794 freg_t fprs[NUM_FPRS]; 3795 unsigned int px; 3796 u64 clkcomp, cputm; 3797 int rc; 3798 3799 px = kvm_s390_get_prefix(vcpu); 3800 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 3801 if (write_guest_abs(vcpu, 163, &archmode, 1)) 3802 return -EFAULT; 3803 gpa = 0; 3804 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 3805 if (write_guest_real(vcpu, 163, &archmode, 1)) 3806 return -EFAULT; 3807 gpa = px; 3808 } else 3809 gpa -= __LC_FPREGS_SAVE_AREA; 3810 3811 /* manually convert vector registers if necessary */ 3812 if (MACHINE_HAS_VX) { 3813 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 3814 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3815 fprs, 128); 3816 } else { 3817 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3818 vcpu->run->s.regs.fprs, 128); 3819 } 3820 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 3821 vcpu->run->s.regs.gprs, 128); 3822 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 3823 &vcpu->arch.sie_block->gpsw, 16); 3824 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 3825 &px, 4); 3826 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 3827 &vcpu->run->s.regs.fpc, 4); 3828 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 3829 &vcpu->arch.sie_block->todpr, 4); 3830 cputm = kvm_s390_get_cpu_timer(vcpu); 3831 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 3832 &cputm, 8); 3833 clkcomp = vcpu->arch.sie_block->ckc >> 8; 3834 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 3835 &clkcomp, 8); 3836 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 3837 &vcpu->run->s.regs.acrs, 64); 3838 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 3839 &vcpu->arch.sie_block->gcr, 128); 3840 return rc ? -EFAULT : 0; 3841 } 3842 3843 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 3844 { 3845 /* 3846 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 3847 * switch in the run ioctl. Let's update our copies before we save 3848 * it into the save area 3849 */ 3850 save_fpu_regs(); 3851 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3852 save_access_regs(vcpu->run->s.regs.acrs); 3853 3854 return kvm_s390_store_status_unloaded(vcpu, addr); 3855 } 3856 3857 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3858 { 3859 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 3860 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 3861 } 3862 3863 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 3864 { 3865 unsigned int i; 3866 struct kvm_vcpu *vcpu; 3867 3868 kvm_for_each_vcpu(i, vcpu, kvm) { 3869 __disable_ibs_on_vcpu(vcpu); 3870 } 3871 } 3872 3873 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3874 { 3875 if (!sclp.has_ibs) 3876 return; 3877 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 3878 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 3879 } 3880 3881 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 3882 { 3883 int i, online_vcpus, started_vcpus = 0; 3884 3885 if (!is_vcpu_stopped(vcpu)) 3886 return; 3887 3888 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 3889 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3890 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3891 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3892 3893 for (i = 0; i < online_vcpus; i++) { 3894 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 3895 started_vcpus++; 3896 } 3897 3898 if (started_vcpus == 0) { 3899 /* we're the only active VCPU -> speed it up */ 3900 __enable_ibs_on_vcpu(vcpu); 3901 } else if (started_vcpus == 1) { 3902 /* 3903 * As we are starting a second VCPU, we have to disable 3904 * the IBS facility on all VCPUs to remove potentially 3905 * oustanding ENABLE requests. 3906 */ 3907 __disable_ibs_on_all_vcpus(vcpu->kvm); 3908 } 3909 3910 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 3911 /* 3912 * Another VCPU might have used IBS while we were offline. 3913 * Let's play safe and flush the VCPU at startup. 3914 */ 3915 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3916 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3917 return; 3918 } 3919 3920 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 3921 { 3922 int i, online_vcpus, started_vcpus = 0; 3923 struct kvm_vcpu *started_vcpu = NULL; 3924 3925 if (is_vcpu_stopped(vcpu)) 3926 return; 3927 3928 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 3929 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3930 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3931 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3932 3933 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 3934 kvm_s390_clear_stop_irq(vcpu); 3935 3936 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 3937 __disable_ibs_on_vcpu(vcpu); 3938 3939 for (i = 0; i < online_vcpus; i++) { 3940 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 3941 started_vcpus++; 3942 started_vcpu = vcpu->kvm->vcpus[i]; 3943 } 3944 } 3945 3946 if (started_vcpus == 1) { 3947 /* 3948 * As we only have one VCPU left, we want to enable the 3949 * IBS facility for that VCPU to speed it up. 3950 */ 3951 __enable_ibs_on_vcpu(started_vcpu); 3952 } 3953 3954 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3955 return; 3956 } 3957 3958 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3959 struct kvm_enable_cap *cap) 3960 { 3961 int r; 3962 3963 if (cap->flags) 3964 return -EINVAL; 3965 3966 switch (cap->cap) { 3967 case KVM_CAP_S390_CSS_SUPPORT: 3968 if (!vcpu->kvm->arch.css_support) { 3969 vcpu->kvm->arch.css_support = 1; 3970 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 3971 trace_kvm_s390_enable_css(vcpu->kvm); 3972 } 3973 r = 0; 3974 break; 3975 default: 3976 r = -EINVAL; 3977 break; 3978 } 3979 return r; 3980 } 3981 3982 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 3983 struct kvm_s390_mem_op *mop) 3984 { 3985 void __user *uaddr = (void __user *)mop->buf; 3986 void *tmpbuf = NULL; 3987 int r, srcu_idx; 3988 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 3989 | KVM_S390_MEMOP_F_CHECK_ONLY; 3990 3991 if (mop->flags & ~supported_flags) 3992 return -EINVAL; 3993 3994 if (mop->size > MEM_OP_MAX_SIZE) 3995 return -E2BIG; 3996 3997 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 3998 tmpbuf = vmalloc(mop->size); 3999 if (!tmpbuf) 4000 return -ENOMEM; 4001 } 4002 4003 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4004 4005 switch (mop->op) { 4006 case KVM_S390_MEMOP_LOGICAL_READ: 4007 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4008 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4009 mop->size, GACC_FETCH); 4010 break; 4011 } 4012 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4013 if (r == 0) { 4014 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4015 r = -EFAULT; 4016 } 4017 break; 4018 case KVM_S390_MEMOP_LOGICAL_WRITE: 4019 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4020 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4021 mop->size, GACC_STORE); 4022 break; 4023 } 4024 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4025 r = -EFAULT; 4026 break; 4027 } 4028 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4029 break; 4030 default: 4031 r = -EINVAL; 4032 } 4033 4034 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4035 4036 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4037 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4038 4039 vfree(tmpbuf); 4040 return r; 4041 } 4042 4043 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4044 unsigned int ioctl, unsigned long arg) 4045 { 4046 struct kvm_vcpu *vcpu = filp->private_data; 4047 void __user *argp = (void __user *)arg; 4048 4049 switch (ioctl) { 4050 case KVM_S390_IRQ: { 4051 struct kvm_s390_irq s390irq; 4052 4053 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4054 return -EFAULT; 4055 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4056 } 4057 case KVM_S390_INTERRUPT: { 4058 struct kvm_s390_interrupt s390int; 4059 struct kvm_s390_irq s390irq; 4060 4061 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4062 return -EFAULT; 4063 if (s390int_to_s390irq(&s390int, &s390irq)) 4064 return -EINVAL; 4065 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4066 } 4067 } 4068 return -ENOIOCTLCMD; 4069 } 4070 4071 long kvm_arch_vcpu_ioctl(struct file *filp, 4072 unsigned int ioctl, unsigned long arg) 4073 { 4074 struct kvm_vcpu *vcpu = filp->private_data; 4075 void __user *argp = (void __user *)arg; 4076 int idx; 4077 long r; 4078 4079 vcpu_load(vcpu); 4080 4081 switch (ioctl) { 4082 case KVM_S390_STORE_STATUS: 4083 idx = srcu_read_lock(&vcpu->kvm->srcu); 4084 r = kvm_s390_vcpu_store_status(vcpu, arg); 4085 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4086 break; 4087 case KVM_S390_SET_INITIAL_PSW: { 4088 psw_t psw; 4089 4090 r = -EFAULT; 4091 if (copy_from_user(&psw, argp, sizeof(psw))) 4092 break; 4093 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4094 break; 4095 } 4096 case KVM_S390_INITIAL_RESET: 4097 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4098 break; 4099 case KVM_SET_ONE_REG: 4100 case KVM_GET_ONE_REG: { 4101 struct kvm_one_reg reg; 4102 r = -EFAULT; 4103 if (copy_from_user(®, argp, sizeof(reg))) 4104 break; 4105 if (ioctl == KVM_SET_ONE_REG) 4106 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4107 else 4108 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4109 break; 4110 } 4111 #ifdef CONFIG_KVM_S390_UCONTROL 4112 case KVM_S390_UCAS_MAP: { 4113 struct kvm_s390_ucas_mapping ucasmap; 4114 4115 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4116 r = -EFAULT; 4117 break; 4118 } 4119 4120 if (!kvm_is_ucontrol(vcpu->kvm)) { 4121 r = -EINVAL; 4122 break; 4123 } 4124 4125 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4126 ucasmap.vcpu_addr, ucasmap.length); 4127 break; 4128 } 4129 case KVM_S390_UCAS_UNMAP: { 4130 struct kvm_s390_ucas_mapping ucasmap; 4131 4132 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4133 r = -EFAULT; 4134 break; 4135 } 4136 4137 if (!kvm_is_ucontrol(vcpu->kvm)) { 4138 r = -EINVAL; 4139 break; 4140 } 4141 4142 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4143 ucasmap.length); 4144 break; 4145 } 4146 #endif 4147 case KVM_S390_VCPU_FAULT: { 4148 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4149 break; 4150 } 4151 case KVM_ENABLE_CAP: 4152 { 4153 struct kvm_enable_cap cap; 4154 r = -EFAULT; 4155 if (copy_from_user(&cap, argp, sizeof(cap))) 4156 break; 4157 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4158 break; 4159 } 4160 case KVM_S390_MEM_OP: { 4161 struct kvm_s390_mem_op mem_op; 4162 4163 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4164 r = kvm_s390_guest_mem_op(vcpu, &mem_op); 4165 else 4166 r = -EFAULT; 4167 break; 4168 } 4169 case KVM_S390_SET_IRQ_STATE: { 4170 struct kvm_s390_irq_state irq_state; 4171 4172 r = -EFAULT; 4173 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4174 break; 4175 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4176 irq_state.len == 0 || 4177 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4178 r = -EINVAL; 4179 break; 4180 } 4181 /* do not use irq_state.flags, it will break old QEMUs */ 4182 r = kvm_s390_set_irq_state(vcpu, 4183 (void __user *) irq_state.buf, 4184 irq_state.len); 4185 break; 4186 } 4187 case KVM_S390_GET_IRQ_STATE: { 4188 struct kvm_s390_irq_state irq_state; 4189 4190 r = -EFAULT; 4191 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4192 break; 4193 if (irq_state.len == 0) { 4194 r = -EINVAL; 4195 break; 4196 } 4197 /* do not use irq_state.flags, it will break old QEMUs */ 4198 r = kvm_s390_get_irq_state(vcpu, 4199 (__u8 __user *) irq_state.buf, 4200 irq_state.len); 4201 break; 4202 } 4203 default: 4204 r = -ENOTTY; 4205 } 4206 4207 vcpu_put(vcpu); 4208 return r; 4209 } 4210 4211 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 4212 { 4213 #ifdef CONFIG_KVM_S390_UCONTROL 4214 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 4215 && (kvm_is_ucontrol(vcpu->kvm))) { 4216 vmf->page = virt_to_page(vcpu->arch.sie_block); 4217 get_page(vmf->page); 4218 return 0; 4219 } 4220 #endif 4221 return VM_FAULT_SIGBUS; 4222 } 4223 4224 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 4225 unsigned long npages) 4226 { 4227 return 0; 4228 } 4229 4230 /* Section: memory related */ 4231 int kvm_arch_prepare_memory_region(struct kvm *kvm, 4232 struct kvm_memory_slot *memslot, 4233 const struct kvm_userspace_memory_region *mem, 4234 enum kvm_mr_change change) 4235 { 4236 /* A few sanity checks. We can have memory slots which have to be 4237 located/ended at a segment boundary (1MB). The memory in userland is 4238 ok to be fragmented into various different vmas. It is okay to mmap() 4239 and munmap() stuff in this slot after doing this call at any time */ 4240 4241 if (mem->userspace_addr & 0xffffful) 4242 return -EINVAL; 4243 4244 if (mem->memory_size & 0xffffful) 4245 return -EINVAL; 4246 4247 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 4248 return -EINVAL; 4249 4250 return 0; 4251 } 4252 4253 void kvm_arch_commit_memory_region(struct kvm *kvm, 4254 const struct kvm_userspace_memory_region *mem, 4255 const struct kvm_memory_slot *old, 4256 const struct kvm_memory_slot *new, 4257 enum kvm_mr_change change) 4258 { 4259 int rc; 4260 4261 /* If the basics of the memslot do not change, we do not want 4262 * to update the gmap. Every update causes several unnecessary 4263 * segment translation exceptions. This is usually handled just 4264 * fine by the normal fault handler + gmap, but it will also 4265 * cause faults on the prefix page of running guest CPUs. 4266 */ 4267 if (old->userspace_addr == mem->userspace_addr && 4268 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 4269 old->npages * PAGE_SIZE == mem->memory_size) 4270 return; 4271 4272 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 4273 mem->guest_phys_addr, mem->memory_size); 4274 if (rc) 4275 pr_warn("failed to commit memory region\n"); 4276 return; 4277 } 4278 4279 static inline unsigned long nonhyp_mask(int i) 4280 { 4281 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 4282 4283 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 4284 } 4285 4286 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 4287 { 4288 vcpu->valid_wakeup = false; 4289 } 4290 4291 static int __init kvm_s390_init(void) 4292 { 4293 int i; 4294 4295 if (!sclp.has_sief2) { 4296 pr_info("SIE not available\n"); 4297 return -ENODEV; 4298 } 4299 4300 if (nested && hpage) { 4301 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); 4302 return -EINVAL; 4303 } 4304 4305 for (i = 0; i < 16; i++) 4306 kvm_s390_fac_base[i] |= 4307 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 4308 4309 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 4310 } 4311 4312 static void __exit kvm_s390_exit(void) 4313 { 4314 kvm_exit(); 4315 } 4316 4317 module_init(kvm_s390_init); 4318 module_exit(kvm_s390_exit); 4319 4320 /* 4321 * Enable autoloading of the kvm module. 4322 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 4323 * since x86 takes a different approach. 4324 */ 4325 #include <linux/miscdevice.h> 4326 MODULE_ALIAS_MISCDEV(KVM_MINOR); 4327 MODULE_ALIAS("devname:kvm"); 4328