1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2018 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #include <linux/compiler.h> 15 #include <linux/err.h> 16 #include <linux/fs.h> 17 #include <linux/hrtimer.h> 18 #include <linux/init.h> 19 #include <linux/kvm.h> 20 #include <linux/kvm_host.h> 21 #include <linux/mman.h> 22 #include <linux/module.h> 23 #include <linux/moduleparam.h> 24 #include <linux/random.h> 25 #include <linux/slab.h> 26 #include <linux/timer.h> 27 #include <linux/vmalloc.h> 28 #include <linux/bitmap.h> 29 #include <linux/sched/signal.h> 30 #include <linux/string.h> 31 32 #include <asm/asm-offsets.h> 33 #include <asm/lowcore.h> 34 #include <asm/stp.h> 35 #include <asm/pgtable.h> 36 #include <asm/gmap.h> 37 #include <asm/nmi.h> 38 #include <asm/switch_to.h> 39 #include <asm/isc.h> 40 #include <asm/sclp.h> 41 #include <asm/cpacf.h> 42 #include <asm/timex.h> 43 #include <asm/ap.h> 44 #include "kvm-s390.h" 45 #include "gaccess.h" 46 47 #define KMSG_COMPONENT "kvm-s390" 48 #undef pr_fmt 49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 50 51 #define CREATE_TRACE_POINTS 52 #include "trace.h" 53 #include "trace-s390.h" 54 55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 56 #define LOCAL_IRQS 32 57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 58 (KVM_MAX_VCPUS + LOCAL_IRQS)) 59 60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 62 63 struct kvm_stats_debugfs_item debugfs_entries[] = { 64 { "userspace_handled", VCPU_STAT(exit_userspace) }, 65 { "exit_null", VCPU_STAT(exit_null) }, 66 { "exit_validity", VCPU_STAT(exit_validity) }, 67 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 68 { "exit_external_request", VCPU_STAT(exit_external_request) }, 69 { "exit_io_request", VCPU_STAT(exit_io_request) }, 70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 71 { "exit_instruction", VCPU_STAT(exit_instruction) }, 72 { "exit_pei", VCPU_STAT(exit_pei) }, 73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 79 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 81 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 82 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 83 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 84 { "deliver_ckc", VCPU_STAT(deliver_ckc) }, 85 { "deliver_cputm", VCPU_STAT(deliver_cputm) }, 86 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 87 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 88 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 89 { "deliver_virtio", VCPU_STAT(deliver_virtio) }, 90 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 91 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 92 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 93 { "deliver_program", VCPU_STAT(deliver_program) }, 94 { "deliver_io", VCPU_STAT(deliver_io) }, 95 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, 96 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 97 { "inject_ckc", VCPU_STAT(inject_ckc) }, 98 { "inject_cputm", VCPU_STAT(inject_cputm) }, 99 { "inject_external_call", VCPU_STAT(inject_external_call) }, 100 { "inject_float_mchk", VM_STAT(inject_float_mchk) }, 101 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, 102 { "inject_io", VM_STAT(inject_io) }, 103 { "inject_mchk", VCPU_STAT(inject_mchk) }, 104 { "inject_pfault_done", VM_STAT(inject_pfault_done) }, 105 { "inject_program", VCPU_STAT(inject_program) }, 106 { "inject_restart", VCPU_STAT(inject_restart) }, 107 { "inject_service_signal", VM_STAT(inject_service_signal) }, 108 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, 109 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, 110 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, 111 { "inject_virtio", VM_STAT(inject_virtio) }, 112 { "instruction_epsw", VCPU_STAT(instruction_epsw) }, 113 { "instruction_gs", VCPU_STAT(instruction_gs) }, 114 { "instruction_io_other", VCPU_STAT(instruction_io_other) }, 115 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, 116 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, 117 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 118 { "instruction_ptff", VCPU_STAT(instruction_ptff) }, 119 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 120 { "instruction_sck", VCPU_STAT(instruction_sck) }, 121 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, 122 { "instruction_spx", VCPU_STAT(instruction_spx) }, 123 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 124 { "instruction_stap", VCPU_STAT(instruction_stap) }, 125 { "instruction_iske", VCPU_STAT(instruction_iske) }, 126 { "instruction_ri", VCPU_STAT(instruction_ri) }, 127 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, 128 { "instruction_sske", VCPU_STAT(instruction_sske) }, 129 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 130 { "instruction_essa", VCPU_STAT(instruction_essa) }, 131 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 132 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 133 { "instruction_tb", VCPU_STAT(instruction_tb) }, 134 { "instruction_tpi", VCPU_STAT(instruction_tpi) }, 135 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 136 { "instruction_tsch", VCPU_STAT(instruction_tsch) }, 137 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 138 { "instruction_sie", VCPU_STAT(instruction_sie) }, 139 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 140 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 141 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 142 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 143 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 144 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 145 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 146 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 147 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 148 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 149 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 150 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 151 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 152 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 153 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 154 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 155 { "instruction_diag_10", VCPU_STAT(diagnose_10) }, 156 { "instruction_diag_44", VCPU_STAT(diagnose_44) }, 157 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, 158 { "instruction_diag_258", VCPU_STAT(diagnose_258) }, 159 { "instruction_diag_308", VCPU_STAT(diagnose_308) }, 160 { "instruction_diag_500", VCPU_STAT(diagnose_500) }, 161 { "instruction_diag_other", VCPU_STAT(diagnose_other) }, 162 { NULL } 163 }; 164 165 struct kvm_s390_tod_clock_ext { 166 __u8 epoch_idx; 167 __u64 tod; 168 __u8 reserved[7]; 169 } __packed; 170 171 /* allow nested virtualization in KVM (if enabled by user space) */ 172 static int nested; 173 module_param(nested, int, S_IRUGO); 174 MODULE_PARM_DESC(nested, "Nested virtualization support"); 175 176 /* allow 1m huge page guest backing, if !nested */ 177 static int hpage; 178 module_param(hpage, int, 0444); 179 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 180 181 /* 182 * For now we handle at most 16 double words as this is what the s390 base 183 * kernel handles and stores in the prefix page. If we ever need to go beyond 184 * this, this requires changes to code, but the external uapi can stay. 185 */ 186 #define SIZE_INTERNAL 16 187 188 /* 189 * Base feature mask that defines default mask for facilities. Consists of the 190 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 191 */ 192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 193 /* 194 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 195 * and defines the facilities that can be enabled via a cpu model. 196 */ 197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 198 199 static unsigned long kvm_s390_fac_size(void) 200 { 201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 202 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 203 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 204 sizeof(S390_lowcore.stfle_fac_list)); 205 206 return SIZE_INTERNAL; 207 } 208 209 /* available cpu features supported by kvm */ 210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 211 /* available subfunctions indicated via query / "test bit" */ 212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 213 214 static struct gmap_notifier gmap_notifier; 215 static struct gmap_notifier vsie_gmap_notifier; 216 debug_info_t *kvm_s390_dbf; 217 218 /* Section: not file related */ 219 int kvm_arch_hardware_enable(void) 220 { 221 /* every s390 is virtualization enabled ;-) */ 222 return 0; 223 } 224 225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 226 unsigned long end); 227 228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 229 { 230 u8 delta_idx = 0; 231 232 /* 233 * The TOD jumps by delta, we have to compensate this by adding 234 * -delta to the epoch. 235 */ 236 delta = -delta; 237 238 /* sign-extension - we're adding to signed values below */ 239 if ((s64)delta < 0) 240 delta_idx = -1; 241 242 scb->epoch += delta; 243 if (scb->ecd & ECD_MEF) { 244 scb->epdx += delta_idx; 245 if (scb->epoch < delta) 246 scb->epdx += 1; 247 } 248 } 249 250 /* 251 * This callback is executed during stop_machine(). All CPUs are therefore 252 * temporarily stopped. In order not to change guest behavior, we have to 253 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 254 * so a CPU won't be stopped while calculating with the epoch. 255 */ 256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 257 void *v) 258 { 259 struct kvm *kvm; 260 struct kvm_vcpu *vcpu; 261 int i; 262 unsigned long long *delta = v; 263 264 list_for_each_entry(kvm, &vm_list, vm_list) { 265 kvm_for_each_vcpu(i, vcpu, kvm) { 266 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 267 if (i == 0) { 268 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 269 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 270 } 271 if (vcpu->arch.cputm_enabled) 272 vcpu->arch.cputm_start += *delta; 273 if (vcpu->arch.vsie_block) 274 kvm_clock_sync_scb(vcpu->arch.vsie_block, 275 *delta); 276 } 277 } 278 return NOTIFY_OK; 279 } 280 281 static struct notifier_block kvm_clock_notifier = { 282 .notifier_call = kvm_clock_sync, 283 }; 284 285 int kvm_arch_hardware_setup(void) 286 { 287 gmap_notifier.notifier_call = kvm_gmap_notifier; 288 gmap_register_pte_notifier(&gmap_notifier); 289 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 290 gmap_register_pte_notifier(&vsie_gmap_notifier); 291 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 292 &kvm_clock_notifier); 293 return 0; 294 } 295 296 void kvm_arch_hardware_unsetup(void) 297 { 298 gmap_unregister_pte_notifier(&gmap_notifier); 299 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 300 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 301 &kvm_clock_notifier); 302 } 303 304 static void allow_cpu_feat(unsigned long nr) 305 { 306 set_bit_inv(nr, kvm_s390_available_cpu_feat); 307 } 308 309 static inline int plo_test_bit(unsigned char nr) 310 { 311 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 312 int cc; 313 314 asm volatile( 315 /* Parameter registers are ignored for "test bit" */ 316 " plo 0,0,0,0(0)\n" 317 " ipm %0\n" 318 " srl %0,28\n" 319 : "=d" (cc) 320 : "d" (r0) 321 : "cc"); 322 return cc == 0; 323 } 324 325 static void kvm_s390_cpu_feat_init(void) 326 { 327 int i; 328 329 for (i = 0; i < 256; ++i) { 330 if (plo_test_bit(i)) 331 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 332 } 333 334 if (test_facility(28)) /* TOD-clock steering */ 335 ptff(kvm_s390_available_subfunc.ptff, 336 sizeof(kvm_s390_available_subfunc.ptff), 337 PTFF_QAF); 338 339 if (test_facility(17)) { /* MSA */ 340 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 341 kvm_s390_available_subfunc.kmac); 342 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 343 kvm_s390_available_subfunc.kmc); 344 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 345 kvm_s390_available_subfunc.km); 346 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 347 kvm_s390_available_subfunc.kimd); 348 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 349 kvm_s390_available_subfunc.klmd); 350 } 351 if (test_facility(76)) /* MSA3 */ 352 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 353 kvm_s390_available_subfunc.pckmo); 354 if (test_facility(77)) { /* MSA4 */ 355 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 356 kvm_s390_available_subfunc.kmctr); 357 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 358 kvm_s390_available_subfunc.kmf); 359 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 360 kvm_s390_available_subfunc.kmo); 361 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 362 kvm_s390_available_subfunc.pcc); 363 } 364 if (test_facility(57)) /* MSA5 */ 365 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 366 kvm_s390_available_subfunc.ppno); 367 368 if (test_facility(146)) /* MSA8 */ 369 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 370 kvm_s390_available_subfunc.kma); 371 372 if (MACHINE_HAS_ESOP) 373 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 374 /* 375 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 376 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 377 */ 378 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 379 !test_facility(3) || !nested) 380 return; 381 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 382 if (sclp.has_64bscao) 383 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 384 if (sclp.has_siif) 385 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 386 if (sclp.has_gpere) 387 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 388 if (sclp.has_gsls) 389 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 390 if (sclp.has_ib) 391 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 392 if (sclp.has_cei) 393 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 394 if (sclp.has_ibs) 395 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 396 if (sclp.has_kss) 397 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 398 /* 399 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 400 * all skey handling functions read/set the skey from the PGSTE 401 * instead of the real storage key. 402 * 403 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 404 * pages being detected as preserved although they are resident. 405 * 406 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 407 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 408 * 409 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 410 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 411 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 412 * 413 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 414 * cannot easily shadow the SCA because of the ipte lock. 415 */ 416 } 417 418 int kvm_arch_init(void *opaque) 419 { 420 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 421 if (!kvm_s390_dbf) 422 return -ENOMEM; 423 424 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { 425 debug_unregister(kvm_s390_dbf); 426 return -ENOMEM; 427 } 428 429 kvm_s390_cpu_feat_init(); 430 431 /* Register floating interrupt controller interface. */ 432 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 433 } 434 435 void kvm_arch_exit(void) 436 { 437 debug_unregister(kvm_s390_dbf); 438 } 439 440 /* Section: device related */ 441 long kvm_arch_dev_ioctl(struct file *filp, 442 unsigned int ioctl, unsigned long arg) 443 { 444 if (ioctl == KVM_S390_ENABLE_SIE) 445 return s390_enable_sie(); 446 return -EINVAL; 447 } 448 449 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 450 { 451 int r; 452 453 switch (ext) { 454 case KVM_CAP_S390_PSW: 455 case KVM_CAP_S390_GMAP: 456 case KVM_CAP_SYNC_MMU: 457 #ifdef CONFIG_KVM_S390_UCONTROL 458 case KVM_CAP_S390_UCONTROL: 459 #endif 460 case KVM_CAP_ASYNC_PF: 461 case KVM_CAP_SYNC_REGS: 462 case KVM_CAP_ONE_REG: 463 case KVM_CAP_ENABLE_CAP: 464 case KVM_CAP_S390_CSS_SUPPORT: 465 case KVM_CAP_IOEVENTFD: 466 case KVM_CAP_DEVICE_CTRL: 467 case KVM_CAP_ENABLE_CAP_VM: 468 case KVM_CAP_S390_IRQCHIP: 469 case KVM_CAP_VM_ATTRIBUTES: 470 case KVM_CAP_MP_STATE: 471 case KVM_CAP_IMMEDIATE_EXIT: 472 case KVM_CAP_S390_INJECT_IRQ: 473 case KVM_CAP_S390_USER_SIGP: 474 case KVM_CAP_S390_USER_STSI: 475 case KVM_CAP_S390_SKEYS: 476 case KVM_CAP_S390_IRQ_STATE: 477 case KVM_CAP_S390_USER_INSTR0: 478 case KVM_CAP_S390_CMMA_MIGRATION: 479 case KVM_CAP_S390_AIS: 480 case KVM_CAP_S390_AIS_MIGRATION: 481 r = 1; 482 break; 483 case KVM_CAP_S390_HPAGE_1M: 484 r = 0; 485 if (hpage && !kvm_is_ucontrol(kvm)) 486 r = 1; 487 break; 488 case KVM_CAP_S390_MEM_OP: 489 r = MEM_OP_MAX_SIZE; 490 break; 491 case KVM_CAP_NR_VCPUS: 492 case KVM_CAP_MAX_VCPUS: 493 r = KVM_S390_BSCA_CPU_SLOTS; 494 if (!kvm_s390_use_sca_entries()) 495 r = KVM_MAX_VCPUS; 496 else if (sclp.has_esca && sclp.has_64bscao) 497 r = KVM_S390_ESCA_CPU_SLOTS; 498 break; 499 case KVM_CAP_NR_MEMSLOTS: 500 r = KVM_USER_MEM_SLOTS; 501 break; 502 case KVM_CAP_S390_COW: 503 r = MACHINE_HAS_ESOP; 504 break; 505 case KVM_CAP_S390_VECTOR_REGISTERS: 506 r = MACHINE_HAS_VX; 507 break; 508 case KVM_CAP_S390_RI: 509 r = test_facility(64); 510 break; 511 case KVM_CAP_S390_GS: 512 r = test_facility(133); 513 break; 514 case KVM_CAP_S390_BPB: 515 r = test_facility(82); 516 break; 517 default: 518 r = 0; 519 } 520 return r; 521 } 522 523 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 524 struct kvm_memory_slot *memslot) 525 { 526 int i; 527 gfn_t cur_gfn, last_gfn; 528 unsigned long gaddr, vmaddr; 529 struct gmap *gmap = kvm->arch.gmap; 530 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 531 532 /* Loop over all guest segments */ 533 cur_gfn = memslot->base_gfn; 534 last_gfn = memslot->base_gfn + memslot->npages; 535 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 536 gaddr = gfn_to_gpa(cur_gfn); 537 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 538 if (kvm_is_error_hva(vmaddr)) 539 continue; 540 541 bitmap_zero(bitmap, _PAGE_ENTRIES); 542 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 543 for (i = 0; i < _PAGE_ENTRIES; i++) { 544 if (test_bit(i, bitmap)) 545 mark_page_dirty(kvm, cur_gfn + i); 546 } 547 548 if (fatal_signal_pending(current)) 549 return; 550 cond_resched(); 551 } 552 } 553 554 /* Section: vm related */ 555 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 556 557 /* 558 * Get (and clear) the dirty memory log for a memory slot. 559 */ 560 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 561 struct kvm_dirty_log *log) 562 { 563 int r; 564 unsigned long n; 565 struct kvm_memslots *slots; 566 struct kvm_memory_slot *memslot; 567 int is_dirty = 0; 568 569 if (kvm_is_ucontrol(kvm)) 570 return -EINVAL; 571 572 mutex_lock(&kvm->slots_lock); 573 574 r = -EINVAL; 575 if (log->slot >= KVM_USER_MEM_SLOTS) 576 goto out; 577 578 slots = kvm_memslots(kvm); 579 memslot = id_to_memslot(slots, log->slot); 580 r = -ENOENT; 581 if (!memslot->dirty_bitmap) 582 goto out; 583 584 kvm_s390_sync_dirty_log(kvm, memslot); 585 r = kvm_get_dirty_log(kvm, log, &is_dirty); 586 if (r) 587 goto out; 588 589 /* Clear the dirty log */ 590 if (is_dirty) { 591 n = kvm_dirty_bitmap_bytes(memslot); 592 memset(memslot->dirty_bitmap, 0, n); 593 } 594 r = 0; 595 out: 596 mutex_unlock(&kvm->slots_lock); 597 return r; 598 } 599 600 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 601 { 602 unsigned int i; 603 struct kvm_vcpu *vcpu; 604 605 kvm_for_each_vcpu(i, vcpu, kvm) { 606 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 607 } 608 } 609 610 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 611 { 612 int r; 613 614 if (cap->flags) 615 return -EINVAL; 616 617 switch (cap->cap) { 618 case KVM_CAP_S390_IRQCHIP: 619 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 620 kvm->arch.use_irqchip = 1; 621 r = 0; 622 break; 623 case KVM_CAP_S390_USER_SIGP: 624 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 625 kvm->arch.user_sigp = 1; 626 r = 0; 627 break; 628 case KVM_CAP_S390_VECTOR_REGISTERS: 629 mutex_lock(&kvm->lock); 630 if (kvm->created_vcpus) { 631 r = -EBUSY; 632 } else if (MACHINE_HAS_VX) { 633 set_kvm_facility(kvm->arch.model.fac_mask, 129); 634 set_kvm_facility(kvm->arch.model.fac_list, 129); 635 if (test_facility(134)) { 636 set_kvm_facility(kvm->arch.model.fac_mask, 134); 637 set_kvm_facility(kvm->arch.model.fac_list, 134); 638 } 639 if (test_facility(135)) { 640 set_kvm_facility(kvm->arch.model.fac_mask, 135); 641 set_kvm_facility(kvm->arch.model.fac_list, 135); 642 } 643 r = 0; 644 } else 645 r = -EINVAL; 646 mutex_unlock(&kvm->lock); 647 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 648 r ? "(not available)" : "(success)"); 649 break; 650 case KVM_CAP_S390_RI: 651 r = -EINVAL; 652 mutex_lock(&kvm->lock); 653 if (kvm->created_vcpus) { 654 r = -EBUSY; 655 } else if (test_facility(64)) { 656 set_kvm_facility(kvm->arch.model.fac_mask, 64); 657 set_kvm_facility(kvm->arch.model.fac_list, 64); 658 r = 0; 659 } 660 mutex_unlock(&kvm->lock); 661 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 662 r ? "(not available)" : "(success)"); 663 break; 664 case KVM_CAP_S390_AIS: 665 mutex_lock(&kvm->lock); 666 if (kvm->created_vcpus) { 667 r = -EBUSY; 668 } else { 669 set_kvm_facility(kvm->arch.model.fac_mask, 72); 670 set_kvm_facility(kvm->arch.model.fac_list, 72); 671 r = 0; 672 } 673 mutex_unlock(&kvm->lock); 674 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 675 r ? "(not available)" : "(success)"); 676 break; 677 case KVM_CAP_S390_GS: 678 r = -EINVAL; 679 mutex_lock(&kvm->lock); 680 if (kvm->created_vcpus) { 681 r = -EBUSY; 682 } else if (test_facility(133)) { 683 set_kvm_facility(kvm->arch.model.fac_mask, 133); 684 set_kvm_facility(kvm->arch.model.fac_list, 133); 685 r = 0; 686 } 687 mutex_unlock(&kvm->lock); 688 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 689 r ? "(not available)" : "(success)"); 690 break; 691 case KVM_CAP_S390_HPAGE_1M: 692 mutex_lock(&kvm->lock); 693 if (kvm->created_vcpus) 694 r = -EBUSY; 695 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 696 r = -EINVAL; 697 else { 698 r = 0; 699 down_write(&kvm->mm->mmap_sem); 700 kvm->mm->context.allow_gmap_hpage_1m = 1; 701 up_write(&kvm->mm->mmap_sem); 702 /* 703 * We might have to create fake 4k page 704 * tables. To avoid that the hardware works on 705 * stale PGSTEs, we emulate these instructions. 706 */ 707 kvm->arch.use_skf = 0; 708 kvm->arch.use_pfmfi = 0; 709 } 710 mutex_unlock(&kvm->lock); 711 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 712 r ? "(not available)" : "(success)"); 713 break; 714 case KVM_CAP_S390_USER_STSI: 715 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 716 kvm->arch.user_stsi = 1; 717 r = 0; 718 break; 719 case KVM_CAP_S390_USER_INSTR0: 720 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 721 kvm->arch.user_instr0 = 1; 722 icpt_operexc_on_all_vcpus(kvm); 723 r = 0; 724 break; 725 default: 726 r = -EINVAL; 727 break; 728 } 729 return r; 730 } 731 732 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 733 { 734 int ret; 735 736 switch (attr->attr) { 737 case KVM_S390_VM_MEM_LIMIT_SIZE: 738 ret = 0; 739 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 740 kvm->arch.mem_limit); 741 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 742 ret = -EFAULT; 743 break; 744 default: 745 ret = -ENXIO; 746 break; 747 } 748 return ret; 749 } 750 751 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 752 { 753 int ret; 754 unsigned int idx; 755 switch (attr->attr) { 756 case KVM_S390_VM_MEM_ENABLE_CMMA: 757 ret = -ENXIO; 758 if (!sclp.has_cmma) 759 break; 760 761 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 762 mutex_lock(&kvm->lock); 763 if (kvm->created_vcpus) 764 ret = -EBUSY; 765 else if (kvm->mm->context.allow_gmap_hpage_1m) 766 ret = -EINVAL; 767 else { 768 kvm->arch.use_cmma = 1; 769 /* Not compatible with cmma. */ 770 kvm->arch.use_pfmfi = 0; 771 ret = 0; 772 } 773 mutex_unlock(&kvm->lock); 774 break; 775 case KVM_S390_VM_MEM_CLR_CMMA: 776 ret = -ENXIO; 777 if (!sclp.has_cmma) 778 break; 779 ret = -EINVAL; 780 if (!kvm->arch.use_cmma) 781 break; 782 783 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 784 mutex_lock(&kvm->lock); 785 idx = srcu_read_lock(&kvm->srcu); 786 s390_reset_cmma(kvm->arch.gmap->mm); 787 srcu_read_unlock(&kvm->srcu, idx); 788 mutex_unlock(&kvm->lock); 789 ret = 0; 790 break; 791 case KVM_S390_VM_MEM_LIMIT_SIZE: { 792 unsigned long new_limit; 793 794 if (kvm_is_ucontrol(kvm)) 795 return -EINVAL; 796 797 if (get_user(new_limit, (u64 __user *)attr->addr)) 798 return -EFAULT; 799 800 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 801 new_limit > kvm->arch.mem_limit) 802 return -E2BIG; 803 804 if (!new_limit) 805 return -EINVAL; 806 807 /* gmap_create takes last usable address */ 808 if (new_limit != KVM_S390_NO_MEM_LIMIT) 809 new_limit -= 1; 810 811 ret = -EBUSY; 812 mutex_lock(&kvm->lock); 813 if (!kvm->created_vcpus) { 814 /* gmap_create will round the limit up */ 815 struct gmap *new = gmap_create(current->mm, new_limit); 816 817 if (!new) { 818 ret = -ENOMEM; 819 } else { 820 gmap_remove(kvm->arch.gmap); 821 new->private = kvm; 822 kvm->arch.gmap = new; 823 ret = 0; 824 } 825 } 826 mutex_unlock(&kvm->lock); 827 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 828 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 829 (void *) kvm->arch.gmap->asce); 830 break; 831 } 832 default: 833 ret = -ENXIO; 834 break; 835 } 836 return ret; 837 } 838 839 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 840 841 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 842 { 843 struct kvm_vcpu *vcpu; 844 int i; 845 846 kvm_s390_vcpu_block_all(kvm); 847 848 kvm_for_each_vcpu(i, vcpu, kvm) { 849 kvm_s390_vcpu_crypto_setup(vcpu); 850 /* recreate the shadow crycb by leaving the VSIE handler */ 851 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 852 } 853 854 kvm_s390_vcpu_unblock_all(kvm); 855 } 856 857 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 858 { 859 mutex_lock(&kvm->lock); 860 switch (attr->attr) { 861 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 862 if (!test_kvm_facility(kvm, 76)) { 863 mutex_unlock(&kvm->lock); 864 return -EINVAL; 865 } 866 get_random_bytes( 867 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 868 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 869 kvm->arch.crypto.aes_kw = 1; 870 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 871 break; 872 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 873 if (!test_kvm_facility(kvm, 76)) { 874 mutex_unlock(&kvm->lock); 875 return -EINVAL; 876 } 877 get_random_bytes( 878 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 879 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 880 kvm->arch.crypto.dea_kw = 1; 881 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 882 break; 883 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 884 if (!test_kvm_facility(kvm, 76)) { 885 mutex_unlock(&kvm->lock); 886 return -EINVAL; 887 } 888 kvm->arch.crypto.aes_kw = 0; 889 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 890 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 891 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 892 break; 893 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 894 if (!test_kvm_facility(kvm, 76)) { 895 mutex_unlock(&kvm->lock); 896 return -EINVAL; 897 } 898 kvm->arch.crypto.dea_kw = 0; 899 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 900 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 901 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 902 break; 903 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 904 if (!ap_instructions_available()) { 905 mutex_unlock(&kvm->lock); 906 return -EOPNOTSUPP; 907 } 908 kvm->arch.crypto.apie = 1; 909 break; 910 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 911 if (!ap_instructions_available()) { 912 mutex_unlock(&kvm->lock); 913 return -EOPNOTSUPP; 914 } 915 kvm->arch.crypto.apie = 0; 916 break; 917 default: 918 mutex_unlock(&kvm->lock); 919 return -ENXIO; 920 } 921 922 kvm_s390_vcpu_crypto_reset_all(kvm); 923 mutex_unlock(&kvm->lock); 924 return 0; 925 } 926 927 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 928 { 929 int cx; 930 struct kvm_vcpu *vcpu; 931 932 kvm_for_each_vcpu(cx, vcpu, kvm) 933 kvm_s390_sync_request(req, vcpu); 934 } 935 936 /* 937 * Must be called with kvm->srcu held to avoid races on memslots, and with 938 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 939 */ 940 static int kvm_s390_vm_start_migration(struct kvm *kvm) 941 { 942 struct kvm_memory_slot *ms; 943 struct kvm_memslots *slots; 944 unsigned long ram_pages = 0; 945 int slotnr; 946 947 /* migration mode already enabled */ 948 if (kvm->arch.migration_mode) 949 return 0; 950 slots = kvm_memslots(kvm); 951 if (!slots || !slots->used_slots) 952 return -EINVAL; 953 954 if (!kvm->arch.use_cmma) { 955 kvm->arch.migration_mode = 1; 956 return 0; 957 } 958 /* mark all the pages in active slots as dirty */ 959 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 960 ms = slots->memslots + slotnr; 961 /* 962 * The second half of the bitmap is only used on x86, 963 * and would be wasted otherwise, so we put it to good 964 * use here to keep track of the state of the storage 965 * attributes. 966 */ 967 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 968 ram_pages += ms->npages; 969 } 970 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 971 kvm->arch.migration_mode = 1; 972 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 973 return 0; 974 } 975 976 /* 977 * Must be called with kvm->slots_lock to avoid races with ourselves and 978 * kvm_s390_vm_start_migration. 979 */ 980 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 981 { 982 /* migration mode already disabled */ 983 if (!kvm->arch.migration_mode) 984 return 0; 985 kvm->arch.migration_mode = 0; 986 if (kvm->arch.use_cmma) 987 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 988 return 0; 989 } 990 991 static int kvm_s390_vm_set_migration(struct kvm *kvm, 992 struct kvm_device_attr *attr) 993 { 994 int res = -ENXIO; 995 996 mutex_lock(&kvm->slots_lock); 997 switch (attr->attr) { 998 case KVM_S390_VM_MIGRATION_START: 999 res = kvm_s390_vm_start_migration(kvm); 1000 break; 1001 case KVM_S390_VM_MIGRATION_STOP: 1002 res = kvm_s390_vm_stop_migration(kvm); 1003 break; 1004 default: 1005 break; 1006 } 1007 mutex_unlock(&kvm->slots_lock); 1008 1009 return res; 1010 } 1011 1012 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1013 struct kvm_device_attr *attr) 1014 { 1015 u64 mig = kvm->arch.migration_mode; 1016 1017 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1018 return -ENXIO; 1019 1020 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1021 return -EFAULT; 1022 return 0; 1023 } 1024 1025 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1026 { 1027 struct kvm_s390_vm_tod_clock gtod; 1028 1029 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1030 return -EFAULT; 1031 1032 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1033 return -EINVAL; 1034 kvm_s390_set_tod_clock(kvm, >od); 1035 1036 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1037 gtod.epoch_idx, gtod.tod); 1038 1039 return 0; 1040 } 1041 1042 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1043 { 1044 u8 gtod_high; 1045 1046 if (copy_from_user(>od_high, (void __user *)attr->addr, 1047 sizeof(gtod_high))) 1048 return -EFAULT; 1049 1050 if (gtod_high != 0) 1051 return -EINVAL; 1052 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1053 1054 return 0; 1055 } 1056 1057 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1058 { 1059 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1060 1061 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1062 sizeof(gtod.tod))) 1063 return -EFAULT; 1064 1065 kvm_s390_set_tod_clock(kvm, >od); 1066 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1067 return 0; 1068 } 1069 1070 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1071 { 1072 int ret; 1073 1074 if (attr->flags) 1075 return -EINVAL; 1076 1077 switch (attr->attr) { 1078 case KVM_S390_VM_TOD_EXT: 1079 ret = kvm_s390_set_tod_ext(kvm, attr); 1080 break; 1081 case KVM_S390_VM_TOD_HIGH: 1082 ret = kvm_s390_set_tod_high(kvm, attr); 1083 break; 1084 case KVM_S390_VM_TOD_LOW: 1085 ret = kvm_s390_set_tod_low(kvm, attr); 1086 break; 1087 default: 1088 ret = -ENXIO; 1089 break; 1090 } 1091 return ret; 1092 } 1093 1094 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1095 struct kvm_s390_vm_tod_clock *gtod) 1096 { 1097 struct kvm_s390_tod_clock_ext htod; 1098 1099 preempt_disable(); 1100 1101 get_tod_clock_ext((char *)&htod); 1102 1103 gtod->tod = htod.tod + kvm->arch.epoch; 1104 gtod->epoch_idx = 0; 1105 if (test_kvm_facility(kvm, 139)) { 1106 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 1107 if (gtod->tod < htod.tod) 1108 gtod->epoch_idx += 1; 1109 } 1110 1111 preempt_enable(); 1112 } 1113 1114 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1115 { 1116 struct kvm_s390_vm_tod_clock gtod; 1117 1118 memset(>od, 0, sizeof(gtod)); 1119 kvm_s390_get_tod_clock(kvm, >od); 1120 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1121 return -EFAULT; 1122 1123 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1124 gtod.epoch_idx, gtod.tod); 1125 return 0; 1126 } 1127 1128 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1129 { 1130 u8 gtod_high = 0; 1131 1132 if (copy_to_user((void __user *)attr->addr, >od_high, 1133 sizeof(gtod_high))) 1134 return -EFAULT; 1135 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1136 1137 return 0; 1138 } 1139 1140 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1141 { 1142 u64 gtod; 1143 1144 gtod = kvm_s390_get_tod_clock_fast(kvm); 1145 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1146 return -EFAULT; 1147 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1148 1149 return 0; 1150 } 1151 1152 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1153 { 1154 int ret; 1155 1156 if (attr->flags) 1157 return -EINVAL; 1158 1159 switch (attr->attr) { 1160 case KVM_S390_VM_TOD_EXT: 1161 ret = kvm_s390_get_tod_ext(kvm, attr); 1162 break; 1163 case KVM_S390_VM_TOD_HIGH: 1164 ret = kvm_s390_get_tod_high(kvm, attr); 1165 break; 1166 case KVM_S390_VM_TOD_LOW: 1167 ret = kvm_s390_get_tod_low(kvm, attr); 1168 break; 1169 default: 1170 ret = -ENXIO; 1171 break; 1172 } 1173 return ret; 1174 } 1175 1176 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1177 { 1178 struct kvm_s390_vm_cpu_processor *proc; 1179 u16 lowest_ibc, unblocked_ibc; 1180 int ret = 0; 1181 1182 mutex_lock(&kvm->lock); 1183 if (kvm->created_vcpus) { 1184 ret = -EBUSY; 1185 goto out; 1186 } 1187 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1188 if (!proc) { 1189 ret = -ENOMEM; 1190 goto out; 1191 } 1192 if (!copy_from_user(proc, (void __user *)attr->addr, 1193 sizeof(*proc))) { 1194 kvm->arch.model.cpuid = proc->cpuid; 1195 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1196 unblocked_ibc = sclp.ibc & 0xfff; 1197 if (lowest_ibc && proc->ibc) { 1198 if (proc->ibc > unblocked_ibc) 1199 kvm->arch.model.ibc = unblocked_ibc; 1200 else if (proc->ibc < lowest_ibc) 1201 kvm->arch.model.ibc = lowest_ibc; 1202 else 1203 kvm->arch.model.ibc = proc->ibc; 1204 } 1205 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1206 S390_ARCH_FAC_LIST_SIZE_BYTE); 1207 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1208 kvm->arch.model.ibc, 1209 kvm->arch.model.cpuid); 1210 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1211 kvm->arch.model.fac_list[0], 1212 kvm->arch.model.fac_list[1], 1213 kvm->arch.model.fac_list[2]); 1214 } else 1215 ret = -EFAULT; 1216 kfree(proc); 1217 out: 1218 mutex_unlock(&kvm->lock); 1219 return ret; 1220 } 1221 1222 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1223 struct kvm_device_attr *attr) 1224 { 1225 struct kvm_s390_vm_cpu_feat data; 1226 1227 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1228 return -EFAULT; 1229 if (!bitmap_subset((unsigned long *) data.feat, 1230 kvm_s390_available_cpu_feat, 1231 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1232 return -EINVAL; 1233 1234 mutex_lock(&kvm->lock); 1235 if (kvm->created_vcpus) { 1236 mutex_unlock(&kvm->lock); 1237 return -EBUSY; 1238 } 1239 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1240 KVM_S390_VM_CPU_FEAT_NR_BITS); 1241 mutex_unlock(&kvm->lock); 1242 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1243 data.feat[0], 1244 data.feat[1], 1245 data.feat[2]); 1246 return 0; 1247 } 1248 1249 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1250 struct kvm_device_attr *attr) 1251 { 1252 /* 1253 * Once supported by kernel + hw, we have to store the subfunctions 1254 * in kvm->arch and remember that user space configured them. 1255 */ 1256 return -ENXIO; 1257 } 1258 1259 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1260 { 1261 int ret = -ENXIO; 1262 1263 switch (attr->attr) { 1264 case KVM_S390_VM_CPU_PROCESSOR: 1265 ret = kvm_s390_set_processor(kvm, attr); 1266 break; 1267 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1268 ret = kvm_s390_set_processor_feat(kvm, attr); 1269 break; 1270 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1271 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1272 break; 1273 } 1274 return ret; 1275 } 1276 1277 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1278 { 1279 struct kvm_s390_vm_cpu_processor *proc; 1280 int ret = 0; 1281 1282 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1283 if (!proc) { 1284 ret = -ENOMEM; 1285 goto out; 1286 } 1287 proc->cpuid = kvm->arch.model.cpuid; 1288 proc->ibc = kvm->arch.model.ibc; 1289 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1290 S390_ARCH_FAC_LIST_SIZE_BYTE); 1291 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1292 kvm->arch.model.ibc, 1293 kvm->arch.model.cpuid); 1294 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1295 kvm->arch.model.fac_list[0], 1296 kvm->arch.model.fac_list[1], 1297 kvm->arch.model.fac_list[2]); 1298 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1299 ret = -EFAULT; 1300 kfree(proc); 1301 out: 1302 return ret; 1303 } 1304 1305 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1306 { 1307 struct kvm_s390_vm_cpu_machine *mach; 1308 int ret = 0; 1309 1310 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1311 if (!mach) { 1312 ret = -ENOMEM; 1313 goto out; 1314 } 1315 get_cpu_id((struct cpuid *) &mach->cpuid); 1316 mach->ibc = sclp.ibc; 1317 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1318 S390_ARCH_FAC_LIST_SIZE_BYTE); 1319 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1320 sizeof(S390_lowcore.stfle_fac_list)); 1321 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1322 kvm->arch.model.ibc, 1323 kvm->arch.model.cpuid); 1324 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1325 mach->fac_mask[0], 1326 mach->fac_mask[1], 1327 mach->fac_mask[2]); 1328 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1329 mach->fac_list[0], 1330 mach->fac_list[1], 1331 mach->fac_list[2]); 1332 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1333 ret = -EFAULT; 1334 kfree(mach); 1335 out: 1336 return ret; 1337 } 1338 1339 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1340 struct kvm_device_attr *attr) 1341 { 1342 struct kvm_s390_vm_cpu_feat data; 1343 1344 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1345 KVM_S390_VM_CPU_FEAT_NR_BITS); 1346 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1347 return -EFAULT; 1348 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1349 data.feat[0], 1350 data.feat[1], 1351 data.feat[2]); 1352 return 0; 1353 } 1354 1355 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1356 struct kvm_device_attr *attr) 1357 { 1358 struct kvm_s390_vm_cpu_feat data; 1359 1360 bitmap_copy((unsigned long *) data.feat, 1361 kvm_s390_available_cpu_feat, 1362 KVM_S390_VM_CPU_FEAT_NR_BITS); 1363 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1364 return -EFAULT; 1365 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1366 data.feat[0], 1367 data.feat[1], 1368 data.feat[2]); 1369 return 0; 1370 } 1371 1372 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1373 struct kvm_device_attr *attr) 1374 { 1375 /* 1376 * Once we can actually configure subfunctions (kernel + hw support), 1377 * we have to check if they were already set by user space, if so copy 1378 * them from kvm->arch. 1379 */ 1380 return -ENXIO; 1381 } 1382 1383 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1384 struct kvm_device_attr *attr) 1385 { 1386 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1387 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1388 return -EFAULT; 1389 return 0; 1390 } 1391 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1392 { 1393 int ret = -ENXIO; 1394 1395 switch (attr->attr) { 1396 case KVM_S390_VM_CPU_PROCESSOR: 1397 ret = kvm_s390_get_processor(kvm, attr); 1398 break; 1399 case KVM_S390_VM_CPU_MACHINE: 1400 ret = kvm_s390_get_machine(kvm, attr); 1401 break; 1402 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1403 ret = kvm_s390_get_processor_feat(kvm, attr); 1404 break; 1405 case KVM_S390_VM_CPU_MACHINE_FEAT: 1406 ret = kvm_s390_get_machine_feat(kvm, attr); 1407 break; 1408 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1409 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1410 break; 1411 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1412 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1413 break; 1414 } 1415 return ret; 1416 } 1417 1418 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1419 { 1420 int ret; 1421 1422 switch (attr->group) { 1423 case KVM_S390_VM_MEM_CTRL: 1424 ret = kvm_s390_set_mem_control(kvm, attr); 1425 break; 1426 case KVM_S390_VM_TOD: 1427 ret = kvm_s390_set_tod(kvm, attr); 1428 break; 1429 case KVM_S390_VM_CPU_MODEL: 1430 ret = kvm_s390_set_cpu_model(kvm, attr); 1431 break; 1432 case KVM_S390_VM_CRYPTO: 1433 ret = kvm_s390_vm_set_crypto(kvm, attr); 1434 break; 1435 case KVM_S390_VM_MIGRATION: 1436 ret = kvm_s390_vm_set_migration(kvm, attr); 1437 break; 1438 default: 1439 ret = -ENXIO; 1440 break; 1441 } 1442 1443 return ret; 1444 } 1445 1446 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1447 { 1448 int ret; 1449 1450 switch (attr->group) { 1451 case KVM_S390_VM_MEM_CTRL: 1452 ret = kvm_s390_get_mem_control(kvm, attr); 1453 break; 1454 case KVM_S390_VM_TOD: 1455 ret = kvm_s390_get_tod(kvm, attr); 1456 break; 1457 case KVM_S390_VM_CPU_MODEL: 1458 ret = kvm_s390_get_cpu_model(kvm, attr); 1459 break; 1460 case KVM_S390_VM_MIGRATION: 1461 ret = kvm_s390_vm_get_migration(kvm, attr); 1462 break; 1463 default: 1464 ret = -ENXIO; 1465 break; 1466 } 1467 1468 return ret; 1469 } 1470 1471 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1472 { 1473 int ret; 1474 1475 switch (attr->group) { 1476 case KVM_S390_VM_MEM_CTRL: 1477 switch (attr->attr) { 1478 case KVM_S390_VM_MEM_ENABLE_CMMA: 1479 case KVM_S390_VM_MEM_CLR_CMMA: 1480 ret = sclp.has_cmma ? 0 : -ENXIO; 1481 break; 1482 case KVM_S390_VM_MEM_LIMIT_SIZE: 1483 ret = 0; 1484 break; 1485 default: 1486 ret = -ENXIO; 1487 break; 1488 } 1489 break; 1490 case KVM_S390_VM_TOD: 1491 switch (attr->attr) { 1492 case KVM_S390_VM_TOD_LOW: 1493 case KVM_S390_VM_TOD_HIGH: 1494 ret = 0; 1495 break; 1496 default: 1497 ret = -ENXIO; 1498 break; 1499 } 1500 break; 1501 case KVM_S390_VM_CPU_MODEL: 1502 switch (attr->attr) { 1503 case KVM_S390_VM_CPU_PROCESSOR: 1504 case KVM_S390_VM_CPU_MACHINE: 1505 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1506 case KVM_S390_VM_CPU_MACHINE_FEAT: 1507 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1508 ret = 0; 1509 break; 1510 /* configuring subfunctions is not supported yet */ 1511 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1512 default: 1513 ret = -ENXIO; 1514 break; 1515 } 1516 break; 1517 case KVM_S390_VM_CRYPTO: 1518 switch (attr->attr) { 1519 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1520 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1521 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1522 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1523 ret = 0; 1524 break; 1525 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1526 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1527 ret = ap_instructions_available() ? 0 : -ENXIO; 1528 break; 1529 default: 1530 ret = -ENXIO; 1531 break; 1532 } 1533 break; 1534 case KVM_S390_VM_MIGRATION: 1535 ret = 0; 1536 break; 1537 default: 1538 ret = -ENXIO; 1539 break; 1540 } 1541 1542 return ret; 1543 } 1544 1545 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1546 { 1547 uint8_t *keys; 1548 uint64_t hva; 1549 int srcu_idx, i, r = 0; 1550 1551 if (args->flags != 0) 1552 return -EINVAL; 1553 1554 /* Is this guest using storage keys? */ 1555 if (!mm_uses_skeys(current->mm)) 1556 return KVM_S390_GET_SKEYS_NONE; 1557 1558 /* Enforce sane limit on memory allocation */ 1559 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1560 return -EINVAL; 1561 1562 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1563 if (!keys) 1564 return -ENOMEM; 1565 1566 down_read(¤t->mm->mmap_sem); 1567 srcu_idx = srcu_read_lock(&kvm->srcu); 1568 for (i = 0; i < args->count; i++) { 1569 hva = gfn_to_hva(kvm, args->start_gfn + i); 1570 if (kvm_is_error_hva(hva)) { 1571 r = -EFAULT; 1572 break; 1573 } 1574 1575 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1576 if (r) 1577 break; 1578 } 1579 srcu_read_unlock(&kvm->srcu, srcu_idx); 1580 up_read(¤t->mm->mmap_sem); 1581 1582 if (!r) { 1583 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1584 sizeof(uint8_t) * args->count); 1585 if (r) 1586 r = -EFAULT; 1587 } 1588 1589 kvfree(keys); 1590 return r; 1591 } 1592 1593 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1594 { 1595 uint8_t *keys; 1596 uint64_t hva; 1597 int srcu_idx, i, r = 0; 1598 bool unlocked; 1599 1600 if (args->flags != 0) 1601 return -EINVAL; 1602 1603 /* Enforce sane limit on memory allocation */ 1604 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1605 return -EINVAL; 1606 1607 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1608 if (!keys) 1609 return -ENOMEM; 1610 1611 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1612 sizeof(uint8_t) * args->count); 1613 if (r) { 1614 r = -EFAULT; 1615 goto out; 1616 } 1617 1618 /* Enable storage key handling for the guest */ 1619 r = s390_enable_skey(); 1620 if (r) 1621 goto out; 1622 1623 i = 0; 1624 down_read(¤t->mm->mmap_sem); 1625 srcu_idx = srcu_read_lock(&kvm->srcu); 1626 while (i < args->count) { 1627 unlocked = false; 1628 hva = gfn_to_hva(kvm, args->start_gfn + i); 1629 if (kvm_is_error_hva(hva)) { 1630 r = -EFAULT; 1631 break; 1632 } 1633 1634 /* Lowest order bit is reserved */ 1635 if (keys[i] & 0x01) { 1636 r = -EINVAL; 1637 break; 1638 } 1639 1640 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1641 if (r) { 1642 r = fixup_user_fault(current, current->mm, hva, 1643 FAULT_FLAG_WRITE, &unlocked); 1644 if (r) 1645 break; 1646 } 1647 if (!r) 1648 i++; 1649 } 1650 srcu_read_unlock(&kvm->srcu, srcu_idx); 1651 up_read(¤t->mm->mmap_sem); 1652 out: 1653 kvfree(keys); 1654 return r; 1655 } 1656 1657 /* 1658 * Base address and length must be sent at the start of each block, therefore 1659 * it's cheaper to send some clean data, as long as it's less than the size of 1660 * two longs. 1661 */ 1662 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1663 /* for consistency */ 1664 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1665 1666 /* 1667 * Similar to gfn_to_memslot, but returns the index of a memslot also when the 1668 * address falls in a hole. In that case the index of one of the memslots 1669 * bordering the hole is returned. 1670 */ 1671 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) 1672 { 1673 int start = 0, end = slots->used_slots; 1674 int slot = atomic_read(&slots->lru_slot); 1675 struct kvm_memory_slot *memslots = slots->memslots; 1676 1677 if (gfn >= memslots[slot].base_gfn && 1678 gfn < memslots[slot].base_gfn + memslots[slot].npages) 1679 return slot; 1680 1681 while (start < end) { 1682 slot = start + (end - start) / 2; 1683 1684 if (gfn >= memslots[slot].base_gfn) 1685 end = slot; 1686 else 1687 start = slot + 1; 1688 } 1689 1690 if (gfn >= memslots[start].base_gfn && 1691 gfn < memslots[start].base_gfn + memslots[start].npages) { 1692 atomic_set(&slots->lru_slot, start); 1693 } 1694 1695 return start; 1696 } 1697 1698 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1699 u8 *res, unsigned long bufsize) 1700 { 1701 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1702 1703 args->count = 0; 1704 while (args->count < bufsize) { 1705 hva = gfn_to_hva(kvm, cur_gfn); 1706 /* 1707 * We return an error if the first value was invalid, but we 1708 * return successfully if at least one value was copied. 1709 */ 1710 if (kvm_is_error_hva(hva)) 1711 return args->count ? 0 : -EFAULT; 1712 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1713 pgstev = 0; 1714 res[args->count++] = (pgstev >> 24) & 0x43; 1715 cur_gfn++; 1716 } 1717 1718 return 0; 1719 } 1720 1721 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1722 unsigned long cur_gfn) 1723 { 1724 int slotidx = gfn_to_memslot_approx(slots, cur_gfn); 1725 struct kvm_memory_slot *ms = slots->memslots + slotidx; 1726 unsigned long ofs = cur_gfn - ms->base_gfn; 1727 1728 if (ms->base_gfn + ms->npages <= cur_gfn) { 1729 slotidx--; 1730 /* If we are above the highest slot, wrap around */ 1731 if (slotidx < 0) 1732 slotidx = slots->used_slots - 1; 1733 1734 ms = slots->memslots + slotidx; 1735 ofs = 0; 1736 } 1737 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1738 while ((slotidx > 0) && (ofs >= ms->npages)) { 1739 slotidx--; 1740 ms = slots->memslots + slotidx; 1741 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 1742 } 1743 return ms->base_gfn + ofs; 1744 } 1745 1746 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1747 u8 *res, unsigned long bufsize) 1748 { 1749 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 1750 struct kvm_memslots *slots = kvm_memslots(kvm); 1751 struct kvm_memory_slot *ms; 1752 1753 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 1754 ms = gfn_to_memslot(kvm, cur_gfn); 1755 args->count = 0; 1756 args->start_gfn = cur_gfn; 1757 if (!ms) 1758 return 0; 1759 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 1760 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; 1761 1762 while (args->count < bufsize) { 1763 hva = gfn_to_hva(kvm, cur_gfn); 1764 if (kvm_is_error_hva(hva)) 1765 return 0; 1766 /* Decrement only if we actually flipped the bit to 0 */ 1767 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 1768 atomic64_dec(&kvm->arch.cmma_dirty_pages); 1769 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1770 pgstev = 0; 1771 /* Save the value */ 1772 res[args->count++] = (pgstev >> 24) & 0x43; 1773 /* If the next bit is too far away, stop. */ 1774 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 1775 return 0; 1776 /* If we reached the previous "next", find the next one */ 1777 if (cur_gfn == next_gfn) 1778 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 1779 /* Reached the end of memory or of the buffer, stop */ 1780 if ((next_gfn >= mem_end) || 1781 (next_gfn - args->start_gfn >= bufsize)) 1782 return 0; 1783 cur_gfn++; 1784 /* Reached the end of the current memslot, take the next one. */ 1785 if (cur_gfn - ms->base_gfn >= ms->npages) { 1786 ms = gfn_to_memslot(kvm, cur_gfn); 1787 if (!ms) 1788 return 0; 1789 } 1790 } 1791 return 0; 1792 } 1793 1794 /* 1795 * This function searches for the next page with dirty CMMA attributes, and 1796 * saves the attributes in the buffer up to either the end of the buffer or 1797 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 1798 * no trailing clean bytes are saved. 1799 * In case no dirty bits were found, or if CMMA was not enabled or used, the 1800 * output buffer will indicate 0 as length. 1801 */ 1802 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 1803 struct kvm_s390_cmma_log *args) 1804 { 1805 unsigned long bufsize; 1806 int srcu_idx, peek, ret; 1807 u8 *values; 1808 1809 if (!kvm->arch.use_cmma) 1810 return -ENXIO; 1811 /* Invalid/unsupported flags were specified */ 1812 if (args->flags & ~KVM_S390_CMMA_PEEK) 1813 return -EINVAL; 1814 /* Migration mode query, and we are not doing a migration */ 1815 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 1816 if (!peek && !kvm->arch.migration_mode) 1817 return -EINVAL; 1818 /* CMMA is disabled or was not used, or the buffer has length zero */ 1819 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 1820 if (!bufsize || !kvm->mm->context.uses_cmm) { 1821 memset(args, 0, sizeof(*args)); 1822 return 0; 1823 } 1824 /* We are not peeking, and there are no dirty pages */ 1825 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 1826 memset(args, 0, sizeof(*args)); 1827 return 0; 1828 } 1829 1830 values = vmalloc(bufsize); 1831 if (!values) 1832 return -ENOMEM; 1833 1834 down_read(&kvm->mm->mmap_sem); 1835 srcu_idx = srcu_read_lock(&kvm->srcu); 1836 if (peek) 1837 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 1838 else 1839 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 1840 srcu_read_unlock(&kvm->srcu, srcu_idx); 1841 up_read(&kvm->mm->mmap_sem); 1842 1843 if (kvm->arch.migration_mode) 1844 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 1845 else 1846 args->remaining = 0; 1847 1848 if (copy_to_user((void __user *)args->values, values, args->count)) 1849 ret = -EFAULT; 1850 1851 vfree(values); 1852 return ret; 1853 } 1854 1855 /* 1856 * This function sets the CMMA attributes for the given pages. If the input 1857 * buffer has zero length, no action is taken, otherwise the attributes are 1858 * set and the mm->context.uses_cmm flag is set. 1859 */ 1860 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 1861 const struct kvm_s390_cmma_log *args) 1862 { 1863 unsigned long hva, mask, pgstev, i; 1864 uint8_t *bits; 1865 int srcu_idx, r = 0; 1866 1867 mask = args->mask; 1868 1869 if (!kvm->arch.use_cmma) 1870 return -ENXIO; 1871 /* invalid/unsupported flags */ 1872 if (args->flags != 0) 1873 return -EINVAL; 1874 /* Enforce sane limit on memory allocation */ 1875 if (args->count > KVM_S390_CMMA_SIZE_MAX) 1876 return -EINVAL; 1877 /* Nothing to do */ 1878 if (args->count == 0) 1879 return 0; 1880 1881 bits = vmalloc(array_size(sizeof(*bits), args->count)); 1882 if (!bits) 1883 return -ENOMEM; 1884 1885 r = copy_from_user(bits, (void __user *)args->values, args->count); 1886 if (r) { 1887 r = -EFAULT; 1888 goto out; 1889 } 1890 1891 down_read(&kvm->mm->mmap_sem); 1892 srcu_idx = srcu_read_lock(&kvm->srcu); 1893 for (i = 0; i < args->count; i++) { 1894 hva = gfn_to_hva(kvm, args->start_gfn + i); 1895 if (kvm_is_error_hva(hva)) { 1896 r = -EFAULT; 1897 break; 1898 } 1899 1900 pgstev = bits[i]; 1901 pgstev = pgstev << 24; 1902 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 1903 set_pgste_bits(kvm->mm, hva, mask, pgstev); 1904 } 1905 srcu_read_unlock(&kvm->srcu, srcu_idx); 1906 up_read(&kvm->mm->mmap_sem); 1907 1908 if (!kvm->mm->context.uses_cmm) { 1909 down_write(&kvm->mm->mmap_sem); 1910 kvm->mm->context.uses_cmm = 1; 1911 up_write(&kvm->mm->mmap_sem); 1912 } 1913 out: 1914 vfree(bits); 1915 return r; 1916 } 1917 1918 long kvm_arch_vm_ioctl(struct file *filp, 1919 unsigned int ioctl, unsigned long arg) 1920 { 1921 struct kvm *kvm = filp->private_data; 1922 void __user *argp = (void __user *)arg; 1923 struct kvm_device_attr attr; 1924 int r; 1925 1926 switch (ioctl) { 1927 case KVM_S390_INTERRUPT: { 1928 struct kvm_s390_interrupt s390int; 1929 1930 r = -EFAULT; 1931 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1932 break; 1933 r = kvm_s390_inject_vm(kvm, &s390int); 1934 break; 1935 } 1936 case KVM_ENABLE_CAP: { 1937 struct kvm_enable_cap cap; 1938 r = -EFAULT; 1939 if (copy_from_user(&cap, argp, sizeof(cap))) 1940 break; 1941 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 1942 break; 1943 } 1944 case KVM_CREATE_IRQCHIP: { 1945 struct kvm_irq_routing_entry routing; 1946 1947 r = -EINVAL; 1948 if (kvm->arch.use_irqchip) { 1949 /* Set up dummy routing. */ 1950 memset(&routing, 0, sizeof(routing)); 1951 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 1952 } 1953 break; 1954 } 1955 case KVM_SET_DEVICE_ATTR: { 1956 r = -EFAULT; 1957 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1958 break; 1959 r = kvm_s390_vm_set_attr(kvm, &attr); 1960 break; 1961 } 1962 case KVM_GET_DEVICE_ATTR: { 1963 r = -EFAULT; 1964 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1965 break; 1966 r = kvm_s390_vm_get_attr(kvm, &attr); 1967 break; 1968 } 1969 case KVM_HAS_DEVICE_ATTR: { 1970 r = -EFAULT; 1971 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1972 break; 1973 r = kvm_s390_vm_has_attr(kvm, &attr); 1974 break; 1975 } 1976 case KVM_S390_GET_SKEYS: { 1977 struct kvm_s390_skeys args; 1978 1979 r = -EFAULT; 1980 if (copy_from_user(&args, argp, 1981 sizeof(struct kvm_s390_skeys))) 1982 break; 1983 r = kvm_s390_get_skeys(kvm, &args); 1984 break; 1985 } 1986 case KVM_S390_SET_SKEYS: { 1987 struct kvm_s390_skeys args; 1988 1989 r = -EFAULT; 1990 if (copy_from_user(&args, argp, 1991 sizeof(struct kvm_s390_skeys))) 1992 break; 1993 r = kvm_s390_set_skeys(kvm, &args); 1994 break; 1995 } 1996 case KVM_S390_GET_CMMA_BITS: { 1997 struct kvm_s390_cmma_log args; 1998 1999 r = -EFAULT; 2000 if (copy_from_user(&args, argp, sizeof(args))) 2001 break; 2002 mutex_lock(&kvm->slots_lock); 2003 r = kvm_s390_get_cmma_bits(kvm, &args); 2004 mutex_unlock(&kvm->slots_lock); 2005 if (!r) { 2006 r = copy_to_user(argp, &args, sizeof(args)); 2007 if (r) 2008 r = -EFAULT; 2009 } 2010 break; 2011 } 2012 case KVM_S390_SET_CMMA_BITS: { 2013 struct kvm_s390_cmma_log args; 2014 2015 r = -EFAULT; 2016 if (copy_from_user(&args, argp, sizeof(args))) 2017 break; 2018 mutex_lock(&kvm->slots_lock); 2019 r = kvm_s390_set_cmma_bits(kvm, &args); 2020 mutex_unlock(&kvm->slots_lock); 2021 break; 2022 } 2023 default: 2024 r = -ENOTTY; 2025 } 2026 2027 return r; 2028 } 2029 2030 static int kvm_s390_apxa_installed(void) 2031 { 2032 struct ap_config_info info; 2033 2034 if (ap_instructions_available()) { 2035 if (ap_qci(&info) == 0) 2036 return info.apxa; 2037 } 2038 2039 return 0; 2040 } 2041 2042 /* 2043 * The format of the crypto control block (CRYCB) is specified in the 3 low 2044 * order bits of the CRYCB designation (CRYCBD) field as follows: 2045 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2046 * AP extended addressing (APXA) facility are installed. 2047 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2048 * Format 2: Both the APXA and MSAX3 facilities are installed 2049 */ 2050 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2051 { 2052 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2053 2054 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2055 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2056 2057 /* Check whether MSAX3 is installed */ 2058 if (!test_kvm_facility(kvm, 76)) 2059 return; 2060 2061 if (kvm_s390_apxa_installed()) 2062 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2063 else 2064 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2065 } 2066 2067 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2068 unsigned long *aqm, unsigned long *adm) 2069 { 2070 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2071 2072 mutex_lock(&kvm->lock); 2073 kvm_s390_vcpu_block_all(kvm); 2074 2075 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2076 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2077 memcpy(crycb->apcb1.apm, apm, 32); 2078 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2079 apm[0], apm[1], apm[2], apm[3]); 2080 memcpy(crycb->apcb1.aqm, aqm, 32); 2081 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2082 aqm[0], aqm[1], aqm[2], aqm[3]); 2083 memcpy(crycb->apcb1.adm, adm, 32); 2084 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2085 adm[0], adm[1], adm[2], adm[3]); 2086 break; 2087 case CRYCB_FORMAT1: 2088 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2089 memcpy(crycb->apcb0.apm, apm, 8); 2090 memcpy(crycb->apcb0.aqm, aqm, 2); 2091 memcpy(crycb->apcb0.adm, adm, 2); 2092 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2093 apm[0], *((unsigned short *)aqm), 2094 *((unsigned short *)adm)); 2095 break; 2096 default: /* Can not happen */ 2097 break; 2098 } 2099 2100 /* recreate the shadow crycb for each vcpu */ 2101 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2102 kvm_s390_vcpu_unblock_all(kvm); 2103 mutex_unlock(&kvm->lock); 2104 } 2105 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2106 2107 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2108 { 2109 mutex_lock(&kvm->lock); 2110 kvm_s390_vcpu_block_all(kvm); 2111 2112 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2113 sizeof(kvm->arch.crypto.crycb->apcb0)); 2114 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2115 sizeof(kvm->arch.crypto.crycb->apcb1)); 2116 2117 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2118 /* recreate the shadow crycb for each vcpu */ 2119 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2120 kvm_s390_vcpu_unblock_all(kvm); 2121 mutex_unlock(&kvm->lock); 2122 } 2123 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2124 2125 static u64 kvm_s390_get_initial_cpuid(void) 2126 { 2127 struct cpuid cpuid; 2128 2129 get_cpu_id(&cpuid); 2130 cpuid.version = 0xff; 2131 return *((u64 *) &cpuid); 2132 } 2133 2134 static void kvm_s390_crypto_init(struct kvm *kvm) 2135 { 2136 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2137 kvm_s390_set_crycb_format(kvm); 2138 2139 if (!test_kvm_facility(kvm, 76)) 2140 return; 2141 2142 /* Enable AES/DEA protected key functions by default */ 2143 kvm->arch.crypto.aes_kw = 1; 2144 kvm->arch.crypto.dea_kw = 1; 2145 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2146 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2147 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2148 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2149 } 2150 2151 static void sca_dispose(struct kvm *kvm) 2152 { 2153 if (kvm->arch.use_esca) 2154 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2155 else 2156 free_page((unsigned long)(kvm->arch.sca)); 2157 kvm->arch.sca = NULL; 2158 } 2159 2160 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2161 { 2162 gfp_t alloc_flags = GFP_KERNEL; 2163 int i, rc; 2164 char debug_name[16]; 2165 static unsigned long sca_offset; 2166 2167 rc = -EINVAL; 2168 #ifdef CONFIG_KVM_S390_UCONTROL 2169 if (type & ~KVM_VM_S390_UCONTROL) 2170 goto out_err; 2171 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2172 goto out_err; 2173 #else 2174 if (type) 2175 goto out_err; 2176 #endif 2177 2178 rc = s390_enable_sie(); 2179 if (rc) 2180 goto out_err; 2181 2182 rc = -ENOMEM; 2183 2184 if (!sclp.has_64bscao) 2185 alloc_flags |= GFP_DMA; 2186 rwlock_init(&kvm->arch.sca_lock); 2187 /* start with basic SCA */ 2188 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2189 if (!kvm->arch.sca) 2190 goto out_err; 2191 spin_lock(&kvm_lock); 2192 sca_offset += 16; 2193 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2194 sca_offset = 0; 2195 kvm->arch.sca = (struct bsca_block *) 2196 ((char *) kvm->arch.sca + sca_offset); 2197 spin_unlock(&kvm_lock); 2198 2199 sprintf(debug_name, "kvm-%u", current->pid); 2200 2201 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2202 if (!kvm->arch.dbf) 2203 goto out_err; 2204 2205 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2206 kvm->arch.sie_page2 = 2207 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 2208 if (!kvm->arch.sie_page2) 2209 goto out_err; 2210 2211 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2212 2213 for (i = 0; i < kvm_s390_fac_size(); i++) { 2214 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] & 2215 (kvm_s390_fac_base[i] | 2216 kvm_s390_fac_ext[i]); 2217 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & 2218 kvm_s390_fac_base[i]; 2219 } 2220 2221 /* we are always in czam mode - even on pre z14 machines */ 2222 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2223 set_kvm_facility(kvm->arch.model.fac_list, 138); 2224 /* we emulate STHYI in kvm */ 2225 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2226 set_kvm_facility(kvm->arch.model.fac_list, 74); 2227 if (MACHINE_HAS_TLB_GUEST) { 2228 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2229 set_kvm_facility(kvm->arch.model.fac_list, 147); 2230 } 2231 2232 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2233 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2234 2235 kvm_s390_crypto_init(kvm); 2236 2237 mutex_init(&kvm->arch.float_int.ais_lock); 2238 spin_lock_init(&kvm->arch.float_int.lock); 2239 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2240 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2241 init_waitqueue_head(&kvm->arch.ipte_wq); 2242 mutex_init(&kvm->arch.ipte_mutex); 2243 2244 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2245 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2246 2247 if (type & KVM_VM_S390_UCONTROL) { 2248 kvm->arch.gmap = NULL; 2249 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2250 } else { 2251 if (sclp.hamax == U64_MAX) 2252 kvm->arch.mem_limit = TASK_SIZE_MAX; 2253 else 2254 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2255 sclp.hamax + 1); 2256 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2257 if (!kvm->arch.gmap) 2258 goto out_err; 2259 kvm->arch.gmap->private = kvm; 2260 kvm->arch.gmap->pfault_enabled = 0; 2261 } 2262 2263 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2264 kvm->arch.use_skf = sclp.has_skey; 2265 spin_lock_init(&kvm->arch.start_stop_lock); 2266 kvm_s390_vsie_init(kvm); 2267 kvm_s390_gisa_init(kvm); 2268 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2269 2270 return 0; 2271 out_err: 2272 free_page((unsigned long)kvm->arch.sie_page2); 2273 debug_unregister(kvm->arch.dbf); 2274 sca_dispose(kvm); 2275 KVM_EVENT(3, "creation of vm failed: %d", rc); 2276 return rc; 2277 } 2278 2279 bool kvm_arch_has_vcpu_debugfs(void) 2280 { 2281 return false; 2282 } 2283 2284 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 2285 { 2286 return 0; 2287 } 2288 2289 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2290 { 2291 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2292 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2293 kvm_s390_clear_local_irqs(vcpu); 2294 kvm_clear_async_pf_completion_queue(vcpu); 2295 if (!kvm_is_ucontrol(vcpu->kvm)) 2296 sca_del_vcpu(vcpu); 2297 2298 if (kvm_is_ucontrol(vcpu->kvm)) 2299 gmap_remove(vcpu->arch.gmap); 2300 2301 if (vcpu->kvm->arch.use_cmma) 2302 kvm_s390_vcpu_unsetup_cmma(vcpu); 2303 free_page((unsigned long)(vcpu->arch.sie_block)); 2304 2305 kvm_vcpu_uninit(vcpu); 2306 kmem_cache_free(kvm_vcpu_cache, vcpu); 2307 } 2308 2309 static void kvm_free_vcpus(struct kvm *kvm) 2310 { 2311 unsigned int i; 2312 struct kvm_vcpu *vcpu; 2313 2314 kvm_for_each_vcpu(i, vcpu, kvm) 2315 kvm_arch_vcpu_destroy(vcpu); 2316 2317 mutex_lock(&kvm->lock); 2318 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2319 kvm->vcpus[i] = NULL; 2320 2321 atomic_set(&kvm->online_vcpus, 0); 2322 mutex_unlock(&kvm->lock); 2323 } 2324 2325 void kvm_arch_destroy_vm(struct kvm *kvm) 2326 { 2327 kvm_free_vcpus(kvm); 2328 sca_dispose(kvm); 2329 debug_unregister(kvm->arch.dbf); 2330 kvm_s390_gisa_destroy(kvm); 2331 free_page((unsigned long)kvm->arch.sie_page2); 2332 if (!kvm_is_ucontrol(kvm)) 2333 gmap_remove(kvm->arch.gmap); 2334 kvm_s390_destroy_adapters(kvm); 2335 kvm_s390_clear_float_irqs(kvm); 2336 kvm_s390_vsie_destroy(kvm); 2337 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2338 } 2339 2340 /* Section: vcpu related */ 2341 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2342 { 2343 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2344 if (!vcpu->arch.gmap) 2345 return -ENOMEM; 2346 vcpu->arch.gmap->private = vcpu->kvm; 2347 2348 return 0; 2349 } 2350 2351 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2352 { 2353 if (!kvm_s390_use_sca_entries()) 2354 return; 2355 read_lock(&vcpu->kvm->arch.sca_lock); 2356 if (vcpu->kvm->arch.use_esca) { 2357 struct esca_block *sca = vcpu->kvm->arch.sca; 2358 2359 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2360 sca->cpu[vcpu->vcpu_id].sda = 0; 2361 } else { 2362 struct bsca_block *sca = vcpu->kvm->arch.sca; 2363 2364 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2365 sca->cpu[vcpu->vcpu_id].sda = 0; 2366 } 2367 read_unlock(&vcpu->kvm->arch.sca_lock); 2368 } 2369 2370 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2371 { 2372 if (!kvm_s390_use_sca_entries()) { 2373 struct bsca_block *sca = vcpu->kvm->arch.sca; 2374 2375 /* we still need the basic sca for the ipte control */ 2376 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2377 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2378 return; 2379 } 2380 read_lock(&vcpu->kvm->arch.sca_lock); 2381 if (vcpu->kvm->arch.use_esca) { 2382 struct esca_block *sca = vcpu->kvm->arch.sca; 2383 2384 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2385 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2386 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2387 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2388 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2389 } else { 2390 struct bsca_block *sca = vcpu->kvm->arch.sca; 2391 2392 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2393 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2394 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2395 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2396 } 2397 read_unlock(&vcpu->kvm->arch.sca_lock); 2398 } 2399 2400 /* Basic SCA to Extended SCA data copy routines */ 2401 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2402 { 2403 d->sda = s->sda; 2404 d->sigp_ctrl.c = s->sigp_ctrl.c; 2405 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2406 } 2407 2408 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2409 { 2410 int i; 2411 2412 d->ipte_control = s->ipte_control; 2413 d->mcn[0] = s->mcn; 2414 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2415 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2416 } 2417 2418 static int sca_switch_to_extended(struct kvm *kvm) 2419 { 2420 struct bsca_block *old_sca = kvm->arch.sca; 2421 struct esca_block *new_sca; 2422 struct kvm_vcpu *vcpu; 2423 unsigned int vcpu_idx; 2424 u32 scaol, scaoh; 2425 2426 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2427 if (!new_sca) 2428 return -ENOMEM; 2429 2430 scaoh = (u32)((u64)(new_sca) >> 32); 2431 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2432 2433 kvm_s390_vcpu_block_all(kvm); 2434 write_lock(&kvm->arch.sca_lock); 2435 2436 sca_copy_b_to_e(new_sca, old_sca); 2437 2438 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2439 vcpu->arch.sie_block->scaoh = scaoh; 2440 vcpu->arch.sie_block->scaol = scaol; 2441 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2442 } 2443 kvm->arch.sca = new_sca; 2444 kvm->arch.use_esca = 1; 2445 2446 write_unlock(&kvm->arch.sca_lock); 2447 kvm_s390_vcpu_unblock_all(kvm); 2448 2449 free_page((unsigned long)old_sca); 2450 2451 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2452 old_sca, kvm->arch.sca); 2453 return 0; 2454 } 2455 2456 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2457 { 2458 int rc; 2459 2460 if (!kvm_s390_use_sca_entries()) { 2461 if (id < KVM_MAX_VCPUS) 2462 return true; 2463 return false; 2464 } 2465 if (id < KVM_S390_BSCA_CPU_SLOTS) 2466 return true; 2467 if (!sclp.has_esca || !sclp.has_64bscao) 2468 return false; 2469 2470 mutex_lock(&kvm->lock); 2471 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2472 mutex_unlock(&kvm->lock); 2473 2474 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2475 } 2476 2477 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 2478 { 2479 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2480 kvm_clear_async_pf_completion_queue(vcpu); 2481 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 2482 KVM_SYNC_GPRS | 2483 KVM_SYNC_ACRS | 2484 KVM_SYNC_CRS | 2485 KVM_SYNC_ARCH0 | 2486 KVM_SYNC_PFAULT; 2487 kvm_s390_set_prefix(vcpu, 0); 2488 if (test_kvm_facility(vcpu->kvm, 64)) 2489 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 2490 if (test_kvm_facility(vcpu->kvm, 82)) 2491 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 2492 if (test_kvm_facility(vcpu->kvm, 133)) 2493 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 2494 if (test_kvm_facility(vcpu->kvm, 156)) 2495 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 2496 /* fprs can be synchronized via vrs, even if the guest has no vx. With 2497 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 2498 */ 2499 if (MACHINE_HAS_VX) 2500 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 2501 else 2502 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 2503 2504 if (kvm_is_ucontrol(vcpu->kvm)) 2505 return __kvm_ucontrol_vcpu_init(vcpu); 2506 2507 return 0; 2508 } 2509 2510 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2511 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2512 { 2513 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2514 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2515 vcpu->arch.cputm_start = get_tod_clock_fast(); 2516 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2517 } 2518 2519 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2520 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2521 { 2522 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2523 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2524 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2525 vcpu->arch.cputm_start = 0; 2526 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2527 } 2528 2529 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2530 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2531 { 2532 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2533 vcpu->arch.cputm_enabled = true; 2534 __start_cpu_timer_accounting(vcpu); 2535 } 2536 2537 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2538 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2539 { 2540 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2541 __stop_cpu_timer_accounting(vcpu); 2542 vcpu->arch.cputm_enabled = false; 2543 } 2544 2545 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2546 { 2547 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2548 __enable_cpu_timer_accounting(vcpu); 2549 preempt_enable(); 2550 } 2551 2552 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2553 { 2554 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2555 __disable_cpu_timer_accounting(vcpu); 2556 preempt_enable(); 2557 } 2558 2559 /* set the cpu timer - may only be called from the VCPU thread itself */ 2560 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2561 { 2562 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2563 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2564 if (vcpu->arch.cputm_enabled) 2565 vcpu->arch.cputm_start = get_tod_clock_fast(); 2566 vcpu->arch.sie_block->cputm = cputm; 2567 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2568 preempt_enable(); 2569 } 2570 2571 /* update and get the cpu timer - can also be called from other VCPU threads */ 2572 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2573 { 2574 unsigned int seq; 2575 __u64 value; 2576 2577 if (unlikely(!vcpu->arch.cputm_enabled)) 2578 return vcpu->arch.sie_block->cputm; 2579 2580 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2581 do { 2582 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2583 /* 2584 * If the writer would ever execute a read in the critical 2585 * section, e.g. in irq context, we have a deadlock. 2586 */ 2587 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 2588 value = vcpu->arch.sie_block->cputm; 2589 /* if cputm_start is 0, accounting is being started/stopped */ 2590 if (likely(vcpu->arch.cputm_start)) 2591 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2592 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 2593 preempt_enable(); 2594 return value; 2595 } 2596 2597 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2598 { 2599 2600 gmap_enable(vcpu->arch.enabled_gmap); 2601 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 2602 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2603 __start_cpu_timer_accounting(vcpu); 2604 vcpu->cpu = cpu; 2605 } 2606 2607 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2608 { 2609 vcpu->cpu = -1; 2610 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2611 __stop_cpu_timer_accounting(vcpu); 2612 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 2613 vcpu->arch.enabled_gmap = gmap_get_enabled(); 2614 gmap_disable(vcpu->arch.enabled_gmap); 2615 2616 } 2617 2618 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 2619 { 2620 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 2621 vcpu->arch.sie_block->gpsw.mask = 0UL; 2622 vcpu->arch.sie_block->gpsw.addr = 0UL; 2623 kvm_s390_set_prefix(vcpu, 0); 2624 kvm_s390_set_cpu_timer(vcpu, 0); 2625 vcpu->arch.sie_block->ckc = 0UL; 2626 vcpu->arch.sie_block->todpr = 0; 2627 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 2628 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 | 2629 CR0_INTERRUPT_KEY_SUBMASK | 2630 CR0_MEASUREMENT_ALERT_SUBMASK; 2631 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | 2632 CR14_UNUSED_33 | 2633 CR14_EXTERNAL_DAMAGE_SUBMASK; 2634 /* make sure the new fpc will be lazily loaded */ 2635 save_fpu_regs(); 2636 current->thread.fpu.fpc = 0; 2637 vcpu->arch.sie_block->gbea = 1; 2638 vcpu->arch.sie_block->pp = 0; 2639 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 2640 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2641 kvm_clear_async_pf_completion_queue(vcpu); 2642 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 2643 kvm_s390_vcpu_stop(vcpu); 2644 kvm_s390_clear_local_irqs(vcpu); 2645 } 2646 2647 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 2648 { 2649 mutex_lock(&vcpu->kvm->lock); 2650 preempt_disable(); 2651 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2652 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 2653 preempt_enable(); 2654 mutex_unlock(&vcpu->kvm->lock); 2655 if (!kvm_is_ucontrol(vcpu->kvm)) { 2656 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 2657 sca_add_vcpu(vcpu); 2658 } 2659 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 2660 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2661 /* make vcpu_load load the right gmap on the first trigger */ 2662 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 2663 } 2664 2665 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 2666 { 2667 /* 2668 * If the AP instructions are not being interpreted and the MSAX3 2669 * facility is not configured for the guest, there is nothing to set up. 2670 */ 2671 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 2672 return; 2673 2674 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 2675 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 2676 vcpu->arch.sie_block->eca &= ~ECA_APIE; 2677 2678 if (vcpu->kvm->arch.crypto.apie) 2679 vcpu->arch.sie_block->eca |= ECA_APIE; 2680 2681 /* Set up protected key support */ 2682 if (vcpu->kvm->arch.crypto.aes_kw) 2683 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 2684 if (vcpu->kvm->arch.crypto.dea_kw) 2685 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 2686 } 2687 2688 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 2689 { 2690 free_page(vcpu->arch.sie_block->cbrlo); 2691 vcpu->arch.sie_block->cbrlo = 0; 2692 } 2693 2694 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 2695 { 2696 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 2697 if (!vcpu->arch.sie_block->cbrlo) 2698 return -ENOMEM; 2699 return 0; 2700 } 2701 2702 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 2703 { 2704 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 2705 2706 vcpu->arch.sie_block->ibc = model->ibc; 2707 if (test_kvm_facility(vcpu->kvm, 7)) 2708 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 2709 } 2710 2711 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 2712 { 2713 int rc = 0; 2714 2715 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 2716 CPUSTAT_SM | 2717 CPUSTAT_STOPPED); 2718 2719 if (test_kvm_facility(vcpu->kvm, 78)) 2720 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 2721 else if (test_kvm_facility(vcpu->kvm, 8)) 2722 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 2723 2724 kvm_s390_vcpu_setup_model(vcpu); 2725 2726 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2727 if (MACHINE_HAS_ESOP) 2728 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 2729 if (test_kvm_facility(vcpu->kvm, 9)) 2730 vcpu->arch.sie_block->ecb |= ECB_SRSI; 2731 if (test_kvm_facility(vcpu->kvm, 73)) 2732 vcpu->arch.sie_block->ecb |= ECB_TE; 2733 2734 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 2735 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 2736 if (test_kvm_facility(vcpu->kvm, 130)) 2737 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 2738 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 2739 if (sclp.has_cei) 2740 vcpu->arch.sie_block->eca |= ECA_CEI; 2741 if (sclp.has_ib) 2742 vcpu->arch.sie_block->eca |= ECA_IB; 2743 if (sclp.has_siif) 2744 vcpu->arch.sie_block->eca |= ECA_SII; 2745 if (sclp.has_sigpif) 2746 vcpu->arch.sie_block->eca |= ECA_SIGPI; 2747 if (test_kvm_facility(vcpu->kvm, 129)) { 2748 vcpu->arch.sie_block->eca |= ECA_VX; 2749 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 2750 } 2751 if (test_kvm_facility(vcpu->kvm, 139)) 2752 vcpu->arch.sie_block->ecd |= ECD_MEF; 2753 if (test_kvm_facility(vcpu->kvm, 156)) 2754 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 2755 if (vcpu->arch.sie_block->gd) { 2756 vcpu->arch.sie_block->eca |= ECA_AIV; 2757 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 2758 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 2759 } 2760 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 2761 | SDNXC; 2762 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2763 2764 if (sclp.has_kss) 2765 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 2766 else 2767 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2768 2769 if (vcpu->kvm->arch.use_cmma) { 2770 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2771 if (rc) 2772 return rc; 2773 } 2774 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2775 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 2776 2777 vcpu->arch.sie_block->hpid = HPID_KVM; 2778 2779 kvm_s390_vcpu_crypto_setup(vcpu); 2780 2781 return rc; 2782 } 2783 2784 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 2785 unsigned int id) 2786 { 2787 struct kvm_vcpu *vcpu; 2788 struct sie_page *sie_page; 2789 int rc = -EINVAL; 2790 2791 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 2792 goto out; 2793 2794 rc = -ENOMEM; 2795 2796 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 2797 if (!vcpu) 2798 goto out; 2799 2800 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 2801 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 2802 if (!sie_page) 2803 goto out_free_cpu; 2804 2805 vcpu->arch.sie_block = &sie_page->sie_block; 2806 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 2807 2808 /* the real guest size will always be smaller than msl */ 2809 vcpu->arch.sie_block->mso = 0; 2810 vcpu->arch.sie_block->msl = sclp.hamax; 2811 2812 vcpu->arch.sie_block->icpua = id; 2813 spin_lock_init(&vcpu->arch.local_int.lock); 2814 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa; 2815 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 2816 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 2817 seqcount_init(&vcpu->arch.cputm_seqcount); 2818 2819 rc = kvm_vcpu_init(vcpu, kvm, id); 2820 if (rc) 2821 goto out_free_sie_block; 2822 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 2823 vcpu->arch.sie_block); 2824 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 2825 2826 return vcpu; 2827 out_free_sie_block: 2828 free_page((unsigned long)(vcpu->arch.sie_block)); 2829 out_free_cpu: 2830 kmem_cache_free(kvm_vcpu_cache, vcpu); 2831 out: 2832 return ERR_PTR(rc); 2833 } 2834 2835 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 2836 { 2837 return kvm_s390_vcpu_has_irq(vcpu, 0); 2838 } 2839 2840 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 2841 { 2842 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 2843 } 2844 2845 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 2846 { 2847 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2848 exit_sie(vcpu); 2849 } 2850 2851 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 2852 { 2853 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2854 } 2855 2856 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 2857 { 2858 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2859 exit_sie(vcpu); 2860 } 2861 2862 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 2863 { 2864 return atomic_read(&vcpu->arch.sie_block->prog20) & 2865 (PROG_BLOCK_SIE | PROG_REQUEST); 2866 } 2867 2868 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 2869 { 2870 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2871 } 2872 2873 /* 2874 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 2875 * If the CPU is not running (e.g. waiting as idle) the function will 2876 * return immediately. */ 2877 void exit_sie(struct kvm_vcpu *vcpu) 2878 { 2879 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 2880 kvm_s390_vsie_kick(vcpu); 2881 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 2882 cpu_relax(); 2883 } 2884 2885 /* Kick a guest cpu out of SIE to process a request synchronously */ 2886 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 2887 { 2888 kvm_make_request(req, vcpu); 2889 kvm_s390_vcpu_request(vcpu); 2890 } 2891 2892 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 2893 unsigned long end) 2894 { 2895 struct kvm *kvm = gmap->private; 2896 struct kvm_vcpu *vcpu; 2897 unsigned long prefix; 2898 int i; 2899 2900 if (gmap_is_shadow(gmap)) 2901 return; 2902 if (start >= 1UL << 31) 2903 /* We are only interested in prefix pages */ 2904 return; 2905 kvm_for_each_vcpu(i, vcpu, kvm) { 2906 /* match against both prefix pages */ 2907 prefix = kvm_s390_get_prefix(vcpu); 2908 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 2909 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 2910 start, end); 2911 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 2912 } 2913 } 2914 } 2915 2916 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 2917 { 2918 /* kvm common code refers to this, but never calls it */ 2919 BUG(); 2920 return 0; 2921 } 2922 2923 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 2924 struct kvm_one_reg *reg) 2925 { 2926 int r = -EINVAL; 2927 2928 switch (reg->id) { 2929 case KVM_REG_S390_TODPR: 2930 r = put_user(vcpu->arch.sie_block->todpr, 2931 (u32 __user *)reg->addr); 2932 break; 2933 case KVM_REG_S390_EPOCHDIFF: 2934 r = put_user(vcpu->arch.sie_block->epoch, 2935 (u64 __user *)reg->addr); 2936 break; 2937 case KVM_REG_S390_CPU_TIMER: 2938 r = put_user(kvm_s390_get_cpu_timer(vcpu), 2939 (u64 __user *)reg->addr); 2940 break; 2941 case KVM_REG_S390_CLOCK_COMP: 2942 r = put_user(vcpu->arch.sie_block->ckc, 2943 (u64 __user *)reg->addr); 2944 break; 2945 case KVM_REG_S390_PFTOKEN: 2946 r = put_user(vcpu->arch.pfault_token, 2947 (u64 __user *)reg->addr); 2948 break; 2949 case KVM_REG_S390_PFCOMPARE: 2950 r = put_user(vcpu->arch.pfault_compare, 2951 (u64 __user *)reg->addr); 2952 break; 2953 case KVM_REG_S390_PFSELECT: 2954 r = put_user(vcpu->arch.pfault_select, 2955 (u64 __user *)reg->addr); 2956 break; 2957 case KVM_REG_S390_PP: 2958 r = put_user(vcpu->arch.sie_block->pp, 2959 (u64 __user *)reg->addr); 2960 break; 2961 case KVM_REG_S390_GBEA: 2962 r = put_user(vcpu->arch.sie_block->gbea, 2963 (u64 __user *)reg->addr); 2964 break; 2965 default: 2966 break; 2967 } 2968 2969 return r; 2970 } 2971 2972 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 2973 struct kvm_one_reg *reg) 2974 { 2975 int r = -EINVAL; 2976 __u64 val; 2977 2978 switch (reg->id) { 2979 case KVM_REG_S390_TODPR: 2980 r = get_user(vcpu->arch.sie_block->todpr, 2981 (u32 __user *)reg->addr); 2982 break; 2983 case KVM_REG_S390_EPOCHDIFF: 2984 r = get_user(vcpu->arch.sie_block->epoch, 2985 (u64 __user *)reg->addr); 2986 break; 2987 case KVM_REG_S390_CPU_TIMER: 2988 r = get_user(val, (u64 __user *)reg->addr); 2989 if (!r) 2990 kvm_s390_set_cpu_timer(vcpu, val); 2991 break; 2992 case KVM_REG_S390_CLOCK_COMP: 2993 r = get_user(vcpu->arch.sie_block->ckc, 2994 (u64 __user *)reg->addr); 2995 break; 2996 case KVM_REG_S390_PFTOKEN: 2997 r = get_user(vcpu->arch.pfault_token, 2998 (u64 __user *)reg->addr); 2999 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3000 kvm_clear_async_pf_completion_queue(vcpu); 3001 break; 3002 case KVM_REG_S390_PFCOMPARE: 3003 r = get_user(vcpu->arch.pfault_compare, 3004 (u64 __user *)reg->addr); 3005 break; 3006 case KVM_REG_S390_PFSELECT: 3007 r = get_user(vcpu->arch.pfault_select, 3008 (u64 __user *)reg->addr); 3009 break; 3010 case KVM_REG_S390_PP: 3011 r = get_user(vcpu->arch.sie_block->pp, 3012 (u64 __user *)reg->addr); 3013 break; 3014 case KVM_REG_S390_GBEA: 3015 r = get_user(vcpu->arch.sie_block->gbea, 3016 (u64 __user *)reg->addr); 3017 break; 3018 default: 3019 break; 3020 } 3021 3022 return r; 3023 } 3024 3025 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3026 { 3027 kvm_s390_vcpu_initial_reset(vcpu); 3028 return 0; 3029 } 3030 3031 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3032 { 3033 vcpu_load(vcpu); 3034 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3035 vcpu_put(vcpu); 3036 return 0; 3037 } 3038 3039 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3040 { 3041 vcpu_load(vcpu); 3042 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3043 vcpu_put(vcpu); 3044 return 0; 3045 } 3046 3047 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3048 struct kvm_sregs *sregs) 3049 { 3050 vcpu_load(vcpu); 3051 3052 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3053 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3054 3055 vcpu_put(vcpu); 3056 return 0; 3057 } 3058 3059 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3060 struct kvm_sregs *sregs) 3061 { 3062 vcpu_load(vcpu); 3063 3064 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3065 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3066 3067 vcpu_put(vcpu); 3068 return 0; 3069 } 3070 3071 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3072 { 3073 int ret = 0; 3074 3075 vcpu_load(vcpu); 3076 3077 if (test_fp_ctl(fpu->fpc)) { 3078 ret = -EINVAL; 3079 goto out; 3080 } 3081 vcpu->run->s.regs.fpc = fpu->fpc; 3082 if (MACHINE_HAS_VX) 3083 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3084 (freg_t *) fpu->fprs); 3085 else 3086 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3087 3088 out: 3089 vcpu_put(vcpu); 3090 return ret; 3091 } 3092 3093 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3094 { 3095 vcpu_load(vcpu); 3096 3097 /* make sure we have the latest values */ 3098 save_fpu_regs(); 3099 if (MACHINE_HAS_VX) 3100 convert_vx_to_fp((freg_t *) fpu->fprs, 3101 (__vector128 *) vcpu->run->s.regs.vrs); 3102 else 3103 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3104 fpu->fpc = vcpu->run->s.regs.fpc; 3105 3106 vcpu_put(vcpu); 3107 return 0; 3108 } 3109 3110 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3111 { 3112 int rc = 0; 3113 3114 if (!is_vcpu_stopped(vcpu)) 3115 rc = -EBUSY; 3116 else { 3117 vcpu->run->psw_mask = psw.mask; 3118 vcpu->run->psw_addr = psw.addr; 3119 } 3120 return rc; 3121 } 3122 3123 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3124 struct kvm_translation *tr) 3125 { 3126 return -EINVAL; /* not implemented yet */ 3127 } 3128 3129 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3130 KVM_GUESTDBG_USE_HW_BP | \ 3131 KVM_GUESTDBG_ENABLE) 3132 3133 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3134 struct kvm_guest_debug *dbg) 3135 { 3136 int rc = 0; 3137 3138 vcpu_load(vcpu); 3139 3140 vcpu->guest_debug = 0; 3141 kvm_s390_clear_bp_data(vcpu); 3142 3143 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3144 rc = -EINVAL; 3145 goto out; 3146 } 3147 if (!sclp.has_gpere) { 3148 rc = -EINVAL; 3149 goto out; 3150 } 3151 3152 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3153 vcpu->guest_debug = dbg->control; 3154 /* enforce guest PER */ 3155 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3156 3157 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3158 rc = kvm_s390_import_bp_data(vcpu, dbg); 3159 } else { 3160 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3161 vcpu->arch.guestdbg.last_bp = 0; 3162 } 3163 3164 if (rc) { 3165 vcpu->guest_debug = 0; 3166 kvm_s390_clear_bp_data(vcpu); 3167 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3168 } 3169 3170 out: 3171 vcpu_put(vcpu); 3172 return rc; 3173 } 3174 3175 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3176 struct kvm_mp_state *mp_state) 3177 { 3178 int ret; 3179 3180 vcpu_load(vcpu); 3181 3182 /* CHECK_STOP and LOAD are not supported yet */ 3183 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3184 KVM_MP_STATE_OPERATING; 3185 3186 vcpu_put(vcpu); 3187 return ret; 3188 } 3189 3190 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3191 struct kvm_mp_state *mp_state) 3192 { 3193 int rc = 0; 3194 3195 vcpu_load(vcpu); 3196 3197 /* user space knows about this interface - let it control the state */ 3198 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 3199 3200 switch (mp_state->mp_state) { 3201 case KVM_MP_STATE_STOPPED: 3202 kvm_s390_vcpu_stop(vcpu); 3203 break; 3204 case KVM_MP_STATE_OPERATING: 3205 kvm_s390_vcpu_start(vcpu); 3206 break; 3207 case KVM_MP_STATE_LOAD: 3208 case KVM_MP_STATE_CHECK_STOP: 3209 /* fall through - CHECK_STOP and LOAD are not supported yet */ 3210 default: 3211 rc = -ENXIO; 3212 } 3213 3214 vcpu_put(vcpu); 3215 return rc; 3216 } 3217 3218 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3219 { 3220 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3221 } 3222 3223 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3224 { 3225 retry: 3226 kvm_s390_vcpu_request_handled(vcpu); 3227 if (!kvm_request_pending(vcpu)) 3228 return 0; 3229 /* 3230 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3231 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3232 * This ensures that the ipte instruction for this request has 3233 * already finished. We might race against a second unmapper that 3234 * wants to set the blocking bit. Lets just retry the request loop. 3235 */ 3236 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3237 int rc; 3238 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3239 kvm_s390_get_prefix(vcpu), 3240 PAGE_SIZE * 2, PROT_WRITE); 3241 if (rc) { 3242 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3243 return rc; 3244 } 3245 goto retry; 3246 } 3247 3248 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3249 vcpu->arch.sie_block->ihcpu = 0xffff; 3250 goto retry; 3251 } 3252 3253 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3254 if (!ibs_enabled(vcpu)) { 3255 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3256 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3257 } 3258 goto retry; 3259 } 3260 3261 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3262 if (ibs_enabled(vcpu)) { 3263 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3264 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3265 } 3266 goto retry; 3267 } 3268 3269 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3270 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3271 goto retry; 3272 } 3273 3274 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3275 /* 3276 * Disable CMM virtualization; we will emulate the ESSA 3277 * instruction manually, in order to provide additional 3278 * functionalities needed for live migration. 3279 */ 3280 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3281 goto retry; 3282 } 3283 3284 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3285 /* 3286 * Re-enable CMM virtualization if CMMA is available and 3287 * CMM has been used. 3288 */ 3289 if ((vcpu->kvm->arch.use_cmma) && 3290 (vcpu->kvm->mm->context.uses_cmm)) 3291 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3292 goto retry; 3293 } 3294 3295 /* nothing to do, just clear the request */ 3296 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3297 /* we left the vsie handler, nothing to do, just clear the request */ 3298 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3299 3300 return 0; 3301 } 3302 3303 void kvm_s390_set_tod_clock(struct kvm *kvm, 3304 const struct kvm_s390_vm_tod_clock *gtod) 3305 { 3306 struct kvm_vcpu *vcpu; 3307 struct kvm_s390_tod_clock_ext htod; 3308 int i; 3309 3310 mutex_lock(&kvm->lock); 3311 preempt_disable(); 3312 3313 get_tod_clock_ext((char *)&htod); 3314 3315 kvm->arch.epoch = gtod->tod - htod.tod; 3316 kvm->arch.epdx = 0; 3317 if (test_kvm_facility(kvm, 139)) { 3318 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 3319 if (kvm->arch.epoch > gtod->tod) 3320 kvm->arch.epdx -= 1; 3321 } 3322 3323 kvm_s390_vcpu_block_all(kvm); 3324 kvm_for_each_vcpu(i, vcpu, kvm) { 3325 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3326 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3327 } 3328 3329 kvm_s390_vcpu_unblock_all(kvm); 3330 preempt_enable(); 3331 mutex_unlock(&kvm->lock); 3332 } 3333 3334 /** 3335 * kvm_arch_fault_in_page - fault-in guest page if necessary 3336 * @vcpu: The corresponding virtual cpu 3337 * @gpa: Guest physical address 3338 * @writable: Whether the page should be writable or not 3339 * 3340 * Make sure that a guest page has been faulted-in on the host. 3341 * 3342 * Return: Zero on success, negative error code otherwise. 3343 */ 3344 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3345 { 3346 return gmap_fault(vcpu->arch.gmap, gpa, 3347 writable ? FAULT_FLAG_WRITE : 0); 3348 } 3349 3350 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3351 unsigned long token) 3352 { 3353 struct kvm_s390_interrupt inti; 3354 struct kvm_s390_irq irq; 3355 3356 if (start_token) { 3357 irq.u.ext.ext_params2 = token; 3358 irq.type = KVM_S390_INT_PFAULT_INIT; 3359 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3360 } else { 3361 inti.type = KVM_S390_INT_PFAULT_DONE; 3362 inti.parm64 = token; 3363 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3364 } 3365 } 3366 3367 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3368 struct kvm_async_pf *work) 3369 { 3370 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3371 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3372 } 3373 3374 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3375 struct kvm_async_pf *work) 3376 { 3377 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3378 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3379 } 3380 3381 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3382 struct kvm_async_pf *work) 3383 { 3384 /* s390 will always inject the page directly */ 3385 } 3386 3387 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3388 { 3389 /* 3390 * s390 will always inject the page directly, 3391 * but we still want check_async_completion to cleanup 3392 */ 3393 return true; 3394 } 3395 3396 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3397 { 3398 hva_t hva; 3399 struct kvm_arch_async_pf arch; 3400 int rc; 3401 3402 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3403 return 0; 3404 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3405 vcpu->arch.pfault_compare) 3406 return 0; 3407 if (psw_extint_disabled(vcpu)) 3408 return 0; 3409 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3410 return 0; 3411 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 3412 return 0; 3413 if (!vcpu->arch.gmap->pfault_enabled) 3414 return 0; 3415 3416 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3417 hva += current->thread.gmap_addr & ~PAGE_MASK; 3418 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3419 return 0; 3420 3421 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3422 return rc; 3423 } 3424 3425 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3426 { 3427 int rc, cpuflags; 3428 3429 /* 3430 * On s390 notifications for arriving pages will be delivered directly 3431 * to the guest but the house keeping for completed pfaults is 3432 * handled outside the worker. 3433 */ 3434 kvm_check_async_pf_completion(vcpu); 3435 3436 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3437 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3438 3439 if (need_resched()) 3440 schedule(); 3441 3442 if (test_cpu_flag(CIF_MCCK_PENDING)) 3443 s390_handle_mcck(); 3444 3445 if (!kvm_is_ucontrol(vcpu->kvm)) { 3446 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3447 if (rc) 3448 return rc; 3449 } 3450 3451 rc = kvm_s390_handle_requests(vcpu); 3452 if (rc) 3453 return rc; 3454 3455 if (guestdbg_enabled(vcpu)) { 3456 kvm_s390_backup_guest_per_regs(vcpu); 3457 kvm_s390_patch_guest_per_regs(vcpu); 3458 } 3459 3460 vcpu->arch.sie_block->icptcode = 0; 3461 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3462 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3463 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3464 3465 return 0; 3466 } 3467 3468 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3469 { 3470 struct kvm_s390_pgm_info pgm_info = { 3471 .code = PGM_ADDRESSING, 3472 }; 3473 u8 opcode, ilen; 3474 int rc; 3475 3476 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3477 trace_kvm_s390_sie_fault(vcpu); 3478 3479 /* 3480 * We want to inject an addressing exception, which is defined as a 3481 * suppressing or terminating exception. However, since we came here 3482 * by a DAT access exception, the PSW still points to the faulting 3483 * instruction since DAT exceptions are nullifying. So we've got 3484 * to look up the current opcode to get the length of the instruction 3485 * to be able to forward the PSW. 3486 */ 3487 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3488 ilen = insn_length(opcode); 3489 if (rc < 0) { 3490 return rc; 3491 } else if (rc) { 3492 /* Instruction-Fetching Exceptions - we can't detect the ilen. 3493 * Forward by arbitrary ilc, injection will take care of 3494 * nullification if necessary. 3495 */ 3496 pgm_info = vcpu->arch.pgm; 3497 ilen = 4; 3498 } 3499 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 3500 kvm_s390_forward_psw(vcpu, ilen); 3501 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 3502 } 3503 3504 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 3505 { 3506 struct mcck_volatile_info *mcck_info; 3507 struct sie_page *sie_page; 3508 3509 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 3510 vcpu->arch.sie_block->icptcode); 3511 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 3512 3513 if (guestdbg_enabled(vcpu)) 3514 kvm_s390_restore_guest_per_regs(vcpu); 3515 3516 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 3517 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 3518 3519 if (exit_reason == -EINTR) { 3520 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 3521 sie_page = container_of(vcpu->arch.sie_block, 3522 struct sie_page, sie_block); 3523 mcck_info = &sie_page->mcck_info; 3524 kvm_s390_reinject_machine_check(vcpu, mcck_info); 3525 return 0; 3526 } 3527 3528 if (vcpu->arch.sie_block->icptcode > 0) { 3529 int rc = kvm_handle_sie_intercept(vcpu); 3530 3531 if (rc != -EOPNOTSUPP) 3532 return rc; 3533 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 3534 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 3535 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 3536 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 3537 return -EREMOTE; 3538 } else if (exit_reason != -EFAULT) { 3539 vcpu->stat.exit_null++; 3540 return 0; 3541 } else if (kvm_is_ucontrol(vcpu->kvm)) { 3542 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 3543 vcpu->run->s390_ucontrol.trans_exc_code = 3544 current->thread.gmap_addr; 3545 vcpu->run->s390_ucontrol.pgm_code = 0x10; 3546 return -EREMOTE; 3547 } else if (current->thread.gmap_pfault) { 3548 trace_kvm_s390_major_guest_pfault(vcpu); 3549 current->thread.gmap_pfault = 0; 3550 if (kvm_arch_setup_async_pf(vcpu)) 3551 return 0; 3552 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 3553 } 3554 return vcpu_post_run_fault_in_sie(vcpu); 3555 } 3556 3557 static int __vcpu_run(struct kvm_vcpu *vcpu) 3558 { 3559 int rc, exit_reason; 3560 3561 /* 3562 * We try to hold kvm->srcu during most of vcpu_run (except when run- 3563 * ning the guest), so that memslots (and other stuff) are protected 3564 */ 3565 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3566 3567 do { 3568 rc = vcpu_pre_run(vcpu); 3569 if (rc) 3570 break; 3571 3572 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3573 /* 3574 * As PF_VCPU will be used in fault handler, between 3575 * guest_enter and guest_exit should be no uaccess. 3576 */ 3577 local_irq_disable(); 3578 guest_enter_irqoff(); 3579 __disable_cpu_timer_accounting(vcpu); 3580 local_irq_enable(); 3581 exit_reason = sie64a(vcpu->arch.sie_block, 3582 vcpu->run->s.regs.gprs); 3583 local_irq_disable(); 3584 __enable_cpu_timer_accounting(vcpu); 3585 guest_exit_irqoff(); 3586 local_irq_enable(); 3587 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3588 3589 rc = vcpu_post_run(vcpu, exit_reason); 3590 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 3591 3592 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3593 return rc; 3594 } 3595 3596 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3597 { 3598 struct runtime_instr_cb *riccb; 3599 struct gs_cb *gscb; 3600 3601 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 3602 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 3603 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 3604 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 3605 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 3606 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 3607 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 3608 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 3609 /* some control register changes require a tlb flush */ 3610 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3611 } 3612 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 3613 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 3614 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 3615 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 3616 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 3617 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 3618 } 3619 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 3620 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 3621 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 3622 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 3623 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3624 kvm_clear_async_pf_completion_queue(vcpu); 3625 } 3626 /* 3627 * If userspace sets the riccb (e.g. after migration) to a valid state, 3628 * we should enable RI here instead of doing the lazy enablement. 3629 */ 3630 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 3631 test_kvm_facility(vcpu->kvm, 64) && 3632 riccb->v && 3633 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 3634 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 3635 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 3636 } 3637 /* 3638 * If userspace sets the gscb (e.g. after migration) to non-zero, 3639 * we should enable GS here instead of doing the lazy enablement. 3640 */ 3641 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 3642 test_kvm_facility(vcpu->kvm, 133) && 3643 gscb->gssm && 3644 !vcpu->arch.gs_enabled) { 3645 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 3646 vcpu->arch.sie_block->ecb |= ECB_GS; 3647 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3648 vcpu->arch.gs_enabled = 1; 3649 } 3650 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 3651 test_kvm_facility(vcpu->kvm, 82)) { 3652 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3653 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 3654 } 3655 save_access_regs(vcpu->arch.host_acrs); 3656 restore_access_regs(vcpu->run->s.regs.acrs); 3657 /* save host (userspace) fprs/vrs */ 3658 save_fpu_regs(); 3659 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 3660 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 3661 if (MACHINE_HAS_VX) 3662 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 3663 else 3664 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 3665 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 3666 if (test_fp_ctl(current->thread.fpu.fpc)) 3667 /* User space provided an invalid FPC, let's clear it */ 3668 current->thread.fpu.fpc = 0; 3669 if (MACHINE_HAS_GS) { 3670 preempt_disable(); 3671 __ctl_set_bit(2, 4); 3672 if (current->thread.gs_cb) { 3673 vcpu->arch.host_gscb = current->thread.gs_cb; 3674 save_gs_cb(vcpu->arch.host_gscb); 3675 } 3676 if (vcpu->arch.gs_enabled) { 3677 current->thread.gs_cb = (struct gs_cb *) 3678 &vcpu->run->s.regs.gscb; 3679 restore_gs_cb(current->thread.gs_cb); 3680 } 3681 preempt_enable(); 3682 } 3683 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 3684 3685 kvm_run->kvm_dirty_regs = 0; 3686 } 3687 3688 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3689 { 3690 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 3691 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 3692 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 3693 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 3694 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 3695 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 3696 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 3697 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 3698 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 3699 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 3700 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 3701 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 3702 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 3703 save_access_regs(vcpu->run->s.regs.acrs); 3704 restore_access_regs(vcpu->arch.host_acrs); 3705 /* Save guest register state */ 3706 save_fpu_regs(); 3707 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3708 /* Restore will be done lazily at return */ 3709 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 3710 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 3711 if (MACHINE_HAS_GS) { 3712 __ctl_set_bit(2, 4); 3713 if (vcpu->arch.gs_enabled) 3714 save_gs_cb(current->thread.gs_cb); 3715 preempt_disable(); 3716 current->thread.gs_cb = vcpu->arch.host_gscb; 3717 restore_gs_cb(vcpu->arch.host_gscb); 3718 preempt_enable(); 3719 if (!vcpu->arch.host_gscb) 3720 __ctl_clear_bit(2, 4); 3721 vcpu->arch.host_gscb = NULL; 3722 } 3723 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 3724 } 3725 3726 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3727 { 3728 int rc; 3729 3730 if (kvm_run->immediate_exit) 3731 return -EINTR; 3732 3733 vcpu_load(vcpu); 3734 3735 if (guestdbg_exit_pending(vcpu)) { 3736 kvm_s390_prepare_debug_exit(vcpu); 3737 rc = 0; 3738 goto out; 3739 } 3740 3741 kvm_sigset_activate(vcpu); 3742 3743 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3744 kvm_s390_vcpu_start(vcpu); 3745 } else if (is_vcpu_stopped(vcpu)) { 3746 pr_err_ratelimited("can't run stopped vcpu %d\n", 3747 vcpu->vcpu_id); 3748 rc = -EINVAL; 3749 goto out; 3750 } 3751 3752 sync_regs(vcpu, kvm_run); 3753 enable_cpu_timer_accounting(vcpu); 3754 3755 might_fault(); 3756 rc = __vcpu_run(vcpu); 3757 3758 if (signal_pending(current) && !rc) { 3759 kvm_run->exit_reason = KVM_EXIT_INTR; 3760 rc = -EINTR; 3761 } 3762 3763 if (guestdbg_exit_pending(vcpu) && !rc) { 3764 kvm_s390_prepare_debug_exit(vcpu); 3765 rc = 0; 3766 } 3767 3768 if (rc == -EREMOTE) { 3769 /* userspace support is needed, kvm_run has been prepared */ 3770 rc = 0; 3771 } 3772 3773 disable_cpu_timer_accounting(vcpu); 3774 store_regs(vcpu, kvm_run); 3775 3776 kvm_sigset_deactivate(vcpu); 3777 3778 vcpu->stat.exit_userspace++; 3779 out: 3780 vcpu_put(vcpu); 3781 return rc; 3782 } 3783 3784 /* 3785 * store status at address 3786 * we use have two special cases: 3787 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 3788 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 3789 */ 3790 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 3791 { 3792 unsigned char archmode = 1; 3793 freg_t fprs[NUM_FPRS]; 3794 unsigned int px; 3795 u64 clkcomp, cputm; 3796 int rc; 3797 3798 px = kvm_s390_get_prefix(vcpu); 3799 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 3800 if (write_guest_abs(vcpu, 163, &archmode, 1)) 3801 return -EFAULT; 3802 gpa = 0; 3803 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 3804 if (write_guest_real(vcpu, 163, &archmode, 1)) 3805 return -EFAULT; 3806 gpa = px; 3807 } else 3808 gpa -= __LC_FPREGS_SAVE_AREA; 3809 3810 /* manually convert vector registers if necessary */ 3811 if (MACHINE_HAS_VX) { 3812 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 3813 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3814 fprs, 128); 3815 } else { 3816 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3817 vcpu->run->s.regs.fprs, 128); 3818 } 3819 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 3820 vcpu->run->s.regs.gprs, 128); 3821 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 3822 &vcpu->arch.sie_block->gpsw, 16); 3823 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 3824 &px, 4); 3825 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 3826 &vcpu->run->s.regs.fpc, 4); 3827 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 3828 &vcpu->arch.sie_block->todpr, 4); 3829 cputm = kvm_s390_get_cpu_timer(vcpu); 3830 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 3831 &cputm, 8); 3832 clkcomp = vcpu->arch.sie_block->ckc >> 8; 3833 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 3834 &clkcomp, 8); 3835 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 3836 &vcpu->run->s.regs.acrs, 64); 3837 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 3838 &vcpu->arch.sie_block->gcr, 128); 3839 return rc ? -EFAULT : 0; 3840 } 3841 3842 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 3843 { 3844 /* 3845 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 3846 * switch in the run ioctl. Let's update our copies before we save 3847 * it into the save area 3848 */ 3849 save_fpu_regs(); 3850 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3851 save_access_regs(vcpu->run->s.regs.acrs); 3852 3853 return kvm_s390_store_status_unloaded(vcpu, addr); 3854 } 3855 3856 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3857 { 3858 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 3859 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 3860 } 3861 3862 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 3863 { 3864 unsigned int i; 3865 struct kvm_vcpu *vcpu; 3866 3867 kvm_for_each_vcpu(i, vcpu, kvm) { 3868 __disable_ibs_on_vcpu(vcpu); 3869 } 3870 } 3871 3872 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3873 { 3874 if (!sclp.has_ibs) 3875 return; 3876 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 3877 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 3878 } 3879 3880 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 3881 { 3882 int i, online_vcpus, started_vcpus = 0; 3883 3884 if (!is_vcpu_stopped(vcpu)) 3885 return; 3886 3887 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 3888 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3889 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3890 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3891 3892 for (i = 0; i < online_vcpus; i++) { 3893 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 3894 started_vcpus++; 3895 } 3896 3897 if (started_vcpus == 0) { 3898 /* we're the only active VCPU -> speed it up */ 3899 __enable_ibs_on_vcpu(vcpu); 3900 } else if (started_vcpus == 1) { 3901 /* 3902 * As we are starting a second VCPU, we have to disable 3903 * the IBS facility on all VCPUs to remove potentially 3904 * oustanding ENABLE requests. 3905 */ 3906 __disable_ibs_on_all_vcpus(vcpu->kvm); 3907 } 3908 3909 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 3910 /* 3911 * Another VCPU might have used IBS while we were offline. 3912 * Let's play safe and flush the VCPU at startup. 3913 */ 3914 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3915 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3916 return; 3917 } 3918 3919 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 3920 { 3921 int i, online_vcpus, started_vcpus = 0; 3922 struct kvm_vcpu *started_vcpu = NULL; 3923 3924 if (is_vcpu_stopped(vcpu)) 3925 return; 3926 3927 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 3928 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3929 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3930 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3931 3932 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 3933 kvm_s390_clear_stop_irq(vcpu); 3934 3935 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 3936 __disable_ibs_on_vcpu(vcpu); 3937 3938 for (i = 0; i < online_vcpus; i++) { 3939 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 3940 started_vcpus++; 3941 started_vcpu = vcpu->kvm->vcpus[i]; 3942 } 3943 } 3944 3945 if (started_vcpus == 1) { 3946 /* 3947 * As we only have one VCPU left, we want to enable the 3948 * IBS facility for that VCPU to speed it up. 3949 */ 3950 __enable_ibs_on_vcpu(started_vcpu); 3951 } 3952 3953 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3954 return; 3955 } 3956 3957 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3958 struct kvm_enable_cap *cap) 3959 { 3960 int r; 3961 3962 if (cap->flags) 3963 return -EINVAL; 3964 3965 switch (cap->cap) { 3966 case KVM_CAP_S390_CSS_SUPPORT: 3967 if (!vcpu->kvm->arch.css_support) { 3968 vcpu->kvm->arch.css_support = 1; 3969 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 3970 trace_kvm_s390_enable_css(vcpu->kvm); 3971 } 3972 r = 0; 3973 break; 3974 default: 3975 r = -EINVAL; 3976 break; 3977 } 3978 return r; 3979 } 3980 3981 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 3982 struct kvm_s390_mem_op *mop) 3983 { 3984 void __user *uaddr = (void __user *)mop->buf; 3985 void *tmpbuf = NULL; 3986 int r, srcu_idx; 3987 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 3988 | KVM_S390_MEMOP_F_CHECK_ONLY; 3989 3990 if (mop->flags & ~supported_flags) 3991 return -EINVAL; 3992 3993 if (mop->size > MEM_OP_MAX_SIZE) 3994 return -E2BIG; 3995 3996 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 3997 tmpbuf = vmalloc(mop->size); 3998 if (!tmpbuf) 3999 return -ENOMEM; 4000 } 4001 4002 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4003 4004 switch (mop->op) { 4005 case KVM_S390_MEMOP_LOGICAL_READ: 4006 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4007 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4008 mop->size, GACC_FETCH); 4009 break; 4010 } 4011 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4012 if (r == 0) { 4013 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4014 r = -EFAULT; 4015 } 4016 break; 4017 case KVM_S390_MEMOP_LOGICAL_WRITE: 4018 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4019 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4020 mop->size, GACC_STORE); 4021 break; 4022 } 4023 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4024 r = -EFAULT; 4025 break; 4026 } 4027 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4028 break; 4029 default: 4030 r = -EINVAL; 4031 } 4032 4033 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4034 4035 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4036 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4037 4038 vfree(tmpbuf); 4039 return r; 4040 } 4041 4042 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4043 unsigned int ioctl, unsigned long arg) 4044 { 4045 struct kvm_vcpu *vcpu = filp->private_data; 4046 void __user *argp = (void __user *)arg; 4047 4048 switch (ioctl) { 4049 case KVM_S390_IRQ: { 4050 struct kvm_s390_irq s390irq; 4051 4052 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4053 return -EFAULT; 4054 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4055 } 4056 case KVM_S390_INTERRUPT: { 4057 struct kvm_s390_interrupt s390int; 4058 struct kvm_s390_irq s390irq; 4059 4060 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4061 return -EFAULT; 4062 if (s390int_to_s390irq(&s390int, &s390irq)) 4063 return -EINVAL; 4064 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4065 } 4066 } 4067 return -ENOIOCTLCMD; 4068 } 4069 4070 long kvm_arch_vcpu_ioctl(struct file *filp, 4071 unsigned int ioctl, unsigned long arg) 4072 { 4073 struct kvm_vcpu *vcpu = filp->private_data; 4074 void __user *argp = (void __user *)arg; 4075 int idx; 4076 long r; 4077 4078 vcpu_load(vcpu); 4079 4080 switch (ioctl) { 4081 case KVM_S390_STORE_STATUS: 4082 idx = srcu_read_lock(&vcpu->kvm->srcu); 4083 r = kvm_s390_vcpu_store_status(vcpu, arg); 4084 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4085 break; 4086 case KVM_S390_SET_INITIAL_PSW: { 4087 psw_t psw; 4088 4089 r = -EFAULT; 4090 if (copy_from_user(&psw, argp, sizeof(psw))) 4091 break; 4092 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4093 break; 4094 } 4095 case KVM_S390_INITIAL_RESET: 4096 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4097 break; 4098 case KVM_SET_ONE_REG: 4099 case KVM_GET_ONE_REG: { 4100 struct kvm_one_reg reg; 4101 r = -EFAULT; 4102 if (copy_from_user(®, argp, sizeof(reg))) 4103 break; 4104 if (ioctl == KVM_SET_ONE_REG) 4105 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4106 else 4107 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4108 break; 4109 } 4110 #ifdef CONFIG_KVM_S390_UCONTROL 4111 case KVM_S390_UCAS_MAP: { 4112 struct kvm_s390_ucas_mapping ucasmap; 4113 4114 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4115 r = -EFAULT; 4116 break; 4117 } 4118 4119 if (!kvm_is_ucontrol(vcpu->kvm)) { 4120 r = -EINVAL; 4121 break; 4122 } 4123 4124 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4125 ucasmap.vcpu_addr, ucasmap.length); 4126 break; 4127 } 4128 case KVM_S390_UCAS_UNMAP: { 4129 struct kvm_s390_ucas_mapping ucasmap; 4130 4131 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4132 r = -EFAULT; 4133 break; 4134 } 4135 4136 if (!kvm_is_ucontrol(vcpu->kvm)) { 4137 r = -EINVAL; 4138 break; 4139 } 4140 4141 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4142 ucasmap.length); 4143 break; 4144 } 4145 #endif 4146 case KVM_S390_VCPU_FAULT: { 4147 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4148 break; 4149 } 4150 case KVM_ENABLE_CAP: 4151 { 4152 struct kvm_enable_cap cap; 4153 r = -EFAULT; 4154 if (copy_from_user(&cap, argp, sizeof(cap))) 4155 break; 4156 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4157 break; 4158 } 4159 case KVM_S390_MEM_OP: { 4160 struct kvm_s390_mem_op mem_op; 4161 4162 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4163 r = kvm_s390_guest_mem_op(vcpu, &mem_op); 4164 else 4165 r = -EFAULT; 4166 break; 4167 } 4168 case KVM_S390_SET_IRQ_STATE: { 4169 struct kvm_s390_irq_state irq_state; 4170 4171 r = -EFAULT; 4172 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4173 break; 4174 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4175 irq_state.len == 0 || 4176 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4177 r = -EINVAL; 4178 break; 4179 } 4180 /* do not use irq_state.flags, it will break old QEMUs */ 4181 r = kvm_s390_set_irq_state(vcpu, 4182 (void __user *) irq_state.buf, 4183 irq_state.len); 4184 break; 4185 } 4186 case KVM_S390_GET_IRQ_STATE: { 4187 struct kvm_s390_irq_state irq_state; 4188 4189 r = -EFAULT; 4190 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4191 break; 4192 if (irq_state.len == 0) { 4193 r = -EINVAL; 4194 break; 4195 } 4196 /* do not use irq_state.flags, it will break old QEMUs */ 4197 r = kvm_s390_get_irq_state(vcpu, 4198 (__u8 __user *) irq_state.buf, 4199 irq_state.len); 4200 break; 4201 } 4202 default: 4203 r = -ENOTTY; 4204 } 4205 4206 vcpu_put(vcpu); 4207 return r; 4208 } 4209 4210 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 4211 { 4212 #ifdef CONFIG_KVM_S390_UCONTROL 4213 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 4214 && (kvm_is_ucontrol(vcpu->kvm))) { 4215 vmf->page = virt_to_page(vcpu->arch.sie_block); 4216 get_page(vmf->page); 4217 return 0; 4218 } 4219 #endif 4220 return VM_FAULT_SIGBUS; 4221 } 4222 4223 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 4224 unsigned long npages) 4225 { 4226 return 0; 4227 } 4228 4229 /* Section: memory related */ 4230 int kvm_arch_prepare_memory_region(struct kvm *kvm, 4231 struct kvm_memory_slot *memslot, 4232 const struct kvm_userspace_memory_region *mem, 4233 enum kvm_mr_change change) 4234 { 4235 /* A few sanity checks. We can have memory slots which have to be 4236 located/ended at a segment boundary (1MB). The memory in userland is 4237 ok to be fragmented into various different vmas. It is okay to mmap() 4238 and munmap() stuff in this slot after doing this call at any time */ 4239 4240 if (mem->userspace_addr & 0xffffful) 4241 return -EINVAL; 4242 4243 if (mem->memory_size & 0xffffful) 4244 return -EINVAL; 4245 4246 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 4247 return -EINVAL; 4248 4249 return 0; 4250 } 4251 4252 void kvm_arch_commit_memory_region(struct kvm *kvm, 4253 const struct kvm_userspace_memory_region *mem, 4254 const struct kvm_memory_slot *old, 4255 const struct kvm_memory_slot *new, 4256 enum kvm_mr_change change) 4257 { 4258 int rc; 4259 4260 /* If the basics of the memslot do not change, we do not want 4261 * to update the gmap. Every update causes several unnecessary 4262 * segment translation exceptions. This is usually handled just 4263 * fine by the normal fault handler + gmap, but it will also 4264 * cause faults on the prefix page of running guest CPUs. 4265 */ 4266 if (old->userspace_addr == mem->userspace_addr && 4267 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 4268 old->npages * PAGE_SIZE == mem->memory_size) 4269 return; 4270 4271 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 4272 mem->guest_phys_addr, mem->memory_size); 4273 if (rc) 4274 pr_warn("failed to commit memory region\n"); 4275 return; 4276 } 4277 4278 static inline unsigned long nonhyp_mask(int i) 4279 { 4280 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 4281 4282 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 4283 } 4284 4285 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 4286 { 4287 vcpu->valid_wakeup = false; 4288 } 4289 4290 static int __init kvm_s390_init(void) 4291 { 4292 int i; 4293 4294 if (!sclp.has_sief2) { 4295 pr_info("SIE not available\n"); 4296 return -ENODEV; 4297 } 4298 4299 if (nested && hpage) { 4300 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); 4301 return -EINVAL; 4302 } 4303 4304 for (i = 0; i < 16; i++) 4305 kvm_s390_fac_base[i] |= 4306 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 4307 4308 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 4309 } 4310 4311 static void __exit kvm_s390_exit(void) 4312 { 4313 kvm_exit(); 4314 } 4315 4316 module_init(kvm_s390_init); 4317 module_exit(kvm_s390_exit); 4318 4319 /* 4320 * Enable autoloading of the kvm module. 4321 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 4322 * since x86 takes a different approach. 4323 */ 4324 #include <linux/miscdevice.h> 4325 MODULE_ALIAS_MISCDEV(KVM_MINOR); 4326 MODULE_ALIAS("devname:kvm"); 4327