1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2017 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #include <linux/compiler.h> 15 #include <linux/err.h> 16 #include <linux/fs.h> 17 #include <linux/hrtimer.h> 18 #include <linux/init.h> 19 #include <linux/kvm.h> 20 #include <linux/kvm_host.h> 21 #include <linux/mman.h> 22 #include <linux/module.h> 23 #include <linux/moduleparam.h> 24 #include <linux/random.h> 25 #include <linux/slab.h> 26 #include <linux/timer.h> 27 #include <linux/vmalloc.h> 28 #include <linux/bitmap.h> 29 #include <linux/sched/signal.h> 30 #include <linux/string.h> 31 32 #include <asm/asm-offsets.h> 33 #include <asm/lowcore.h> 34 #include <asm/stp.h> 35 #include <asm/pgtable.h> 36 #include <asm/gmap.h> 37 #include <asm/nmi.h> 38 #include <asm/switch_to.h> 39 #include <asm/isc.h> 40 #include <asm/sclp.h> 41 #include <asm/cpacf.h> 42 #include <asm/timex.h> 43 #include "kvm-s390.h" 44 #include "gaccess.h" 45 46 #define KMSG_COMPONENT "kvm-s390" 47 #undef pr_fmt 48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 49 50 #define CREATE_TRACE_POINTS 51 #include "trace.h" 52 #include "trace-s390.h" 53 54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 55 #define LOCAL_IRQS 32 56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 57 (KVM_MAX_VCPUS + LOCAL_IRQS)) 58 59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 60 61 struct kvm_stats_debugfs_item debugfs_entries[] = { 62 { "userspace_handled", VCPU_STAT(exit_userspace) }, 63 { "exit_null", VCPU_STAT(exit_null) }, 64 { "exit_validity", VCPU_STAT(exit_validity) }, 65 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 66 { "exit_external_request", VCPU_STAT(exit_external_request) }, 67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 68 { "exit_instruction", VCPU_STAT(exit_instruction) }, 69 { "exit_pei", VCPU_STAT(exit_pei) }, 70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 76 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 78 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 79 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 80 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 82 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 89 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 91 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 92 { "instruction_spx", VCPU_STAT(instruction_spx) }, 93 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 94 { "instruction_stap", VCPU_STAT(instruction_stap) }, 95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 97 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 98 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 99 { "instruction_essa", VCPU_STAT(instruction_essa) }, 100 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 101 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 102 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 104 { "instruction_sie", VCPU_STAT(instruction_sie) }, 105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 121 { "diagnose_10", VCPU_STAT(diagnose_10) }, 122 { "diagnose_44", VCPU_STAT(diagnose_44) }, 123 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 124 { "diagnose_258", VCPU_STAT(diagnose_258) }, 125 { "diagnose_308", VCPU_STAT(diagnose_308) }, 126 { "diagnose_500", VCPU_STAT(diagnose_500) }, 127 { NULL } 128 }; 129 130 struct kvm_s390_tod_clock_ext { 131 __u8 epoch_idx; 132 __u64 tod; 133 __u8 reserved[7]; 134 } __packed; 135 136 /* allow nested virtualization in KVM (if enabled by user space) */ 137 static int nested; 138 module_param(nested, int, S_IRUGO); 139 MODULE_PARM_DESC(nested, "Nested virtualization support"); 140 141 /* upper facilities limit for kvm */ 142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM }; 143 144 unsigned long kvm_s390_fac_list_mask_size(void) 145 { 146 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64); 147 return ARRAY_SIZE(kvm_s390_fac_list_mask); 148 } 149 150 /* available cpu features supported by kvm */ 151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 152 /* available subfunctions indicated via query / "test bit" */ 153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 154 155 static struct gmap_notifier gmap_notifier; 156 static struct gmap_notifier vsie_gmap_notifier; 157 debug_info_t *kvm_s390_dbf; 158 159 /* Section: not file related */ 160 int kvm_arch_hardware_enable(void) 161 { 162 /* every s390 is virtualization enabled ;-) */ 163 return 0; 164 } 165 166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 167 unsigned long end); 168 169 /* 170 * This callback is executed during stop_machine(). All CPUs are therefore 171 * temporarily stopped. In order not to change guest behavior, we have to 172 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 173 * so a CPU won't be stopped while calculating with the epoch. 174 */ 175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 176 void *v) 177 { 178 struct kvm *kvm; 179 struct kvm_vcpu *vcpu; 180 int i; 181 unsigned long long *delta = v; 182 183 list_for_each_entry(kvm, &vm_list, vm_list) { 184 kvm->arch.epoch -= *delta; 185 kvm_for_each_vcpu(i, vcpu, kvm) { 186 vcpu->arch.sie_block->epoch -= *delta; 187 if (vcpu->arch.cputm_enabled) 188 vcpu->arch.cputm_start += *delta; 189 if (vcpu->arch.vsie_block) 190 vcpu->arch.vsie_block->epoch -= *delta; 191 } 192 } 193 return NOTIFY_OK; 194 } 195 196 static struct notifier_block kvm_clock_notifier = { 197 .notifier_call = kvm_clock_sync, 198 }; 199 200 int kvm_arch_hardware_setup(void) 201 { 202 gmap_notifier.notifier_call = kvm_gmap_notifier; 203 gmap_register_pte_notifier(&gmap_notifier); 204 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 205 gmap_register_pte_notifier(&vsie_gmap_notifier); 206 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 207 &kvm_clock_notifier); 208 return 0; 209 } 210 211 void kvm_arch_hardware_unsetup(void) 212 { 213 gmap_unregister_pte_notifier(&gmap_notifier); 214 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 215 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 216 &kvm_clock_notifier); 217 } 218 219 static void allow_cpu_feat(unsigned long nr) 220 { 221 set_bit_inv(nr, kvm_s390_available_cpu_feat); 222 } 223 224 static inline int plo_test_bit(unsigned char nr) 225 { 226 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 227 int cc; 228 229 asm volatile( 230 /* Parameter registers are ignored for "test bit" */ 231 " plo 0,0,0,0(0)\n" 232 " ipm %0\n" 233 " srl %0,28\n" 234 : "=d" (cc) 235 : "d" (r0) 236 : "cc"); 237 return cc == 0; 238 } 239 240 static void kvm_s390_cpu_feat_init(void) 241 { 242 int i; 243 244 for (i = 0; i < 256; ++i) { 245 if (plo_test_bit(i)) 246 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 247 } 248 249 if (test_facility(28)) /* TOD-clock steering */ 250 ptff(kvm_s390_available_subfunc.ptff, 251 sizeof(kvm_s390_available_subfunc.ptff), 252 PTFF_QAF); 253 254 if (test_facility(17)) { /* MSA */ 255 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 256 kvm_s390_available_subfunc.kmac); 257 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 258 kvm_s390_available_subfunc.kmc); 259 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 260 kvm_s390_available_subfunc.km); 261 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 262 kvm_s390_available_subfunc.kimd); 263 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 264 kvm_s390_available_subfunc.klmd); 265 } 266 if (test_facility(76)) /* MSA3 */ 267 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 268 kvm_s390_available_subfunc.pckmo); 269 if (test_facility(77)) { /* MSA4 */ 270 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 271 kvm_s390_available_subfunc.kmctr); 272 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 273 kvm_s390_available_subfunc.kmf); 274 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 275 kvm_s390_available_subfunc.kmo); 276 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 277 kvm_s390_available_subfunc.pcc); 278 } 279 if (test_facility(57)) /* MSA5 */ 280 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 281 kvm_s390_available_subfunc.ppno); 282 283 if (test_facility(146)) /* MSA8 */ 284 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 285 kvm_s390_available_subfunc.kma); 286 287 if (MACHINE_HAS_ESOP) 288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 289 /* 290 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 291 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 292 */ 293 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 294 !test_facility(3) || !nested) 295 return; 296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 297 if (sclp.has_64bscao) 298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 299 if (sclp.has_siif) 300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 301 if (sclp.has_gpere) 302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 303 if (sclp.has_gsls) 304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 305 if (sclp.has_ib) 306 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 307 if (sclp.has_cei) 308 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 309 if (sclp.has_ibs) 310 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 311 if (sclp.has_kss) 312 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 313 /* 314 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 315 * all skey handling functions read/set the skey from the PGSTE 316 * instead of the real storage key. 317 * 318 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 319 * pages being detected as preserved although they are resident. 320 * 321 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 322 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 323 * 324 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 325 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 326 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 327 * 328 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 329 * cannot easily shadow the SCA because of the ipte lock. 330 */ 331 } 332 333 int kvm_arch_init(void *opaque) 334 { 335 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 336 if (!kvm_s390_dbf) 337 return -ENOMEM; 338 339 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { 340 debug_unregister(kvm_s390_dbf); 341 return -ENOMEM; 342 } 343 344 kvm_s390_cpu_feat_init(); 345 346 /* Register floating interrupt controller interface. */ 347 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 348 } 349 350 void kvm_arch_exit(void) 351 { 352 debug_unregister(kvm_s390_dbf); 353 } 354 355 /* Section: device related */ 356 long kvm_arch_dev_ioctl(struct file *filp, 357 unsigned int ioctl, unsigned long arg) 358 { 359 if (ioctl == KVM_S390_ENABLE_SIE) 360 return s390_enable_sie(); 361 return -EINVAL; 362 } 363 364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 365 { 366 int r; 367 368 switch (ext) { 369 case KVM_CAP_S390_PSW: 370 case KVM_CAP_S390_GMAP: 371 case KVM_CAP_SYNC_MMU: 372 #ifdef CONFIG_KVM_S390_UCONTROL 373 case KVM_CAP_S390_UCONTROL: 374 #endif 375 case KVM_CAP_ASYNC_PF: 376 case KVM_CAP_SYNC_REGS: 377 case KVM_CAP_ONE_REG: 378 case KVM_CAP_ENABLE_CAP: 379 case KVM_CAP_S390_CSS_SUPPORT: 380 case KVM_CAP_IOEVENTFD: 381 case KVM_CAP_DEVICE_CTRL: 382 case KVM_CAP_ENABLE_CAP_VM: 383 case KVM_CAP_S390_IRQCHIP: 384 case KVM_CAP_VM_ATTRIBUTES: 385 case KVM_CAP_MP_STATE: 386 case KVM_CAP_IMMEDIATE_EXIT: 387 case KVM_CAP_S390_INJECT_IRQ: 388 case KVM_CAP_S390_USER_SIGP: 389 case KVM_CAP_S390_USER_STSI: 390 case KVM_CAP_S390_SKEYS: 391 case KVM_CAP_S390_IRQ_STATE: 392 case KVM_CAP_S390_USER_INSTR0: 393 case KVM_CAP_S390_CMMA_MIGRATION: 394 case KVM_CAP_S390_AIS: 395 case KVM_CAP_S390_AIS_MIGRATION: 396 r = 1; 397 break; 398 case KVM_CAP_S390_MEM_OP: 399 r = MEM_OP_MAX_SIZE; 400 break; 401 case KVM_CAP_NR_VCPUS: 402 case KVM_CAP_MAX_VCPUS: 403 r = KVM_S390_BSCA_CPU_SLOTS; 404 if (!kvm_s390_use_sca_entries()) 405 r = KVM_MAX_VCPUS; 406 else if (sclp.has_esca && sclp.has_64bscao) 407 r = KVM_S390_ESCA_CPU_SLOTS; 408 break; 409 case KVM_CAP_NR_MEMSLOTS: 410 r = KVM_USER_MEM_SLOTS; 411 break; 412 case KVM_CAP_S390_COW: 413 r = MACHINE_HAS_ESOP; 414 break; 415 case KVM_CAP_S390_VECTOR_REGISTERS: 416 r = MACHINE_HAS_VX; 417 break; 418 case KVM_CAP_S390_RI: 419 r = test_facility(64); 420 break; 421 case KVM_CAP_S390_GS: 422 r = test_facility(133); 423 break; 424 default: 425 r = 0; 426 } 427 return r; 428 } 429 430 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 431 struct kvm_memory_slot *memslot) 432 { 433 gfn_t cur_gfn, last_gfn; 434 unsigned long address; 435 struct gmap *gmap = kvm->arch.gmap; 436 437 /* Loop over all guest pages */ 438 last_gfn = memslot->base_gfn + memslot->npages; 439 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 440 address = gfn_to_hva_memslot(memslot, cur_gfn); 441 442 if (test_and_clear_guest_dirty(gmap->mm, address)) 443 mark_page_dirty(kvm, cur_gfn); 444 if (fatal_signal_pending(current)) 445 return; 446 cond_resched(); 447 } 448 } 449 450 /* Section: vm related */ 451 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 452 453 /* 454 * Get (and clear) the dirty memory log for a memory slot. 455 */ 456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 457 struct kvm_dirty_log *log) 458 { 459 int r; 460 unsigned long n; 461 struct kvm_memslots *slots; 462 struct kvm_memory_slot *memslot; 463 int is_dirty = 0; 464 465 if (kvm_is_ucontrol(kvm)) 466 return -EINVAL; 467 468 mutex_lock(&kvm->slots_lock); 469 470 r = -EINVAL; 471 if (log->slot >= KVM_USER_MEM_SLOTS) 472 goto out; 473 474 slots = kvm_memslots(kvm); 475 memslot = id_to_memslot(slots, log->slot); 476 r = -ENOENT; 477 if (!memslot->dirty_bitmap) 478 goto out; 479 480 kvm_s390_sync_dirty_log(kvm, memslot); 481 r = kvm_get_dirty_log(kvm, log, &is_dirty); 482 if (r) 483 goto out; 484 485 /* Clear the dirty log */ 486 if (is_dirty) { 487 n = kvm_dirty_bitmap_bytes(memslot); 488 memset(memslot->dirty_bitmap, 0, n); 489 } 490 r = 0; 491 out: 492 mutex_unlock(&kvm->slots_lock); 493 return r; 494 } 495 496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 497 { 498 unsigned int i; 499 struct kvm_vcpu *vcpu; 500 501 kvm_for_each_vcpu(i, vcpu, kvm) { 502 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 503 } 504 } 505 506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 507 { 508 int r; 509 510 if (cap->flags) 511 return -EINVAL; 512 513 switch (cap->cap) { 514 case KVM_CAP_S390_IRQCHIP: 515 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 516 kvm->arch.use_irqchip = 1; 517 r = 0; 518 break; 519 case KVM_CAP_S390_USER_SIGP: 520 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 521 kvm->arch.user_sigp = 1; 522 r = 0; 523 break; 524 case KVM_CAP_S390_VECTOR_REGISTERS: 525 mutex_lock(&kvm->lock); 526 if (kvm->created_vcpus) { 527 r = -EBUSY; 528 } else if (MACHINE_HAS_VX) { 529 set_kvm_facility(kvm->arch.model.fac_mask, 129); 530 set_kvm_facility(kvm->arch.model.fac_list, 129); 531 if (test_facility(134)) { 532 set_kvm_facility(kvm->arch.model.fac_mask, 134); 533 set_kvm_facility(kvm->arch.model.fac_list, 134); 534 } 535 if (test_facility(135)) { 536 set_kvm_facility(kvm->arch.model.fac_mask, 135); 537 set_kvm_facility(kvm->arch.model.fac_list, 135); 538 } 539 r = 0; 540 } else 541 r = -EINVAL; 542 mutex_unlock(&kvm->lock); 543 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 544 r ? "(not available)" : "(success)"); 545 break; 546 case KVM_CAP_S390_RI: 547 r = -EINVAL; 548 mutex_lock(&kvm->lock); 549 if (kvm->created_vcpus) { 550 r = -EBUSY; 551 } else if (test_facility(64)) { 552 set_kvm_facility(kvm->arch.model.fac_mask, 64); 553 set_kvm_facility(kvm->arch.model.fac_list, 64); 554 r = 0; 555 } 556 mutex_unlock(&kvm->lock); 557 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 558 r ? "(not available)" : "(success)"); 559 break; 560 case KVM_CAP_S390_AIS: 561 mutex_lock(&kvm->lock); 562 if (kvm->created_vcpus) { 563 r = -EBUSY; 564 } else { 565 set_kvm_facility(kvm->arch.model.fac_mask, 72); 566 set_kvm_facility(kvm->arch.model.fac_list, 72); 567 r = 0; 568 } 569 mutex_unlock(&kvm->lock); 570 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 571 r ? "(not available)" : "(success)"); 572 break; 573 case KVM_CAP_S390_GS: 574 r = -EINVAL; 575 mutex_lock(&kvm->lock); 576 if (atomic_read(&kvm->online_vcpus)) { 577 r = -EBUSY; 578 } else if (test_facility(133)) { 579 set_kvm_facility(kvm->arch.model.fac_mask, 133); 580 set_kvm_facility(kvm->arch.model.fac_list, 133); 581 r = 0; 582 } 583 mutex_unlock(&kvm->lock); 584 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 585 r ? "(not available)" : "(success)"); 586 break; 587 case KVM_CAP_S390_USER_STSI: 588 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 589 kvm->arch.user_stsi = 1; 590 r = 0; 591 break; 592 case KVM_CAP_S390_USER_INSTR0: 593 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 594 kvm->arch.user_instr0 = 1; 595 icpt_operexc_on_all_vcpus(kvm); 596 r = 0; 597 break; 598 default: 599 r = -EINVAL; 600 break; 601 } 602 return r; 603 } 604 605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 606 { 607 int ret; 608 609 switch (attr->attr) { 610 case KVM_S390_VM_MEM_LIMIT_SIZE: 611 ret = 0; 612 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 613 kvm->arch.mem_limit); 614 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 615 ret = -EFAULT; 616 break; 617 default: 618 ret = -ENXIO; 619 break; 620 } 621 return ret; 622 } 623 624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 625 { 626 int ret; 627 unsigned int idx; 628 switch (attr->attr) { 629 case KVM_S390_VM_MEM_ENABLE_CMMA: 630 ret = -ENXIO; 631 if (!sclp.has_cmma) 632 break; 633 634 ret = -EBUSY; 635 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 636 mutex_lock(&kvm->lock); 637 if (!kvm->created_vcpus) { 638 kvm->arch.use_cmma = 1; 639 ret = 0; 640 } 641 mutex_unlock(&kvm->lock); 642 break; 643 case KVM_S390_VM_MEM_CLR_CMMA: 644 ret = -ENXIO; 645 if (!sclp.has_cmma) 646 break; 647 ret = -EINVAL; 648 if (!kvm->arch.use_cmma) 649 break; 650 651 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 652 mutex_lock(&kvm->lock); 653 idx = srcu_read_lock(&kvm->srcu); 654 s390_reset_cmma(kvm->arch.gmap->mm); 655 srcu_read_unlock(&kvm->srcu, idx); 656 mutex_unlock(&kvm->lock); 657 ret = 0; 658 break; 659 case KVM_S390_VM_MEM_LIMIT_SIZE: { 660 unsigned long new_limit; 661 662 if (kvm_is_ucontrol(kvm)) 663 return -EINVAL; 664 665 if (get_user(new_limit, (u64 __user *)attr->addr)) 666 return -EFAULT; 667 668 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 669 new_limit > kvm->arch.mem_limit) 670 return -E2BIG; 671 672 if (!new_limit) 673 return -EINVAL; 674 675 /* gmap_create takes last usable address */ 676 if (new_limit != KVM_S390_NO_MEM_LIMIT) 677 new_limit -= 1; 678 679 ret = -EBUSY; 680 mutex_lock(&kvm->lock); 681 if (!kvm->created_vcpus) { 682 /* gmap_create will round the limit up */ 683 struct gmap *new = gmap_create(current->mm, new_limit); 684 685 if (!new) { 686 ret = -ENOMEM; 687 } else { 688 gmap_remove(kvm->arch.gmap); 689 new->private = kvm; 690 kvm->arch.gmap = new; 691 ret = 0; 692 } 693 } 694 mutex_unlock(&kvm->lock); 695 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 696 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 697 (void *) kvm->arch.gmap->asce); 698 break; 699 } 700 default: 701 ret = -ENXIO; 702 break; 703 } 704 return ret; 705 } 706 707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 708 709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 710 { 711 struct kvm_vcpu *vcpu; 712 int i; 713 714 if (!test_kvm_facility(kvm, 76)) 715 return -EINVAL; 716 717 mutex_lock(&kvm->lock); 718 switch (attr->attr) { 719 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 720 get_random_bytes( 721 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 722 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 723 kvm->arch.crypto.aes_kw = 1; 724 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 725 break; 726 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 727 get_random_bytes( 728 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 729 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 730 kvm->arch.crypto.dea_kw = 1; 731 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 732 break; 733 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 734 kvm->arch.crypto.aes_kw = 0; 735 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 736 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 737 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 738 break; 739 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 740 kvm->arch.crypto.dea_kw = 0; 741 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 742 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 743 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 744 break; 745 default: 746 mutex_unlock(&kvm->lock); 747 return -ENXIO; 748 } 749 750 kvm_for_each_vcpu(i, vcpu, kvm) { 751 kvm_s390_vcpu_crypto_setup(vcpu); 752 exit_sie(vcpu); 753 } 754 mutex_unlock(&kvm->lock); 755 return 0; 756 } 757 758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 759 { 760 int cx; 761 struct kvm_vcpu *vcpu; 762 763 kvm_for_each_vcpu(cx, vcpu, kvm) 764 kvm_s390_sync_request(req, vcpu); 765 } 766 767 /* 768 * Must be called with kvm->srcu held to avoid races on memslots, and with 769 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 770 */ 771 static int kvm_s390_vm_start_migration(struct kvm *kvm) 772 { 773 struct kvm_s390_migration_state *mgs; 774 struct kvm_memory_slot *ms; 775 /* should be the only one */ 776 struct kvm_memslots *slots; 777 unsigned long ram_pages; 778 int slotnr; 779 780 /* migration mode already enabled */ 781 if (kvm->arch.migration_state) 782 return 0; 783 784 slots = kvm_memslots(kvm); 785 if (!slots || !slots->used_slots) 786 return -EINVAL; 787 788 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); 789 if (!mgs) 790 return -ENOMEM; 791 kvm->arch.migration_state = mgs; 792 793 if (kvm->arch.use_cmma) { 794 /* 795 * Get the last slot. They should be sorted by base_gfn, so the 796 * last slot is also the one at the end of the address space. 797 * We have verified above that at least one slot is present. 798 */ 799 ms = slots->memslots + slots->used_slots - 1; 800 /* round up so we only use full longs */ 801 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); 802 /* allocate enough bytes to store all the bits */ 803 mgs->pgste_bitmap = vmalloc(ram_pages / 8); 804 if (!mgs->pgste_bitmap) { 805 kfree(mgs); 806 kvm->arch.migration_state = NULL; 807 return -ENOMEM; 808 } 809 810 mgs->bitmap_size = ram_pages; 811 atomic64_set(&mgs->dirty_pages, ram_pages); 812 /* mark all the pages in active slots as dirty */ 813 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 814 ms = slots->memslots + slotnr; 815 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); 816 } 817 818 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 819 } 820 return 0; 821 } 822 823 /* 824 * Must be called with kvm->lock to avoid races with ourselves and 825 * kvm_s390_vm_start_migration. 826 */ 827 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 828 { 829 struct kvm_s390_migration_state *mgs; 830 831 /* migration mode already disabled */ 832 if (!kvm->arch.migration_state) 833 return 0; 834 mgs = kvm->arch.migration_state; 835 kvm->arch.migration_state = NULL; 836 837 if (kvm->arch.use_cmma) { 838 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 839 vfree(mgs->pgste_bitmap); 840 } 841 kfree(mgs); 842 return 0; 843 } 844 845 static int kvm_s390_vm_set_migration(struct kvm *kvm, 846 struct kvm_device_attr *attr) 847 { 848 int idx, res = -ENXIO; 849 850 mutex_lock(&kvm->lock); 851 switch (attr->attr) { 852 case KVM_S390_VM_MIGRATION_START: 853 idx = srcu_read_lock(&kvm->srcu); 854 res = kvm_s390_vm_start_migration(kvm); 855 srcu_read_unlock(&kvm->srcu, idx); 856 break; 857 case KVM_S390_VM_MIGRATION_STOP: 858 res = kvm_s390_vm_stop_migration(kvm); 859 break; 860 default: 861 break; 862 } 863 mutex_unlock(&kvm->lock); 864 865 return res; 866 } 867 868 static int kvm_s390_vm_get_migration(struct kvm *kvm, 869 struct kvm_device_attr *attr) 870 { 871 u64 mig = (kvm->arch.migration_state != NULL); 872 873 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 874 return -ENXIO; 875 876 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 877 return -EFAULT; 878 return 0; 879 } 880 881 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 882 { 883 struct kvm_s390_vm_tod_clock gtod; 884 885 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 886 return -EFAULT; 887 888 if (test_kvm_facility(kvm, 139)) 889 kvm_s390_set_tod_clock_ext(kvm, >od); 890 else if (gtod.epoch_idx == 0) 891 kvm_s390_set_tod_clock(kvm, gtod.tod); 892 else 893 return -EINVAL; 894 895 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 896 gtod.epoch_idx, gtod.tod); 897 898 return 0; 899 } 900 901 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 902 { 903 u8 gtod_high; 904 905 if (copy_from_user(>od_high, (void __user *)attr->addr, 906 sizeof(gtod_high))) 907 return -EFAULT; 908 909 if (gtod_high != 0) 910 return -EINVAL; 911 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 912 913 return 0; 914 } 915 916 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 917 { 918 u64 gtod; 919 920 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 921 return -EFAULT; 922 923 kvm_s390_set_tod_clock(kvm, gtod); 924 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); 925 return 0; 926 } 927 928 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 929 { 930 int ret; 931 932 if (attr->flags) 933 return -EINVAL; 934 935 switch (attr->attr) { 936 case KVM_S390_VM_TOD_EXT: 937 ret = kvm_s390_set_tod_ext(kvm, attr); 938 break; 939 case KVM_S390_VM_TOD_HIGH: 940 ret = kvm_s390_set_tod_high(kvm, attr); 941 break; 942 case KVM_S390_VM_TOD_LOW: 943 ret = kvm_s390_set_tod_low(kvm, attr); 944 break; 945 default: 946 ret = -ENXIO; 947 break; 948 } 949 return ret; 950 } 951 952 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, 953 struct kvm_s390_vm_tod_clock *gtod) 954 { 955 struct kvm_s390_tod_clock_ext htod; 956 957 preempt_disable(); 958 959 get_tod_clock_ext((char *)&htod); 960 961 gtod->tod = htod.tod + kvm->arch.epoch; 962 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 963 964 if (gtod->tod < htod.tod) 965 gtod->epoch_idx += 1; 966 967 preempt_enable(); 968 } 969 970 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 971 { 972 struct kvm_s390_vm_tod_clock gtod; 973 974 memset(>od, 0, sizeof(gtod)); 975 976 if (test_kvm_facility(kvm, 139)) 977 kvm_s390_get_tod_clock_ext(kvm, >od); 978 else 979 gtod.tod = kvm_s390_get_tod_clock_fast(kvm); 980 981 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 982 return -EFAULT; 983 984 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 985 gtod.epoch_idx, gtod.tod); 986 return 0; 987 } 988 989 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 990 { 991 u8 gtod_high = 0; 992 993 if (copy_to_user((void __user *)attr->addr, >od_high, 994 sizeof(gtod_high))) 995 return -EFAULT; 996 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 997 998 return 0; 999 } 1000 1001 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1002 { 1003 u64 gtod; 1004 1005 gtod = kvm_s390_get_tod_clock_fast(kvm); 1006 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1007 return -EFAULT; 1008 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1009 1010 return 0; 1011 } 1012 1013 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1014 { 1015 int ret; 1016 1017 if (attr->flags) 1018 return -EINVAL; 1019 1020 switch (attr->attr) { 1021 case KVM_S390_VM_TOD_EXT: 1022 ret = kvm_s390_get_tod_ext(kvm, attr); 1023 break; 1024 case KVM_S390_VM_TOD_HIGH: 1025 ret = kvm_s390_get_tod_high(kvm, attr); 1026 break; 1027 case KVM_S390_VM_TOD_LOW: 1028 ret = kvm_s390_get_tod_low(kvm, attr); 1029 break; 1030 default: 1031 ret = -ENXIO; 1032 break; 1033 } 1034 return ret; 1035 } 1036 1037 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1038 { 1039 struct kvm_s390_vm_cpu_processor *proc; 1040 u16 lowest_ibc, unblocked_ibc; 1041 int ret = 0; 1042 1043 mutex_lock(&kvm->lock); 1044 if (kvm->created_vcpus) { 1045 ret = -EBUSY; 1046 goto out; 1047 } 1048 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1049 if (!proc) { 1050 ret = -ENOMEM; 1051 goto out; 1052 } 1053 if (!copy_from_user(proc, (void __user *)attr->addr, 1054 sizeof(*proc))) { 1055 kvm->arch.model.cpuid = proc->cpuid; 1056 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1057 unblocked_ibc = sclp.ibc & 0xfff; 1058 if (lowest_ibc && proc->ibc) { 1059 if (proc->ibc > unblocked_ibc) 1060 kvm->arch.model.ibc = unblocked_ibc; 1061 else if (proc->ibc < lowest_ibc) 1062 kvm->arch.model.ibc = lowest_ibc; 1063 else 1064 kvm->arch.model.ibc = proc->ibc; 1065 } 1066 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1067 S390_ARCH_FAC_LIST_SIZE_BYTE); 1068 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1069 kvm->arch.model.ibc, 1070 kvm->arch.model.cpuid); 1071 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1072 kvm->arch.model.fac_list[0], 1073 kvm->arch.model.fac_list[1], 1074 kvm->arch.model.fac_list[2]); 1075 } else 1076 ret = -EFAULT; 1077 kfree(proc); 1078 out: 1079 mutex_unlock(&kvm->lock); 1080 return ret; 1081 } 1082 1083 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1084 struct kvm_device_attr *attr) 1085 { 1086 struct kvm_s390_vm_cpu_feat data; 1087 int ret = -EBUSY; 1088 1089 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1090 return -EFAULT; 1091 if (!bitmap_subset((unsigned long *) data.feat, 1092 kvm_s390_available_cpu_feat, 1093 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1094 return -EINVAL; 1095 1096 mutex_lock(&kvm->lock); 1097 if (!atomic_read(&kvm->online_vcpus)) { 1098 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1099 KVM_S390_VM_CPU_FEAT_NR_BITS); 1100 ret = 0; 1101 } 1102 mutex_unlock(&kvm->lock); 1103 return ret; 1104 } 1105 1106 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1107 struct kvm_device_attr *attr) 1108 { 1109 /* 1110 * Once supported by kernel + hw, we have to store the subfunctions 1111 * in kvm->arch and remember that user space configured them. 1112 */ 1113 return -ENXIO; 1114 } 1115 1116 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1117 { 1118 int ret = -ENXIO; 1119 1120 switch (attr->attr) { 1121 case KVM_S390_VM_CPU_PROCESSOR: 1122 ret = kvm_s390_set_processor(kvm, attr); 1123 break; 1124 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1125 ret = kvm_s390_set_processor_feat(kvm, attr); 1126 break; 1127 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1128 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1129 break; 1130 } 1131 return ret; 1132 } 1133 1134 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1135 { 1136 struct kvm_s390_vm_cpu_processor *proc; 1137 int ret = 0; 1138 1139 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1140 if (!proc) { 1141 ret = -ENOMEM; 1142 goto out; 1143 } 1144 proc->cpuid = kvm->arch.model.cpuid; 1145 proc->ibc = kvm->arch.model.ibc; 1146 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1147 S390_ARCH_FAC_LIST_SIZE_BYTE); 1148 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1149 kvm->arch.model.ibc, 1150 kvm->arch.model.cpuid); 1151 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1152 kvm->arch.model.fac_list[0], 1153 kvm->arch.model.fac_list[1], 1154 kvm->arch.model.fac_list[2]); 1155 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1156 ret = -EFAULT; 1157 kfree(proc); 1158 out: 1159 return ret; 1160 } 1161 1162 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1163 { 1164 struct kvm_s390_vm_cpu_machine *mach; 1165 int ret = 0; 1166 1167 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1168 if (!mach) { 1169 ret = -ENOMEM; 1170 goto out; 1171 } 1172 get_cpu_id((struct cpuid *) &mach->cpuid); 1173 mach->ibc = sclp.ibc; 1174 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1175 S390_ARCH_FAC_LIST_SIZE_BYTE); 1176 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1177 sizeof(S390_lowcore.stfle_fac_list)); 1178 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1179 kvm->arch.model.ibc, 1180 kvm->arch.model.cpuid); 1181 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1182 mach->fac_mask[0], 1183 mach->fac_mask[1], 1184 mach->fac_mask[2]); 1185 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1186 mach->fac_list[0], 1187 mach->fac_list[1], 1188 mach->fac_list[2]); 1189 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1190 ret = -EFAULT; 1191 kfree(mach); 1192 out: 1193 return ret; 1194 } 1195 1196 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1197 struct kvm_device_attr *attr) 1198 { 1199 struct kvm_s390_vm_cpu_feat data; 1200 1201 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1202 KVM_S390_VM_CPU_FEAT_NR_BITS); 1203 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1204 return -EFAULT; 1205 return 0; 1206 } 1207 1208 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1209 struct kvm_device_attr *attr) 1210 { 1211 struct kvm_s390_vm_cpu_feat data; 1212 1213 bitmap_copy((unsigned long *) data.feat, 1214 kvm_s390_available_cpu_feat, 1215 KVM_S390_VM_CPU_FEAT_NR_BITS); 1216 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1217 return -EFAULT; 1218 return 0; 1219 } 1220 1221 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1222 struct kvm_device_attr *attr) 1223 { 1224 /* 1225 * Once we can actually configure subfunctions (kernel + hw support), 1226 * we have to check if they were already set by user space, if so copy 1227 * them from kvm->arch. 1228 */ 1229 return -ENXIO; 1230 } 1231 1232 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1233 struct kvm_device_attr *attr) 1234 { 1235 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1236 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1237 return -EFAULT; 1238 return 0; 1239 } 1240 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1241 { 1242 int ret = -ENXIO; 1243 1244 switch (attr->attr) { 1245 case KVM_S390_VM_CPU_PROCESSOR: 1246 ret = kvm_s390_get_processor(kvm, attr); 1247 break; 1248 case KVM_S390_VM_CPU_MACHINE: 1249 ret = kvm_s390_get_machine(kvm, attr); 1250 break; 1251 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1252 ret = kvm_s390_get_processor_feat(kvm, attr); 1253 break; 1254 case KVM_S390_VM_CPU_MACHINE_FEAT: 1255 ret = kvm_s390_get_machine_feat(kvm, attr); 1256 break; 1257 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1258 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1259 break; 1260 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1261 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1262 break; 1263 } 1264 return ret; 1265 } 1266 1267 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1268 { 1269 int ret; 1270 1271 switch (attr->group) { 1272 case KVM_S390_VM_MEM_CTRL: 1273 ret = kvm_s390_set_mem_control(kvm, attr); 1274 break; 1275 case KVM_S390_VM_TOD: 1276 ret = kvm_s390_set_tod(kvm, attr); 1277 break; 1278 case KVM_S390_VM_CPU_MODEL: 1279 ret = kvm_s390_set_cpu_model(kvm, attr); 1280 break; 1281 case KVM_S390_VM_CRYPTO: 1282 ret = kvm_s390_vm_set_crypto(kvm, attr); 1283 break; 1284 case KVM_S390_VM_MIGRATION: 1285 ret = kvm_s390_vm_set_migration(kvm, attr); 1286 break; 1287 default: 1288 ret = -ENXIO; 1289 break; 1290 } 1291 1292 return ret; 1293 } 1294 1295 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1296 { 1297 int ret; 1298 1299 switch (attr->group) { 1300 case KVM_S390_VM_MEM_CTRL: 1301 ret = kvm_s390_get_mem_control(kvm, attr); 1302 break; 1303 case KVM_S390_VM_TOD: 1304 ret = kvm_s390_get_tod(kvm, attr); 1305 break; 1306 case KVM_S390_VM_CPU_MODEL: 1307 ret = kvm_s390_get_cpu_model(kvm, attr); 1308 break; 1309 case KVM_S390_VM_MIGRATION: 1310 ret = kvm_s390_vm_get_migration(kvm, attr); 1311 break; 1312 default: 1313 ret = -ENXIO; 1314 break; 1315 } 1316 1317 return ret; 1318 } 1319 1320 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1321 { 1322 int ret; 1323 1324 switch (attr->group) { 1325 case KVM_S390_VM_MEM_CTRL: 1326 switch (attr->attr) { 1327 case KVM_S390_VM_MEM_ENABLE_CMMA: 1328 case KVM_S390_VM_MEM_CLR_CMMA: 1329 ret = sclp.has_cmma ? 0 : -ENXIO; 1330 break; 1331 case KVM_S390_VM_MEM_LIMIT_SIZE: 1332 ret = 0; 1333 break; 1334 default: 1335 ret = -ENXIO; 1336 break; 1337 } 1338 break; 1339 case KVM_S390_VM_TOD: 1340 switch (attr->attr) { 1341 case KVM_S390_VM_TOD_LOW: 1342 case KVM_S390_VM_TOD_HIGH: 1343 ret = 0; 1344 break; 1345 default: 1346 ret = -ENXIO; 1347 break; 1348 } 1349 break; 1350 case KVM_S390_VM_CPU_MODEL: 1351 switch (attr->attr) { 1352 case KVM_S390_VM_CPU_PROCESSOR: 1353 case KVM_S390_VM_CPU_MACHINE: 1354 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1355 case KVM_S390_VM_CPU_MACHINE_FEAT: 1356 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1357 ret = 0; 1358 break; 1359 /* configuring subfunctions is not supported yet */ 1360 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1361 default: 1362 ret = -ENXIO; 1363 break; 1364 } 1365 break; 1366 case KVM_S390_VM_CRYPTO: 1367 switch (attr->attr) { 1368 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1369 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1370 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1371 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1372 ret = 0; 1373 break; 1374 default: 1375 ret = -ENXIO; 1376 break; 1377 } 1378 break; 1379 case KVM_S390_VM_MIGRATION: 1380 ret = 0; 1381 break; 1382 default: 1383 ret = -ENXIO; 1384 break; 1385 } 1386 1387 return ret; 1388 } 1389 1390 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1391 { 1392 uint8_t *keys; 1393 uint64_t hva; 1394 int srcu_idx, i, r = 0; 1395 1396 if (args->flags != 0) 1397 return -EINVAL; 1398 1399 /* Is this guest using storage keys? */ 1400 if (!mm_use_skey(current->mm)) 1401 return KVM_S390_GET_SKEYS_NONE; 1402 1403 /* Enforce sane limit on memory allocation */ 1404 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1405 return -EINVAL; 1406 1407 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1408 if (!keys) 1409 return -ENOMEM; 1410 1411 down_read(¤t->mm->mmap_sem); 1412 srcu_idx = srcu_read_lock(&kvm->srcu); 1413 for (i = 0; i < args->count; i++) { 1414 hva = gfn_to_hva(kvm, args->start_gfn + i); 1415 if (kvm_is_error_hva(hva)) { 1416 r = -EFAULT; 1417 break; 1418 } 1419 1420 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1421 if (r) 1422 break; 1423 } 1424 srcu_read_unlock(&kvm->srcu, srcu_idx); 1425 up_read(¤t->mm->mmap_sem); 1426 1427 if (!r) { 1428 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1429 sizeof(uint8_t) * args->count); 1430 if (r) 1431 r = -EFAULT; 1432 } 1433 1434 kvfree(keys); 1435 return r; 1436 } 1437 1438 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1439 { 1440 uint8_t *keys; 1441 uint64_t hva; 1442 int srcu_idx, i, r = 0; 1443 1444 if (args->flags != 0) 1445 return -EINVAL; 1446 1447 /* Enforce sane limit on memory allocation */ 1448 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1449 return -EINVAL; 1450 1451 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1452 if (!keys) 1453 return -ENOMEM; 1454 1455 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1456 sizeof(uint8_t) * args->count); 1457 if (r) { 1458 r = -EFAULT; 1459 goto out; 1460 } 1461 1462 /* Enable storage key handling for the guest */ 1463 r = s390_enable_skey(); 1464 if (r) 1465 goto out; 1466 1467 down_read(¤t->mm->mmap_sem); 1468 srcu_idx = srcu_read_lock(&kvm->srcu); 1469 for (i = 0; i < args->count; i++) { 1470 hva = gfn_to_hva(kvm, args->start_gfn + i); 1471 if (kvm_is_error_hva(hva)) { 1472 r = -EFAULT; 1473 break; 1474 } 1475 1476 /* Lowest order bit is reserved */ 1477 if (keys[i] & 0x01) { 1478 r = -EINVAL; 1479 break; 1480 } 1481 1482 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1483 if (r) 1484 break; 1485 } 1486 srcu_read_unlock(&kvm->srcu, srcu_idx); 1487 up_read(¤t->mm->mmap_sem); 1488 out: 1489 kvfree(keys); 1490 return r; 1491 } 1492 1493 /* 1494 * Base address and length must be sent at the start of each block, therefore 1495 * it's cheaper to send some clean data, as long as it's less than the size of 1496 * two longs. 1497 */ 1498 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1499 /* for consistency */ 1500 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1501 1502 /* 1503 * This function searches for the next page with dirty CMMA attributes, and 1504 * saves the attributes in the buffer up to either the end of the buffer or 1505 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 1506 * no trailing clean bytes are saved. 1507 * In case no dirty bits were found, or if CMMA was not enabled or used, the 1508 * output buffer will indicate 0 as length. 1509 */ 1510 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 1511 struct kvm_s390_cmma_log *args) 1512 { 1513 struct kvm_s390_migration_state *s = kvm->arch.migration_state; 1514 unsigned long bufsize, hva, pgstev, i, next, cur; 1515 int srcu_idx, peek, r = 0, rr; 1516 u8 *res; 1517 1518 cur = args->start_gfn; 1519 i = next = pgstev = 0; 1520 1521 if (unlikely(!kvm->arch.use_cmma)) 1522 return -ENXIO; 1523 /* Invalid/unsupported flags were specified */ 1524 if (args->flags & ~KVM_S390_CMMA_PEEK) 1525 return -EINVAL; 1526 /* Migration mode query, and we are not doing a migration */ 1527 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 1528 if (!peek && !s) 1529 return -EINVAL; 1530 /* CMMA is disabled or was not used, or the buffer has length zero */ 1531 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 1532 if (!bufsize || !kvm->mm->context.use_cmma) { 1533 memset(args, 0, sizeof(*args)); 1534 return 0; 1535 } 1536 1537 if (!peek) { 1538 /* We are not peeking, and there are no dirty pages */ 1539 if (!atomic64_read(&s->dirty_pages)) { 1540 memset(args, 0, sizeof(*args)); 1541 return 0; 1542 } 1543 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 1544 args->start_gfn); 1545 if (cur >= s->bitmap_size) /* nothing found, loop back */ 1546 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); 1547 if (cur >= s->bitmap_size) { /* again! (very unlikely) */ 1548 memset(args, 0, sizeof(*args)); 1549 return 0; 1550 } 1551 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); 1552 } 1553 1554 res = vmalloc(bufsize); 1555 if (!res) 1556 return -ENOMEM; 1557 1558 args->start_gfn = cur; 1559 1560 down_read(&kvm->mm->mmap_sem); 1561 srcu_idx = srcu_read_lock(&kvm->srcu); 1562 while (i < bufsize) { 1563 hva = gfn_to_hva(kvm, cur); 1564 if (kvm_is_error_hva(hva)) { 1565 r = -EFAULT; 1566 break; 1567 } 1568 /* decrement only if we actually flipped the bit to 0 */ 1569 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) 1570 atomic64_dec(&s->dirty_pages); 1571 r = get_pgste(kvm->mm, hva, &pgstev); 1572 if (r < 0) 1573 pgstev = 0; 1574 /* save the value */ 1575 res[i++] = (pgstev >> 24) & 0x43; 1576 /* 1577 * if the next bit is too far away, stop. 1578 * if we reached the previous "next", find the next one 1579 */ 1580 if (!peek) { 1581 if (next > cur + KVM_S390_MAX_BIT_DISTANCE) 1582 break; 1583 if (cur == next) 1584 next = find_next_bit(s->pgste_bitmap, 1585 s->bitmap_size, cur + 1); 1586 /* reached the end of the bitmap or of the buffer, stop */ 1587 if ((next >= s->bitmap_size) || 1588 (next >= args->start_gfn + bufsize)) 1589 break; 1590 } 1591 cur++; 1592 } 1593 srcu_read_unlock(&kvm->srcu, srcu_idx); 1594 up_read(&kvm->mm->mmap_sem); 1595 args->count = i; 1596 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; 1597 1598 rr = copy_to_user((void __user *)args->values, res, args->count); 1599 if (rr) 1600 r = -EFAULT; 1601 1602 vfree(res); 1603 return r; 1604 } 1605 1606 /* 1607 * This function sets the CMMA attributes for the given pages. If the input 1608 * buffer has zero length, no action is taken, otherwise the attributes are 1609 * set and the mm->context.use_cmma flag is set. 1610 */ 1611 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 1612 const struct kvm_s390_cmma_log *args) 1613 { 1614 unsigned long hva, mask, pgstev, i; 1615 uint8_t *bits; 1616 int srcu_idx, r = 0; 1617 1618 mask = args->mask; 1619 1620 if (!kvm->arch.use_cmma) 1621 return -ENXIO; 1622 /* invalid/unsupported flags */ 1623 if (args->flags != 0) 1624 return -EINVAL; 1625 /* Enforce sane limit on memory allocation */ 1626 if (args->count > KVM_S390_CMMA_SIZE_MAX) 1627 return -EINVAL; 1628 /* Nothing to do */ 1629 if (args->count == 0) 1630 return 0; 1631 1632 bits = vmalloc(sizeof(*bits) * args->count); 1633 if (!bits) 1634 return -ENOMEM; 1635 1636 r = copy_from_user(bits, (void __user *)args->values, args->count); 1637 if (r) { 1638 r = -EFAULT; 1639 goto out; 1640 } 1641 1642 down_read(&kvm->mm->mmap_sem); 1643 srcu_idx = srcu_read_lock(&kvm->srcu); 1644 for (i = 0; i < args->count; i++) { 1645 hva = gfn_to_hva(kvm, args->start_gfn + i); 1646 if (kvm_is_error_hva(hva)) { 1647 r = -EFAULT; 1648 break; 1649 } 1650 1651 pgstev = bits[i]; 1652 pgstev = pgstev << 24; 1653 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 1654 set_pgste_bits(kvm->mm, hva, mask, pgstev); 1655 } 1656 srcu_read_unlock(&kvm->srcu, srcu_idx); 1657 up_read(&kvm->mm->mmap_sem); 1658 1659 if (!kvm->mm->context.use_cmma) { 1660 down_write(&kvm->mm->mmap_sem); 1661 kvm->mm->context.use_cmma = 1; 1662 up_write(&kvm->mm->mmap_sem); 1663 } 1664 out: 1665 vfree(bits); 1666 return r; 1667 } 1668 1669 long kvm_arch_vm_ioctl(struct file *filp, 1670 unsigned int ioctl, unsigned long arg) 1671 { 1672 struct kvm *kvm = filp->private_data; 1673 void __user *argp = (void __user *)arg; 1674 struct kvm_device_attr attr; 1675 int r; 1676 1677 switch (ioctl) { 1678 case KVM_S390_INTERRUPT: { 1679 struct kvm_s390_interrupt s390int; 1680 1681 r = -EFAULT; 1682 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1683 break; 1684 r = kvm_s390_inject_vm(kvm, &s390int); 1685 break; 1686 } 1687 case KVM_ENABLE_CAP: { 1688 struct kvm_enable_cap cap; 1689 r = -EFAULT; 1690 if (copy_from_user(&cap, argp, sizeof(cap))) 1691 break; 1692 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 1693 break; 1694 } 1695 case KVM_CREATE_IRQCHIP: { 1696 struct kvm_irq_routing_entry routing; 1697 1698 r = -EINVAL; 1699 if (kvm->arch.use_irqchip) { 1700 /* Set up dummy routing. */ 1701 memset(&routing, 0, sizeof(routing)); 1702 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 1703 } 1704 break; 1705 } 1706 case KVM_SET_DEVICE_ATTR: { 1707 r = -EFAULT; 1708 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1709 break; 1710 r = kvm_s390_vm_set_attr(kvm, &attr); 1711 break; 1712 } 1713 case KVM_GET_DEVICE_ATTR: { 1714 r = -EFAULT; 1715 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1716 break; 1717 r = kvm_s390_vm_get_attr(kvm, &attr); 1718 break; 1719 } 1720 case KVM_HAS_DEVICE_ATTR: { 1721 r = -EFAULT; 1722 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1723 break; 1724 r = kvm_s390_vm_has_attr(kvm, &attr); 1725 break; 1726 } 1727 case KVM_S390_GET_SKEYS: { 1728 struct kvm_s390_skeys args; 1729 1730 r = -EFAULT; 1731 if (copy_from_user(&args, argp, 1732 sizeof(struct kvm_s390_skeys))) 1733 break; 1734 r = kvm_s390_get_skeys(kvm, &args); 1735 break; 1736 } 1737 case KVM_S390_SET_SKEYS: { 1738 struct kvm_s390_skeys args; 1739 1740 r = -EFAULT; 1741 if (copy_from_user(&args, argp, 1742 sizeof(struct kvm_s390_skeys))) 1743 break; 1744 r = kvm_s390_set_skeys(kvm, &args); 1745 break; 1746 } 1747 case KVM_S390_GET_CMMA_BITS: { 1748 struct kvm_s390_cmma_log args; 1749 1750 r = -EFAULT; 1751 if (copy_from_user(&args, argp, sizeof(args))) 1752 break; 1753 r = kvm_s390_get_cmma_bits(kvm, &args); 1754 if (!r) { 1755 r = copy_to_user(argp, &args, sizeof(args)); 1756 if (r) 1757 r = -EFAULT; 1758 } 1759 break; 1760 } 1761 case KVM_S390_SET_CMMA_BITS: { 1762 struct kvm_s390_cmma_log args; 1763 1764 r = -EFAULT; 1765 if (copy_from_user(&args, argp, sizeof(args))) 1766 break; 1767 r = kvm_s390_set_cmma_bits(kvm, &args); 1768 break; 1769 } 1770 default: 1771 r = -ENOTTY; 1772 } 1773 1774 return r; 1775 } 1776 1777 static int kvm_s390_query_ap_config(u8 *config) 1778 { 1779 u32 fcn_code = 0x04000000UL; 1780 u32 cc = 0; 1781 1782 memset(config, 0, 128); 1783 asm volatile( 1784 "lgr 0,%1\n" 1785 "lgr 2,%2\n" 1786 ".long 0xb2af0000\n" /* PQAP(QCI) */ 1787 "0: ipm %0\n" 1788 "srl %0,28\n" 1789 "1:\n" 1790 EX_TABLE(0b, 1b) 1791 : "+r" (cc) 1792 : "r" (fcn_code), "r" (config) 1793 : "cc", "0", "2", "memory" 1794 ); 1795 1796 return cc; 1797 } 1798 1799 static int kvm_s390_apxa_installed(void) 1800 { 1801 u8 config[128]; 1802 int cc; 1803 1804 if (test_facility(12)) { 1805 cc = kvm_s390_query_ap_config(config); 1806 1807 if (cc) 1808 pr_err("PQAP(QCI) failed with cc=%d", cc); 1809 else 1810 return config[0] & 0x40; 1811 } 1812 1813 return 0; 1814 } 1815 1816 static void kvm_s390_set_crycb_format(struct kvm *kvm) 1817 { 1818 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 1819 1820 if (kvm_s390_apxa_installed()) 1821 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 1822 else 1823 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 1824 } 1825 1826 static u64 kvm_s390_get_initial_cpuid(void) 1827 { 1828 struct cpuid cpuid; 1829 1830 get_cpu_id(&cpuid); 1831 cpuid.version = 0xff; 1832 return *((u64 *) &cpuid); 1833 } 1834 1835 static void kvm_s390_crypto_init(struct kvm *kvm) 1836 { 1837 if (!test_kvm_facility(kvm, 76)) 1838 return; 1839 1840 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 1841 kvm_s390_set_crycb_format(kvm); 1842 1843 /* Enable AES/DEA protected key functions by default */ 1844 kvm->arch.crypto.aes_kw = 1; 1845 kvm->arch.crypto.dea_kw = 1; 1846 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1847 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1848 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1849 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1850 } 1851 1852 static void sca_dispose(struct kvm *kvm) 1853 { 1854 if (kvm->arch.use_esca) 1855 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 1856 else 1857 free_page((unsigned long)(kvm->arch.sca)); 1858 kvm->arch.sca = NULL; 1859 } 1860 1861 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 1862 { 1863 gfp_t alloc_flags = GFP_KERNEL; 1864 int i, rc; 1865 char debug_name[16]; 1866 static unsigned long sca_offset; 1867 1868 rc = -EINVAL; 1869 #ifdef CONFIG_KVM_S390_UCONTROL 1870 if (type & ~KVM_VM_S390_UCONTROL) 1871 goto out_err; 1872 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 1873 goto out_err; 1874 #else 1875 if (type) 1876 goto out_err; 1877 #endif 1878 1879 rc = s390_enable_sie(); 1880 if (rc) 1881 goto out_err; 1882 1883 rc = -ENOMEM; 1884 1885 kvm->arch.use_esca = 0; /* start with basic SCA */ 1886 if (!sclp.has_64bscao) 1887 alloc_flags |= GFP_DMA; 1888 rwlock_init(&kvm->arch.sca_lock); 1889 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 1890 if (!kvm->arch.sca) 1891 goto out_err; 1892 spin_lock(&kvm_lock); 1893 sca_offset += 16; 1894 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 1895 sca_offset = 0; 1896 kvm->arch.sca = (struct bsca_block *) 1897 ((char *) kvm->arch.sca + sca_offset); 1898 spin_unlock(&kvm_lock); 1899 1900 sprintf(debug_name, "kvm-%u", current->pid); 1901 1902 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 1903 if (!kvm->arch.dbf) 1904 goto out_err; 1905 1906 kvm->arch.sie_page2 = 1907 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 1908 if (!kvm->arch.sie_page2) 1909 goto out_err; 1910 1911 /* Populate the facility mask initially. */ 1912 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, 1913 sizeof(S390_lowcore.stfle_fac_list)); 1914 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { 1915 if (i < kvm_s390_fac_list_mask_size()) 1916 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; 1917 else 1918 kvm->arch.model.fac_mask[i] = 0UL; 1919 } 1920 1921 /* Populate the facility list initially. */ 1922 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 1923 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, 1924 S390_ARCH_FAC_LIST_SIZE_BYTE); 1925 1926 /* we are always in czam mode - even on pre z14 machines */ 1927 set_kvm_facility(kvm->arch.model.fac_mask, 138); 1928 set_kvm_facility(kvm->arch.model.fac_list, 138); 1929 /* we emulate STHYI in kvm */ 1930 set_kvm_facility(kvm->arch.model.fac_mask, 74); 1931 set_kvm_facility(kvm->arch.model.fac_list, 74); 1932 if (MACHINE_HAS_TLB_GUEST) { 1933 set_kvm_facility(kvm->arch.model.fac_mask, 147); 1934 set_kvm_facility(kvm->arch.model.fac_list, 147); 1935 } 1936 1937 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 1938 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 1939 1940 kvm_s390_crypto_init(kvm); 1941 1942 mutex_init(&kvm->arch.float_int.ais_lock); 1943 kvm->arch.float_int.simm = 0; 1944 kvm->arch.float_int.nimm = 0; 1945 spin_lock_init(&kvm->arch.float_int.lock); 1946 for (i = 0; i < FIRQ_LIST_COUNT; i++) 1947 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 1948 init_waitqueue_head(&kvm->arch.ipte_wq); 1949 mutex_init(&kvm->arch.ipte_mutex); 1950 1951 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 1952 VM_EVENT(kvm, 3, "vm created with type %lu", type); 1953 1954 if (type & KVM_VM_S390_UCONTROL) { 1955 kvm->arch.gmap = NULL; 1956 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 1957 } else { 1958 if (sclp.hamax == U64_MAX) 1959 kvm->arch.mem_limit = TASK_SIZE_MAX; 1960 else 1961 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 1962 sclp.hamax + 1); 1963 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 1964 if (!kvm->arch.gmap) 1965 goto out_err; 1966 kvm->arch.gmap->private = kvm; 1967 kvm->arch.gmap->pfault_enabled = 0; 1968 } 1969 1970 kvm->arch.css_support = 0; 1971 kvm->arch.use_irqchip = 0; 1972 kvm->arch.epoch = 0; 1973 1974 spin_lock_init(&kvm->arch.start_stop_lock); 1975 kvm_s390_vsie_init(kvm); 1976 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 1977 1978 return 0; 1979 out_err: 1980 free_page((unsigned long)kvm->arch.sie_page2); 1981 debug_unregister(kvm->arch.dbf); 1982 sca_dispose(kvm); 1983 KVM_EVENT(3, "creation of vm failed: %d", rc); 1984 return rc; 1985 } 1986 1987 bool kvm_arch_has_vcpu_debugfs(void) 1988 { 1989 return false; 1990 } 1991 1992 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 1993 { 1994 return 0; 1995 } 1996 1997 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 1998 { 1999 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2000 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2001 kvm_s390_clear_local_irqs(vcpu); 2002 kvm_clear_async_pf_completion_queue(vcpu); 2003 if (!kvm_is_ucontrol(vcpu->kvm)) 2004 sca_del_vcpu(vcpu); 2005 2006 if (kvm_is_ucontrol(vcpu->kvm)) 2007 gmap_remove(vcpu->arch.gmap); 2008 2009 if (vcpu->kvm->arch.use_cmma) 2010 kvm_s390_vcpu_unsetup_cmma(vcpu); 2011 free_page((unsigned long)(vcpu->arch.sie_block)); 2012 2013 kvm_vcpu_uninit(vcpu); 2014 kmem_cache_free(kvm_vcpu_cache, vcpu); 2015 } 2016 2017 static void kvm_free_vcpus(struct kvm *kvm) 2018 { 2019 unsigned int i; 2020 struct kvm_vcpu *vcpu; 2021 2022 kvm_for_each_vcpu(i, vcpu, kvm) 2023 kvm_arch_vcpu_destroy(vcpu); 2024 2025 mutex_lock(&kvm->lock); 2026 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2027 kvm->vcpus[i] = NULL; 2028 2029 atomic_set(&kvm->online_vcpus, 0); 2030 mutex_unlock(&kvm->lock); 2031 } 2032 2033 void kvm_arch_destroy_vm(struct kvm *kvm) 2034 { 2035 kvm_free_vcpus(kvm); 2036 sca_dispose(kvm); 2037 debug_unregister(kvm->arch.dbf); 2038 free_page((unsigned long)kvm->arch.sie_page2); 2039 if (!kvm_is_ucontrol(kvm)) 2040 gmap_remove(kvm->arch.gmap); 2041 kvm_s390_destroy_adapters(kvm); 2042 kvm_s390_clear_float_irqs(kvm); 2043 kvm_s390_vsie_destroy(kvm); 2044 if (kvm->arch.migration_state) { 2045 vfree(kvm->arch.migration_state->pgste_bitmap); 2046 kfree(kvm->arch.migration_state); 2047 } 2048 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2049 } 2050 2051 /* Section: vcpu related */ 2052 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2053 { 2054 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2055 if (!vcpu->arch.gmap) 2056 return -ENOMEM; 2057 vcpu->arch.gmap->private = vcpu->kvm; 2058 2059 return 0; 2060 } 2061 2062 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2063 { 2064 if (!kvm_s390_use_sca_entries()) 2065 return; 2066 read_lock(&vcpu->kvm->arch.sca_lock); 2067 if (vcpu->kvm->arch.use_esca) { 2068 struct esca_block *sca = vcpu->kvm->arch.sca; 2069 2070 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2071 sca->cpu[vcpu->vcpu_id].sda = 0; 2072 } else { 2073 struct bsca_block *sca = vcpu->kvm->arch.sca; 2074 2075 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2076 sca->cpu[vcpu->vcpu_id].sda = 0; 2077 } 2078 read_unlock(&vcpu->kvm->arch.sca_lock); 2079 } 2080 2081 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2082 { 2083 if (!kvm_s390_use_sca_entries()) { 2084 struct bsca_block *sca = vcpu->kvm->arch.sca; 2085 2086 /* we still need the basic sca for the ipte control */ 2087 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2088 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2089 } 2090 read_lock(&vcpu->kvm->arch.sca_lock); 2091 if (vcpu->kvm->arch.use_esca) { 2092 struct esca_block *sca = vcpu->kvm->arch.sca; 2093 2094 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2095 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2096 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2097 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2098 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2099 } else { 2100 struct bsca_block *sca = vcpu->kvm->arch.sca; 2101 2102 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2103 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2104 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2105 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2106 } 2107 read_unlock(&vcpu->kvm->arch.sca_lock); 2108 } 2109 2110 /* Basic SCA to Extended SCA data copy routines */ 2111 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2112 { 2113 d->sda = s->sda; 2114 d->sigp_ctrl.c = s->sigp_ctrl.c; 2115 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2116 } 2117 2118 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2119 { 2120 int i; 2121 2122 d->ipte_control = s->ipte_control; 2123 d->mcn[0] = s->mcn; 2124 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2125 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2126 } 2127 2128 static int sca_switch_to_extended(struct kvm *kvm) 2129 { 2130 struct bsca_block *old_sca = kvm->arch.sca; 2131 struct esca_block *new_sca; 2132 struct kvm_vcpu *vcpu; 2133 unsigned int vcpu_idx; 2134 u32 scaol, scaoh; 2135 2136 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2137 if (!new_sca) 2138 return -ENOMEM; 2139 2140 scaoh = (u32)((u64)(new_sca) >> 32); 2141 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2142 2143 kvm_s390_vcpu_block_all(kvm); 2144 write_lock(&kvm->arch.sca_lock); 2145 2146 sca_copy_b_to_e(new_sca, old_sca); 2147 2148 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2149 vcpu->arch.sie_block->scaoh = scaoh; 2150 vcpu->arch.sie_block->scaol = scaol; 2151 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2152 } 2153 kvm->arch.sca = new_sca; 2154 kvm->arch.use_esca = 1; 2155 2156 write_unlock(&kvm->arch.sca_lock); 2157 kvm_s390_vcpu_unblock_all(kvm); 2158 2159 free_page((unsigned long)old_sca); 2160 2161 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2162 old_sca, kvm->arch.sca); 2163 return 0; 2164 } 2165 2166 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2167 { 2168 int rc; 2169 2170 if (!kvm_s390_use_sca_entries()) { 2171 if (id < KVM_MAX_VCPUS) 2172 return true; 2173 return false; 2174 } 2175 if (id < KVM_S390_BSCA_CPU_SLOTS) 2176 return true; 2177 if (!sclp.has_esca || !sclp.has_64bscao) 2178 return false; 2179 2180 mutex_lock(&kvm->lock); 2181 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2182 mutex_unlock(&kvm->lock); 2183 2184 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2185 } 2186 2187 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 2188 { 2189 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2190 kvm_clear_async_pf_completion_queue(vcpu); 2191 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 2192 KVM_SYNC_GPRS | 2193 KVM_SYNC_ACRS | 2194 KVM_SYNC_CRS | 2195 KVM_SYNC_ARCH0 | 2196 KVM_SYNC_PFAULT; 2197 kvm_s390_set_prefix(vcpu, 0); 2198 if (test_kvm_facility(vcpu->kvm, 64)) 2199 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 2200 if (test_kvm_facility(vcpu->kvm, 133)) 2201 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 2202 /* fprs can be synchronized via vrs, even if the guest has no vx. With 2203 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 2204 */ 2205 if (MACHINE_HAS_VX) 2206 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 2207 else 2208 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 2209 2210 if (kvm_is_ucontrol(vcpu->kvm)) 2211 return __kvm_ucontrol_vcpu_init(vcpu); 2212 2213 return 0; 2214 } 2215 2216 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2217 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2218 { 2219 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2220 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2221 vcpu->arch.cputm_start = get_tod_clock_fast(); 2222 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2223 } 2224 2225 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2226 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2227 { 2228 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2229 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2230 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2231 vcpu->arch.cputm_start = 0; 2232 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2233 } 2234 2235 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2236 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2237 { 2238 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2239 vcpu->arch.cputm_enabled = true; 2240 __start_cpu_timer_accounting(vcpu); 2241 } 2242 2243 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2244 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2245 { 2246 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2247 __stop_cpu_timer_accounting(vcpu); 2248 vcpu->arch.cputm_enabled = false; 2249 } 2250 2251 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2252 { 2253 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2254 __enable_cpu_timer_accounting(vcpu); 2255 preempt_enable(); 2256 } 2257 2258 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2259 { 2260 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2261 __disable_cpu_timer_accounting(vcpu); 2262 preempt_enable(); 2263 } 2264 2265 /* set the cpu timer - may only be called from the VCPU thread itself */ 2266 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2267 { 2268 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2269 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2270 if (vcpu->arch.cputm_enabled) 2271 vcpu->arch.cputm_start = get_tod_clock_fast(); 2272 vcpu->arch.sie_block->cputm = cputm; 2273 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2274 preempt_enable(); 2275 } 2276 2277 /* update and get the cpu timer - can also be called from other VCPU threads */ 2278 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2279 { 2280 unsigned int seq; 2281 __u64 value; 2282 2283 if (unlikely(!vcpu->arch.cputm_enabled)) 2284 return vcpu->arch.sie_block->cputm; 2285 2286 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2287 do { 2288 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2289 /* 2290 * If the writer would ever execute a read in the critical 2291 * section, e.g. in irq context, we have a deadlock. 2292 */ 2293 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 2294 value = vcpu->arch.sie_block->cputm; 2295 /* if cputm_start is 0, accounting is being started/stopped */ 2296 if (likely(vcpu->arch.cputm_start)) 2297 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2298 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 2299 preempt_enable(); 2300 return value; 2301 } 2302 2303 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2304 { 2305 2306 gmap_enable(vcpu->arch.enabled_gmap); 2307 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 2308 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2309 __start_cpu_timer_accounting(vcpu); 2310 vcpu->cpu = cpu; 2311 } 2312 2313 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2314 { 2315 vcpu->cpu = -1; 2316 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2317 __stop_cpu_timer_accounting(vcpu); 2318 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 2319 vcpu->arch.enabled_gmap = gmap_get_enabled(); 2320 gmap_disable(vcpu->arch.enabled_gmap); 2321 2322 } 2323 2324 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 2325 { 2326 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 2327 vcpu->arch.sie_block->gpsw.mask = 0UL; 2328 vcpu->arch.sie_block->gpsw.addr = 0UL; 2329 kvm_s390_set_prefix(vcpu, 0); 2330 kvm_s390_set_cpu_timer(vcpu, 0); 2331 vcpu->arch.sie_block->ckc = 0UL; 2332 vcpu->arch.sie_block->todpr = 0; 2333 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 2334 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 2335 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 2336 /* make sure the new fpc will be lazily loaded */ 2337 save_fpu_regs(); 2338 current->thread.fpu.fpc = 0; 2339 vcpu->arch.sie_block->gbea = 1; 2340 vcpu->arch.sie_block->pp = 0; 2341 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2342 kvm_clear_async_pf_completion_queue(vcpu); 2343 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 2344 kvm_s390_vcpu_stop(vcpu); 2345 kvm_s390_clear_local_irqs(vcpu); 2346 } 2347 2348 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 2349 { 2350 mutex_lock(&vcpu->kvm->lock); 2351 preempt_disable(); 2352 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2353 preempt_enable(); 2354 mutex_unlock(&vcpu->kvm->lock); 2355 if (!kvm_is_ucontrol(vcpu->kvm)) { 2356 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 2357 sca_add_vcpu(vcpu); 2358 } 2359 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 2360 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2361 /* make vcpu_load load the right gmap on the first trigger */ 2362 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 2363 } 2364 2365 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 2366 { 2367 if (!test_kvm_facility(vcpu->kvm, 76)) 2368 return; 2369 2370 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 2371 2372 if (vcpu->kvm->arch.crypto.aes_kw) 2373 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 2374 if (vcpu->kvm->arch.crypto.dea_kw) 2375 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 2376 2377 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 2378 } 2379 2380 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 2381 { 2382 free_page(vcpu->arch.sie_block->cbrlo); 2383 vcpu->arch.sie_block->cbrlo = 0; 2384 } 2385 2386 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 2387 { 2388 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 2389 if (!vcpu->arch.sie_block->cbrlo) 2390 return -ENOMEM; 2391 2392 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; 2393 return 0; 2394 } 2395 2396 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 2397 { 2398 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 2399 2400 vcpu->arch.sie_block->ibc = model->ibc; 2401 if (test_kvm_facility(vcpu->kvm, 7)) 2402 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 2403 } 2404 2405 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 2406 { 2407 int rc = 0; 2408 2409 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 2410 CPUSTAT_SM | 2411 CPUSTAT_STOPPED); 2412 2413 if (test_kvm_facility(vcpu->kvm, 78)) 2414 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags); 2415 else if (test_kvm_facility(vcpu->kvm, 8)) 2416 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags); 2417 2418 kvm_s390_vcpu_setup_model(vcpu); 2419 2420 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2421 if (MACHINE_HAS_ESOP) 2422 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 2423 if (test_kvm_facility(vcpu->kvm, 9)) 2424 vcpu->arch.sie_block->ecb |= ECB_SRSI; 2425 if (test_kvm_facility(vcpu->kvm, 73)) 2426 vcpu->arch.sie_block->ecb |= ECB_TE; 2427 2428 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) 2429 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 2430 if (test_kvm_facility(vcpu->kvm, 130)) 2431 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 2432 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 2433 if (sclp.has_cei) 2434 vcpu->arch.sie_block->eca |= ECA_CEI; 2435 if (sclp.has_ib) 2436 vcpu->arch.sie_block->eca |= ECA_IB; 2437 if (sclp.has_siif) 2438 vcpu->arch.sie_block->eca |= ECA_SII; 2439 if (sclp.has_sigpif) 2440 vcpu->arch.sie_block->eca |= ECA_SIGPI; 2441 if (test_kvm_facility(vcpu->kvm, 129)) { 2442 vcpu->arch.sie_block->eca |= ECA_VX; 2443 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 2444 } 2445 if (test_kvm_facility(vcpu->kvm, 139)) 2446 vcpu->arch.sie_block->ecd |= ECD_MEF; 2447 2448 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 2449 | SDNXC; 2450 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2451 2452 if (sclp.has_kss) 2453 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags); 2454 else 2455 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2456 2457 if (vcpu->kvm->arch.use_cmma) { 2458 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2459 if (rc) 2460 return rc; 2461 } 2462 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2463 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 2464 2465 kvm_s390_vcpu_crypto_setup(vcpu); 2466 2467 return rc; 2468 } 2469 2470 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 2471 unsigned int id) 2472 { 2473 struct kvm_vcpu *vcpu; 2474 struct sie_page *sie_page; 2475 int rc = -EINVAL; 2476 2477 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 2478 goto out; 2479 2480 rc = -ENOMEM; 2481 2482 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 2483 if (!vcpu) 2484 goto out; 2485 2486 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 2487 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 2488 if (!sie_page) 2489 goto out_free_cpu; 2490 2491 vcpu->arch.sie_block = &sie_page->sie_block; 2492 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 2493 2494 /* the real guest size will always be smaller than msl */ 2495 vcpu->arch.sie_block->mso = 0; 2496 vcpu->arch.sie_block->msl = sclp.hamax; 2497 2498 vcpu->arch.sie_block->icpua = id; 2499 spin_lock_init(&vcpu->arch.local_int.lock); 2500 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 2501 vcpu->arch.local_int.wq = &vcpu->wq; 2502 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 2503 seqcount_init(&vcpu->arch.cputm_seqcount); 2504 2505 rc = kvm_vcpu_init(vcpu, kvm, id); 2506 if (rc) 2507 goto out_free_sie_block; 2508 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 2509 vcpu->arch.sie_block); 2510 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 2511 2512 return vcpu; 2513 out_free_sie_block: 2514 free_page((unsigned long)(vcpu->arch.sie_block)); 2515 out_free_cpu: 2516 kmem_cache_free(kvm_vcpu_cache, vcpu); 2517 out: 2518 return ERR_PTR(rc); 2519 } 2520 2521 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 2522 { 2523 return kvm_s390_vcpu_has_irq(vcpu, 0); 2524 } 2525 2526 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 2527 { 2528 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 2529 } 2530 2531 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 2532 { 2533 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2534 exit_sie(vcpu); 2535 } 2536 2537 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 2538 { 2539 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2540 } 2541 2542 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 2543 { 2544 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2545 exit_sie(vcpu); 2546 } 2547 2548 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 2549 { 2550 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2551 } 2552 2553 /* 2554 * Kick a guest cpu out of SIE and wait until SIE is not running. 2555 * If the CPU is not running (e.g. waiting as idle) the function will 2556 * return immediately. */ 2557 void exit_sie(struct kvm_vcpu *vcpu) 2558 { 2559 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); 2560 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 2561 cpu_relax(); 2562 } 2563 2564 /* Kick a guest cpu out of SIE to process a request synchronously */ 2565 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 2566 { 2567 kvm_make_request(req, vcpu); 2568 kvm_s390_vcpu_request(vcpu); 2569 } 2570 2571 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 2572 unsigned long end) 2573 { 2574 struct kvm *kvm = gmap->private; 2575 struct kvm_vcpu *vcpu; 2576 unsigned long prefix; 2577 int i; 2578 2579 if (gmap_is_shadow(gmap)) 2580 return; 2581 if (start >= 1UL << 31) 2582 /* We are only interested in prefix pages */ 2583 return; 2584 kvm_for_each_vcpu(i, vcpu, kvm) { 2585 /* match against both prefix pages */ 2586 prefix = kvm_s390_get_prefix(vcpu); 2587 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 2588 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 2589 start, end); 2590 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 2591 } 2592 } 2593 } 2594 2595 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 2596 { 2597 /* kvm common code refers to this, but never calls it */ 2598 BUG(); 2599 return 0; 2600 } 2601 2602 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 2603 struct kvm_one_reg *reg) 2604 { 2605 int r = -EINVAL; 2606 2607 switch (reg->id) { 2608 case KVM_REG_S390_TODPR: 2609 r = put_user(vcpu->arch.sie_block->todpr, 2610 (u32 __user *)reg->addr); 2611 break; 2612 case KVM_REG_S390_EPOCHDIFF: 2613 r = put_user(vcpu->arch.sie_block->epoch, 2614 (u64 __user *)reg->addr); 2615 break; 2616 case KVM_REG_S390_CPU_TIMER: 2617 r = put_user(kvm_s390_get_cpu_timer(vcpu), 2618 (u64 __user *)reg->addr); 2619 break; 2620 case KVM_REG_S390_CLOCK_COMP: 2621 r = put_user(vcpu->arch.sie_block->ckc, 2622 (u64 __user *)reg->addr); 2623 break; 2624 case KVM_REG_S390_PFTOKEN: 2625 r = put_user(vcpu->arch.pfault_token, 2626 (u64 __user *)reg->addr); 2627 break; 2628 case KVM_REG_S390_PFCOMPARE: 2629 r = put_user(vcpu->arch.pfault_compare, 2630 (u64 __user *)reg->addr); 2631 break; 2632 case KVM_REG_S390_PFSELECT: 2633 r = put_user(vcpu->arch.pfault_select, 2634 (u64 __user *)reg->addr); 2635 break; 2636 case KVM_REG_S390_PP: 2637 r = put_user(vcpu->arch.sie_block->pp, 2638 (u64 __user *)reg->addr); 2639 break; 2640 case KVM_REG_S390_GBEA: 2641 r = put_user(vcpu->arch.sie_block->gbea, 2642 (u64 __user *)reg->addr); 2643 break; 2644 default: 2645 break; 2646 } 2647 2648 return r; 2649 } 2650 2651 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 2652 struct kvm_one_reg *reg) 2653 { 2654 int r = -EINVAL; 2655 __u64 val; 2656 2657 switch (reg->id) { 2658 case KVM_REG_S390_TODPR: 2659 r = get_user(vcpu->arch.sie_block->todpr, 2660 (u32 __user *)reg->addr); 2661 break; 2662 case KVM_REG_S390_EPOCHDIFF: 2663 r = get_user(vcpu->arch.sie_block->epoch, 2664 (u64 __user *)reg->addr); 2665 break; 2666 case KVM_REG_S390_CPU_TIMER: 2667 r = get_user(val, (u64 __user *)reg->addr); 2668 if (!r) 2669 kvm_s390_set_cpu_timer(vcpu, val); 2670 break; 2671 case KVM_REG_S390_CLOCK_COMP: 2672 r = get_user(vcpu->arch.sie_block->ckc, 2673 (u64 __user *)reg->addr); 2674 break; 2675 case KVM_REG_S390_PFTOKEN: 2676 r = get_user(vcpu->arch.pfault_token, 2677 (u64 __user *)reg->addr); 2678 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 2679 kvm_clear_async_pf_completion_queue(vcpu); 2680 break; 2681 case KVM_REG_S390_PFCOMPARE: 2682 r = get_user(vcpu->arch.pfault_compare, 2683 (u64 __user *)reg->addr); 2684 break; 2685 case KVM_REG_S390_PFSELECT: 2686 r = get_user(vcpu->arch.pfault_select, 2687 (u64 __user *)reg->addr); 2688 break; 2689 case KVM_REG_S390_PP: 2690 r = get_user(vcpu->arch.sie_block->pp, 2691 (u64 __user *)reg->addr); 2692 break; 2693 case KVM_REG_S390_GBEA: 2694 r = get_user(vcpu->arch.sie_block->gbea, 2695 (u64 __user *)reg->addr); 2696 break; 2697 default: 2698 break; 2699 } 2700 2701 return r; 2702 } 2703 2704 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 2705 { 2706 kvm_s390_vcpu_initial_reset(vcpu); 2707 return 0; 2708 } 2709 2710 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2711 { 2712 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 2713 return 0; 2714 } 2715 2716 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2717 { 2718 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 2719 return 0; 2720 } 2721 2722 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 2723 struct kvm_sregs *sregs) 2724 { 2725 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 2726 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 2727 return 0; 2728 } 2729 2730 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 2731 struct kvm_sregs *sregs) 2732 { 2733 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 2734 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 2735 return 0; 2736 } 2737 2738 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2739 { 2740 if (test_fp_ctl(fpu->fpc)) 2741 return -EINVAL; 2742 vcpu->run->s.regs.fpc = fpu->fpc; 2743 if (MACHINE_HAS_VX) 2744 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 2745 (freg_t *) fpu->fprs); 2746 else 2747 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 2748 return 0; 2749 } 2750 2751 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2752 { 2753 /* make sure we have the latest values */ 2754 save_fpu_regs(); 2755 if (MACHINE_HAS_VX) 2756 convert_vx_to_fp((freg_t *) fpu->fprs, 2757 (__vector128 *) vcpu->run->s.regs.vrs); 2758 else 2759 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 2760 fpu->fpc = vcpu->run->s.regs.fpc; 2761 return 0; 2762 } 2763 2764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 2765 { 2766 int rc = 0; 2767 2768 if (!is_vcpu_stopped(vcpu)) 2769 rc = -EBUSY; 2770 else { 2771 vcpu->run->psw_mask = psw.mask; 2772 vcpu->run->psw_addr = psw.addr; 2773 } 2774 return rc; 2775 } 2776 2777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 2778 struct kvm_translation *tr) 2779 { 2780 return -EINVAL; /* not implemented yet */ 2781 } 2782 2783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 2784 KVM_GUESTDBG_USE_HW_BP | \ 2785 KVM_GUESTDBG_ENABLE) 2786 2787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 2788 struct kvm_guest_debug *dbg) 2789 { 2790 int rc = 0; 2791 2792 vcpu->guest_debug = 0; 2793 kvm_s390_clear_bp_data(vcpu); 2794 2795 if (dbg->control & ~VALID_GUESTDBG_FLAGS) 2796 return -EINVAL; 2797 if (!sclp.has_gpere) 2798 return -EINVAL; 2799 2800 if (dbg->control & KVM_GUESTDBG_ENABLE) { 2801 vcpu->guest_debug = dbg->control; 2802 /* enforce guest PER */ 2803 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2804 2805 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 2806 rc = kvm_s390_import_bp_data(vcpu, dbg); 2807 } else { 2808 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2809 vcpu->arch.guestdbg.last_bp = 0; 2810 } 2811 2812 if (rc) { 2813 vcpu->guest_debug = 0; 2814 kvm_s390_clear_bp_data(vcpu); 2815 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2816 } 2817 2818 return rc; 2819 } 2820 2821 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 2822 struct kvm_mp_state *mp_state) 2823 { 2824 /* CHECK_STOP and LOAD are not supported yet */ 2825 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 2826 KVM_MP_STATE_OPERATING; 2827 } 2828 2829 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 2830 struct kvm_mp_state *mp_state) 2831 { 2832 int rc = 0; 2833 2834 /* user space knows about this interface - let it control the state */ 2835 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 2836 2837 switch (mp_state->mp_state) { 2838 case KVM_MP_STATE_STOPPED: 2839 kvm_s390_vcpu_stop(vcpu); 2840 break; 2841 case KVM_MP_STATE_OPERATING: 2842 kvm_s390_vcpu_start(vcpu); 2843 break; 2844 case KVM_MP_STATE_LOAD: 2845 case KVM_MP_STATE_CHECK_STOP: 2846 /* fall through - CHECK_STOP and LOAD are not supported yet */ 2847 default: 2848 rc = -ENXIO; 2849 } 2850 2851 return rc; 2852 } 2853 2854 static bool ibs_enabled(struct kvm_vcpu *vcpu) 2855 { 2856 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; 2857 } 2858 2859 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 2860 { 2861 retry: 2862 kvm_s390_vcpu_request_handled(vcpu); 2863 if (!kvm_request_pending(vcpu)) 2864 return 0; 2865 /* 2866 * We use MMU_RELOAD just to re-arm the ipte notifier for the 2867 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 2868 * This ensures that the ipte instruction for this request has 2869 * already finished. We might race against a second unmapper that 2870 * wants to set the blocking bit. Lets just retry the request loop. 2871 */ 2872 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 2873 int rc; 2874 rc = gmap_mprotect_notify(vcpu->arch.gmap, 2875 kvm_s390_get_prefix(vcpu), 2876 PAGE_SIZE * 2, PROT_WRITE); 2877 if (rc) { 2878 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 2879 return rc; 2880 } 2881 goto retry; 2882 } 2883 2884 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 2885 vcpu->arch.sie_block->ihcpu = 0xffff; 2886 goto retry; 2887 } 2888 2889 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 2890 if (!ibs_enabled(vcpu)) { 2891 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 2892 atomic_or(CPUSTAT_IBS, 2893 &vcpu->arch.sie_block->cpuflags); 2894 } 2895 goto retry; 2896 } 2897 2898 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 2899 if (ibs_enabled(vcpu)) { 2900 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 2901 atomic_andnot(CPUSTAT_IBS, 2902 &vcpu->arch.sie_block->cpuflags); 2903 } 2904 goto retry; 2905 } 2906 2907 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 2908 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2909 goto retry; 2910 } 2911 2912 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 2913 /* 2914 * Disable CMMA virtualization; we will emulate the ESSA 2915 * instruction manually, in order to provide additional 2916 * functionalities needed for live migration. 2917 */ 2918 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 2919 goto retry; 2920 } 2921 2922 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 2923 /* 2924 * Re-enable CMMA virtualization if CMMA is available and 2925 * was used. 2926 */ 2927 if ((vcpu->kvm->arch.use_cmma) && 2928 (vcpu->kvm->mm->context.use_cmma)) 2929 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 2930 goto retry; 2931 } 2932 2933 /* nothing to do, just clear the request */ 2934 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 2935 2936 return 0; 2937 } 2938 2939 void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 2940 const struct kvm_s390_vm_tod_clock *gtod) 2941 { 2942 struct kvm_vcpu *vcpu; 2943 struct kvm_s390_tod_clock_ext htod; 2944 int i; 2945 2946 mutex_lock(&kvm->lock); 2947 preempt_disable(); 2948 2949 get_tod_clock_ext((char *)&htod); 2950 2951 kvm->arch.epoch = gtod->tod - htod.tod; 2952 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 2953 2954 if (kvm->arch.epoch > gtod->tod) 2955 kvm->arch.epdx -= 1; 2956 2957 kvm_s390_vcpu_block_all(kvm); 2958 kvm_for_each_vcpu(i, vcpu, kvm) { 2959 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 2960 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 2961 } 2962 2963 kvm_s390_vcpu_unblock_all(kvm); 2964 preempt_enable(); 2965 mutex_unlock(&kvm->lock); 2966 } 2967 2968 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) 2969 { 2970 struct kvm_vcpu *vcpu; 2971 int i; 2972 2973 mutex_lock(&kvm->lock); 2974 preempt_disable(); 2975 kvm->arch.epoch = tod - get_tod_clock(); 2976 kvm_s390_vcpu_block_all(kvm); 2977 kvm_for_each_vcpu(i, vcpu, kvm) 2978 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 2979 kvm_s390_vcpu_unblock_all(kvm); 2980 preempt_enable(); 2981 mutex_unlock(&kvm->lock); 2982 } 2983 2984 /** 2985 * kvm_arch_fault_in_page - fault-in guest page if necessary 2986 * @vcpu: The corresponding virtual cpu 2987 * @gpa: Guest physical address 2988 * @writable: Whether the page should be writable or not 2989 * 2990 * Make sure that a guest page has been faulted-in on the host. 2991 * 2992 * Return: Zero on success, negative error code otherwise. 2993 */ 2994 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 2995 { 2996 return gmap_fault(vcpu->arch.gmap, gpa, 2997 writable ? FAULT_FLAG_WRITE : 0); 2998 } 2999 3000 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3001 unsigned long token) 3002 { 3003 struct kvm_s390_interrupt inti; 3004 struct kvm_s390_irq irq; 3005 3006 if (start_token) { 3007 irq.u.ext.ext_params2 = token; 3008 irq.type = KVM_S390_INT_PFAULT_INIT; 3009 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3010 } else { 3011 inti.type = KVM_S390_INT_PFAULT_DONE; 3012 inti.parm64 = token; 3013 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3014 } 3015 } 3016 3017 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3018 struct kvm_async_pf *work) 3019 { 3020 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3021 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3022 } 3023 3024 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3025 struct kvm_async_pf *work) 3026 { 3027 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3028 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3029 } 3030 3031 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3032 struct kvm_async_pf *work) 3033 { 3034 /* s390 will always inject the page directly */ 3035 } 3036 3037 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3038 { 3039 /* 3040 * s390 will always inject the page directly, 3041 * but we still want check_async_completion to cleanup 3042 */ 3043 return true; 3044 } 3045 3046 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3047 { 3048 hva_t hva; 3049 struct kvm_arch_async_pf arch; 3050 int rc; 3051 3052 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3053 return 0; 3054 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3055 vcpu->arch.pfault_compare) 3056 return 0; 3057 if (psw_extint_disabled(vcpu)) 3058 return 0; 3059 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3060 return 0; 3061 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 3062 return 0; 3063 if (!vcpu->arch.gmap->pfault_enabled) 3064 return 0; 3065 3066 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3067 hva += current->thread.gmap_addr & ~PAGE_MASK; 3068 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3069 return 0; 3070 3071 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3072 return rc; 3073 } 3074 3075 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3076 { 3077 int rc, cpuflags; 3078 3079 /* 3080 * On s390 notifications for arriving pages will be delivered directly 3081 * to the guest but the house keeping for completed pfaults is 3082 * handled outside the worker. 3083 */ 3084 kvm_check_async_pf_completion(vcpu); 3085 3086 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3087 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3088 3089 if (need_resched()) 3090 schedule(); 3091 3092 if (test_cpu_flag(CIF_MCCK_PENDING)) 3093 s390_handle_mcck(); 3094 3095 if (!kvm_is_ucontrol(vcpu->kvm)) { 3096 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3097 if (rc) 3098 return rc; 3099 } 3100 3101 rc = kvm_s390_handle_requests(vcpu); 3102 if (rc) 3103 return rc; 3104 3105 if (guestdbg_enabled(vcpu)) { 3106 kvm_s390_backup_guest_per_regs(vcpu); 3107 kvm_s390_patch_guest_per_regs(vcpu); 3108 } 3109 3110 vcpu->arch.sie_block->icptcode = 0; 3111 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3112 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3113 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3114 3115 return 0; 3116 } 3117 3118 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3119 { 3120 struct kvm_s390_pgm_info pgm_info = { 3121 .code = PGM_ADDRESSING, 3122 }; 3123 u8 opcode, ilen; 3124 int rc; 3125 3126 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3127 trace_kvm_s390_sie_fault(vcpu); 3128 3129 /* 3130 * We want to inject an addressing exception, which is defined as a 3131 * suppressing or terminating exception. However, since we came here 3132 * by a DAT access exception, the PSW still points to the faulting 3133 * instruction since DAT exceptions are nullifying. So we've got 3134 * to look up the current opcode to get the length of the instruction 3135 * to be able to forward the PSW. 3136 */ 3137 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3138 ilen = insn_length(opcode); 3139 if (rc < 0) { 3140 return rc; 3141 } else if (rc) { 3142 /* Instruction-Fetching Exceptions - we can't detect the ilen. 3143 * Forward by arbitrary ilc, injection will take care of 3144 * nullification if necessary. 3145 */ 3146 pgm_info = vcpu->arch.pgm; 3147 ilen = 4; 3148 } 3149 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 3150 kvm_s390_forward_psw(vcpu, ilen); 3151 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 3152 } 3153 3154 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 3155 { 3156 struct mcck_volatile_info *mcck_info; 3157 struct sie_page *sie_page; 3158 3159 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 3160 vcpu->arch.sie_block->icptcode); 3161 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 3162 3163 if (guestdbg_enabled(vcpu)) 3164 kvm_s390_restore_guest_per_regs(vcpu); 3165 3166 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 3167 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 3168 3169 if (exit_reason == -EINTR) { 3170 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 3171 sie_page = container_of(vcpu->arch.sie_block, 3172 struct sie_page, sie_block); 3173 mcck_info = &sie_page->mcck_info; 3174 kvm_s390_reinject_machine_check(vcpu, mcck_info); 3175 return 0; 3176 } 3177 3178 if (vcpu->arch.sie_block->icptcode > 0) { 3179 int rc = kvm_handle_sie_intercept(vcpu); 3180 3181 if (rc != -EOPNOTSUPP) 3182 return rc; 3183 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 3184 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 3185 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 3186 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 3187 return -EREMOTE; 3188 } else if (exit_reason != -EFAULT) { 3189 vcpu->stat.exit_null++; 3190 return 0; 3191 } else if (kvm_is_ucontrol(vcpu->kvm)) { 3192 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 3193 vcpu->run->s390_ucontrol.trans_exc_code = 3194 current->thread.gmap_addr; 3195 vcpu->run->s390_ucontrol.pgm_code = 0x10; 3196 return -EREMOTE; 3197 } else if (current->thread.gmap_pfault) { 3198 trace_kvm_s390_major_guest_pfault(vcpu); 3199 current->thread.gmap_pfault = 0; 3200 if (kvm_arch_setup_async_pf(vcpu)) 3201 return 0; 3202 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 3203 } 3204 return vcpu_post_run_fault_in_sie(vcpu); 3205 } 3206 3207 static int __vcpu_run(struct kvm_vcpu *vcpu) 3208 { 3209 int rc, exit_reason; 3210 3211 /* 3212 * We try to hold kvm->srcu during most of vcpu_run (except when run- 3213 * ning the guest), so that memslots (and other stuff) are protected 3214 */ 3215 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3216 3217 do { 3218 rc = vcpu_pre_run(vcpu); 3219 if (rc) 3220 break; 3221 3222 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3223 /* 3224 * As PF_VCPU will be used in fault handler, between 3225 * guest_enter and guest_exit should be no uaccess. 3226 */ 3227 local_irq_disable(); 3228 guest_enter_irqoff(); 3229 __disable_cpu_timer_accounting(vcpu); 3230 local_irq_enable(); 3231 exit_reason = sie64a(vcpu->arch.sie_block, 3232 vcpu->run->s.regs.gprs); 3233 local_irq_disable(); 3234 __enable_cpu_timer_accounting(vcpu); 3235 guest_exit_irqoff(); 3236 local_irq_enable(); 3237 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3238 3239 rc = vcpu_post_run(vcpu, exit_reason); 3240 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 3241 3242 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3243 return rc; 3244 } 3245 3246 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3247 { 3248 struct runtime_instr_cb *riccb; 3249 struct gs_cb *gscb; 3250 3251 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 3252 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 3253 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 3254 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 3255 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 3256 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 3257 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 3258 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 3259 /* some control register changes require a tlb flush */ 3260 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3261 } 3262 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 3263 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 3264 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 3265 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 3266 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 3267 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 3268 } 3269 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 3270 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 3271 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 3272 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 3273 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3274 kvm_clear_async_pf_completion_queue(vcpu); 3275 } 3276 /* 3277 * If userspace sets the riccb (e.g. after migration) to a valid state, 3278 * we should enable RI here instead of doing the lazy enablement. 3279 */ 3280 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 3281 test_kvm_facility(vcpu->kvm, 64) && 3282 riccb->v && 3283 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 3284 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 3285 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 3286 } 3287 /* 3288 * If userspace sets the gscb (e.g. after migration) to non-zero, 3289 * we should enable GS here instead of doing the lazy enablement. 3290 */ 3291 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 3292 test_kvm_facility(vcpu->kvm, 133) && 3293 gscb->gssm && 3294 !vcpu->arch.gs_enabled) { 3295 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 3296 vcpu->arch.sie_block->ecb |= ECB_GS; 3297 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3298 vcpu->arch.gs_enabled = 1; 3299 } 3300 save_access_regs(vcpu->arch.host_acrs); 3301 restore_access_regs(vcpu->run->s.regs.acrs); 3302 /* save host (userspace) fprs/vrs */ 3303 save_fpu_regs(); 3304 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 3305 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 3306 if (MACHINE_HAS_VX) 3307 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 3308 else 3309 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 3310 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 3311 if (test_fp_ctl(current->thread.fpu.fpc)) 3312 /* User space provided an invalid FPC, let's clear it */ 3313 current->thread.fpu.fpc = 0; 3314 if (MACHINE_HAS_GS) { 3315 preempt_disable(); 3316 __ctl_set_bit(2, 4); 3317 if (current->thread.gs_cb) { 3318 vcpu->arch.host_gscb = current->thread.gs_cb; 3319 save_gs_cb(vcpu->arch.host_gscb); 3320 } 3321 if (vcpu->arch.gs_enabled) { 3322 current->thread.gs_cb = (struct gs_cb *) 3323 &vcpu->run->s.regs.gscb; 3324 restore_gs_cb(current->thread.gs_cb); 3325 } 3326 preempt_enable(); 3327 } 3328 3329 kvm_run->kvm_dirty_regs = 0; 3330 } 3331 3332 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3333 { 3334 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 3335 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 3336 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 3337 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 3338 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 3339 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 3340 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 3341 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 3342 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 3343 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 3344 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 3345 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 3346 save_access_regs(vcpu->run->s.regs.acrs); 3347 restore_access_regs(vcpu->arch.host_acrs); 3348 /* Save guest register state */ 3349 save_fpu_regs(); 3350 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3351 /* Restore will be done lazily at return */ 3352 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 3353 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 3354 if (MACHINE_HAS_GS) { 3355 __ctl_set_bit(2, 4); 3356 if (vcpu->arch.gs_enabled) 3357 save_gs_cb(current->thread.gs_cb); 3358 preempt_disable(); 3359 current->thread.gs_cb = vcpu->arch.host_gscb; 3360 restore_gs_cb(vcpu->arch.host_gscb); 3361 preempt_enable(); 3362 if (!vcpu->arch.host_gscb) 3363 __ctl_clear_bit(2, 4); 3364 vcpu->arch.host_gscb = NULL; 3365 } 3366 3367 } 3368 3369 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3370 { 3371 int rc; 3372 3373 if (kvm_run->immediate_exit) 3374 return -EINTR; 3375 3376 if (guestdbg_exit_pending(vcpu)) { 3377 kvm_s390_prepare_debug_exit(vcpu); 3378 return 0; 3379 } 3380 3381 kvm_sigset_activate(vcpu); 3382 3383 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3384 kvm_s390_vcpu_start(vcpu); 3385 } else if (is_vcpu_stopped(vcpu)) { 3386 pr_err_ratelimited("can't run stopped vcpu %d\n", 3387 vcpu->vcpu_id); 3388 return -EINVAL; 3389 } 3390 3391 sync_regs(vcpu, kvm_run); 3392 enable_cpu_timer_accounting(vcpu); 3393 3394 might_fault(); 3395 rc = __vcpu_run(vcpu); 3396 3397 if (signal_pending(current) && !rc) { 3398 kvm_run->exit_reason = KVM_EXIT_INTR; 3399 rc = -EINTR; 3400 } 3401 3402 if (guestdbg_exit_pending(vcpu) && !rc) { 3403 kvm_s390_prepare_debug_exit(vcpu); 3404 rc = 0; 3405 } 3406 3407 if (rc == -EREMOTE) { 3408 /* userspace support is needed, kvm_run has been prepared */ 3409 rc = 0; 3410 } 3411 3412 disable_cpu_timer_accounting(vcpu); 3413 store_regs(vcpu, kvm_run); 3414 3415 kvm_sigset_deactivate(vcpu); 3416 3417 vcpu->stat.exit_userspace++; 3418 return rc; 3419 } 3420 3421 /* 3422 * store status at address 3423 * we use have two special cases: 3424 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 3425 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 3426 */ 3427 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 3428 { 3429 unsigned char archmode = 1; 3430 freg_t fprs[NUM_FPRS]; 3431 unsigned int px; 3432 u64 clkcomp, cputm; 3433 int rc; 3434 3435 px = kvm_s390_get_prefix(vcpu); 3436 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 3437 if (write_guest_abs(vcpu, 163, &archmode, 1)) 3438 return -EFAULT; 3439 gpa = 0; 3440 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 3441 if (write_guest_real(vcpu, 163, &archmode, 1)) 3442 return -EFAULT; 3443 gpa = px; 3444 } else 3445 gpa -= __LC_FPREGS_SAVE_AREA; 3446 3447 /* manually convert vector registers if necessary */ 3448 if (MACHINE_HAS_VX) { 3449 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 3450 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3451 fprs, 128); 3452 } else { 3453 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3454 vcpu->run->s.regs.fprs, 128); 3455 } 3456 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 3457 vcpu->run->s.regs.gprs, 128); 3458 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 3459 &vcpu->arch.sie_block->gpsw, 16); 3460 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 3461 &px, 4); 3462 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 3463 &vcpu->run->s.regs.fpc, 4); 3464 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 3465 &vcpu->arch.sie_block->todpr, 4); 3466 cputm = kvm_s390_get_cpu_timer(vcpu); 3467 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 3468 &cputm, 8); 3469 clkcomp = vcpu->arch.sie_block->ckc >> 8; 3470 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 3471 &clkcomp, 8); 3472 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 3473 &vcpu->run->s.regs.acrs, 64); 3474 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 3475 &vcpu->arch.sie_block->gcr, 128); 3476 return rc ? -EFAULT : 0; 3477 } 3478 3479 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 3480 { 3481 /* 3482 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 3483 * switch in the run ioctl. Let's update our copies before we save 3484 * it into the save area 3485 */ 3486 save_fpu_regs(); 3487 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3488 save_access_regs(vcpu->run->s.regs.acrs); 3489 3490 return kvm_s390_store_status_unloaded(vcpu, addr); 3491 } 3492 3493 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3494 { 3495 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 3496 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 3497 } 3498 3499 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 3500 { 3501 unsigned int i; 3502 struct kvm_vcpu *vcpu; 3503 3504 kvm_for_each_vcpu(i, vcpu, kvm) { 3505 __disable_ibs_on_vcpu(vcpu); 3506 } 3507 } 3508 3509 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3510 { 3511 if (!sclp.has_ibs) 3512 return; 3513 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 3514 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 3515 } 3516 3517 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 3518 { 3519 int i, online_vcpus, started_vcpus = 0; 3520 3521 if (!is_vcpu_stopped(vcpu)) 3522 return; 3523 3524 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 3525 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3526 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3527 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3528 3529 for (i = 0; i < online_vcpus; i++) { 3530 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 3531 started_vcpus++; 3532 } 3533 3534 if (started_vcpus == 0) { 3535 /* we're the only active VCPU -> speed it up */ 3536 __enable_ibs_on_vcpu(vcpu); 3537 } else if (started_vcpus == 1) { 3538 /* 3539 * As we are starting a second VCPU, we have to disable 3540 * the IBS facility on all VCPUs to remove potentially 3541 * oustanding ENABLE requests. 3542 */ 3543 __disable_ibs_on_all_vcpus(vcpu->kvm); 3544 } 3545 3546 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 3547 /* 3548 * Another VCPU might have used IBS while we were offline. 3549 * Let's play safe and flush the VCPU at startup. 3550 */ 3551 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3552 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3553 return; 3554 } 3555 3556 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 3557 { 3558 int i, online_vcpus, started_vcpus = 0; 3559 struct kvm_vcpu *started_vcpu = NULL; 3560 3561 if (is_vcpu_stopped(vcpu)) 3562 return; 3563 3564 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 3565 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3566 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3567 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3568 3569 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 3570 kvm_s390_clear_stop_irq(vcpu); 3571 3572 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 3573 __disable_ibs_on_vcpu(vcpu); 3574 3575 for (i = 0; i < online_vcpus; i++) { 3576 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 3577 started_vcpus++; 3578 started_vcpu = vcpu->kvm->vcpus[i]; 3579 } 3580 } 3581 3582 if (started_vcpus == 1) { 3583 /* 3584 * As we only have one VCPU left, we want to enable the 3585 * IBS facility for that VCPU to speed it up. 3586 */ 3587 __enable_ibs_on_vcpu(started_vcpu); 3588 } 3589 3590 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3591 return; 3592 } 3593 3594 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3595 struct kvm_enable_cap *cap) 3596 { 3597 int r; 3598 3599 if (cap->flags) 3600 return -EINVAL; 3601 3602 switch (cap->cap) { 3603 case KVM_CAP_S390_CSS_SUPPORT: 3604 if (!vcpu->kvm->arch.css_support) { 3605 vcpu->kvm->arch.css_support = 1; 3606 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 3607 trace_kvm_s390_enable_css(vcpu->kvm); 3608 } 3609 r = 0; 3610 break; 3611 default: 3612 r = -EINVAL; 3613 break; 3614 } 3615 return r; 3616 } 3617 3618 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 3619 struct kvm_s390_mem_op *mop) 3620 { 3621 void __user *uaddr = (void __user *)mop->buf; 3622 void *tmpbuf = NULL; 3623 int r, srcu_idx; 3624 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 3625 | KVM_S390_MEMOP_F_CHECK_ONLY; 3626 3627 if (mop->flags & ~supported_flags) 3628 return -EINVAL; 3629 3630 if (mop->size > MEM_OP_MAX_SIZE) 3631 return -E2BIG; 3632 3633 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 3634 tmpbuf = vmalloc(mop->size); 3635 if (!tmpbuf) 3636 return -ENOMEM; 3637 } 3638 3639 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3640 3641 switch (mop->op) { 3642 case KVM_S390_MEMOP_LOGICAL_READ: 3643 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3644 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3645 mop->size, GACC_FETCH); 3646 break; 3647 } 3648 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3649 if (r == 0) { 3650 if (copy_to_user(uaddr, tmpbuf, mop->size)) 3651 r = -EFAULT; 3652 } 3653 break; 3654 case KVM_S390_MEMOP_LOGICAL_WRITE: 3655 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3656 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3657 mop->size, GACC_STORE); 3658 break; 3659 } 3660 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 3661 r = -EFAULT; 3662 break; 3663 } 3664 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3665 break; 3666 default: 3667 r = -EINVAL; 3668 } 3669 3670 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 3671 3672 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 3673 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 3674 3675 vfree(tmpbuf); 3676 return r; 3677 } 3678 3679 long kvm_arch_vcpu_ioctl(struct file *filp, 3680 unsigned int ioctl, unsigned long arg) 3681 { 3682 struct kvm_vcpu *vcpu = filp->private_data; 3683 void __user *argp = (void __user *)arg; 3684 int idx; 3685 long r; 3686 3687 switch (ioctl) { 3688 case KVM_S390_IRQ: { 3689 struct kvm_s390_irq s390irq; 3690 3691 r = -EFAULT; 3692 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 3693 break; 3694 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3695 break; 3696 } 3697 case KVM_S390_INTERRUPT: { 3698 struct kvm_s390_interrupt s390int; 3699 struct kvm_s390_irq s390irq; 3700 3701 r = -EFAULT; 3702 if (copy_from_user(&s390int, argp, sizeof(s390int))) 3703 break; 3704 if (s390int_to_s390irq(&s390int, &s390irq)) 3705 return -EINVAL; 3706 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3707 break; 3708 } 3709 case KVM_S390_STORE_STATUS: 3710 idx = srcu_read_lock(&vcpu->kvm->srcu); 3711 r = kvm_s390_vcpu_store_status(vcpu, arg); 3712 srcu_read_unlock(&vcpu->kvm->srcu, idx); 3713 break; 3714 case KVM_S390_SET_INITIAL_PSW: { 3715 psw_t psw; 3716 3717 r = -EFAULT; 3718 if (copy_from_user(&psw, argp, sizeof(psw))) 3719 break; 3720 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 3721 break; 3722 } 3723 case KVM_S390_INITIAL_RESET: 3724 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3725 break; 3726 case KVM_SET_ONE_REG: 3727 case KVM_GET_ONE_REG: { 3728 struct kvm_one_reg reg; 3729 r = -EFAULT; 3730 if (copy_from_user(®, argp, sizeof(reg))) 3731 break; 3732 if (ioctl == KVM_SET_ONE_REG) 3733 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 3734 else 3735 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 3736 break; 3737 } 3738 #ifdef CONFIG_KVM_S390_UCONTROL 3739 case KVM_S390_UCAS_MAP: { 3740 struct kvm_s390_ucas_mapping ucasmap; 3741 3742 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3743 r = -EFAULT; 3744 break; 3745 } 3746 3747 if (!kvm_is_ucontrol(vcpu->kvm)) { 3748 r = -EINVAL; 3749 break; 3750 } 3751 3752 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 3753 ucasmap.vcpu_addr, ucasmap.length); 3754 break; 3755 } 3756 case KVM_S390_UCAS_UNMAP: { 3757 struct kvm_s390_ucas_mapping ucasmap; 3758 3759 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3760 r = -EFAULT; 3761 break; 3762 } 3763 3764 if (!kvm_is_ucontrol(vcpu->kvm)) { 3765 r = -EINVAL; 3766 break; 3767 } 3768 3769 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 3770 ucasmap.length); 3771 break; 3772 } 3773 #endif 3774 case KVM_S390_VCPU_FAULT: { 3775 r = gmap_fault(vcpu->arch.gmap, arg, 0); 3776 break; 3777 } 3778 case KVM_ENABLE_CAP: 3779 { 3780 struct kvm_enable_cap cap; 3781 r = -EFAULT; 3782 if (copy_from_user(&cap, argp, sizeof(cap))) 3783 break; 3784 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 3785 break; 3786 } 3787 case KVM_S390_MEM_OP: { 3788 struct kvm_s390_mem_op mem_op; 3789 3790 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 3791 r = kvm_s390_guest_mem_op(vcpu, &mem_op); 3792 else 3793 r = -EFAULT; 3794 break; 3795 } 3796 case KVM_S390_SET_IRQ_STATE: { 3797 struct kvm_s390_irq_state irq_state; 3798 3799 r = -EFAULT; 3800 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3801 break; 3802 if (irq_state.len > VCPU_IRQS_MAX_BUF || 3803 irq_state.len == 0 || 3804 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 3805 r = -EINVAL; 3806 break; 3807 } 3808 /* do not use irq_state.flags, it will break old QEMUs */ 3809 r = kvm_s390_set_irq_state(vcpu, 3810 (void __user *) irq_state.buf, 3811 irq_state.len); 3812 break; 3813 } 3814 case KVM_S390_GET_IRQ_STATE: { 3815 struct kvm_s390_irq_state irq_state; 3816 3817 r = -EFAULT; 3818 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3819 break; 3820 if (irq_state.len == 0) { 3821 r = -EINVAL; 3822 break; 3823 } 3824 /* do not use irq_state.flags, it will break old QEMUs */ 3825 r = kvm_s390_get_irq_state(vcpu, 3826 (__u8 __user *) irq_state.buf, 3827 irq_state.len); 3828 break; 3829 } 3830 default: 3831 r = -ENOTTY; 3832 } 3833 return r; 3834 } 3835 3836 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 3837 { 3838 #ifdef CONFIG_KVM_S390_UCONTROL 3839 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 3840 && (kvm_is_ucontrol(vcpu->kvm))) { 3841 vmf->page = virt_to_page(vcpu->arch.sie_block); 3842 get_page(vmf->page); 3843 return 0; 3844 } 3845 #endif 3846 return VM_FAULT_SIGBUS; 3847 } 3848 3849 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 3850 unsigned long npages) 3851 { 3852 return 0; 3853 } 3854 3855 /* Section: memory related */ 3856 int kvm_arch_prepare_memory_region(struct kvm *kvm, 3857 struct kvm_memory_slot *memslot, 3858 const struct kvm_userspace_memory_region *mem, 3859 enum kvm_mr_change change) 3860 { 3861 /* A few sanity checks. We can have memory slots which have to be 3862 located/ended at a segment boundary (1MB). The memory in userland is 3863 ok to be fragmented into various different vmas. It is okay to mmap() 3864 and munmap() stuff in this slot after doing this call at any time */ 3865 3866 if (mem->userspace_addr & 0xffffful) 3867 return -EINVAL; 3868 3869 if (mem->memory_size & 0xffffful) 3870 return -EINVAL; 3871 3872 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 3873 return -EINVAL; 3874 3875 return 0; 3876 } 3877 3878 void kvm_arch_commit_memory_region(struct kvm *kvm, 3879 const struct kvm_userspace_memory_region *mem, 3880 const struct kvm_memory_slot *old, 3881 const struct kvm_memory_slot *new, 3882 enum kvm_mr_change change) 3883 { 3884 int rc; 3885 3886 /* If the basics of the memslot do not change, we do not want 3887 * to update the gmap. Every update causes several unnecessary 3888 * segment translation exceptions. This is usually handled just 3889 * fine by the normal fault handler + gmap, but it will also 3890 * cause faults on the prefix page of running guest CPUs. 3891 */ 3892 if (old->userspace_addr == mem->userspace_addr && 3893 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 3894 old->npages * PAGE_SIZE == mem->memory_size) 3895 return; 3896 3897 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 3898 mem->guest_phys_addr, mem->memory_size); 3899 if (rc) 3900 pr_warn("failed to commit memory region\n"); 3901 return; 3902 } 3903 3904 static inline unsigned long nonhyp_mask(int i) 3905 { 3906 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 3907 3908 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 3909 } 3910 3911 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 3912 { 3913 vcpu->valid_wakeup = false; 3914 } 3915 3916 static int __init kvm_s390_init(void) 3917 { 3918 int i; 3919 3920 if (!sclp.has_sief2) { 3921 pr_info("SIE not available\n"); 3922 return -ENODEV; 3923 } 3924 3925 for (i = 0; i < 16; i++) 3926 kvm_s390_fac_list_mask[i] |= 3927 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 3928 3929 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 3930 } 3931 3932 static void __exit kvm_s390_exit(void) 3933 { 3934 kvm_exit(); 3935 } 3936 3937 module_init(kvm_s390_init); 3938 module_exit(kvm_s390_exit); 3939 3940 /* 3941 * Enable autoloading of the kvm module. 3942 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 3943 * since x86 takes a different approach. 3944 */ 3945 #include <linux/miscdevice.h> 3946 MODULE_ALIAS_MISCDEV(KVM_MINOR); 3947 MODULE_ALIAS("devname:kvm"); 3948