1 /* 2 * hosting zSeries kernel virtual machines 3 * 4 * Copyright IBM Corp. 2008, 2009 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2 only) 8 * as published by the Free Software Foundation. 9 * 10 * Author(s): Carsten Otte <cotte@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 14 * Jason J. Herne <jjherne@us.ibm.com> 15 */ 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 35 #include <asm/asm-offsets.h> 36 #include <asm/lowcore.h> 37 #include <asm/stp.h> 38 #include <asm/pgtable.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include "kvm-s390.h" 47 #include "gaccess.h" 48 49 #define KMSG_COMPONENT "kvm-s390" 50 #undef pr_fmt 51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 52 53 #define CREATE_TRACE_POINTS 54 #include "trace.h" 55 #include "trace-s390.h" 56 57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 58 #define LOCAL_IRQS 32 59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 60 (KVM_MAX_VCPUS + LOCAL_IRQS)) 61 62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 63 64 struct kvm_stats_debugfs_item debugfs_entries[] = { 65 { "userspace_handled", VCPU_STAT(exit_userspace) }, 66 { "exit_null", VCPU_STAT(exit_null) }, 67 { "exit_validity", VCPU_STAT(exit_validity) }, 68 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 69 { "exit_external_request", VCPU_STAT(exit_external_request) }, 70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 71 { "exit_instruction", VCPU_STAT(exit_instruction) }, 72 { "exit_pei", VCPU_STAT(exit_pei) }, 73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 79 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 81 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 82 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 83 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 85 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 92 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 94 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 95 { "instruction_spx", VCPU_STAT(instruction_spx) }, 96 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 97 { "instruction_stap", VCPU_STAT(instruction_stap) }, 98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 100 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 101 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 102 { "instruction_essa", VCPU_STAT(instruction_essa) }, 103 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 104 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 105 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 107 { "instruction_sie", VCPU_STAT(instruction_sie) }, 108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 124 { "diagnose_10", VCPU_STAT(diagnose_10) }, 125 { "diagnose_44", VCPU_STAT(diagnose_44) }, 126 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 127 { "diagnose_258", VCPU_STAT(diagnose_258) }, 128 { "diagnose_308", VCPU_STAT(diagnose_308) }, 129 { "diagnose_500", VCPU_STAT(diagnose_500) }, 130 { NULL } 131 }; 132 133 struct kvm_s390_tod_clock_ext { 134 __u8 epoch_idx; 135 __u64 tod; 136 __u8 reserved[7]; 137 } __packed; 138 139 /* allow nested virtualization in KVM (if enabled by user space) */ 140 static int nested; 141 module_param(nested, int, S_IRUGO); 142 MODULE_PARM_DESC(nested, "Nested virtualization support"); 143 144 /* upper facilities limit for kvm */ 145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM }; 146 147 unsigned long kvm_s390_fac_list_mask_size(void) 148 { 149 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64); 150 return ARRAY_SIZE(kvm_s390_fac_list_mask); 151 } 152 153 /* available cpu features supported by kvm */ 154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 155 /* available subfunctions indicated via query / "test bit" */ 156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 157 158 static struct gmap_notifier gmap_notifier; 159 static struct gmap_notifier vsie_gmap_notifier; 160 debug_info_t *kvm_s390_dbf; 161 162 /* Section: not file related */ 163 int kvm_arch_hardware_enable(void) 164 { 165 /* every s390 is virtualization enabled ;-) */ 166 return 0; 167 } 168 169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 170 unsigned long end); 171 172 /* 173 * This callback is executed during stop_machine(). All CPUs are therefore 174 * temporarily stopped. In order not to change guest behavior, we have to 175 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 176 * so a CPU won't be stopped while calculating with the epoch. 177 */ 178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 179 void *v) 180 { 181 struct kvm *kvm; 182 struct kvm_vcpu *vcpu; 183 int i; 184 unsigned long long *delta = v; 185 186 list_for_each_entry(kvm, &vm_list, vm_list) { 187 kvm->arch.epoch -= *delta; 188 kvm_for_each_vcpu(i, vcpu, kvm) { 189 vcpu->arch.sie_block->epoch -= *delta; 190 if (vcpu->arch.cputm_enabled) 191 vcpu->arch.cputm_start += *delta; 192 if (vcpu->arch.vsie_block) 193 vcpu->arch.vsie_block->epoch -= *delta; 194 } 195 } 196 return NOTIFY_OK; 197 } 198 199 static struct notifier_block kvm_clock_notifier = { 200 .notifier_call = kvm_clock_sync, 201 }; 202 203 int kvm_arch_hardware_setup(void) 204 { 205 gmap_notifier.notifier_call = kvm_gmap_notifier; 206 gmap_register_pte_notifier(&gmap_notifier); 207 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 208 gmap_register_pte_notifier(&vsie_gmap_notifier); 209 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 210 &kvm_clock_notifier); 211 return 0; 212 } 213 214 void kvm_arch_hardware_unsetup(void) 215 { 216 gmap_unregister_pte_notifier(&gmap_notifier); 217 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 218 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 219 &kvm_clock_notifier); 220 } 221 222 static void allow_cpu_feat(unsigned long nr) 223 { 224 set_bit_inv(nr, kvm_s390_available_cpu_feat); 225 } 226 227 static inline int plo_test_bit(unsigned char nr) 228 { 229 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 230 int cc; 231 232 asm volatile( 233 /* Parameter registers are ignored for "test bit" */ 234 " plo 0,0,0,0(0)\n" 235 " ipm %0\n" 236 " srl %0,28\n" 237 : "=d" (cc) 238 : "d" (r0) 239 : "cc"); 240 return cc == 0; 241 } 242 243 static void kvm_s390_cpu_feat_init(void) 244 { 245 int i; 246 247 for (i = 0; i < 256; ++i) { 248 if (plo_test_bit(i)) 249 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 250 } 251 252 if (test_facility(28)) /* TOD-clock steering */ 253 ptff(kvm_s390_available_subfunc.ptff, 254 sizeof(kvm_s390_available_subfunc.ptff), 255 PTFF_QAF); 256 257 if (test_facility(17)) { /* MSA */ 258 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 259 kvm_s390_available_subfunc.kmac); 260 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 261 kvm_s390_available_subfunc.kmc); 262 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 263 kvm_s390_available_subfunc.km); 264 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 265 kvm_s390_available_subfunc.kimd); 266 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 267 kvm_s390_available_subfunc.klmd); 268 } 269 if (test_facility(76)) /* MSA3 */ 270 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 271 kvm_s390_available_subfunc.pckmo); 272 if (test_facility(77)) { /* MSA4 */ 273 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 274 kvm_s390_available_subfunc.kmctr); 275 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 276 kvm_s390_available_subfunc.kmf); 277 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 278 kvm_s390_available_subfunc.kmo); 279 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 280 kvm_s390_available_subfunc.pcc); 281 } 282 if (test_facility(57)) /* MSA5 */ 283 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 284 kvm_s390_available_subfunc.ppno); 285 286 if (test_facility(146)) /* MSA8 */ 287 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 288 kvm_s390_available_subfunc.kma); 289 290 if (MACHINE_HAS_ESOP) 291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 292 /* 293 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 294 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 295 */ 296 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 297 !test_facility(3) || !nested) 298 return; 299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 300 if (sclp.has_64bscao) 301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 302 if (sclp.has_siif) 303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 304 if (sclp.has_gpere) 305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 306 if (sclp.has_gsls) 307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 308 if (sclp.has_ib) 309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 310 if (sclp.has_cei) 311 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 312 if (sclp.has_ibs) 313 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 314 if (sclp.has_kss) 315 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 316 /* 317 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 318 * all skey handling functions read/set the skey from the PGSTE 319 * instead of the real storage key. 320 * 321 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 322 * pages being detected as preserved although they are resident. 323 * 324 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 325 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 326 * 327 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 328 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 329 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 330 * 331 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 332 * cannot easily shadow the SCA because of the ipte lock. 333 */ 334 } 335 336 int kvm_arch_init(void *opaque) 337 { 338 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 339 if (!kvm_s390_dbf) 340 return -ENOMEM; 341 342 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { 343 debug_unregister(kvm_s390_dbf); 344 return -ENOMEM; 345 } 346 347 kvm_s390_cpu_feat_init(); 348 349 /* Register floating interrupt controller interface. */ 350 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 351 } 352 353 void kvm_arch_exit(void) 354 { 355 debug_unregister(kvm_s390_dbf); 356 } 357 358 /* Section: device related */ 359 long kvm_arch_dev_ioctl(struct file *filp, 360 unsigned int ioctl, unsigned long arg) 361 { 362 if (ioctl == KVM_S390_ENABLE_SIE) 363 return s390_enable_sie(); 364 return -EINVAL; 365 } 366 367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 368 { 369 int r; 370 371 switch (ext) { 372 case KVM_CAP_S390_PSW: 373 case KVM_CAP_S390_GMAP: 374 case KVM_CAP_SYNC_MMU: 375 #ifdef CONFIG_KVM_S390_UCONTROL 376 case KVM_CAP_S390_UCONTROL: 377 #endif 378 case KVM_CAP_ASYNC_PF: 379 case KVM_CAP_SYNC_REGS: 380 case KVM_CAP_ONE_REG: 381 case KVM_CAP_ENABLE_CAP: 382 case KVM_CAP_S390_CSS_SUPPORT: 383 case KVM_CAP_IOEVENTFD: 384 case KVM_CAP_DEVICE_CTRL: 385 case KVM_CAP_ENABLE_CAP_VM: 386 case KVM_CAP_S390_IRQCHIP: 387 case KVM_CAP_VM_ATTRIBUTES: 388 case KVM_CAP_MP_STATE: 389 case KVM_CAP_IMMEDIATE_EXIT: 390 case KVM_CAP_S390_INJECT_IRQ: 391 case KVM_CAP_S390_USER_SIGP: 392 case KVM_CAP_S390_USER_STSI: 393 case KVM_CAP_S390_SKEYS: 394 case KVM_CAP_S390_IRQ_STATE: 395 case KVM_CAP_S390_USER_INSTR0: 396 case KVM_CAP_S390_CMMA_MIGRATION: 397 case KVM_CAP_S390_AIS: 398 case KVM_CAP_S390_AIS_MIGRATION: 399 r = 1; 400 break; 401 case KVM_CAP_S390_MEM_OP: 402 r = MEM_OP_MAX_SIZE; 403 break; 404 case KVM_CAP_NR_VCPUS: 405 case KVM_CAP_MAX_VCPUS: 406 r = KVM_S390_BSCA_CPU_SLOTS; 407 if (!kvm_s390_use_sca_entries()) 408 r = KVM_MAX_VCPUS; 409 else if (sclp.has_esca && sclp.has_64bscao) 410 r = KVM_S390_ESCA_CPU_SLOTS; 411 break; 412 case KVM_CAP_NR_MEMSLOTS: 413 r = KVM_USER_MEM_SLOTS; 414 break; 415 case KVM_CAP_S390_COW: 416 r = MACHINE_HAS_ESOP; 417 break; 418 case KVM_CAP_S390_VECTOR_REGISTERS: 419 r = MACHINE_HAS_VX; 420 break; 421 case KVM_CAP_S390_RI: 422 r = test_facility(64); 423 break; 424 case KVM_CAP_S390_GS: 425 r = test_facility(133); 426 break; 427 default: 428 r = 0; 429 } 430 return r; 431 } 432 433 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 434 struct kvm_memory_slot *memslot) 435 { 436 gfn_t cur_gfn, last_gfn; 437 unsigned long address; 438 struct gmap *gmap = kvm->arch.gmap; 439 440 /* Loop over all guest pages */ 441 last_gfn = memslot->base_gfn + memslot->npages; 442 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 443 address = gfn_to_hva_memslot(memslot, cur_gfn); 444 445 if (test_and_clear_guest_dirty(gmap->mm, address)) 446 mark_page_dirty(kvm, cur_gfn); 447 if (fatal_signal_pending(current)) 448 return; 449 cond_resched(); 450 } 451 } 452 453 /* Section: vm related */ 454 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 455 456 /* 457 * Get (and clear) the dirty memory log for a memory slot. 458 */ 459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 460 struct kvm_dirty_log *log) 461 { 462 int r; 463 unsigned long n; 464 struct kvm_memslots *slots; 465 struct kvm_memory_slot *memslot; 466 int is_dirty = 0; 467 468 if (kvm_is_ucontrol(kvm)) 469 return -EINVAL; 470 471 mutex_lock(&kvm->slots_lock); 472 473 r = -EINVAL; 474 if (log->slot >= KVM_USER_MEM_SLOTS) 475 goto out; 476 477 slots = kvm_memslots(kvm); 478 memslot = id_to_memslot(slots, log->slot); 479 r = -ENOENT; 480 if (!memslot->dirty_bitmap) 481 goto out; 482 483 kvm_s390_sync_dirty_log(kvm, memslot); 484 r = kvm_get_dirty_log(kvm, log, &is_dirty); 485 if (r) 486 goto out; 487 488 /* Clear the dirty log */ 489 if (is_dirty) { 490 n = kvm_dirty_bitmap_bytes(memslot); 491 memset(memslot->dirty_bitmap, 0, n); 492 } 493 r = 0; 494 out: 495 mutex_unlock(&kvm->slots_lock); 496 return r; 497 } 498 499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 500 { 501 unsigned int i; 502 struct kvm_vcpu *vcpu; 503 504 kvm_for_each_vcpu(i, vcpu, kvm) { 505 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 506 } 507 } 508 509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 510 { 511 int r; 512 513 if (cap->flags) 514 return -EINVAL; 515 516 switch (cap->cap) { 517 case KVM_CAP_S390_IRQCHIP: 518 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 519 kvm->arch.use_irqchip = 1; 520 r = 0; 521 break; 522 case KVM_CAP_S390_USER_SIGP: 523 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 524 kvm->arch.user_sigp = 1; 525 r = 0; 526 break; 527 case KVM_CAP_S390_VECTOR_REGISTERS: 528 mutex_lock(&kvm->lock); 529 if (kvm->created_vcpus) { 530 r = -EBUSY; 531 } else if (MACHINE_HAS_VX) { 532 set_kvm_facility(kvm->arch.model.fac_mask, 129); 533 set_kvm_facility(kvm->arch.model.fac_list, 129); 534 if (test_facility(134)) { 535 set_kvm_facility(kvm->arch.model.fac_mask, 134); 536 set_kvm_facility(kvm->arch.model.fac_list, 134); 537 } 538 if (test_facility(135)) { 539 set_kvm_facility(kvm->arch.model.fac_mask, 135); 540 set_kvm_facility(kvm->arch.model.fac_list, 135); 541 } 542 r = 0; 543 } else 544 r = -EINVAL; 545 mutex_unlock(&kvm->lock); 546 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 547 r ? "(not available)" : "(success)"); 548 break; 549 case KVM_CAP_S390_RI: 550 r = -EINVAL; 551 mutex_lock(&kvm->lock); 552 if (kvm->created_vcpus) { 553 r = -EBUSY; 554 } else if (test_facility(64)) { 555 set_kvm_facility(kvm->arch.model.fac_mask, 64); 556 set_kvm_facility(kvm->arch.model.fac_list, 64); 557 r = 0; 558 } 559 mutex_unlock(&kvm->lock); 560 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 561 r ? "(not available)" : "(success)"); 562 break; 563 case KVM_CAP_S390_AIS: 564 mutex_lock(&kvm->lock); 565 if (kvm->created_vcpus) { 566 r = -EBUSY; 567 } else { 568 set_kvm_facility(kvm->arch.model.fac_mask, 72); 569 set_kvm_facility(kvm->arch.model.fac_list, 72); 570 r = 0; 571 } 572 mutex_unlock(&kvm->lock); 573 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 574 r ? "(not available)" : "(success)"); 575 break; 576 case KVM_CAP_S390_GS: 577 r = -EINVAL; 578 mutex_lock(&kvm->lock); 579 if (atomic_read(&kvm->online_vcpus)) { 580 r = -EBUSY; 581 } else if (test_facility(133)) { 582 set_kvm_facility(kvm->arch.model.fac_mask, 133); 583 set_kvm_facility(kvm->arch.model.fac_list, 133); 584 r = 0; 585 } 586 mutex_unlock(&kvm->lock); 587 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 588 r ? "(not available)" : "(success)"); 589 break; 590 case KVM_CAP_S390_USER_STSI: 591 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 592 kvm->arch.user_stsi = 1; 593 r = 0; 594 break; 595 case KVM_CAP_S390_USER_INSTR0: 596 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 597 kvm->arch.user_instr0 = 1; 598 icpt_operexc_on_all_vcpus(kvm); 599 r = 0; 600 break; 601 default: 602 r = -EINVAL; 603 break; 604 } 605 return r; 606 } 607 608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 609 { 610 int ret; 611 612 switch (attr->attr) { 613 case KVM_S390_VM_MEM_LIMIT_SIZE: 614 ret = 0; 615 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 616 kvm->arch.mem_limit); 617 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 618 ret = -EFAULT; 619 break; 620 default: 621 ret = -ENXIO; 622 break; 623 } 624 return ret; 625 } 626 627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 628 { 629 int ret; 630 unsigned int idx; 631 switch (attr->attr) { 632 case KVM_S390_VM_MEM_ENABLE_CMMA: 633 ret = -ENXIO; 634 if (!sclp.has_cmma) 635 break; 636 637 ret = -EBUSY; 638 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 639 mutex_lock(&kvm->lock); 640 if (!kvm->created_vcpus) { 641 kvm->arch.use_cmma = 1; 642 ret = 0; 643 } 644 mutex_unlock(&kvm->lock); 645 break; 646 case KVM_S390_VM_MEM_CLR_CMMA: 647 ret = -ENXIO; 648 if (!sclp.has_cmma) 649 break; 650 ret = -EINVAL; 651 if (!kvm->arch.use_cmma) 652 break; 653 654 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 655 mutex_lock(&kvm->lock); 656 idx = srcu_read_lock(&kvm->srcu); 657 s390_reset_cmma(kvm->arch.gmap->mm); 658 srcu_read_unlock(&kvm->srcu, idx); 659 mutex_unlock(&kvm->lock); 660 ret = 0; 661 break; 662 case KVM_S390_VM_MEM_LIMIT_SIZE: { 663 unsigned long new_limit; 664 665 if (kvm_is_ucontrol(kvm)) 666 return -EINVAL; 667 668 if (get_user(new_limit, (u64 __user *)attr->addr)) 669 return -EFAULT; 670 671 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 672 new_limit > kvm->arch.mem_limit) 673 return -E2BIG; 674 675 if (!new_limit) 676 return -EINVAL; 677 678 /* gmap_create takes last usable address */ 679 if (new_limit != KVM_S390_NO_MEM_LIMIT) 680 new_limit -= 1; 681 682 ret = -EBUSY; 683 mutex_lock(&kvm->lock); 684 if (!kvm->created_vcpus) { 685 /* gmap_create will round the limit up */ 686 struct gmap *new = gmap_create(current->mm, new_limit); 687 688 if (!new) { 689 ret = -ENOMEM; 690 } else { 691 gmap_remove(kvm->arch.gmap); 692 new->private = kvm; 693 kvm->arch.gmap = new; 694 ret = 0; 695 } 696 } 697 mutex_unlock(&kvm->lock); 698 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 699 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 700 (void *) kvm->arch.gmap->asce); 701 break; 702 } 703 default: 704 ret = -ENXIO; 705 break; 706 } 707 return ret; 708 } 709 710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 711 712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 713 { 714 struct kvm_vcpu *vcpu; 715 int i; 716 717 if (!test_kvm_facility(kvm, 76)) 718 return -EINVAL; 719 720 mutex_lock(&kvm->lock); 721 switch (attr->attr) { 722 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 723 get_random_bytes( 724 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 725 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 726 kvm->arch.crypto.aes_kw = 1; 727 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 728 break; 729 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 730 get_random_bytes( 731 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 732 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 733 kvm->arch.crypto.dea_kw = 1; 734 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 735 break; 736 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 737 kvm->arch.crypto.aes_kw = 0; 738 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 739 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 740 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 741 break; 742 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 743 kvm->arch.crypto.dea_kw = 0; 744 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 745 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 746 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 747 break; 748 default: 749 mutex_unlock(&kvm->lock); 750 return -ENXIO; 751 } 752 753 kvm_for_each_vcpu(i, vcpu, kvm) { 754 kvm_s390_vcpu_crypto_setup(vcpu); 755 exit_sie(vcpu); 756 } 757 mutex_unlock(&kvm->lock); 758 return 0; 759 } 760 761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 762 { 763 int cx; 764 struct kvm_vcpu *vcpu; 765 766 kvm_for_each_vcpu(cx, vcpu, kvm) 767 kvm_s390_sync_request(req, vcpu); 768 } 769 770 /* 771 * Must be called with kvm->srcu held to avoid races on memslots, and with 772 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 773 */ 774 static int kvm_s390_vm_start_migration(struct kvm *kvm) 775 { 776 struct kvm_s390_migration_state *mgs; 777 struct kvm_memory_slot *ms; 778 /* should be the only one */ 779 struct kvm_memslots *slots; 780 unsigned long ram_pages; 781 int slotnr; 782 783 /* migration mode already enabled */ 784 if (kvm->arch.migration_state) 785 return 0; 786 787 slots = kvm_memslots(kvm); 788 if (!slots || !slots->used_slots) 789 return -EINVAL; 790 791 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); 792 if (!mgs) 793 return -ENOMEM; 794 kvm->arch.migration_state = mgs; 795 796 if (kvm->arch.use_cmma) { 797 /* 798 * Get the last slot. They should be sorted by base_gfn, so the 799 * last slot is also the one at the end of the address space. 800 * We have verified above that at least one slot is present. 801 */ 802 ms = slots->memslots + slots->used_slots - 1; 803 /* round up so we only use full longs */ 804 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); 805 /* allocate enough bytes to store all the bits */ 806 mgs->pgste_bitmap = vmalloc(ram_pages / 8); 807 if (!mgs->pgste_bitmap) { 808 kfree(mgs); 809 kvm->arch.migration_state = NULL; 810 return -ENOMEM; 811 } 812 813 mgs->bitmap_size = ram_pages; 814 atomic64_set(&mgs->dirty_pages, ram_pages); 815 /* mark all the pages in active slots as dirty */ 816 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 817 ms = slots->memslots + slotnr; 818 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); 819 } 820 821 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 822 } 823 return 0; 824 } 825 826 /* 827 * Must be called with kvm->lock to avoid races with ourselves and 828 * kvm_s390_vm_start_migration. 829 */ 830 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 831 { 832 struct kvm_s390_migration_state *mgs; 833 834 /* migration mode already disabled */ 835 if (!kvm->arch.migration_state) 836 return 0; 837 mgs = kvm->arch.migration_state; 838 kvm->arch.migration_state = NULL; 839 840 if (kvm->arch.use_cmma) { 841 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 842 vfree(mgs->pgste_bitmap); 843 } 844 kfree(mgs); 845 return 0; 846 } 847 848 static int kvm_s390_vm_set_migration(struct kvm *kvm, 849 struct kvm_device_attr *attr) 850 { 851 int idx, res = -ENXIO; 852 853 mutex_lock(&kvm->lock); 854 switch (attr->attr) { 855 case KVM_S390_VM_MIGRATION_START: 856 idx = srcu_read_lock(&kvm->srcu); 857 res = kvm_s390_vm_start_migration(kvm); 858 srcu_read_unlock(&kvm->srcu, idx); 859 break; 860 case KVM_S390_VM_MIGRATION_STOP: 861 res = kvm_s390_vm_stop_migration(kvm); 862 break; 863 default: 864 break; 865 } 866 mutex_unlock(&kvm->lock); 867 868 return res; 869 } 870 871 static int kvm_s390_vm_get_migration(struct kvm *kvm, 872 struct kvm_device_attr *attr) 873 { 874 u64 mig = (kvm->arch.migration_state != NULL); 875 876 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 877 return -ENXIO; 878 879 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 880 return -EFAULT; 881 return 0; 882 } 883 884 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 885 { 886 struct kvm_s390_vm_tod_clock gtod; 887 888 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 889 return -EFAULT; 890 891 if (test_kvm_facility(kvm, 139)) 892 kvm_s390_set_tod_clock_ext(kvm, >od); 893 else if (gtod.epoch_idx == 0) 894 kvm_s390_set_tod_clock(kvm, gtod.tod); 895 else 896 return -EINVAL; 897 898 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 899 gtod.epoch_idx, gtod.tod); 900 901 return 0; 902 } 903 904 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 905 { 906 u8 gtod_high; 907 908 if (copy_from_user(>od_high, (void __user *)attr->addr, 909 sizeof(gtod_high))) 910 return -EFAULT; 911 912 if (gtod_high != 0) 913 return -EINVAL; 914 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 915 916 return 0; 917 } 918 919 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 920 { 921 u64 gtod; 922 923 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 924 return -EFAULT; 925 926 kvm_s390_set_tod_clock(kvm, gtod); 927 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); 928 return 0; 929 } 930 931 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 932 { 933 int ret; 934 935 if (attr->flags) 936 return -EINVAL; 937 938 switch (attr->attr) { 939 case KVM_S390_VM_TOD_EXT: 940 ret = kvm_s390_set_tod_ext(kvm, attr); 941 break; 942 case KVM_S390_VM_TOD_HIGH: 943 ret = kvm_s390_set_tod_high(kvm, attr); 944 break; 945 case KVM_S390_VM_TOD_LOW: 946 ret = kvm_s390_set_tod_low(kvm, attr); 947 break; 948 default: 949 ret = -ENXIO; 950 break; 951 } 952 return ret; 953 } 954 955 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, 956 struct kvm_s390_vm_tod_clock *gtod) 957 { 958 struct kvm_s390_tod_clock_ext htod; 959 960 preempt_disable(); 961 962 get_tod_clock_ext((char *)&htod); 963 964 gtod->tod = htod.tod + kvm->arch.epoch; 965 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 966 967 if (gtod->tod < htod.tod) 968 gtod->epoch_idx += 1; 969 970 preempt_enable(); 971 } 972 973 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 974 { 975 struct kvm_s390_vm_tod_clock gtod; 976 977 memset(>od, 0, sizeof(gtod)); 978 979 if (test_kvm_facility(kvm, 139)) 980 kvm_s390_get_tod_clock_ext(kvm, >od); 981 else 982 gtod.tod = kvm_s390_get_tod_clock_fast(kvm); 983 984 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 985 return -EFAULT; 986 987 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 988 gtod.epoch_idx, gtod.tod); 989 return 0; 990 } 991 992 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 993 { 994 u8 gtod_high = 0; 995 996 if (copy_to_user((void __user *)attr->addr, >od_high, 997 sizeof(gtod_high))) 998 return -EFAULT; 999 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1000 1001 return 0; 1002 } 1003 1004 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1005 { 1006 u64 gtod; 1007 1008 gtod = kvm_s390_get_tod_clock_fast(kvm); 1009 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1010 return -EFAULT; 1011 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1012 1013 return 0; 1014 } 1015 1016 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1017 { 1018 int ret; 1019 1020 if (attr->flags) 1021 return -EINVAL; 1022 1023 switch (attr->attr) { 1024 case KVM_S390_VM_TOD_EXT: 1025 ret = kvm_s390_get_tod_ext(kvm, attr); 1026 break; 1027 case KVM_S390_VM_TOD_HIGH: 1028 ret = kvm_s390_get_tod_high(kvm, attr); 1029 break; 1030 case KVM_S390_VM_TOD_LOW: 1031 ret = kvm_s390_get_tod_low(kvm, attr); 1032 break; 1033 default: 1034 ret = -ENXIO; 1035 break; 1036 } 1037 return ret; 1038 } 1039 1040 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1041 { 1042 struct kvm_s390_vm_cpu_processor *proc; 1043 u16 lowest_ibc, unblocked_ibc; 1044 int ret = 0; 1045 1046 mutex_lock(&kvm->lock); 1047 if (kvm->created_vcpus) { 1048 ret = -EBUSY; 1049 goto out; 1050 } 1051 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1052 if (!proc) { 1053 ret = -ENOMEM; 1054 goto out; 1055 } 1056 if (!copy_from_user(proc, (void __user *)attr->addr, 1057 sizeof(*proc))) { 1058 kvm->arch.model.cpuid = proc->cpuid; 1059 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1060 unblocked_ibc = sclp.ibc & 0xfff; 1061 if (lowest_ibc && proc->ibc) { 1062 if (proc->ibc > unblocked_ibc) 1063 kvm->arch.model.ibc = unblocked_ibc; 1064 else if (proc->ibc < lowest_ibc) 1065 kvm->arch.model.ibc = lowest_ibc; 1066 else 1067 kvm->arch.model.ibc = proc->ibc; 1068 } 1069 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1070 S390_ARCH_FAC_LIST_SIZE_BYTE); 1071 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1072 kvm->arch.model.ibc, 1073 kvm->arch.model.cpuid); 1074 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1075 kvm->arch.model.fac_list[0], 1076 kvm->arch.model.fac_list[1], 1077 kvm->arch.model.fac_list[2]); 1078 } else 1079 ret = -EFAULT; 1080 kfree(proc); 1081 out: 1082 mutex_unlock(&kvm->lock); 1083 return ret; 1084 } 1085 1086 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1087 struct kvm_device_attr *attr) 1088 { 1089 struct kvm_s390_vm_cpu_feat data; 1090 int ret = -EBUSY; 1091 1092 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1093 return -EFAULT; 1094 if (!bitmap_subset((unsigned long *) data.feat, 1095 kvm_s390_available_cpu_feat, 1096 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1097 return -EINVAL; 1098 1099 mutex_lock(&kvm->lock); 1100 if (!atomic_read(&kvm->online_vcpus)) { 1101 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1102 KVM_S390_VM_CPU_FEAT_NR_BITS); 1103 ret = 0; 1104 } 1105 mutex_unlock(&kvm->lock); 1106 return ret; 1107 } 1108 1109 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1110 struct kvm_device_attr *attr) 1111 { 1112 /* 1113 * Once supported by kernel + hw, we have to store the subfunctions 1114 * in kvm->arch and remember that user space configured them. 1115 */ 1116 return -ENXIO; 1117 } 1118 1119 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1120 { 1121 int ret = -ENXIO; 1122 1123 switch (attr->attr) { 1124 case KVM_S390_VM_CPU_PROCESSOR: 1125 ret = kvm_s390_set_processor(kvm, attr); 1126 break; 1127 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1128 ret = kvm_s390_set_processor_feat(kvm, attr); 1129 break; 1130 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1131 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1132 break; 1133 } 1134 return ret; 1135 } 1136 1137 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1138 { 1139 struct kvm_s390_vm_cpu_processor *proc; 1140 int ret = 0; 1141 1142 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1143 if (!proc) { 1144 ret = -ENOMEM; 1145 goto out; 1146 } 1147 proc->cpuid = kvm->arch.model.cpuid; 1148 proc->ibc = kvm->arch.model.ibc; 1149 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1150 S390_ARCH_FAC_LIST_SIZE_BYTE); 1151 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1152 kvm->arch.model.ibc, 1153 kvm->arch.model.cpuid); 1154 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1155 kvm->arch.model.fac_list[0], 1156 kvm->arch.model.fac_list[1], 1157 kvm->arch.model.fac_list[2]); 1158 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1159 ret = -EFAULT; 1160 kfree(proc); 1161 out: 1162 return ret; 1163 } 1164 1165 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1166 { 1167 struct kvm_s390_vm_cpu_machine *mach; 1168 int ret = 0; 1169 1170 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1171 if (!mach) { 1172 ret = -ENOMEM; 1173 goto out; 1174 } 1175 get_cpu_id((struct cpuid *) &mach->cpuid); 1176 mach->ibc = sclp.ibc; 1177 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1178 S390_ARCH_FAC_LIST_SIZE_BYTE); 1179 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1180 sizeof(S390_lowcore.stfle_fac_list)); 1181 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1182 kvm->arch.model.ibc, 1183 kvm->arch.model.cpuid); 1184 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1185 mach->fac_mask[0], 1186 mach->fac_mask[1], 1187 mach->fac_mask[2]); 1188 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1189 mach->fac_list[0], 1190 mach->fac_list[1], 1191 mach->fac_list[2]); 1192 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1193 ret = -EFAULT; 1194 kfree(mach); 1195 out: 1196 return ret; 1197 } 1198 1199 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1200 struct kvm_device_attr *attr) 1201 { 1202 struct kvm_s390_vm_cpu_feat data; 1203 1204 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1205 KVM_S390_VM_CPU_FEAT_NR_BITS); 1206 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1207 return -EFAULT; 1208 return 0; 1209 } 1210 1211 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1212 struct kvm_device_attr *attr) 1213 { 1214 struct kvm_s390_vm_cpu_feat data; 1215 1216 bitmap_copy((unsigned long *) data.feat, 1217 kvm_s390_available_cpu_feat, 1218 KVM_S390_VM_CPU_FEAT_NR_BITS); 1219 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1220 return -EFAULT; 1221 return 0; 1222 } 1223 1224 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1225 struct kvm_device_attr *attr) 1226 { 1227 /* 1228 * Once we can actually configure subfunctions (kernel + hw support), 1229 * we have to check if they were already set by user space, if so copy 1230 * them from kvm->arch. 1231 */ 1232 return -ENXIO; 1233 } 1234 1235 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1236 struct kvm_device_attr *attr) 1237 { 1238 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1239 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1240 return -EFAULT; 1241 return 0; 1242 } 1243 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1244 { 1245 int ret = -ENXIO; 1246 1247 switch (attr->attr) { 1248 case KVM_S390_VM_CPU_PROCESSOR: 1249 ret = kvm_s390_get_processor(kvm, attr); 1250 break; 1251 case KVM_S390_VM_CPU_MACHINE: 1252 ret = kvm_s390_get_machine(kvm, attr); 1253 break; 1254 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1255 ret = kvm_s390_get_processor_feat(kvm, attr); 1256 break; 1257 case KVM_S390_VM_CPU_MACHINE_FEAT: 1258 ret = kvm_s390_get_machine_feat(kvm, attr); 1259 break; 1260 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1261 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1262 break; 1263 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1264 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1265 break; 1266 } 1267 return ret; 1268 } 1269 1270 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1271 { 1272 int ret; 1273 1274 switch (attr->group) { 1275 case KVM_S390_VM_MEM_CTRL: 1276 ret = kvm_s390_set_mem_control(kvm, attr); 1277 break; 1278 case KVM_S390_VM_TOD: 1279 ret = kvm_s390_set_tod(kvm, attr); 1280 break; 1281 case KVM_S390_VM_CPU_MODEL: 1282 ret = kvm_s390_set_cpu_model(kvm, attr); 1283 break; 1284 case KVM_S390_VM_CRYPTO: 1285 ret = kvm_s390_vm_set_crypto(kvm, attr); 1286 break; 1287 case KVM_S390_VM_MIGRATION: 1288 ret = kvm_s390_vm_set_migration(kvm, attr); 1289 break; 1290 default: 1291 ret = -ENXIO; 1292 break; 1293 } 1294 1295 return ret; 1296 } 1297 1298 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1299 { 1300 int ret; 1301 1302 switch (attr->group) { 1303 case KVM_S390_VM_MEM_CTRL: 1304 ret = kvm_s390_get_mem_control(kvm, attr); 1305 break; 1306 case KVM_S390_VM_TOD: 1307 ret = kvm_s390_get_tod(kvm, attr); 1308 break; 1309 case KVM_S390_VM_CPU_MODEL: 1310 ret = kvm_s390_get_cpu_model(kvm, attr); 1311 break; 1312 case KVM_S390_VM_MIGRATION: 1313 ret = kvm_s390_vm_get_migration(kvm, attr); 1314 break; 1315 default: 1316 ret = -ENXIO; 1317 break; 1318 } 1319 1320 return ret; 1321 } 1322 1323 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1324 { 1325 int ret; 1326 1327 switch (attr->group) { 1328 case KVM_S390_VM_MEM_CTRL: 1329 switch (attr->attr) { 1330 case KVM_S390_VM_MEM_ENABLE_CMMA: 1331 case KVM_S390_VM_MEM_CLR_CMMA: 1332 ret = sclp.has_cmma ? 0 : -ENXIO; 1333 break; 1334 case KVM_S390_VM_MEM_LIMIT_SIZE: 1335 ret = 0; 1336 break; 1337 default: 1338 ret = -ENXIO; 1339 break; 1340 } 1341 break; 1342 case KVM_S390_VM_TOD: 1343 switch (attr->attr) { 1344 case KVM_S390_VM_TOD_LOW: 1345 case KVM_S390_VM_TOD_HIGH: 1346 ret = 0; 1347 break; 1348 default: 1349 ret = -ENXIO; 1350 break; 1351 } 1352 break; 1353 case KVM_S390_VM_CPU_MODEL: 1354 switch (attr->attr) { 1355 case KVM_S390_VM_CPU_PROCESSOR: 1356 case KVM_S390_VM_CPU_MACHINE: 1357 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1358 case KVM_S390_VM_CPU_MACHINE_FEAT: 1359 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1360 ret = 0; 1361 break; 1362 /* configuring subfunctions is not supported yet */ 1363 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1364 default: 1365 ret = -ENXIO; 1366 break; 1367 } 1368 break; 1369 case KVM_S390_VM_CRYPTO: 1370 switch (attr->attr) { 1371 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1372 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1373 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1374 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1375 ret = 0; 1376 break; 1377 default: 1378 ret = -ENXIO; 1379 break; 1380 } 1381 break; 1382 case KVM_S390_VM_MIGRATION: 1383 ret = 0; 1384 break; 1385 default: 1386 ret = -ENXIO; 1387 break; 1388 } 1389 1390 return ret; 1391 } 1392 1393 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1394 { 1395 uint8_t *keys; 1396 uint64_t hva; 1397 int srcu_idx, i, r = 0; 1398 1399 if (args->flags != 0) 1400 return -EINVAL; 1401 1402 /* Is this guest using storage keys? */ 1403 if (!mm_use_skey(current->mm)) 1404 return KVM_S390_GET_SKEYS_NONE; 1405 1406 /* Enforce sane limit on memory allocation */ 1407 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1408 return -EINVAL; 1409 1410 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1411 if (!keys) 1412 return -ENOMEM; 1413 1414 down_read(¤t->mm->mmap_sem); 1415 srcu_idx = srcu_read_lock(&kvm->srcu); 1416 for (i = 0; i < args->count; i++) { 1417 hva = gfn_to_hva(kvm, args->start_gfn + i); 1418 if (kvm_is_error_hva(hva)) { 1419 r = -EFAULT; 1420 break; 1421 } 1422 1423 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1424 if (r) 1425 break; 1426 } 1427 srcu_read_unlock(&kvm->srcu, srcu_idx); 1428 up_read(¤t->mm->mmap_sem); 1429 1430 if (!r) { 1431 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1432 sizeof(uint8_t) * args->count); 1433 if (r) 1434 r = -EFAULT; 1435 } 1436 1437 kvfree(keys); 1438 return r; 1439 } 1440 1441 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1442 { 1443 uint8_t *keys; 1444 uint64_t hva; 1445 int srcu_idx, i, r = 0; 1446 1447 if (args->flags != 0) 1448 return -EINVAL; 1449 1450 /* Enforce sane limit on memory allocation */ 1451 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1452 return -EINVAL; 1453 1454 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1455 if (!keys) 1456 return -ENOMEM; 1457 1458 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1459 sizeof(uint8_t) * args->count); 1460 if (r) { 1461 r = -EFAULT; 1462 goto out; 1463 } 1464 1465 /* Enable storage key handling for the guest */ 1466 r = s390_enable_skey(); 1467 if (r) 1468 goto out; 1469 1470 down_read(¤t->mm->mmap_sem); 1471 srcu_idx = srcu_read_lock(&kvm->srcu); 1472 for (i = 0; i < args->count; i++) { 1473 hva = gfn_to_hva(kvm, args->start_gfn + i); 1474 if (kvm_is_error_hva(hva)) { 1475 r = -EFAULT; 1476 break; 1477 } 1478 1479 /* Lowest order bit is reserved */ 1480 if (keys[i] & 0x01) { 1481 r = -EINVAL; 1482 break; 1483 } 1484 1485 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1486 if (r) 1487 break; 1488 } 1489 srcu_read_unlock(&kvm->srcu, srcu_idx); 1490 up_read(¤t->mm->mmap_sem); 1491 out: 1492 kvfree(keys); 1493 return r; 1494 } 1495 1496 /* 1497 * Base address and length must be sent at the start of each block, therefore 1498 * it's cheaper to send some clean data, as long as it's less than the size of 1499 * two longs. 1500 */ 1501 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1502 /* for consistency */ 1503 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1504 1505 /* 1506 * This function searches for the next page with dirty CMMA attributes, and 1507 * saves the attributes in the buffer up to either the end of the buffer or 1508 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 1509 * no trailing clean bytes are saved. 1510 * In case no dirty bits were found, or if CMMA was not enabled or used, the 1511 * output buffer will indicate 0 as length. 1512 */ 1513 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 1514 struct kvm_s390_cmma_log *args) 1515 { 1516 struct kvm_s390_migration_state *s = kvm->arch.migration_state; 1517 unsigned long bufsize, hva, pgstev, i, next, cur; 1518 int srcu_idx, peek, r = 0, rr; 1519 u8 *res; 1520 1521 cur = args->start_gfn; 1522 i = next = pgstev = 0; 1523 1524 if (unlikely(!kvm->arch.use_cmma)) 1525 return -ENXIO; 1526 /* Invalid/unsupported flags were specified */ 1527 if (args->flags & ~KVM_S390_CMMA_PEEK) 1528 return -EINVAL; 1529 /* Migration mode query, and we are not doing a migration */ 1530 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 1531 if (!peek && !s) 1532 return -EINVAL; 1533 /* CMMA is disabled or was not used, or the buffer has length zero */ 1534 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 1535 if (!bufsize || !kvm->mm->context.use_cmma) { 1536 memset(args, 0, sizeof(*args)); 1537 return 0; 1538 } 1539 1540 if (!peek) { 1541 /* We are not peeking, and there are no dirty pages */ 1542 if (!atomic64_read(&s->dirty_pages)) { 1543 memset(args, 0, sizeof(*args)); 1544 return 0; 1545 } 1546 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 1547 args->start_gfn); 1548 if (cur >= s->bitmap_size) /* nothing found, loop back */ 1549 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); 1550 if (cur >= s->bitmap_size) { /* again! (very unlikely) */ 1551 memset(args, 0, sizeof(*args)); 1552 return 0; 1553 } 1554 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); 1555 } 1556 1557 res = vmalloc(bufsize); 1558 if (!res) 1559 return -ENOMEM; 1560 1561 args->start_gfn = cur; 1562 1563 down_read(&kvm->mm->mmap_sem); 1564 srcu_idx = srcu_read_lock(&kvm->srcu); 1565 while (i < bufsize) { 1566 hva = gfn_to_hva(kvm, cur); 1567 if (kvm_is_error_hva(hva)) { 1568 r = -EFAULT; 1569 break; 1570 } 1571 /* decrement only if we actually flipped the bit to 0 */ 1572 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) 1573 atomic64_dec(&s->dirty_pages); 1574 r = get_pgste(kvm->mm, hva, &pgstev); 1575 if (r < 0) 1576 pgstev = 0; 1577 /* save the value */ 1578 res[i++] = (pgstev >> 24) & 0x43; 1579 /* 1580 * if the next bit is too far away, stop. 1581 * if we reached the previous "next", find the next one 1582 */ 1583 if (!peek) { 1584 if (next > cur + KVM_S390_MAX_BIT_DISTANCE) 1585 break; 1586 if (cur == next) 1587 next = find_next_bit(s->pgste_bitmap, 1588 s->bitmap_size, cur + 1); 1589 /* reached the end of the bitmap or of the buffer, stop */ 1590 if ((next >= s->bitmap_size) || 1591 (next >= args->start_gfn + bufsize)) 1592 break; 1593 } 1594 cur++; 1595 } 1596 srcu_read_unlock(&kvm->srcu, srcu_idx); 1597 up_read(&kvm->mm->mmap_sem); 1598 args->count = i; 1599 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; 1600 1601 rr = copy_to_user((void __user *)args->values, res, args->count); 1602 if (rr) 1603 r = -EFAULT; 1604 1605 vfree(res); 1606 return r; 1607 } 1608 1609 /* 1610 * This function sets the CMMA attributes for the given pages. If the input 1611 * buffer has zero length, no action is taken, otherwise the attributes are 1612 * set and the mm->context.use_cmma flag is set. 1613 */ 1614 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 1615 const struct kvm_s390_cmma_log *args) 1616 { 1617 unsigned long hva, mask, pgstev, i; 1618 uint8_t *bits; 1619 int srcu_idx, r = 0; 1620 1621 mask = args->mask; 1622 1623 if (!kvm->arch.use_cmma) 1624 return -ENXIO; 1625 /* invalid/unsupported flags */ 1626 if (args->flags != 0) 1627 return -EINVAL; 1628 /* Enforce sane limit on memory allocation */ 1629 if (args->count > KVM_S390_CMMA_SIZE_MAX) 1630 return -EINVAL; 1631 /* Nothing to do */ 1632 if (args->count == 0) 1633 return 0; 1634 1635 bits = vmalloc(sizeof(*bits) * args->count); 1636 if (!bits) 1637 return -ENOMEM; 1638 1639 r = copy_from_user(bits, (void __user *)args->values, args->count); 1640 if (r) { 1641 r = -EFAULT; 1642 goto out; 1643 } 1644 1645 down_read(&kvm->mm->mmap_sem); 1646 srcu_idx = srcu_read_lock(&kvm->srcu); 1647 for (i = 0; i < args->count; i++) { 1648 hva = gfn_to_hva(kvm, args->start_gfn + i); 1649 if (kvm_is_error_hva(hva)) { 1650 r = -EFAULT; 1651 break; 1652 } 1653 1654 pgstev = bits[i]; 1655 pgstev = pgstev << 24; 1656 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 1657 set_pgste_bits(kvm->mm, hva, mask, pgstev); 1658 } 1659 srcu_read_unlock(&kvm->srcu, srcu_idx); 1660 up_read(&kvm->mm->mmap_sem); 1661 1662 if (!kvm->mm->context.use_cmma) { 1663 down_write(&kvm->mm->mmap_sem); 1664 kvm->mm->context.use_cmma = 1; 1665 up_write(&kvm->mm->mmap_sem); 1666 } 1667 out: 1668 vfree(bits); 1669 return r; 1670 } 1671 1672 long kvm_arch_vm_ioctl(struct file *filp, 1673 unsigned int ioctl, unsigned long arg) 1674 { 1675 struct kvm *kvm = filp->private_data; 1676 void __user *argp = (void __user *)arg; 1677 struct kvm_device_attr attr; 1678 int r; 1679 1680 switch (ioctl) { 1681 case KVM_S390_INTERRUPT: { 1682 struct kvm_s390_interrupt s390int; 1683 1684 r = -EFAULT; 1685 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1686 break; 1687 r = kvm_s390_inject_vm(kvm, &s390int); 1688 break; 1689 } 1690 case KVM_ENABLE_CAP: { 1691 struct kvm_enable_cap cap; 1692 r = -EFAULT; 1693 if (copy_from_user(&cap, argp, sizeof(cap))) 1694 break; 1695 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 1696 break; 1697 } 1698 case KVM_CREATE_IRQCHIP: { 1699 struct kvm_irq_routing_entry routing; 1700 1701 r = -EINVAL; 1702 if (kvm->arch.use_irqchip) { 1703 /* Set up dummy routing. */ 1704 memset(&routing, 0, sizeof(routing)); 1705 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 1706 } 1707 break; 1708 } 1709 case KVM_SET_DEVICE_ATTR: { 1710 r = -EFAULT; 1711 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1712 break; 1713 r = kvm_s390_vm_set_attr(kvm, &attr); 1714 break; 1715 } 1716 case KVM_GET_DEVICE_ATTR: { 1717 r = -EFAULT; 1718 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1719 break; 1720 r = kvm_s390_vm_get_attr(kvm, &attr); 1721 break; 1722 } 1723 case KVM_HAS_DEVICE_ATTR: { 1724 r = -EFAULT; 1725 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1726 break; 1727 r = kvm_s390_vm_has_attr(kvm, &attr); 1728 break; 1729 } 1730 case KVM_S390_GET_SKEYS: { 1731 struct kvm_s390_skeys args; 1732 1733 r = -EFAULT; 1734 if (copy_from_user(&args, argp, 1735 sizeof(struct kvm_s390_skeys))) 1736 break; 1737 r = kvm_s390_get_skeys(kvm, &args); 1738 break; 1739 } 1740 case KVM_S390_SET_SKEYS: { 1741 struct kvm_s390_skeys args; 1742 1743 r = -EFAULT; 1744 if (copy_from_user(&args, argp, 1745 sizeof(struct kvm_s390_skeys))) 1746 break; 1747 r = kvm_s390_set_skeys(kvm, &args); 1748 break; 1749 } 1750 case KVM_S390_GET_CMMA_BITS: { 1751 struct kvm_s390_cmma_log args; 1752 1753 r = -EFAULT; 1754 if (copy_from_user(&args, argp, sizeof(args))) 1755 break; 1756 r = kvm_s390_get_cmma_bits(kvm, &args); 1757 if (!r) { 1758 r = copy_to_user(argp, &args, sizeof(args)); 1759 if (r) 1760 r = -EFAULT; 1761 } 1762 break; 1763 } 1764 case KVM_S390_SET_CMMA_BITS: { 1765 struct kvm_s390_cmma_log args; 1766 1767 r = -EFAULT; 1768 if (copy_from_user(&args, argp, sizeof(args))) 1769 break; 1770 r = kvm_s390_set_cmma_bits(kvm, &args); 1771 break; 1772 } 1773 default: 1774 r = -ENOTTY; 1775 } 1776 1777 return r; 1778 } 1779 1780 static int kvm_s390_query_ap_config(u8 *config) 1781 { 1782 u32 fcn_code = 0x04000000UL; 1783 u32 cc = 0; 1784 1785 memset(config, 0, 128); 1786 asm volatile( 1787 "lgr 0,%1\n" 1788 "lgr 2,%2\n" 1789 ".long 0xb2af0000\n" /* PQAP(QCI) */ 1790 "0: ipm %0\n" 1791 "srl %0,28\n" 1792 "1:\n" 1793 EX_TABLE(0b, 1b) 1794 : "+r" (cc) 1795 : "r" (fcn_code), "r" (config) 1796 : "cc", "0", "2", "memory" 1797 ); 1798 1799 return cc; 1800 } 1801 1802 static int kvm_s390_apxa_installed(void) 1803 { 1804 u8 config[128]; 1805 int cc; 1806 1807 if (test_facility(12)) { 1808 cc = kvm_s390_query_ap_config(config); 1809 1810 if (cc) 1811 pr_err("PQAP(QCI) failed with cc=%d", cc); 1812 else 1813 return config[0] & 0x40; 1814 } 1815 1816 return 0; 1817 } 1818 1819 static void kvm_s390_set_crycb_format(struct kvm *kvm) 1820 { 1821 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 1822 1823 if (kvm_s390_apxa_installed()) 1824 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 1825 else 1826 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 1827 } 1828 1829 static u64 kvm_s390_get_initial_cpuid(void) 1830 { 1831 struct cpuid cpuid; 1832 1833 get_cpu_id(&cpuid); 1834 cpuid.version = 0xff; 1835 return *((u64 *) &cpuid); 1836 } 1837 1838 static void kvm_s390_crypto_init(struct kvm *kvm) 1839 { 1840 if (!test_kvm_facility(kvm, 76)) 1841 return; 1842 1843 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 1844 kvm_s390_set_crycb_format(kvm); 1845 1846 /* Enable AES/DEA protected key functions by default */ 1847 kvm->arch.crypto.aes_kw = 1; 1848 kvm->arch.crypto.dea_kw = 1; 1849 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1850 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1851 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1852 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1853 } 1854 1855 static void sca_dispose(struct kvm *kvm) 1856 { 1857 if (kvm->arch.use_esca) 1858 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 1859 else 1860 free_page((unsigned long)(kvm->arch.sca)); 1861 kvm->arch.sca = NULL; 1862 } 1863 1864 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 1865 { 1866 gfp_t alloc_flags = GFP_KERNEL; 1867 int i, rc; 1868 char debug_name[16]; 1869 static unsigned long sca_offset; 1870 1871 rc = -EINVAL; 1872 #ifdef CONFIG_KVM_S390_UCONTROL 1873 if (type & ~KVM_VM_S390_UCONTROL) 1874 goto out_err; 1875 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 1876 goto out_err; 1877 #else 1878 if (type) 1879 goto out_err; 1880 #endif 1881 1882 rc = s390_enable_sie(); 1883 if (rc) 1884 goto out_err; 1885 1886 rc = -ENOMEM; 1887 1888 kvm->arch.use_esca = 0; /* start with basic SCA */ 1889 if (!sclp.has_64bscao) 1890 alloc_flags |= GFP_DMA; 1891 rwlock_init(&kvm->arch.sca_lock); 1892 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 1893 if (!kvm->arch.sca) 1894 goto out_err; 1895 spin_lock(&kvm_lock); 1896 sca_offset += 16; 1897 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 1898 sca_offset = 0; 1899 kvm->arch.sca = (struct bsca_block *) 1900 ((char *) kvm->arch.sca + sca_offset); 1901 spin_unlock(&kvm_lock); 1902 1903 sprintf(debug_name, "kvm-%u", current->pid); 1904 1905 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 1906 if (!kvm->arch.dbf) 1907 goto out_err; 1908 1909 kvm->arch.sie_page2 = 1910 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 1911 if (!kvm->arch.sie_page2) 1912 goto out_err; 1913 1914 /* Populate the facility mask initially. */ 1915 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, 1916 sizeof(S390_lowcore.stfle_fac_list)); 1917 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { 1918 if (i < kvm_s390_fac_list_mask_size()) 1919 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; 1920 else 1921 kvm->arch.model.fac_mask[i] = 0UL; 1922 } 1923 1924 /* Populate the facility list initially. */ 1925 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 1926 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, 1927 S390_ARCH_FAC_LIST_SIZE_BYTE); 1928 1929 /* we are always in czam mode - even on pre z14 machines */ 1930 set_kvm_facility(kvm->arch.model.fac_mask, 138); 1931 set_kvm_facility(kvm->arch.model.fac_list, 138); 1932 /* we emulate STHYI in kvm */ 1933 set_kvm_facility(kvm->arch.model.fac_mask, 74); 1934 set_kvm_facility(kvm->arch.model.fac_list, 74); 1935 if (MACHINE_HAS_TLB_GUEST) { 1936 set_kvm_facility(kvm->arch.model.fac_mask, 147); 1937 set_kvm_facility(kvm->arch.model.fac_list, 147); 1938 } 1939 1940 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 1941 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 1942 1943 kvm_s390_crypto_init(kvm); 1944 1945 mutex_init(&kvm->arch.float_int.ais_lock); 1946 kvm->arch.float_int.simm = 0; 1947 kvm->arch.float_int.nimm = 0; 1948 spin_lock_init(&kvm->arch.float_int.lock); 1949 for (i = 0; i < FIRQ_LIST_COUNT; i++) 1950 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 1951 init_waitqueue_head(&kvm->arch.ipte_wq); 1952 mutex_init(&kvm->arch.ipte_mutex); 1953 1954 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 1955 VM_EVENT(kvm, 3, "vm created with type %lu", type); 1956 1957 if (type & KVM_VM_S390_UCONTROL) { 1958 kvm->arch.gmap = NULL; 1959 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 1960 } else { 1961 if (sclp.hamax == U64_MAX) 1962 kvm->arch.mem_limit = TASK_SIZE_MAX; 1963 else 1964 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 1965 sclp.hamax + 1); 1966 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 1967 if (!kvm->arch.gmap) 1968 goto out_err; 1969 kvm->arch.gmap->private = kvm; 1970 kvm->arch.gmap->pfault_enabled = 0; 1971 } 1972 1973 kvm->arch.css_support = 0; 1974 kvm->arch.use_irqchip = 0; 1975 kvm->arch.epoch = 0; 1976 1977 spin_lock_init(&kvm->arch.start_stop_lock); 1978 kvm_s390_vsie_init(kvm); 1979 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 1980 1981 return 0; 1982 out_err: 1983 free_page((unsigned long)kvm->arch.sie_page2); 1984 debug_unregister(kvm->arch.dbf); 1985 sca_dispose(kvm); 1986 KVM_EVENT(3, "creation of vm failed: %d", rc); 1987 return rc; 1988 } 1989 1990 bool kvm_arch_has_vcpu_debugfs(void) 1991 { 1992 return false; 1993 } 1994 1995 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 1996 { 1997 return 0; 1998 } 1999 2000 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2001 { 2002 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2003 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2004 kvm_s390_clear_local_irqs(vcpu); 2005 kvm_clear_async_pf_completion_queue(vcpu); 2006 if (!kvm_is_ucontrol(vcpu->kvm)) 2007 sca_del_vcpu(vcpu); 2008 2009 if (kvm_is_ucontrol(vcpu->kvm)) 2010 gmap_remove(vcpu->arch.gmap); 2011 2012 if (vcpu->kvm->arch.use_cmma) 2013 kvm_s390_vcpu_unsetup_cmma(vcpu); 2014 free_page((unsigned long)(vcpu->arch.sie_block)); 2015 2016 kvm_vcpu_uninit(vcpu); 2017 kmem_cache_free(kvm_vcpu_cache, vcpu); 2018 } 2019 2020 static void kvm_free_vcpus(struct kvm *kvm) 2021 { 2022 unsigned int i; 2023 struct kvm_vcpu *vcpu; 2024 2025 kvm_for_each_vcpu(i, vcpu, kvm) 2026 kvm_arch_vcpu_destroy(vcpu); 2027 2028 mutex_lock(&kvm->lock); 2029 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2030 kvm->vcpus[i] = NULL; 2031 2032 atomic_set(&kvm->online_vcpus, 0); 2033 mutex_unlock(&kvm->lock); 2034 } 2035 2036 void kvm_arch_destroy_vm(struct kvm *kvm) 2037 { 2038 kvm_free_vcpus(kvm); 2039 sca_dispose(kvm); 2040 debug_unregister(kvm->arch.dbf); 2041 free_page((unsigned long)kvm->arch.sie_page2); 2042 if (!kvm_is_ucontrol(kvm)) 2043 gmap_remove(kvm->arch.gmap); 2044 kvm_s390_destroy_adapters(kvm); 2045 kvm_s390_clear_float_irqs(kvm); 2046 kvm_s390_vsie_destroy(kvm); 2047 if (kvm->arch.migration_state) { 2048 vfree(kvm->arch.migration_state->pgste_bitmap); 2049 kfree(kvm->arch.migration_state); 2050 } 2051 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2052 } 2053 2054 /* Section: vcpu related */ 2055 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2056 { 2057 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2058 if (!vcpu->arch.gmap) 2059 return -ENOMEM; 2060 vcpu->arch.gmap->private = vcpu->kvm; 2061 2062 return 0; 2063 } 2064 2065 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2066 { 2067 if (!kvm_s390_use_sca_entries()) 2068 return; 2069 read_lock(&vcpu->kvm->arch.sca_lock); 2070 if (vcpu->kvm->arch.use_esca) { 2071 struct esca_block *sca = vcpu->kvm->arch.sca; 2072 2073 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2074 sca->cpu[vcpu->vcpu_id].sda = 0; 2075 } else { 2076 struct bsca_block *sca = vcpu->kvm->arch.sca; 2077 2078 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2079 sca->cpu[vcpu->vcpu_id].sda = 0; 2080 } 2081 read_unlock(&vcpu->kvm->arch.sca_lock); 2082 } 2083 2084 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2085 { 2086 if (!kvm_s390_use_sca_entries()) { 2087 struct bsca_block *sca = vcpu->kvm->arch.sca; 2088 2089 /* we still need the basic sca for the ipte control */ 2090 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2091 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2092 } 2093 read_lock(&vcpu->kvm->arch.sca_lock); 2094 if (vcpu->kvm->arch.use_esca) { 2095 struct esca_block *sca = vcpu->kvm->arch.sca; 2096 2097 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2098 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2099 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2100 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2101 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2102 } else { 2103 struct bsca_block *sca = vcpu->kvm->arch.sca; 2104 2105 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2106 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2107 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2108 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2109 } 2110 read_unlock(&vcpu->kvm->arch.sca_lock); 2111 } 2112 2113 /* Basic SCA to Extended SCA data copy routines */ 2114 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2115 { 2116 d->sda = s->sda; 2117 d->sigp_ctrl.c = s->sigp_ctrl.c; 2118 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2119 } 2120 2121 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2122 { 2123 int i; 2124 2125 d->ipte_control = s->ipte_control; 2126 d->mcn[0] = s->mcn; 2127 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2128 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2129 } 2130 2131 static int sca_switch_to_extended(struct kvm *kvm) 2132 { 2133 struct bsca_block *old_sca = kvm->arch.sca; 2134 struct esca_block *new_sca; 2135 struct kvm_vcpu *vcpu; 2136 unsigned int vcpu_idx; 2137 u32 scaol, scaoh; 2138 2139 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2140 if (!new_sca) 2141 return -ENOMEM; 2142 2143 scaoh = (u32)((u64)(new_sca) >> 32); 2144 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2145 2146 kvm_s390_vcpu_block_all(kvm); 2147 write_lock(&kvm->arch.sca_lock); 2148 2149 sca_copy_b_to_e(new_sca, old_sca); 2150 2151 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2152 vcpu->arch.sie_block->scaoh = scaoh; 2153 vcpu->arch.sie_block->scaol = scaol; 2154 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2155 } 2156 kvm->arch.sca = new_sca; 2157 kvm->arch.use_esca = 1; 2158 2159 write_unlock(&kvm->arch.sca_lock); 2160 kvm_s390_vcpu_unblock_all(kvm); 2161 2162 free_page((unsigned long)old_sca); 2163 2164 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2165 old_sca, kvm->arch.sca); 2166 return 0; 2167 } 2168 2169 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2170 { 2171 int rc; 2172 2173 if (!kvm_s390_use_sca_entries()) { 2174 if (id < KVM_MAX_VCPUS) 2175 return true; 2176 return false; 2177 } 2178 if (id < KVM_S390_BSCA_CPU_SLOTS) 2179 return true; 2180 if (!sclp.has_esca || !sclp.has_64bscao) 2181 return false; 2182 2183 mutex_lock(&kvm->lock); 2184 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2185 mutex_unlock(&kvm->lock); 2186 2187 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2188 } 2189 2190 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 2191 { 2192 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2193 kvm_clear_async_pf_completion_queue(vcpu); 2194 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 2195 KVM_SYNC_GPRS | 2196 KVM_SYNC_ACRS | 2197 KVM_SYNC_CRS | 2198 KVM_SYNC_ARCH0 | 2199 KVM_SYNC_PFAULT; 2200 kvm_s390_set_prefix(vcpu, 0); 2201 if (test_kvm_facility(vcpu->kvm, 64)) 2202 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 2203 if (test_kvm_facility(vcpu->kvm, 133)) 2204 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 2205 /* fprs can be synchronized via vrs, even if the guest has no vx. With 2206 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 2207 */ 2208 if (MACHINE_HAS_VX) 2209 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 2210 else 2211 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 2212 2213 if (kvm_is_ucontrol(vcpu->kvm)) 2214 return __kvm_ucontrol_vcpu_init(vcpu); 2215 2216 return 0; 2217 } 2218 2219 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2220 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2221 { 2222 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2223 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2224 vcpu->arch.cputm_start = get_tod_clock_fast(); 2225 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2226 } 2227 2228 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2229 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2230 { 2231 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2232 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2233 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2234 vcpu->arch.cputm_start = 0; 2235 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2236 } 2237 2238 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2239 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2240 { 2241 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2242 vcpu->arch.cputm_enabled = true; 2243 __start_cpu_timer_accounting(vcpu); 2244 } 2245 2246 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2247 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2248 { 2249 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2250 __stop_cpu_timer_accounting(vcpu); 2251 vcpu->arch.cputm_enabled = false; 2252 } 2253 2254 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2255 { 2256 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2257 __enable_cpu_timer_accounting(vcpu); 2258 preempt_enable(); 2259 } 2260 2261 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2262 { 2263 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2264 __disable_cpu_timer_accounting(vcpu); 2265 preempt_enable(); 2266 } 2267 2268 /* set the cpu timer - may only be called from the VCPU thread itself */ 2269 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2270 { 2271 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2272 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2273 if (vcpu->arch.cputm_enabled) 2274 vcpu->arch.cputm_start = get_tod_clock_fast(); 2275 vcpu->arch.sie_block->cputm = cputm; 2276 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2277 preempt_enable(); 2278 } 2279 2280 /* update and get the cpu timer - can also be called from other VCPU threads */ 2281 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2282 { 2283 unsigned int seq; 2284 __u64 value; 2285 2286 if (unlikely(!vcpu->arch.cputm_enabled)) 2287 return vcpu->arch.sie_block->cputm; 2288 2289 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2290 do { 2291 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2292 /* 2293 * If the writer would ever execute a read in the critical 2294 * section, e.g. in irq context, we have a deadlock. 2295 */ 2296 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 2297 value = vcpu->arch.sie_block->cputm; 2298 /* if cputm_start is 0, accounting is being started/stopped */ 2299 if (likely(vcpu->arch.cputm_start)) 2300 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2301 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 2302 preempt_enable(); 2303 return value; 2304 } 2305 2306 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2307 { 2308 2309 gmap_enable(vcpu->arch.enabled_gmap); 2310 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 2311 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2312 __start_cpu_timer_accounting(vcpu); 2313 vcpu->cpu = cpu; 2314 } 2315 2316 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2317 { 2318 vcpu->cpu = -1; 2319 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2320 __stop_cpu_timer_accounting(vcpu); 2321 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 2322 vcpu->arch.enabled_gmap = gmap_get_enabled(); 2323 gmap_disable(vcpu->arch.enabled_gmap); 2324 2325 } 2326 2327 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 2328 { 2329 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 2330 vcpu->arch.sie_block->gpsw.mask = 0UL; 2331 vcpu->arch.sie_block->gpsw.addr = 0UL; 2332 kvm_s390_set_prefix(vcpu, 0); 2333 kvm_s390_set_cpu_timer(vcpu, 0); 2334 vcpu->arch.sie_block->ckc = 0UL; 2335 vcpu->arch.sie_block->todpr = 0; 2336 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 2337 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 2338 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 2339 /* make sure the new fpc will be lazily loaded */ 2340 save_fpu_regs(); 2341 current->thread.fpu.fpc = 0; 2342 vcpu->arch.sie_block->gbea = 1; 2343 vcpu->arch.sie_block->pp = 0; 2344 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2345 kvm_clear_async_pf_completion_queue(vcpu); 2346 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 2347 kvm_s390_vcpu_stop(vcpu); 2348 kvm_s390_clear_local_irqs(vcpu); 2349 } 2350 2351 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 2352 { 2353 mutex_lock(&vcpu->kvm->lock); 2354 preempt_disable(); 2355 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2356 preempt_enable(); 2357 mutex_unlock(&vcpu->kvm->lock); 2358 if (!kvm_is_ucontrol(vcpu->kvm)) { 2359 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 2360 sca_add_vcpu(vcpu); 2361 } 2362 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 2363 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2364 /* make vcpu_load load the right gmap on the first trigger */ 2365 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 2366 } 2367 2368 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 2369 { 2370 if (!test_kvm_facility(vcpu->kvm, 76)) 2371 return; 2372 2373 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 2374 2375 if (vcpu->kvm->arch.crypto.aes_kw) 2376 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 2377 if (vcpu->kvm->arch.crypto.dea_kw) 2378 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 2379 2380 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 2381 } 2382 2383 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 2384 { 2385 free_page(vcpu->arch.sie_block->cbrlo); 2386 vcpu->arch.sie_block->cbrlo = 0; 2387 } 2388 2389 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 2390 { 2391 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 2392 if (!vcpu->arch.sie_block->cbrlo) 2393 return -ENOMEM; 2394 2395 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; 2396 return 0; 2397 } 2398 2399 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 2400 { 2401 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 2402 2403 vcpu->arch.sie_block->ibc = model->ibc; 2404 if (test_kvm_facility(vcpu->kvm, 7)) 2405 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 2406 } 2407 2408 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 2409 { 2410 int rc = 0; 2411 2412 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 2413 CPUSTAT_SM | 2414 CPUSTAT_STOPPED); 2415 2416 if (test_kvm_facility(vcpu->kvm, 78)) 2417 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags); 2418 else if (test_kvm_facility(vcpu->kvm, 8)) 2419 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags); 2420 2421 kvm_s390_vcpu_setup_model(vcpu); 2422 2423 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2424 if (MACHINE_HAS_ESOP) 2425 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 2426 if (test_kvm_facility(vcpu->kvm, 9)) 2427 vcpu->arch.sie_block->ecb |= ECB_SRSI; 2428 if (test_kvm_facility(vcpu->kvm, 73)) 2429 vcpu->arch.sie_block->ecb |= ECB_TE; 2430 2431 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) 2432 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 2433 if (test_kvm_facility(vcpu->kvm, 130)) 2434 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 2435 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 2436 if (sclp.has_cei) 2437 vcpu->arch.sie_block->eca |= ECA_CEI; 2438 if (sclp.has_ib) 2439 vcpu->arch.sie_block->eca |= ECA_IB; 2440 if (sclp.has_siif) 2441 vcpu->arch.sie_block->eca |= ECA_SII; 2442 if (sclp.has_sigpif) 2443 vcpu->arch.sie_block->eca |= ECA_SIGPI; 2444 if (test_kvm_facility(vcpu->kvm, 129)) { 2445 vcpu->arch.sie_block->eca |= ECA_VX; 2446 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 2447 } 2448 if (test_kvm_facility(vcpu->kvm, 139)) 2449 vcpu->arch.sie_block->ecd |= ECD_MEF; 2450 2451 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 2452 | SDNXC; 2453 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2454 2455 if (sclp.has_kss) 2456 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags); 2457 else 2458 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2459 2460 if (vcpu->kvm->arch.use_cmma) { 2461 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2462 if (rc) 2463 return rc; 2464 } 2465 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2466 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 2467 2468 kvm_s390_vcpu_crypto_setup(vcpu); 2469 2470 return rc; 2471 } 2472 2473 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 2474 unsigned int id) 2475 { 2476 struct kvm_vcpu *vcpu; 2477 struct sie_page *sie_page; 2478 int rc = -EINVAL; 2479 2480 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 2481 goto out; 2482 2483 rc = -ENOMEM; 2484 2485 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 2486 if (!vcpu) 2487 goto out; 2488 2489 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 2490 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 2491 if (!sie_page) 2492 goto out_free_cpu; 2493 2494 vcpu->arch.sie_block = &sie_page->sie_block; 2495 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 2496 2497 /* the real guest size will always be smaller than msl */ 2498 vcpu->arch.sie_block->mso = 0; 2499 vcpu->arch.sie_block->msl = sclp.hamax; 2500 2501 vcpu->arch.sie_block->icpua = id; 2502 spin_lock_init(&vcpu->arch.local_int.lock); 2503 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 2504 vcpu->arch.local_int.wq = &vcpu->wq; 2505 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 2506 seqcount_init(&vcpu->arch.cputm_seqcount); 2507 2508 rc = kvm_vcpu_init(vcpu, kvm, id); 2509 if (rc) 2510 goto out_free_sie_block; 2511 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 2512 vcpu->arch.sie_block); 2513 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 2514 2515 return vcpu; 2516 out_free_sie_block: 2517 free_page((unsigned long)(vcpu->arch.sie_block)); 2518 out_free_cpu: 2519 kmem_cache_free(kvm_vcpu_cache, vcpu); 2520 out: 2521 return ERR_PTR(rc); 2522 } 2523 2524 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 2525 { 2526 return kvm_s390_vcpu_has_irq(vcpu, 0); 2527 } 2528 2529 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 2530 { 2531 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 2532 } 2533 2534 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 2535 { 2536 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2537 exit_sie(vcpu); 2538 } 2539 2540 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 2541 { 2542 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2543 } 2544 2545 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 2546 { 2547 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2548 exit_sie(vcpu); 2549 } 2550 2551 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 2552 { 2553 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2554 } 2555 2556 /* 2557 * Kick a guest cpu out of SIE and wait until SIE is not running. 2558 * If the CPU is not running (e.g. waiting as idle) the function will 2559 * return immediately. */ 2560 void exit_sie(struct kvm_vcpu *vcpu) 2561 { 2562 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); 2563 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 2564 cpu_relax(); 2565 } 2566 2567 /* Kick a guest cpu out of SIE to process a request synchronously */ 2568 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 2569 { 2570 kvm_make_request(req, vcpu); 2571 kvm_s390_vcpu_request(vcpu); 2572 } 2573 2574 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 2575 unsigned long end) 2576 { 2577 struct kvm *kvm = gmap->private; 2578 struct kvm_vcpu *vcpu; 2579 unsigned long prefix; 2580 int i; 2581 2582 if (gmap_is_shadow(gmap)) 2583 return; 2584 if (start >= 1UL << 31) 2585 /* We are only interested in prefix pages */ 2586 return; 2587 kvm_for_each_vcpu(i, vcpu, kvm) { 2588 /* match against both prefix pages */ 2589 prefix = kvm_s390_get_prefix(vcpu); 2590 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 2591 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 2592 start, end); 2593 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 2594 } 2595 } 2596 } 2597 2598 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 2599 { 2600 /* kvm common code refers to this, but never calls it */ 2601 BUG(); 2602 return 0; 2603 } 2604 2605 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 2606 struct kvm_one_reg *reg) 2607 { 2608 int r = -EINVAL; 2609 2610 switch (reg->id) { 2611 case KVM_REG_S390_TODPR: 2612 r = put_user(vcpu->arch.sie_block->todpr, 2613 (u32 __user *)reg->addr); 2614 break; 2615 case KVM_REG_S390_EPOCHDIFF: 2616 r = put_user(vcpu->arch.sie_block->epoch, 2617 (u64 __user *)reg->addr); 2618 break; 2619 case KVM_REG_S390_CPU_TIMER: 2620 r = put_user(kvm_s390_get_cpu_timer(vcpu), 2621 (u64 __user *)reg->addr); 2622 break; 2623 case KVM_REG_S390_CLOCK_COMP: 2624 r = put_user(vcpu->arch.sie_block->ckc, 2625 (u64 __user *)reg->addr); 2626 break; 2627 case KVM_REG_S390_PFTOKEN: 2628 r = put_user(vcpu->arch.pfault_token, 2629 (u64 __user *)reg->addr); 2630 break; 2631 case KVM_REG_S390_PFCOMPARE: 2632 r = put_user(vcpu->arch.pfault_compare, 2633 (u64 __user *)reg->addr); 2634 break; 2635 case KVM_REG_S390_PFSELECT: 2636 r = put_user(vcpu->arch.pfault_select, 2637 (u64 __user *)reg->addr); 2638 break; 2639 case KVM_REG_S390_PP: 2640 r = put_user(vcpu->arch.sie_block->pp, 2641 (u64 __user *)reg->addr); 2642 break; 2643 case KVM_REG_S390_GBEA: 2644 r = put_user(vcpu->arch.sie_block->gbea, 2645 (u64 __user *)reg->addr); 2646 break; 2647 default: 2648 break; 2649 } 2650 2651 return r; 2652 } 2653 2654 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 2655 struct kvm_one_reg *reg) 2656 { 2657 int r = -EINVAL; 2658 __u64 val; 2659 2660 switch (reg->id) { 2661 case KVM_REG_S390_TODPR: 2662 r = get_user(vcpu->arch.sie_block->todpr, 2663 (u32 __user *)reg->addr); 2664 break; 2665 case KVM_REG_S390_EPOCHDIFF: 2666 r = get_user(vcpu->arch.sie_block->epoch, 2667 (u64 __user *)reg->addr); 2668 break; 2669 case KVM_REG_S390_CPU_TIMER: 2670 r = get_user(val, (u64 __user *)reg->addr); 2671 if (!r) 2672 kvm_s390_set_cpu_timer(vcpu, val); 2673 break; 2674 case KVM_REG_S390_CLOCK_COMP: 2675 r = get_user(vcpu->arch.sie_block->ckc, 2676 (u64 __user *)reg->addr); 2677 break; 2678 case KVM_REG_S390_PFTOKEN: 2679 r = get_user(vcpu->arch.pfault_token, 2680 (u64 __user *)reg->addr); 2681 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 2682 kvm_clear_async_pf_completion_queue(vcpu); 2683 break; 2684 case KVM_REG_S390_PFCOMPARE: 2685 r = get_user(vcpu->arch.pfault_compare, 2686 (u64 __user *)reg->addr); 2687 break; 2688 case KVM_REG_S390_PFSELECT: 2689 r = get_user(vcpu->arch.pfault_select, 2690 (u64 __user *)reg->addr); 2691 break; 2692 case KVM_REG_S390_PP: 2693 r = get_user(vcpu->arch.sie_block->pp, 2694 (u64 __user *)reg->addr); 2695 break; 2696 case KVM_REG_S390_GBEA: 2697 r = get_user(vcpu->arch.sie_block->gbea, 2698 (u64 __user *)reg->addr); 2699 break; 2700 default: 2701 break; 2702 } 2703 2704 return r; 2705 } 2706 2707 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 2708 { 2709 kvm_s390_vcpu_initial_reset(vcpu); 2710 return 0; 2711 } 2712 2713 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2714 { 2715 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 2716 return 0; 2717 } 2718 2719 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2720 { 2721 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 2722 return 0; 2723 } 2724 2725 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 2726 struct kvm_sregs *sregs) 2727 { 2728 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 2729 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 2730 return 0; 2731 } 2732 2733 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 2734 struct kvm_sregs *sregs) 2735 { 2736 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 2737 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 2738 return 0; 2739 } 2740 2741 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2742 { 2743 if (test_fp_ctl(fpu->fpc)) 2744 return -EINVAL; 2745 vcpu->run->s.regs.fpc = fpu->fpc; 2746 if (MACHINE_HAS_VX) 2747 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 2748 (freg_t *) fpu->fprs); 2749 else 2750 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 2751 return 0; 2752 } 2753 2754 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2755 { 2756 /* make sure we have the latest values */ 2757 save_fpu_regs(); 2758 if (MACHINE_HAS_VX) 2759 convert_vx_to_fp((freg_t *) fpu->fprs, 2760 (__vector128 *) vcpu->run->s.regs.vrs); 2761 else 2762 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 2763 fpu->fpc = vcpu->run->s.regs.fpc; 2764 return 0; 2765 } 2766 2767 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 2768 { 2769 int rc = 0; 2770 2771 if (!is_vcpu_stopped(vcpu)) 2772 rc = -EBUSY; 2773 else { 2774 vcpu->run->psw_mask = psw.mask; 2775 vcpu->run->psw_addr = psw.addr; 2776 } 2777 return rc; 2778 } 2779 2780 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 2781 struct kvm_translation *tr) 2782 { 2783 return -EINVAL; /* not implemented yet */ 2784 } 2785 2786 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 2787 KVM_GUESTDBG_USE_HW_BP | \ 2788 KVM_GUESTDBG_ENABLE) 2789 2790 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 2791 struct kvm_guest_debug *dbg) 2792 { 2793 int rc = 0; 2794 2795 vcpu->guest_debug = 0; 2796 kvm_s390_clear_bp_data(vcpu); 2797 2798 if (dbg->control & ~VALID_GUESTDBG_FLAGS) 2799 return -EINVAL; 2800 if (!sclp.has_gpere) 2801 return -EINVAL; 2802 2803 if (dbg->control & KVM_GUESTDBG_ENABLE) { 2804 vcpu->guest_debug = dbg->control; 2805 /* enforce guest PER */ 2806 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2807 2808 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 2809 rc = kvm_s390_import_bp_data(vcpu, dbg); 2810 } else { 2811 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2812 vcpu->arch.guestdbg.last_bp = 0; 2813 } 2814 2815 if (rc) { 2816 vcpu->guest_debug = 0; 2817 kvm_s390_clear_bp_data(vcpu); 2818 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2819 } 2820 2821 return rc; 2822 } 2823 2824 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 2825 struct kvm_mp_state *mp_state) 2826 { 2827 /* CHECK_STOP and LOAD are not supported yet */ 2828 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 2829 KVM_MP_STATE_OPERATING; 2830 } 2831 2832 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 2833 struct kvm_mp_state *mp_state) 2834 { 2835 int rc = 0; 2836 2837 /* user space knows about this interface - let it control the state */ 2838 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 2839 2840 switch (mp_state->mp_state) { 2841 case KVM_MP_STATE_STOPPED: 2842 kvm_s390_vcpu_stop(vcpu); 2843 break; 2844 case KVM_MP_STATE_OPERATING: 2845 kvm_s390_vcpu_start(vcpu); 2846 break; 2847 case KVM_MP_STATE_LOAD: 2848 case KVM_MP_STATE_CHECK_STOP: 2849 /* fall through - CHECK_STOP and LOAD are not supported yet */ 2850 default: 2851 rc = -ENXIO; 2852 } 2853 2854 return rc; 2855 } 2856 2857 static bool ibs_enabled(struct kvm_vcpu *vcpu) 2858 { 2859 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; 2860 } 2861 2862 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 2863 { 2864 retry: 2865 kvm_s390_vcpu_request_handled(vcpu); 2866 if (!kvm_request_pending(vcpu)) 2867 return 0; 2868 /* 2869 * We use MMU_RELOAD just to re-arm the ipte notifier for the 2870 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 2871 * This ensures that the ipte instruction for this request has 2872 * already finished. We might race against a second unmapper that 2873 * wants to set the blocking bit. Lets just retry the request loop. 2874 */ 2875 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 2876 int rc; 2877 rc = gmap_mprotect_notify(vcpu->arch.gmap, 2878 kvm_s390_get_prefix(vcpu), 2879 PAGE_SIZE * 2, PROT_WRITE); 2880 if (rc) { 2881 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 2882 return rc; 2883 } 2884 goto retry; 2885 } 2886 2887 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 2888 vcpu->arch.sie_block->ihcpu = 0xffff; 2889 goto retry; 2890 } 2891 2892 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 2893 if (!ibs_enabled(vcpu)) { 2894 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 2895 atomic_or(CPUSTAT_IBS, 2896 &vcpu->arch.sie_block->cpuflags); 2897 } 2898 goto retry; 2899 } 2900 2901 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 2902 if (ibs_enabled(vcpu)) { 2903 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 2904 atomic_andnot(CPUSTAT_IBS, 2905 &vcpu->arch.sie_block->cpuflags); 2906 } 2907 goto retry; 2908 } 2909 2910 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 2911 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2912 goto retry; 2913 } 2914 2915 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 2916 /* 2917 * Disable CMMA virtualization; we will emulate the ESSA 2918 * instruction manually, in order to provide additional 2919 * functionalities needed for live migration. 2920 */ 2921 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 2922 goto retry; 2923 } 2924 2925 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 2926 /* 2927 * Re-enable CMMA virtualization if CMMA is available and 2928 * was used. 2929 */ 2930 if ((vcpu->kvm->arch.use_cmma) && 2931 (vcpu->kvm->mm->context.use_cmma)) 2932 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 2933 goto retry; 2934 } 2935 2936 /* nothing to do, just clear the request */ 2937 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 2938 2939 return 0; 2940 } 2941 2942 void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 2943 const struct kvm_s390_vm_tod_clock *gtod) 2944 { 2945 struct kvm_vcpu *vcpu; 2946 struct kvm_s390_tod_clock_ext htod; 2947 int i; 2948 2949 mutex_lock(&kvm->lock); 2950 preempt_disable(); 2951 2952 get_tod_clock_ext((char *)&htod); 2953 2954 kvm->arch.epoch = gtod->tod - htod.tod; 2955 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 2956 2957 if (kvm->arch.epoch > gtod->tod) 2958 kvm->arch.epdx -= 1; 2959 2960 kvm_s390_vcpu_block_all(kvm); 2961 kvm_for_each_vcpu(i, vcpu, kvm) { 2962 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 2963 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 2964 } 2965 2966 kvm_s390_vcpu_unblock_all(kvm); 2967 preempt_enable(); 2968 mutex_unlock(&kvm->lock); 2969 } 2970 2971 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) 2972 { 2973 struct kvm_vcpu *vcpu; 2974 int i; 2975 2976 mutex_lock(&kvm->lock); 2977 preempt_disable(); 2978 kvm->arch.epoch = tod - get_tod_clock(); 2979 kvm_s390_vcpu_block_all(kvm); 2980 kvm_for_each_vcpu(i, vcpu, kvm) 2981 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 2982 kvm_s390_vcpu_unblock_all(kvm); 2983 preempt_enable(); 2984 mutex_unlock(&kvm->lock); 2985 } 2986 2987 /** 2988 * kvm_arch_fault_in_page - fault-in guest page if necessary 2989 * @vcpu: The corresponding virtual cpu 2990 * @gpa: Guest physical address 2991 * @writable: Whether the page should be writable or not 2992 * 2993 * Make sure that a guest page has been faulted-in on the host. 2994 * 2995 * Return: Zero on success, negative error code otherwise. 2996 */ 2997 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 2998 { 2999 return gmap_fault(vcpu->arch.gmap, gpa, 3000 writable ? FAULT_FLAG_WRITE : 0); 3001 } 3002 3003 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3004 unsigned long token) 3005 { 3006 struct kvm_s390_interrupt inti; 3007 struct kvm_s390_irq irq; 3008 3009 if (start_token) { 3010 irq.u.ext.ext_params2 = token; 3011 irq.type = KVM_S390_INT_PFAULT_INIT; 3012 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3013 } else { 3014 inti.type = KVM_S390_INT_PFAULT_DONE; 3015 inti.parm64 = token; 3016 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3017 } 3018 } 3019 3020 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3021 struct kvm_async_pf *work) 3022 { 3023 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3024 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3025 } 3026 3027 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3028 struct kvm_async_pf *work) 3029 { 3030 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3031 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3032 } 3033 3034 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3035 struct kvm_async_pf *work) 3036 { 3037 /* s390 will always inject the page directly */ 3038 } 3039 3040 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3041 { 3042 /* 3043 * s390 will always inject the page directly, 3044 * but we still want check_async_completion to cleanup 3045 */ 3046 return true; 3047 } 3048 3049 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3050 { 3051 hva_t hva; 3052 struct kvm_arch_async_pf arch; 3053 int rc; 3054 3055 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3056 return 0; 3057 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3058 vcpu->arch.pfault_compare) 3059 return 0; 3060 if (psw_extint_disabled(vcpu)) 3061 return 0; 3062 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3063 return 0; 3064 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 3065 return 0; 3066 if (!vcpu->arch.gmap->pfault_enabled) 3067 return 0; 3068 3069 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3070 hva += current->thread.gmap_addr & ~PAGE_MASK; 3071 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3072 return 0; 3073 3074 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3075 return rc; 3076 } 3077 3078 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3079 { 3080 int rc, cpuflags; 3081 3082 /* 3083 * On s390 notifications for arriving pages will be delivered directly 3084 * to the guest but the house keeping for completed pfaults is 3085 * handled outside the worker. 3086 */ 3087 kvm_check_async_pf_completion(vcpu); 3088 3089 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3090 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3091 3092 if (need_resched()) 3093 schedule(); 3094 3095 if (test_cpu_flag(CIF_MCCK_PENDING)) 3096 s390_handle_mcck(); 3097 3098 if (!kvm_is_ucontrol(vcpu->kvm)) { 3099 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3100 if (rc) 3101 return rc; 3102 } 3103 3104 rc = kvm_s390_handle_requests(vcpu); 3105 if (rc) 3106 return rc; 3107 3108 if (guestdbg_enabled(vcpu)) { 3109 kvm_s390_backup_guest_per_regs(vcpu); 3110 kvm_s390_patch_guest_per_regs(vcpu); 3111 } 3112 3113 vcpu->arch.sie_block->icptcode = 0; 3114 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3115 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3116 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3117 3118 return 0; 3119 } 3120 3121 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3122 { 3123 struct kvm_s390_pgm_info pgm_info = { 3124 .code = PGM_ADDRESSING, 3125 }; 3126 u8 opcode, ilen; 3127 int rc; 3128 3129 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3130 trace_kvm_s390_sie_fault(vcpu); 3131 3132 /* 3133 * We want to inject an addressing exception, which is defined as a 3134 * suppressing or terminating exception. However, since we came here 3135 * by a DAT access exception, the PSW still points to the faulting 3136 * instruction since DAT exceptions are nullifying. So we've got 3137 * to look up the current opcode to get the length of the instruction 3138 * to be able to forward the PSW. 3139 */ 3140 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3141 ilen = insn_length(opcode); 3142 if (rc < 0) { 3143 return rc; 3144 } else if (rc) { 3145 /* Instruction-Fetching Exceptions - we can't detect the ilen. 3146 * Forward by arbitrary ilc, injection will take care of 3147 * nullification if necessary. 3148 */ 3149 pgm_info = vcpu->arch.pgm; 3150 ilen = 4; 3151 } 3152 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 3153 kvm_s390_forward_psw(vcpu, ilen); 3154 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 3155 } 3156 3157 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 3158 { 3159 struct mcck_volatile_info *mcck_info; 3160 struct sie_page *sie_page; 3161 3162 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 3163 vcpu->arch.sie_block->icptcode); 3164 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 3165 3166 if (guestdbg_enabled(vcpu)) 3167 kvm_s390_restore_guest_per_regs(vcpu); 3168 3169 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 3170 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 3171 3172 if (exit_reason == -EINTR) { 3173 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 3174 sie_page = container_of(vcpu->arch.sie_block, 3175 struct sie_page, sie_block); 3176 mcck_info = &sie_page->mcck_info; 3177 kvm_s390_reinject_machine_check(vcpu, mcck_info); 3178 return 0; 3179 } 3180 3181 if (vcpu->arch.sie_block->icptcode > 0) { 3182 int rc = kvm_handle_sie_intercept(vcpu); 3183 3184 if (rc != -EOPNOTSUPP) 3185 return rc; 3186 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 3187 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 3188 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 3189 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 3190 return -EREMOTE; 3191 } else if (exit_reason != -EFAULT) { 3192 vcpu->stat.exit_null++; 3193 return 0; 3194 } else if (kvm_is_ucontrol(vcpu->kvm)) { 3195 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 3196 vcpu->run->s390_ucontrol.trans_exc_code = 3197 current->thread.gmap_addr; 3198 vcpu->run->s390_ucontrol.pgm_code = 0x10; 3199 return -EREMOTE; 3200 } else if (current->thread.gmap_pfault) { 3201 trace_kvm_s390_major_guest_pfault(vcpu); 3202 current->thread.gmap_pfault = 0; 3203 if (kvm_arch_setup_async_pf(vcpu)) 3204 return 0; 3205 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 3206 } 3207 return vcpu_post_run_fault_in_sie(vcpu); 3208 } 3209 3210 static int __vcpu_run(struct kvm_vcpu *vcpu) 3211 { 3212 int rc, exit_reason; 3213 3214 /* 3215 * We try to hold kvm->srcu during most of vcpu_run (except when run- 3216 * ning the guest), so that memslots (and other stuff) are protected 3217 */ 3218 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3219 3220 do { 3221 rc = vcpu_pre_run(vcpu); 3222 if (rc) 3223 break; 3224 3225 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3226 /* 3227 * As PF_VCPU will be used in fault handler, between 3228 * guest_enter and guest_exit should be no uaccess. 3229 */ 3230 local_irq_disable(); 3231 guest_enter_irqoff(); 3232 __disable_cpu_timer_accounting(vcpu); 3233 local_irq_enable(); 3234 exit_reason = sie64a(vcpu->arch.sie_block, 3235 vcpu->run->s.regs.gprs); 3236 local_irq_disable(); 3237 __enable_cpu_timer_accounting(vcpu); 3238 guest_exit_irqoff(); 3239 local_irq_enable(); 3240 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3241 3242 rc = vcpu_post_run(vcpu, exit_reason); 3243 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 3244 3245 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3246 return rc; 3247 } 3248 3249 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3250 { 3251 struct runtime_instr_cb *riccb; 3252 struct gs_cb *gscb; 3253 3254 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 3255 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 3256 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 3257 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 3258 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 3259 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 3260 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 3261 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 3262 /* some control register changes require a tlb flush */ 3263 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3264 } 3265 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 3266 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 3267 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 3268 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 3269 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 3270 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 3271 } 3272 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 3273 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 3274 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 3275 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 3276 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3277 kvm_clear_async_pf_completion_queue(vcpu); 3278 } 3279 /* 3280 * If userspace sets the riccb (e.g. after migration) to a valid state, 3281 * we should enable RI here instead of doing the lazy enablement. 3282 */ 3283 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 3284 test_kvm_facility(vcpu->kvm, 64) && 3285 riccb->v && 3286 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 3287 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 3288 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 3289 } 3290 /* 3291 * If userspace sets the gscb (e.g. after migration) to non-zero, 3292 * we should enable GS here instead of doing the lazy enablement. 3293 */ 3294 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 3295 test_kvm_facility(vcpu->kvm, 133) && 3296 gscb->gssm && 3297 !vcpu->arch.gs_enabled) { 3298 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 3299 vcpu->arch.sie_block->ecb |= ECB_GS; 3300 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3301 vcpu->arch.gs_enabled = 1; 3302 } 3303 save_access_regs(vcpu->arch.host_acrs); 3304 restore_access_regs(vcpu->run->s.regs.acrs); 3305 /* save host (userspace) fprs/vrs */ 3306 save_fpu_regs(); 3307 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 3308 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 3309 if (MACHINE_HAS_VX) 3310 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 3311 else 3312 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 3313 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 3314 if (test_fp_ctl(current->thread.fpu.fpc)) 3315 /* User space provided an invalid FPC, let's clear it */ 3316 current->thread.fpu.fpc = 0; 3317 if (MACHINE_HAS_GS) { 3318 preempt_disable(); 3319 __ctl_set_bit(2, 4); 3320 if (current->thread.gs_cb) { 3321 vcpu->arch.host_gscb = current->thread.gs_cb; 3322 save_gs_cb(vcpu->arch.host_gscb); 3323 } 3324 if (vcpu->arch.gs_enabled) { 3325 current->thread.gs_cb = (struct gs_cb *) 3326 &vcpu->run->s.regs.gscb; 3327 restore_gs_cb(current->thread.gs_cb); 3328 } 3329 preempt_enable(); 3330 } 3331 3332 kvm_run->kvm_dirty_regs = 0; 3333 } 3334 3335 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3336 { 3337 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 3338 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 3339 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 3340 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 3341 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 3342 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 3343 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 3344 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 3345 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 3346 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 3347 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 3348 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 3349 save_access_regs(vcpu->run->s.regs.acrs); 3350 restore_access_regs(vcpu->arch.host_acrs); 3351 /* Save guest register state */ 3352 save_fpu_regs(); 3353 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3354 /* Restore will be done lazily at return */ 3355 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 3356 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 3357 if (MACHINE_HAS_GS) { 3358 __ctl_set_bit(2, 4); 3359 if (vcpu->arch.gs_enabled) 3360 save_gs_cb(current->thread.gs_cb); 3361 preempt_disable(); 3362 current->thread.gs_cb = vcpu->arch.host_gscb; 3363 restore_gs_cb(vcpu->arch.host_gscb); 3364 preempt_enable(); 3365 if (!vcpu->arch.host_gscb) 3366 __ctl_clear_bit(2, 4); 3367 vcpu->arch.host_gscb = NULL; 3368 } 3369 3370 } 3371 3372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3373 { 3374 int rc; 3375 3376 if (kvm_run->immediate_exit) 3377 return -EINTR; 3378 3379 if (guestdbg_exit_pending(vcpu)) { 3380 kvm_s390_prepare_debug_exit(vcpu); 3381 return 0; 3382 } 3383 3384 kvm_sigset_activate(vcpu); 3385 3386 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3387 kvm_s390_vcpu_start(vcpu); 3388 } else if (is_vcpu_stopped(vcpu)) { 3389 pr_err_ratelimited("can't run stopped vcpu %d\n", 3390 vcpu->vcpu_id); 3391 return -EINVAL; 3392 } 3393 3394 sync_regs(vcpu, kvm_run); 3395 enable_cpu_timer_accounting(vcpu); 3396 3397 might_fault(); 3398 rc = __vcpu_run(vcpu); 3399 3400 if (signal_pending(current) && !rc) { 3401 kvm_run->exit_reason = KVM_EXIT_INTR; 3402 rc = -EINTR; 3403 } 3404 3405 if (guestdbg_exit_pending(vcpu) && !rc) { 3406 kvm_s390_prepare_debug_exit(vcpu); 3407 rc = 0; 3408 } 3409 3410 if (rc == -EREMOTE) { 3411 /* userspace support is needed, kvm_run has been prepared */ 3412 rc = 0; 3413 } 3414 3415 disable_cpu_timer_accounting(vcpu); 3416 store_regs(vcpu, kvm_run); 3417 3418 kvm_sigset_deactivate(vcpu); 3419 3420 vcpu->stat.exit_userspace++; 3421 return rc; 3422 } 3423 3424 /* 3425 * store status at address 3426 * we use have two special cases: 3427 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 3428 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 3429 */ 3430 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 3431 { 3432 unsigned char archmode = 1; 3433 freg_t fprs[NUM_FPRS]; 3434 unsigned int px; 3435 u64 clkcomp, cputm; 3436 int rc; 3437 3438 px = kvm_s390_get_prefix(vcpu); 3439 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 3440 if (write_guest_abs(vcpu, 163, &archmode, 1)) 3441 return -EFAULT; 3442 gpa = 0; 3443 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 3444 if (write_guest_real(vcpu, 163, &archmode, 1)) 3445 return -EFAULT; 3446 gpa = px; 3447 } else 3448 gpa -= __LC_FPREGS_SAVE_AREA; 3449 3450 /* manually convert vector registers if necessary */ 3451 if (MACHINE_HAS_VX) { 3452 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 3453 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3454 fprs, 128); 3455 } else { 3456 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3457 vcpu->run->s.regs.fprs, 128); 3458 } 3459 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 3460 vcpu->run->s.regs.gprs, 128); 3461 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 3462 &vcpu->arch.sie_block->gpsw, 16); 3463 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 3464 &px, 4); 3465 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 3466 &vcpu->run->s.regs.fpc, 4); 3467 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 3468 &vcpu->arch.sie_block->todpr, 4); 3469 cputm = kvm_s390_get_cpu_timer(vcpu); 3470 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 3471 &cputm, 8); 3472 clkcomp = vcpu->arch.sie_block->ckc >> 8; 3473 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 3474 &clkcomp, 8); 3475 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 3476 &vcpu->run->s.regs.acrs, 64); 3477 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 3478 &vcpu->arch.sie_block->gcr, 128); 3479 return rc ? -EFAULT : 0; 3480 } 3481 3482 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 3483 { 3484 /* 3485 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 3486 * switch in the run ioctl. Let's update our copies before we save 3487 * it into the save area 3488 */ 3489 save_fpu_regs(); 3490 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3491 save_access_regs(vcpu->run->s.regs.acrs); 3492 3493 return kvm_s390_store_status_unloaded(vcpu, addr); 3494 } 3495 3496 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3497 { 3498 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 3499 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 3500 } 3501 3502 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 3503 { 3504 unsigned int i; 3505 struct kvm_vcpu *vcpu; 3506 3507 kvm_for_each_vcpu(i, vcpu, kvm) { 3508 __disable_ibs_on_vcpu(vcpu); 3509 } 3510 } 3511 3512 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3513 { 3514 if (!sclp.has_ibs) 3515 return; 3516 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 3517 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 3518 } 3519 3520 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 3521 { 3522 int i, online_vcpus, started_vcpus = 0; 3523 3524 if (!is_vcpu_stopped(vcpu)) 3525 return; 3526 3527 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 3528 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3529 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3530 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3531 3532 for (i = 0; i < online_vcpus; i++) { 3533 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 3534 started_vcpus++; 3535 } 3536 3537 if (started_vcpus == 0) { 3538 /* we're the only active VCPU -> speed it up */ 3539 __enable_ibs_on_vcpu(vcpu); 3540 } else if (started_vcpus == 1) { 3541 /* 3542 * As we are starting a second VCPU, we have to disable 3543 * the IBS facility on all VCPUs to remove potentially 3544 * oustanding ENABLE requests. 3545 */ 3546 __disable_ibs_on_all_vcpus(vcpu->kvm); 3547 } 3548 3549 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 3550 /* 3551 * Another VCPU might have used IBS while we were offline. 3552 * Let's play safe and flush the VCPU at startup. 3553 */ 3554 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3555 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3556 return; 3557 } 3558 3559 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 3560 { 3561 int i, online_vcpus, started_vcpus = 0; 3562 struct kvm_vcpu *started_vcpu = NULL; 3563 3564 if (is_vcpu_stopped(vcpu)) 3565 return; 3566 3567 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 3568 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3569 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3570 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3571 3572 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 3573 kvm_s390_clear_stop_irq(vcpu); 3574 3575 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 3576 __disable_ibs_on_vcpu(vcpu); 3577 3578 for (i = 0; i < online_vcpus; i++) { 3579 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 3580 started_vcpus++; 3581 started_vcpu = vcpu->kvm->vcpus[i]; 3582 } 3583 } 3584 3585 if (started_vcpus == 1) { 3586 /* 3587 * As we only have one VCPU left, we want to enable the 3588 * IBS facility for that VCPU to speed it up. 3589 */ 3590 __enable_ibs_on_vcpu(started_vcpu); 3591 } 3592 3593 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3594 return; 3595 } 3596 3597 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3598 struct kvm_enable_cap *cap) 3599 { 3600 int r; 3601 3602 if (cap->flags) 3603 return -EINVAL; 3604 3605 switch (cap->cap) { 3606 case KVM_CAP_S390_CSS_SUPPORT: 3607 if (!vcpu->kvm->arch.css_support) { 3608 vcpu->kvm->arch.css_support = 1; 3609 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 3610 trace_kvm_s390_enable_css(vcpu->kvm); 3611 } 3612 r = 0; 3613 break; 3614 default: 3615 r = -EINVAL; 3616 break; 3617 } 3618 return r; 3619 } 3620 3621 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 3622 struct kvm_s390_mem_op *mop) 3623 { 3624 void __user *uaddr = (void __user *)mop->buf; 3625 void *tmpbuf = NULL; 3626 int r, srcu_idx; 3627 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 3628 | KVM_S390_MEMOP_F_CHECK_ONLY; 3629 3630 if (mop->flags & ~supported_flags) 3631 return -EINVAL; 3632 3633 if (mop->size > MEM_OP_MAX_SIZE) 3634 return -E2BIG; 3635 3636 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 3637 tmpbuf = vmalloc(mop->size); 3638 if (!tmpbuf) 3639 return -ENOMEM; 3640 } 3641 3642 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3643 3644 switch (mop->op) { 3645 case KVM_S390_MEMOP_LOGICAL_READ: 3646 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3647 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3648 mop->size, GACC_FETCH); 3649 break; 3650 } 3651 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3652 if (r == 0) { 3653 if (copy_to_user(uaddr, tmpbuf, mop->size)) 3654 r = -EFAULT; 3655 } 3656 break; 3657 case KVM_S390_MEMOP_LOGICAL_WRITE: 3658 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3659 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3660 mop->size, GACC_STORE); 3661 break; 3662 } 3663 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 3664 r = -EFAULT; 3665 break; 3666 } 3667 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3668 break; 3669 default: 3670 r = -EINVAL; 3671 } 3672 3673 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 3674 3675 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 3676 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 3677 3678 vfree(tmpbuf); 3679 return r; 3680 } 3681 3682 long kvm_arch_vcpu_ioctl(struct file *filp, 3683 unsigned int ioctl, unsigned long arg) 3684 { 3685 struct kvm_vcpu *vcpu = filp->private_data; 3686 void __user *argp = (void __user *)arg; 3687 int idx; 3688 long r; 3689 3690 switch (ioctl) { 3691 case KVM_S390_IRQ: { 3692 struct kvm_s390_irq s390irq; 3693 3694 r = -EFAULT; 3695 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 3696 break; 3697 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3698 break; 3699 } 3700 case KVM_S390_INTERRUPT: { 3701 struct kvm_s390_interrupt s390int; 3702 struct kvm_s390_irq s390irq; 3703 3704 r = -EFAULT; 3705 if (copy_from_user(&s390int, argp, sizeof(s390int))) 3706 break; 3707 if (s390int_to_s390irq(&s390int, &s390irq)) 3708 return -EINVAL; 3709 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3710 break; 3711 } 3712 case KVM_S390_STORE_STATUS: 3713 idx = srcu_read_lock(&vcpu->kvm->srcu); 3714 r = kvm_s390_vcpu_store_status(vcpu, arg); 3715 srcu_read_unlock(&vcpu->kvm->srcu, idx); 3716 break; 3717 case KVM_S390_SET_INITIAL_PSW: { 3718 psw_t psw; 3719 3720 r = -EFAULT; 3721 if (copy_from_user(&psw, argp, sizeof(psw))) 3722 break; 3723 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 3724 break; 3725 } 3726 case KVM_S390_INITIAL_RESET: 3727 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3728 break; 3729 case KVM_SET_ONE_REG: 3730 case KVM_GET_ONE_REG: { 3731 struct kvm_one_reg reg; 3732 r = -EFAULT; 3733 if (copy_from_user(®, argp, sizeof(reg))) 3734 break; 3735 if (ioctl == KVM_SET_ONE_REG) 3736 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 3737 else 3738 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 3739 break; 3740 } 3741 #ifdef CONFIG_KVM_S390_UCONTROL 3742 case KVM_S390_UCAS_MAP: { 3743 struct kvm_s390_ucas_mapping ucasmap; 3744 3745 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3746 r = -EFAULT; 3747 break; 3748 } 3749 3750 if (!kvm_is_ucontrol(vcpu->kvm)) { 3751 r = -EINVAL; 3752 break; 3753 } 3754 3755 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 3756 ucasmap.vcpu_addr, ucasmap.length); 3757 break; 3758 } 3759 case KVM_S390_UCAS_UNMAP: { 3760 struct kvm_s390_ucas_mapping ucasmap; 3761 3762 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3763 r = -EFAULT; 3764 break; 3765 } 3766 3767 if (!kvm_is_ucontrol(vcpu->kvm)) { 3768 r = -EINVAL; 3769 break; 3770 } 3771 3772 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 3773 ucasmap.length); 3774 break; 3775 } 3776 #endif 3777 case KVM_S390_VCPU_FAULT: { 3778 r = gmap_fault(vcpu->arch.gmap, arg, 0); 3779 break; 3780 } 3781 case KVM_ENABLE_CAP: 3782 { 3783 struct kvm_enable_cap cap; 3784 r = -EFAULT; 3785 if (copy_from_user(&cap, argp, sizeof(cap))) 3786 break; 3787 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 3788 break; 3789 } 3790 case KVM_S390_MEM_OP: { 3791 struct kvm_s390_mem_op mem_op; 3792 3793 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 3794 r = kvm_s390_guest_mem_op(vcpu, &mem_op); 3795 else 3796 r = -EFAULT; 3797 break; 3798 } 3799 case KVM_S390_SET_IRQ_STATE: { 3800 struct kvm_s390_irq_state irq_state; 3801 3802 r = -EFAULT; 3803 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3804 break; 3805 if (irq_state.len > VCPU_IRQS_MAX_BUF || 3806 irq_state.len == 0 || 3807 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 3808 r = -EINVAL; 3809 break; 3810 } 3811 r = kvm_s390_set_irq_state(vcpu, 3812 (void __user *) irq_state.buf, 3813 irq_state.len); 3814 break; 3815 } 3816 case KVM_S390_GET_IRQ_STATE: { 3817 struct kvm_s390_irq_state irq_state; 3818 3819 r = -EFAULT; 3820 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3821 break; 3822 if (irq_state.len == 0) { 3823 r = -EINVAL; 3824 break; 3825 } 3826 r = kvm_s390_get_irq_state(vcpu, 3827 (__u8 __user *) irq_state.buf, 3828 irq_state.len); 3829 break; 3830 } 3831 default: 3832 r = -ENOTTY; 3833 } 3834 return r; 3835 } 3836 3837 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 3838 { 3839 #ifdef CONFIG_KVM_S390_UCONTROL 3840 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 3841 && (kvm_is_ucontrol(vcpu->kvm))) { 3842 vmf->page = virt_to_page(vcpu->arch.sie_block); 3843 get_page(vmf->page); 3844 return 0; 3845 } 3846 #endif 3847 return VM_FAULT_SIGBUS; 3848 } 3849 3850 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 3851 unsigned long npages) 3852 { 3853 return 0; 3854 } 3855 3856 /* Section: memory related */ 3857 int kvm_arch_prepare_memory_region(struct kvm *kvm, 3858 struct kvm_memory_slot *memslot, 3859 const struct kvm_userspace_memory_region *mem, 3860 enum kvm_mr_change change) 3861 { 3862 /* A few sanity checks. We can have memory slots which have to be 3863 located/ended at a segment boundary (1MB). The memory in userland is 3864 ok to be fragmented into various different vmas. It is okay to mmap() 3865 and munmap() stuff in this slot after doing this call at any time */ 3866 3867 if (mem->userspace_addr & 0xffffful) 3868 return -EINVAL; 3869 3870 if (mem->memory_size & 0xffffful) 3871 return -EINVAL; 3872 3873 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 3874 return -EINVAL; 3875 3876 return 0; 3877 } 3878 3879 void kvm_arch_commit_memory_region(struct kvm *kvm, 3880 const struct kvm_userspace_memory_region *mem, 3881 const struct kvm_memory_slot *old, 3882 const struct kvm_memory_slot *new, 3883 enum kvm_mr_change change) 3884 { 3885 int rc; 3886 3887 /* If the basics of the memslot do not change, we do not want 3888 * to update the gmap. Every update causes several unnecessary 3889 * segment translation exceptions. This is usually handled just 3890 * fine by the normal fault handler + gmap, but it will also 3891 * cause faults on the prefix page of running guest CPUs. 3892 */ 3893 if (old->userspace_addr == mem->userspace_addr && 3894 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 3895 old->npages * PAGE_SIZE == mem->memory_size) 3896 return; 3897 3898 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 3899 mem->guest_phys_addr, mem->memory_size); 3900 if (rc) 3901 pr_warn("failed to commit memory region\n"); 3902 return; 3903 } 3904 3905 static inline unsigned long nonhyp_mask(int i) 3906 { 3907 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 3908 3909 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 3910 } 3911 3912 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 3913 { 3914 vcpu->valid_wakeup = false; 3915 } 3916 3917 static int __init kvm_s390_init(void) 3918 { 3919 int i; 3920 3921 if (!sclp.has_sief2) { 3922 pr_info("SIE not available\n"); 3923 return -ENODEV; 3924 } 3925 3926 for (i = 0; i < 16; i++) 3927 kvm_s390_fac_list_mask[i] |= 3928 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 3929 3930 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 3931 } 3932 3933 static void __exit kvm_s390_exit(void) 3934 { 3935 kvm_exit(); 3936 } 3937 3938 module_init(kvm_s390_init); 3939 module_exit(kvm_s390_exit); 3940 3941 /* 3942 * Enable autoloading of the kvm module. 3943 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 3944 * since x86 takes a different approach. 3945 */ 3946 #include <linux/miscdevice.h> 3947 MODULE_ALIAS_MISCDEV(KVM_MINOR); 3948 MODULE_ALIAS("devname:kvm"); 3949