1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2018 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #include <linux/compiler.h> 15 #include <linux/err.h> 16 #include <linux/fs.h> 17 #include <linux/hrtimer.h> 18 #include <linux/init.h> 19 #include <linux/kvm.h> 20 #include <linux/kvm_host.h> 21 #include <linux/mman.h> 22 #include <linux/module.h> 23 #include <linux/moduleparam.h> 24 #include <linux/random.h> 25 #include <linux/slab.h> 26 #include <linux/timer.h> 27 #include <linux/vmalloc.h> 28 #include <linux/bitmap.h> 29 #include <linux/sched/signal.h> 30 #include <linux/string.h> 31 32 #include <asm/asm-offsets.h> 33 #include <asm/lowcore.h> 34 #include <asm/stp.h> 35 #include <asm/pgtable.h> 36 #include <asm/gmap.h> 37 #include <asm/nmi.h> 38 #include <asm/switch_to.h> 39 #include <asm/isc.h> 40 #include <asm/sclp.h> 41 #include <asm/cpacf.h> 42 #include <asm/timex.h> 43 #include "kvm-s390.h" 44 #include "gaccess.h" 45 46 #define KMSG_COMPONENT "kvm-s390" 47 #undef pr_fmt 48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 49 50 #define CREATE_TRACE_POINTS 51 #include "trace.h" 52 #include "trace-s390.h" 53 54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 55 #define LOCAL_IRQS 32 56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 57 (KVM_MAX_VCPUS + LOCAL_IRQS)) 58 59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 60 61 struct kvm_stats_debugfs_item debugfs_entries[] = { 62 { "userspace_handled", VCPU_STAT(exit_userspace) }, 63 { "exit_null", VCPU_STAT(exit_null) }, 64 { "exit_validity", VCPU_STAT(exit_validity) }, 65 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 66 { "exit_external_request", VCPU_STAT(exit_external_request) }, 67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 68 { "exit_instruction", VCPU_STAT(exit_instruction) }, 69 { "exit_pei", VCPU_STAT(exit_pei) }, 70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 76 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 78 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 79 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 80 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 82 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 89 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 90 { "instruction_epsw", VCPU_STAT(instruction_epsw) }, 91 { "instruction_gs", VCPU_STAT(instruction_gs) }, 92 { "instruction_io_other", VCPU_STAT(instruction_io_other) }, 93 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, 94 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, 95 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 96 { "instruction_ptff", VCPU_STAT(instruction_ptff) }, 97 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 98 { "instruction_sck", VCPU_STAT(instruction_sck) }, 99 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, 100 { "instruction_spx", VCPU_STAT(instruction_spx) }, 101 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 102 { "instruction_stap", VCPU_STAT(instruction_stap) }, 103 { "instruction_iske", VCPU_STAT(instruction_iske) }, 104 { "instruction_ri", VCPU_STAT(instruction_ri) }, 105 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, 106 { "instruction_sske", VCPU_STAT(instruction_sske) }, 107 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 108 { "instruction_essa", VCPU_STAT(instruction_essa) }, 109 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 110 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 111 { "instruction_tb", VCPU_STAT(instruction_tb) }, 112 { "instruction_tpi", VCPU_STAT(instruction_tpi) }, 113 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 114 { "instruction_tsch", VCPU_STAT(instruction_tsch) }, 115 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 116 { "instruction_sie", VCPU_STAT(instruction_sie) }, 117 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 118 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 119 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 120 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 121 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 122 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 123 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 124 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 125 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 126 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 127 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 128 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 129 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 130 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 131 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 132 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 133 { "instruction_diag_10", VCPU_STAT(diagnose_10) }, 134 { "instruction_diag_44", VCPU_STAT(diagnose_44) }, 135 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, 136 { "instruction_diag_258", VCPU_STAT(diagnose_258) }, 137 { "instruction_diag_308", VCPU_STAT(diagnose_308) }, 138 { "instruction_diag_500", VCPU_STAT(diagnose_500) }, 139 { "instruction_diag_other", VCPU_STAT(diagnose_other) }, 140 { NULL } 141 }; 142 143 struct kvm_s390_tod_clock_ext { 144 __u8 epoch_idx; 145 __u64 tod; 146 __u8 reserved[7]; 147 } __packed; 148 149 /* allow nested virtualization in KVM (if enabled by user space) */ 150 static int nested; 151 module_param(nested, int, S_IRUGO); 152 MODULE_PARM_DESC(nested, "Nested virtualization support"); 153 154 /* upper facilities limit for kvm */ 155 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM }; 156 157 unsigned long kvm_s390_fac_list_mask_size(void) 158 { 159 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64); 160 return ARRAY_SIZE(kvm_s390_fac_list_mask); 161 } 162 163 /* available cpu features supported by kvm */ 164 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 165 /* available subfunctions indicated via query / "test bit" */ 166 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 167 168 static struct gmap_notifier gmap_notifier; 169 static struct gmap_notifier vsie_gmap_notifier; 170 debug_info_t *kvm_s390_dbf; 171 172 /* Section: not file related */ 173 int kvm_arch_hardware_enable(void) 174 { 175 /* every s390 is virtualization enabled ;-) */ 176 return 0; 177 } 178 179 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 180 unsigned long end); 181 182 /* 183 * This callback is executed during stop_machine(). All CPUs are therefore 184 * temporarily stopped. In order not to change guest behavior, we have to 185 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 186 * so a CPU won't be stopped while calculating with the epoch. 187 */ 188 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 189 void *v) 190 { 191 struct kvm *kvm; 192 struct kvm_vcpu *vcpu; 193 int i; 194 unsigned long long *delta = v; 195 196 list_for_each_entry(kvm, &vm_list, vm_list) { 197 kvm->arch.epoch -= *delta; 198 kvm_for_each_vcpu(i, vcpu, kvm) { 199 vcpu->arch.sie_block->epoch -= *delta; 200 if (vcpu->arch.cputm_enabled) 201 vcpu->arch.cputm_start += *delta; 202 if (vcpu->arch.vsie_block) 203 vcpu->arch.vsie_block->epoch -= *delta; 204 } 205 } 206 return NOTIFY_OK; 207 } 208 209 static struct notifier_block kvm_clock_notifier = { 210 .notifier_call = kvm_clock_sync, 211 }; 212 213 int kvm_arch_hardware_setup(void) 214 { 215 gmap_notifier.notifier_call = kvm_gmap_notifier; 216 gmap_register_pte_notifier(&gmap_notifier); 217 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 218 gmap_register_pte_notifier(&vsie_gmap_notifier); 219 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 220 &kvm_clock_notifier); 221 return 0; 222 } 223 224 void kvm_arch_hardware_unsetup(void) 225 { 226 gmap_unregister_pte_notifier(&gmap_notifier); 227 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 228 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 229 &kvm_clock_notifier); 230 } 231 232 static void allow_cpu_feat(unsigned long nr) 233 { 234 set_bit_inv(nr, kvm_s390_available_cpu_feat); 235 } 236 237 static inline int plo_test_bit(unsigned char nr) 238 { 239 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 240 int cc; 241 242 asm volatile( 243 /* Parameter registers are ignored for "test bit" */ 244 " plo 0,0,0,0(0)\n" 245 " ipm %0\n" 246 " srl %0,28\n" 247 : "=d" (cc) 248 : "d" (r0) 249 : "cc"); 250 return cc == 0; 251 } 252 253 static void kvm_s390_cpu_feat_init(void) 254 { 255 int i; 256 257 for (i = 0; i < 256; ++i) { 258 if (plo_test_bit(i)) 259 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 260 } 261 262 if (test_facility(28)) /* TOD-clock steering */ 263 ptff(kvm_s390_available_subfunc.ptff, 264 sizeof(kvm_s390_available_subfunc.ptff), 265 PTFF_QAF); 266 267 if (test_facility(17)) { /* MSA */ 268 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 269 kvm_s390_available_subfunc.kmac); 270 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 271 kvm_s390_available_subfunc.kmc); 272 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 273 kvm_s390_available_subfunc.km); 274 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 275 kvm_s390_available_subfunc.kimd); 276 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 277 kvm_s390_available_subfunc.klmd); 278 } 279 if (test_facility(76)) /* MSA3 */ 280 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 281 kvm_s390_available_subfunc.pckmo); 282 if (test_facility(77)) { /* MSA4 */ 283 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 284 kvm_s390_available_subfunc.kmctr); 285 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 286 kvm_s390_available_subfunc.kmf); 287 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 288 kvm_s390_available_subfunc.kmo); 289 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 290 kvm_s390_available_subfunc.pcc); 291 } 292 if (test_facility(57)) /* MSA5 */ 293 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 294 kvm_s390_available_subfunc.ppno); 295 296 if (test_facility(146)) /* MSA8 */ 297 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 298 kvm_s390_available_subfunc.kma); 299 300 if (MACHINE_HAS_ESOP) 301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 302 /* 303 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 304 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 305 */ 306 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 307 !test_facility(3) || !nested) 308 return; 309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 310 if (sclp.has_64bscao) 311 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 312 if (sclp.has_siif) 313 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 314 if (sclp.has_gpere) 315 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 316 if (sclp.has_gsls) 317 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 318 if (sclp.has_ib) 319 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 320 if (sclp.has_cei) 321 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 322 if (sclp.has_ibs) 323 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 324 if (sclp.has_kss) 325 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 326 /* 327 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 328 * all skey handling functions read/set the skey from the PGSTE 329 * instead of the real storage key. 330 * 331 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 332 * pages being detected as preserved although they are resident. 333 * 334 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 335 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 336 * 337 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 338 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 339 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 340 * 341 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 342 * cannot easily shadow the SCA because of the ipte lock. 343 */ 344 } 345 346 int kvm_arch_init(void *opaque) 347 { 348 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 349 if (!kvm_s390_dbf) 350 return -ENOMEM; 351 352 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { 353 debug_unregister(kvm_s390_dbf); 354 return -ENOMEM; 355 } 356 357 kvm_s390_cpu_feat_init(); 358 359 /* Register floating interrupt controller interface. */ 360 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 361 } 362 363 void kvm_arch_exit(void) 364 { 365 debug_unregister(kvm_s390_dbf); 366 } 367 368 /* Section: device related */ 369 long kvm_arch_dev_ioctl(struct file *filp, 370 unsigned int ioctl, unsigned long arg) 371 { 372 if (ioctl == KVM_S390_ENABLE_SIE) 373 return s390_enable_sie(); 374 return -EINVAL; 375 } 376 377 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 378 { 379 int r; 380 381 switch (ext) { 382 case KVM_CAP_S390_PSW: 383 case KVM_CAP_S390_GMAP: 384 case KVM_CAP_SYNC_MMU: 385 #ifdef CONFIG_KVM_S390_UCONTROL 386 case KVM_CAP_S390_UCONTROL: 387 #endif 388 case KVM_CAP_ASYNC_PF: 389 case KVM_CAP_SYNC_REGS: 390 case KVM_CAP_ONE_REG: 391 case KVM_CAP_ENABLE_CAP: 392 case KVM_CAP_S390_CSS_SUPPORT: 393 case KVM_CAP_IOEVENTFD: 394 case KVM_CAP_DEVICE_CTRL: 395 case KVM_CAP_ENABLE_CAP_VM: 396 case KVM_CAP_S390_IRQCHIP: 397 case KVM_CAP_VM_ATTRIBUTES: 398 case KVM_CAP_MP_STATE: 399 case KVM_CAP_IMMEDIATE_EXIT: 400 case KVM_CAP_S390_INJECT_IRQ: 401 case KVM_CAP_S390_USER_SIGP: 402 case KVM_CAP_S390_USER_STSI: 403 case KVM_CAP_S390_SKEYS: 404 case KVM_CAP_S390_IRQ_STATE: 405 case KVM_CAP_S390_USER_INSTR0: 406 case KVM_CAP_S390_CMMA_MIGRATION: 407 case KVM_CAP_S390_AIS: 408 case KVM_CAP_S390_AIS_MIGRATION: 409 r = 1; 410 break; 411 case KVM_CAP_S390_MEM_OP: 412 r = MEM_OP_MAX_SIZE; 413 break; 414 case KVM_CAP_NR_VCPUS: 415 case KVM_CAP_MAX_VCPUS: 416 r = KVM_S390_BSCA_CPU_SLOTS; 417 if (!kvm_s390_use_sca_entries()) 418 r = KVM_MAX_VCPUS; 419 else if (sclp.has_esca && sclp.has_64bscao) 420 r = KVM_S390_ESCA_CPU_SLOTS; 421 break; 422 case KVM_CAP_NR_MEMSLOTS: 423 r = KVM_USER_MEM_SLOTS; 424 break; 425 case KVM_CAP_S390_COW: 426 r = MACHINE_HAS_ESOP; 427 break; 428 case KVM_CAP_S390_VECTOR_REGISTERS: 429 r = MACHINE_HAS_VX; 430 break; 431 case KVM_CAP_S390_RI: 432 r = test_facility(64); 433 break; 434 case KVM_CAP_S390_GS: 435 r = test_facility(133); 436 break; 437 default: 438 r = 0; 439 } 440 return r; 441 } 442 443 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 444 struct kvm_memory_slot *memslot) 445 { 446 gfn_t cur_gfn, last_gfn; 447 unsigned long address; 448 struct gmap *gmap = kvm->arch.gmap; 449 450 /* Loop over all guest pages */ 451 last_gfn = memslot->base_gfn + memslot->npages; 452 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 453 address = gfn_to_hva_memslot(memslot, cur_gfn); 454 455 if (test_and_clear_guest_dirty(gmap->mm, address)) 456 mark_page_dirty(kvm, cur_gfn); 457 if (fatal_signal_pending(current)) 458 return; 459 cond_resched(); 460 } 461 } 462 463 /* Section: vm related */ 464 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 465 466 /* 467 * Get (and clear) the dirty memory log for a memory slot. 468 */ 469 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 470 struct kvm_dirty_log *log) 471 { 472 int r; 473 unsigned long n; 474 struct kvm_memslots *slots; 475 struct kvm_memory_slot *memslot; 476 int is_dirty = 0; 477 478 if (kvm_is_ucontrol(kvm)) 479 return -EINVAL; 480 481 mutex_lock(&kvm->slots_lock); 482 483 r = -EINVAL; 484 if (log->slot >= KVM_USER_MEM_SLOTS) 485 goto out; 486 487 slots = kvm_memslots(kvm); 488 memslot = id_to_memslot(slots, log->slot); 489 r = -ENOENT; 490 if (!memslot->dirty_bitmap) 491 goto out; 492 493 kvm_s390_sync_dirty_log(kvm, memslot); 494 r = kvm_get_dirty_log(kvm, log, &is_dirty); 495 if (r) 496 goto out; 497 498 /* Clear the dirty log */ 499 if (is_dirty) { 500 n = kvm_dirty_bitmap_bytes(memslot); 501 memset(memslot->dirty_bitmap, 0, n); 502 } 503 r = 0; 504 out: 505 mutex_unlock(&kvm->slots_lock); 506 return r; 507 } 508 509 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 510 { 511 unsigned int i; 512 struct kvm_vcpu *vcpu; 513 514 kvm_for_each_vcpu(i, vcpu, kvm) { 515 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 516 } 517 } 518 519 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 520 { 521 int r; 522 523 if (cap->flags) 524 return -EINVAL; 525 526 switch (cap->cap) { 527 case KVM_CAP_S390_IRQCHIP: 528 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 529 kvm->arch.use_irqchip = 1; 530 r = 0; 531 break; 532 case KVM_CAP_S390_USER_SIGP: 533 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 534 kvm->arch.user_sigp = 1; 535 r = 0; 536 break; 537 case KVM_CAP_S390_VECTOR_REGISTERS: 538 mutex_lock(&kvm->lock); 539 if (kvm->created_vcpus) { 540 r = -EBUSY; 541 } else if (MACHINE_HAS_VX) { 542 set_kvm_facility(kvm->arch.model.fac_mask, 129); 543 set_kvm_facility(kvm->arch.model.fac_list, 129); 544 if (test_facility(134)) { 545 set_kvm_facility(kvm->arch.model.fac_mask, 134); 546 set_kvm_facility(kvm->arch.model.fac_list, 134); 547 } 548 if (test_facility(135)) { 549 set_kvm_facility(kvm->arch.model.fac_mask, 135); 550 set_kvm_facility(kvm->arch.model.fac_list, 135); 551 } 552 r = 0; 553 } else 554 r = -EINVAL; 555 mutex_unlock(&kvm->lock); 556 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 557 r ? "(not available)" : "(success)"); 558 break; 559 case KVM_CAP_S390_RI: 560 r = -EINVAL; 561 mutex_lock(&kvm->lock); 562 if (kvm->created_vcpus) { 563 r = -EBUSY; 564 } else if (test_facility(64)) { 565 set_kvm_facility(kvm->arch.model.fac_mask, 64); 566 set_kvm_facility(kvm->arch.model.fac_list, 64); 567 r = 0; 568 } 569 mutex_unlock(&kvm->lock); 570 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 571 r ? "(not available)" : "(success)"); 572 break; 573 case KVM_CAP_S390_AIS: 574 mutex_lock(&kvm->lock); 575 if (kvm->created_vcpus) { 576 r = -EBUSY; 577 } else { 578 set_kvm_facility(kvm->arch.model.fac_mask, 72); 579 set_kvm_facility(kvm->arch.model.fac_list, 72); 580 r = 0; 581 } 582 mutex_unlock(&kvm->lock); 583 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 584 r ? "(not available)" : "(success)"); 585 break; 586 case KVM_CAP_S390_GS: 587 r = -EINVAL; 588 mutex_lock(&kvm->lock); 589 if (kvm->created_vcpus) { 590 r = -EBUSY; 591 } else if (test_facility(133)) { 592 set_kvm_facility(kvm->arch.model.fac_mask, 133); 593 set_kvm_facility(kvm->arch.model.fac_list, 133); 594 r = 0; 595 } 596 mutex_unlock(&kvm->lock); 597 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 598 r ? "(not available)" : "(success)"); 599 break; 600 case KVM_CAP_S390_USER_STSI: 601 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 602 kvm->arch.user_stsi = 1; 603 r = 0; 604 break; 605 case KVM_CAP_S390_USER_INSTR0: 606 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 607 kvm->arch.user_instr0 = 1; 608 icpt_operexc_on_all_vcpus(kvm); 609 r = 0; 610 break; 611 default: 612 r = -EINVAL; 613 break; 614 } 615 return r; 616 } 617 618 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 619 { 620 int ret; 621 622 switch (attr->attr) { 623 case KVM_S390_VM_MEM_LIMIT_SIZE: 624 ret = 0; 625 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 626 kvm->arch.mem_limit); 627 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 628 ret = -EFAULT; 629 break; 630 default: 631 ret = -ENXIO; 632 break; 633 } 634 return ret; 635 } 636 637 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 638 { 639 int ret; 640 unsigned int idx; 641 switch (attr->attr) { 642 case KVM_S390_VM_MEM_ENABLE_CMMA: 643 ret = -ENXIO; 644 if (!sclp.has_cmma) 645 break; 646 647 ret = -EBUSY; 648 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 649 mutex_lock(&kvm->lock); 650 if (!kvm->created_vcpus) { 651 kvm->arch.use_cmma = 1; 652 ret = 0; 653 } 654 mutex_unlock(&kvm->lock); 655 break; 656 case KVM_S390_VM_MEM_CLR_CMMA: 657 ret = -ENXIO; 658 if (!sclp.has_cmma) 659 break; 660 ret = -EINVAL; 661 if (!kvm->arch.use_cmma) 662 break; 663 664 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 665 mutex_lock(&kvm->lock); 666 idx = srcu_read_lock(&kvm->srcu); 667 s390_reset_cmma(kvm->arch.gmap->mm); 668 srcu_read_unlock(&kvm->srcu, idx); 669 mutex_unlock(&kvm->lock); 670 ret = 0; 671 break; 672 case KVM_S390_VM_MEM_LIMIT_SIZE: { 673 unsigned long new_limit; 674 675 if (kvm_is_ucontrol(kvm)) 676 return -EINVAL; 677 678 if (get_user(new_limit, (u64 __user *)attr->addr)) 679 return -EFAULT; 680 681 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 682 new_limit > kvm->arch.mem_limit) 683 return -E2BIG; 684 685 if (!new_limit) 686 return -EINVAL; 687 688 /* gmap_create takes last usable address */ 689 if (new_limit != KVM_S390_NO_MEM_LIMIT) 690 new_limit -= 1; 691 692 ret = -EBUSY; 693 mutex_lock(&kvm->lock); 694 if (!kvm->created_vcpus) { 695 /* gmap_create will round the limit up */ 696 struct gmap *new = gmap_create(current->mm, new_limit); 697 698 if (!new) { 699 ret = -ENOMEM; 700 } else { 701 gmap_remove(kvm->arch.gmap); 702 new->private = kvm; 703 kvm->arch.gmap = new; 704 ret = 0; 705 } 706 } 707 mutex_unlock(&kvm->lock); 708 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 709 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 710 (void *) kvm->arch.gmap->asce); 711 break; 712 } 713 default: 714 ret = -ENXIO; 715 break; 716 } 717 return ret; 718 } 719 720 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 721 722 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 723 { 724 struct kvm_vcpu *vcpu; 725 int i; 726 727 if (!test_kvm_facility(kvm, 76)) 728 return -EINVAL; 729 730 mutex_lock(&kvm->lock); 731 switch (attr->attr) { 732 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 733 get_random_bytes( 734 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 735 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 736 kvm->arch.crypto.aes_kw = 1; 737 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 738 break; 739 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 740 get_random_bytes( 741 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 742 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 743 kvm->arch.crypto.dea_kw = 1; 744 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 745 break; 746 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 747 kvm->arch.crypto.aes_kw = 0; 748 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 749 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 750 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 751 break; 752 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 753 kvm->arch.crypto.dea_kw = 0; 754 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 755 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 756 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 757 break; 758 default: 759 mutex_unlock(&kvm->lock); 760 return -ENXIO; 761 } 762 763 kvm_for_each_vcpu(i, vcpu, kvm) { 764 kvm_s390_vcpu_crypto_setup(vcpu); 765 exit_sie(vcpu); 766 } 767 mutex_unlock(&kvm->lock); 768 return 0; 769 } 770 771 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 772 { 773 int cx; 774 struct kvm_vcpu *vcpu; 775 776 kvm_for_each_vcpu(cx, vcpu, kvm) 777 kvm_s390_sync_request(req, vcpu); 778 } 779 780 /* 781 * Must be called with kvm->srcu held to avoid races on memslots, and with 782 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 783 */ 784 static int kvm_s390_vm_start_migration(struct kvm *kvm) 785 { 786 struct kvm_s390_migration_state *mgs; 787 struct kvm_memory_slot *ms; 788 /* should be the only one */ 789 struct kvm_memslots *slots; 790 unsigned long ram_pages; 791 int slotnr; 792 793 /* migration mode already enabled */ 794 if (kvm->arch.migration_state) 795 return 0; 796 797 slots = kvm_memslots(kvm); 798 if (!slots || !slots->used_slots) 799 return -EINVAL; 800 801 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); 802 if (!mgs) 803 return -ENOMEM; 804 kvm->arch.migration_state = mgs; 805 806 if (kvm->arch.use_cmma) { 807 /* 808 * Get the last slot. They should be sorted by base_gfn, so the 809 * last slot is also the one at the end of the address space. 810 * We have verified above that at least one slot is present. 811 */ 812 ms = slots->memslots + slots->used_slots - 1; 813 /* round up so we only use full longs */ 814 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); 815 /* allocate enough bytes to store all the bits */ 816 mgs->pgste_bitmap = vmalloc(ram_pages / 8); 817 if (!mgs->pgste_bitmap) { 818 kfree(mgs); 819 kvm->arch.migration_state = NULL; 820 return -ENOMEM; 821 } 822 823 mgs->bitmap_size = ram_pages; 824 atomic64_set(&mgs->dirty_pages, ram_pages); 825 /* mark all the pages in active slots as dirty */ 826 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 827 ms = slots->memslots + slotnr; 828 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); 829 } 830 831 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 832 } 833 return 0; 834 } 835 836 /* 837 * Must be called with kvm->lock to avoid races with ourselves and 838 * kvm_s390_vm_start_migration. 839 */ 840 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 841 { 842 struct kvm_s390_migration_state *mgs; 843 844 /* migration mode already disabled */ 845 if (!kvm->arch.migration_state) 846 return 0; 847 mgs = kvm->arch.migration_state; 848 kvm->arch.migration_state = NULL; 849 850 if (kvm->arch.use_cmma) { 851 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 852 vfree(mgs->pgste_bitmap); 853 } 854 kfree(mgs); 855 return 0; 856 } 857 858 static int kvm_s390_vm_set_migration(struct kvm *kvm, 859 struct kvm_device_attr *attr) 860 { 861 int idx, res = -ENXIO; 862 863 mutex_lock(&kvm->lock); 864 switch (attr->attr) { 865 case KVM_S390_VM_MIGRATION_START: 866 idx = srcu_read_lock(&kvm->srcu); 867 res = kvm_s390_vm_start_migration(kvm); 868 srcu_read_unlock(&kvm->srcu, idx); 869 break; 870 case KVM_S390_VM_MIGRATION_STOP: 871 res = kvm_s390_vm_stop_migration(kvm); 872 break; 873 default: 874 break; 875 } 876 mutex_unlock(&kvm->lock); 877 878 return res; 879 } 880 881 static int kvm_s390_vm_get_migration(struct kvm *kvm, 882 struct kvm_device_attr *attr) 883 { 884 u64 mig = (kvm->arch.migration_state != NULL); 885 886 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 887 return -ENXIO; 888 889 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 890 return -EFAULT; 891 return 0; 892 } 893 894 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 895 { 896 struct kvm_s390_vm_tod_clock gtod; 897 898 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 899 return -EFAULT; 900 901 if (test_kvm_facility(kvm, 139)) 902 kvm_s390_set_tod_clock_ext(kvm, >od); 903 else if (gtod.epoch_idx == 0) 904 kvm_s390_set_tod_clock(kvm, gtod.tod); 905 else 906 return -EINVAL; 907 908 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 909 gtod.epoch_idx, gtod.tod); 910 911 return 0; 912 } 913 914 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 915 { 916 u8 gtod_high; 917 918 if (copy_from_user(>od_high, (void __user *)attr->addr, 919 sizeof(gtod_high))) 920 return -EFAULT; 921 922 if (gtod_high != 0) 923 return -EINVAL; 924 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 925 926 return 0; 927 } 928 929 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 930 { 931 u64 gtod; 932 933 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 934 return -EFAULT; 935 936 kvm_s390_set_tod_clock(kvm, gtod); 937 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); 938 return 0; 939 } 940 941 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 942 { 943 int ret; 944 945 if (attr->flags) 946 return -EINVAL; 947 948 switch (attr->attr) { 949 case KVM_S390_VM_TOD_EXT: 950 ret = kvm_s390_set_tod_ext(kvm, attr); 951 break; 952 case KVM_S390_VM_TOD_HIGH: 953 ret = kvm_s390_set_tod_high(kvm, attr); 954 break; 955 case KVM_S390_VM_TOD_LOW: 956 ret = kvm_s390_set_tod_low(kvm, attr); 957 break; 958 default: 959 ret = -ENXIO; 960 break; 961 } 962 return ret; 963 } 964 965 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, 966 struct kvm_s390_vm_tod_clock *gtod) 967 { 968 struct kvm_s390_tod_clock_ext htod; 969 970 preempt_disable(); 971 972 get_tod_clock_ext((char *)&htod); 973 974 gtod->tod = htod.tod + kvm->arch.epoch; 975 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 976 977 if (gtod->tod < htod.tod) 978 gtod->epoch_idx += 1; 979 980 preempt_enable(); 981 } 982 983 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 984 { 985 struct kvm_s390_vm_tod_clock gtod; 986 987 memset(>od, 0, sizeof(gtod)); 988 989 if (test_kvm_facility(kvm, 139)) 990 kvm_s390_get_tod_clock_ext(kvm, >od); 991 else 992 gtod.tod = kvm_s390_get_tod_clock_fast(kvm); 993 994 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 995 return -EFAULT; 996 997 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 998 gtod.epoch_idx, gtod.tod); 999 return 0; 1000 } 1001 1002 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1003 { 1004 u8 gtod_high = 0; 1005 1006 if (copy_to_user((void __user *)attr->addr, >od_high, 1007 sizeof(gtod_high))) 1008 return -EFAULT; 1009 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1010 1011 return 0; 1012 } 1013 1014 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1015 { 1016 u64 gtod; 1017 1018 gtod = kvm_s390_get_tod_clock_fast(kvm); 1019 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1020 return -EFAULT; 1021 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1022 1023 return 0; 1024 } 1025 1026 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1027 { 1028 int ret; 1029 1030 if (attr->flags) 1031 return -EINVAL; 1032 1033 switch (attr->attr) { 1034 case KVM_S390_VM_TOD_EXT: 1035 ret = kvm_s390_get_tod_ext(kvm, attr); 1036 break; 1037 case KVM_S390_VM_TOD_HIGH: 1038 ret = kvm_s390_get_tod_high(kvm, attr); 1039 break; 1040 case KVM_S390_VM_TOD_LOW: 1041 ret = kvm_s390_get_tod_low(kvm, attr); 1042 break; 1043 default: 1044 ret = -ENXIO; 1045 break; 1046 } 1047 return ret; 1048 } 1049 1050 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1051 { 1052 struct kvm_s390_vm_cpu_processor *proc; 1053 u16 lowest_ibc, unblocked_ibc; 1054 int ret = 0; 1055 1056 mutex_lock(&kvm->lock); 1057 if (kvm->created_vcpus) { 1058 ret = -EBUSY; 1059 goto out; 1060 } 1061 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1062 if (!proc) { 1063 ret = -ENOMEM; 1064 goto out; 1065 } 1066 if (!copy_from_user(proc, (void __user *)attr->addr, 1067 sizeof(*proc))) { 1068 kvm->arch.model.cpuid = proc->cpuid; 1069 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1070 unblocked_ibc = sclp.ibc & 0xfff; 1071 if (lowest_ibc && proc->ibc) { 1072 if (proc->ibc > unblocked_ibc) 1073 kvm->arch.model.ibc = unblocked_ibc; 1074 else if (proc->ibc < lowest_ibc) 1075 kvm->arch.model.ibc = lowest_ibc; 1076 else 1077 kvm->arch.model.ibc = proc->ibc; 1078 } 1079 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1080 S390_ARCH_FAC_LIST_SIZE_BYTE); 1081 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1082 kvm->arch.model.ibc, 1083 kvm->arch.model.cpuid); 1084 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1085 kvm->arch.model.fac_list[0], 1086 kvm->arch.model.fac_list[1], 1087 kvm->arch.model.fac_list[2]); 1088 } else 1089 ret = -EFAULT; 1090 kfree(proc); 1091 out: 1092 mutex_unlock(&kvm->lock); 1093 return ret; 1094 } 1095 1096 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1097 struct kvm_device_attr *attr) 1098 { 1099 struct kvm_s390_vm_cpu_feat data; 1100 1101 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1102 return -EFAULT; 1103 if (!bitmap_subset((unsigned long *) data.feat, 1104 kvm_s390_available_cpu_feat, 1105 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1106 return -EINVAL; 1107 1108 mutex_lock(&kvm->lock); 1109 if (kvm->created_vcpus) { 1110 mutex_unlock(&kvm->lock); 1111 return -EBUSY; 1112 } 1113 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1114 KVM_S390_VM_CPU_FEAT_NR_BITS); 1115 mutex_unlock(&kvm->lock); 1116 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1117 data.feat[0], 1118 data.feat[1], 1119 data.feat[2]); 1120 return 0; 1121 } 1122 1123 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1124 struct kvm_device_attr *attr) 1125 { 1126 /* 1127 * Once supported by kernel + hw, we have to store the subfunctions 1128 * in kvm->arch and remember that user space configured them. 1129 */ 1130 return -ENXIO; 1131 } 1132 1133 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1134 { 1135 int ret = -ENXIO; 1136 1137 switch (attr->attr) { 1138 case KVM_S390_VM_CPU_PROCESSOR: 1139 ret = kvm_s390_set_processor(kvm, attr); 1140 break; 1141 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1142 ret = kvm_s390_set_processor_feat(kvm, attr); 1143 break; 1144 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1145 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1146 break; 1147 } 1148 return ret; 1149 } 1150 1151 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1152 { 1153 struct kvm_s390_vm_cpu_processor *proc; 1154 int ret = 0; 1155 1156 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1157 if (!proc) { 1158 ret = -ENOMEM; 1159 goto out; 1160 } 1161 proc->cpuid = kvm->arch.model.cpuid; 1162 proc->ibc = kvm->arch.model.ibc; 1163 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1164 S390_ARCH_FAC_LIST_SIZE_BYTE); 1165 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1166 kvm->arch.model.ibc, 1167 kvm->arch.model.cpuid); 1168 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1169 kvm->arch.model.fac_list[0], 1170 kvm->arch.model.fac_list[1], 1171 kvm->arch.model.fac_list[2]); 1172 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1173 ret = -EFAULT; 1174 kfree(proc); 1175 out: 1176 return ret; 1177 } 1178 1179 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1180 { 1181 struct kvm_s390_vm_cpu_machine *mach; 1182 int ret = 0; 1183 1184 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1185 if (!mach) { 1186 ret = -ENOMEM; 1187 goto out; 1188 } 1189 get_cpu_id((struct cpuid *) &mach->cpuid); 1190 mach->ibc = sclp.ibc; 1191 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1192 S390_ARCH_FAC_LIST_SIZE_BYTE); 1193 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1194 sizeof(S390_lowcore.stfle_fac_list)); 1195 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1196 kvm->arch.model.ibc, 1197 kvm->arch.model.cpuid); 1198 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1199 mach->fac_mask[0], 1200 mach->fac_mask[1], 1201 mach->fac_mask[2]); 1202 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1203 mach->fac_list[0], 1204 mach->fac_list[1], 1205 mach->fac_list[2]); 1206 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1207 ret = -EFAULT; 1208 kfree(mach); 1209 out: 1210 return ret; 1211 } 1212 1213 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1214 struct kvm_device_attr *attr) 1215 { 1216 struct kvm_s390_vm_cpu_feat data; 1217 1218 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1219 KVM_S390_VM_CPU_FEAT_NR_BITS); 1220 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1221 return -EFAULT; 1222 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1223 data.feat[0], 1224 data.feat[1], 1225 data.feat[2]); 1226 return 0; 1227 } 1228 1229 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1230 struct kvm_device_attr *attr) 1231 { 1232 struct kvm_s390_vm_cpu_feat data; 1233 1234 bitmap_copy((unsigned long *) data.feat, 1235 kvm_s390_available_cpu_feat, 1236 KVM_S390_VM_CPU_FEAT_NR_BITS); 1237 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1238 return -EFAULT; 1239 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1240 data.feat[0], 1241 data.feat[1], 1242 data.feat[2]); 1243 return 0; 1244 } 1245 1246 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1247 struct kvm_device_attr *attr) 1248 { 1249 /* 1250 * Once we can actually configure subfunctions (kernel + hw support), 1251 * we have to check if they were already set by user space, if so copy 1252 * them from kvm->arch. 1253 */ 1254 return -ENXIO; 1255 } 1256 1257 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1258 struct kvm_device_attr *attr) 1259 { 1260 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1261 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1262 return -EFAULT; 1263 return 0; 1264 } 1265 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1266 { 1267 int ret = -ENXIO; 1268 1269 switch (attr->attr) { 1270 case KVM_S390_VM_CPU_PROCESSOR: 1271 ret = kvm_s390_get_processor(kvm, attr); 1272 break; 1273 case KVM_S390_VM_CPU_MACHINE: 1274 ret = kvm_s390_get_machine(kvm, attr); 1275 break; 1276 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1277 ret = kvm_s390_get_processor_feat(kvm, attr); 1278 break; 1279 case KVM_S390_VM_CPU_MACHINE_FEAT: 1280 ret = kvm_s390_get_machine_feat(kvm, attr); 1281 break; 1282 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1283 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1284 break; 1285 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1286 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1287 break; 1288 } 1289 return ret; 1290 } 1291 1292 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1293 { 1294 int ret; 1295 1296 switch (attr->group) { 1297 case KVM_S390_VM_MEM_CTRL: 1298 ret = kvm_s390_set_mem_control(kvm, attr); 1299 break; 1300 case KVM_S390_VM_TOD: 1301 ret = kvm_s390_set_tod(kvm, attr); 1302 break; 1303 case KVM_S390_VM_CPU_MODEL: 1304 ret = kvm_s390_set_cpu_model(kvm, attr); 1305 break; 1306 case KVM_S390_VM_CRYPTO: 1307 ret = kvm_s390_vm_set_crypto(kvm, attr); 1308 break; 1309 case KVM_S390_VM_MIGRATION: 1310 ret = kvm_s390_vm_set_migration(kvm, attr); 1311 break; 1312 default: 1313 ret = -ENXIO; 1314 break; 1315 } 1316 1317 return ret; 1318 } 1319 1320 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1321 { 1322 int ret; 1323 1324 switch (attr->group) { 1325 case KVM_S390_VM_MEM_CTRL: 1326 ret = kvm_s390_get_mem_control(kvm, attr); 1327 break; 1328 case KVM_S390_VM_TOD: 1329 ret = kvm_s390_get_tod(kvm, attr); 1330 break; 1331 case KVM_S390_VM_CPU_MODEL: 1332 ret = kvm_s390_get_cpu_model(kvm, attr); 1333 break; 1334 case KVM_S390_VM_MIGRATION: 1335 ret = kvm_s390_vm_get_migration(kvm, attr); 1336 break; 1337 default: 1338 ret = -ENXIO; 1339 break; 1340 } 1341 1342 return ret; 1343 } 1344 1345 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1346 { 1347 int ret; 1348 1349 switch (attr->group) { 1350 case KVM_S390_VM_MEM_CTRL: 1351 switch (attr->attr) { 1352 case KVM_S390_VM_MEM_ENABLE_CMMA: 1353 case KVM_S390_VM_MEM_CLR_CMMA: 1354 ret = sclp.has_cmma ? 0 : -ENXIO; 1355 break; 1356 case KVM_S390_VM_MEM_LIMIT_SIZE: 1357 ret = 0; 1358 break; 1359 default: 1360 ret = -ENXIO; 1361 break; 1362 } 1363 break; 1364 case KVM_S390_VM_TOD: 1365 switch (attr->attr) { 1366 case KVM_S390_VM_TOD_LOW: 1367 case KVM_S390_VM_TOD_HIGH: 1368 ret = 0; 1369 break; 1370 default: 1371 ret = -ENXIO; 1372 break; 1373 } 1374 break; 1375 case KVM_S390_VM_CPU_MODEL: 1376 switch (attr->attr) { 1377 case KVM_S390_VM_CPU_PROCESSOR: 1378 case KVM_S390_VM_CPU_MACHINE: 1379 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1380 case KVM_S390_VM_CPU_MACHINE_FEAT: 1381 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1382 ret = 0; 1383 break; 1384 /* configuring subfunctions is not supported yet */ 1385 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1386 default: 1387 ret = -ENXIO; 1388 break; 1389 } 1390 break; 1391 case KVM_S390_VM_CRYPTO: 1392 switch (attr->attr) { 1393 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1394 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1395 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1396 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1397 ret = 0; 1398 break; 1399 default: 1400 ret = -ENXIO; 1401 break; 1402 } 1403 break; 1404 case KVM_S390_VM_MIGRATION: 1405 ret = 0; 1406 break; 1407 default: 1408 ret = -ENXIO; 1409 break; 1410 } 1411 1412 return ret; 1413 } 1414 1415 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1416 { 1417 uint8_t *keys; 1418 uint64_t hva; 1419 int srcu_idx, i, r = 0; 1420 1421 if (args->flags != 0) 1422 return -EINVAL; 1423 1424 /* Is this guest using storage keys? */ 1425 if (!mm_use_skey(current->mm)) 1426 return KVM_S390_GET_SKEYS_NONE; 1427 1428 /* Enforce sane limit on memory allocation */ 1429 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1430 return -EINVAL; 1431 1432 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1433 if (!keys) 1434 return -ENOMEM; 1435 1436 down_read(¤t->mm->mmap_sem); 1437 srcu_idx = srcu_read_lock(&kvm->srcu); 1438 for (i = 0; i < args->count; i++) { 1439 hva = gfn_to_hva(kvm, args->start_gfn + i); 1440 if (kvm_is_error_hva(hva)) { 1441 r = -EFAULT; 1442 break; 1443 } 1444 1445 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1446 if (r) 1447 break; 1448 } 1449 srcu_read_unlock(&kvm->srcu, srcu_idx); 1450 up_read(¤t->mm->mmap_sem); 1451 1452 if (!r) { 1453 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1454 sizeof(uint8_t) * args->count); 1455 if (r) 1456 r = -EFAULT; 1457 } 1458 1459 kvfree(keys); 1460 return r; 1461 } 1462 1463 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1464 { 1465 uint8_t *keys; 1466 uint64_t hva; 1467 int srcu_idx, i, r = 0; 1468 1469 if (args->flags != 0) 1470 return -EINVAL; 1471 1472 /* Enforce sane limit on memory allocation */ 1473 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1474 return -EINVAL; 1475 1476 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1477 if (!keys) 1478 return -ENOMEM; 1479 1480 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1481 sizeof(uint8_t) * args->count); 1482 if (r) { 1483 r = -EFAULT; 1484 goto out; 1485 } 1486 1487 /* Enable storage key handling for the guest */ 1488 r = s390_enable_skey(); 1489 if (r) 1490 goto out; 1491 1492 down_read(¤t->mm->mmap_sem); 1493 srcu_idx = srcu_read_lock(&kvm->srcu); 1494 for (i = 0; i < args->count; i++) { 1495 hva = gfn_to_hva(kvm, args->start_gfn + i); 1496 if (kvm_is_error_hva(hva)) { 1497 r = -EFAULT; 1498 break; 1499 } 1500 1501 /* Lowest order bit is reserved */ 1502 if (keys[i] & 0x01) { 1503 r = -EINVAL; 1504 break; 1505 } 1506 1507 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1508 if (r) 1509 break; 1510 } 1511 srcu_read_unlock(&kvm->srcu, srcu_idx); 1512 up_read(¤t->mm->mmap_sem); 1513 out: 1514 kvfree(keys); 1515 return r; 1516 } 1517 1518 /* 1519 * Base address and length must be sent at the start of each block, therefore 1520 * it's cheaper to send some clean data, as long as it's less than the size of 1521 * two longs. 1522 */ 1523 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1524 /* for consistency */ 1525 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1526 1527 /* 1528 * This function searches for the next page with dirty CMMA attributes, and 1529 * saves the attributes in the buffer up to either the end of the buffer or 1530 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 1531 * no trailing clean bytes are saved. 1532 * In case no dirty bits were found, or if CMMA was not enabled or used, the 1533 * output buffer will indicate 0 as length. 1534 */ 1535 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 1536 struct kvm_s390_cmma_log *args) 1537 { 1538 struct kvm_s390_migration_state *s = kvm->arch.migration_state; 1539 unsigned long bufsize, hva, pgstev, i, next, cur; 1540 int srcu_idx, peek, r = 0, rr; 1541 u8 *res; 1542 1543 cur = args->start_gfn; 1544 i = next = pgstev = 0; 1545 1546 if (unlikely(!kvm->arch.use_cmma)) 1547 return -ENXIO; 1548 /* Invalid/unsupported flags were specified */ 1549 if (args->flags & ~KVM_S390_CMMA_PEEK) 1550 return -EINVAL; 1551 /* Migration mode query, and we are not doing a migration */ 1552 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 1553 if (!peek && !s) 1554 return -EINVAL; 1555 /* CMMA is disabled or was not used, or the buffer has length zero */ 1556 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 1557 if (!bufsize || !kvm->mm->context.use_cmma) { 1558 memset(args, 0, sizeof(*args)); 1559 return 0; 1560 } 1561 1562 if (!peek) { 1563 /* We are not peeking, and there are no dirty pages */ 1564 if (!atomic64_read(&s->dirty_pages)) { 1565 memset(args, 0, sizeof(*args)); 1566 return 0; 1567 } 1568 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 1569 args->start_gfn); 1570 if (cur >= s->bitmap_size) /* nothing found, loop back */ 1571 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); 1572 if (cur >= s->bitmap_size) { /* again! (very unlikely) */ 1573 memset(args, 0, sizeof(*args)); 1574 return 0; 1575 } 1576 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); 1577 } 1578 1579 res = vmalloc(bufsize); 1580 if (!res) 1581 return -ENOMEM; 1582 1583 args->start_gfn = cur; 1584 1585 down_read(&kvm->mm->mmap_sem); 1586 srcu_idx = srcu_read_lock(&kvm->srcu); 1587 while (i < bufsize) { 1588 hva = gfn_to_hva(kvm, cur); 1589 if (kvm_is_error_hva(hva)) { 1590 r = -EFAULT; 1591 break; 1592 } 1593 /* decrement only if we actually flipped the bit to 0 */ 1594 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) 1595 atomic64_dec(&s->dirty_pages); 1596 r = get_pgste(kvm->mm, hva, &pgstev); 1597 if (r < 0) 1598 pgstev = 0; 1599 /* save the value */ 1600 res[i++] = (pgstev >> 24) & 0x43; 1601 /* 1602 * if the next bit is too far away, stop. 1603 * if we reached the previous "next", find the next one 1604 */ 1605 if (!peek) { 1606 if (next > cur + KVM_S390_MAX_BIT_DISTANCE) 1607 break; 1608 if (cur == next) 1609 next = find_next_bit(s->pgste_bitmap, 1610 s->bitmap_size, cur + 1); 1611 /* reached the end of the bitmap or of the buffer, stop */ 1612 if ((next >= s->bitmap_size) || 1613 (next >= args->start_gfn + bufsize)) 1614 break; 1615 } 1616 cur++; 1617 } 1618 srcu_read_unlock(&kvm->srcu, srcu_idx); 1619 up_read(&kvm->mm->mmap_sem); 1620 args->count = i; 1621 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; 1622 1623 rr = copy_to_user((void __user *)args->values, res, args->count); 1624 if (rr) 1625 r = -EFAULT; 1626 1627 vfree(res); 1628 return r; 1629 } 1630 1631 /* 1632 * This function sets the CMMA attributes for the given pages. If the input 1633 * buffer has zero length, no action is taken, otherwise the attributes are 1634 * set and the mm->context.use_cmma flag is set. 1635 */ 1636 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 1637 const struct kvm_s390_cmma_log *args) 1638 { 1639 unsigned long hva, mask, pgstev, i; 1640 uint8_t *bits; 1641 int srcu_idx, r = 0; 1642 1643 mask = args->mask; 1644 1645 if (!kvm->arch.use_cmma) 1646 return -ENXIO; 1647 /* invalid/unsupported flags */ 1648 if (args->flags != 0) 1649 return -EINVAL; 1650 /* Enforce sane limit on memory allocation */ 1651 if (args->count > KVM_S390_CMMA_SIZE_MAX) 1652 return -EINVAL; 1653 /* Nothing to do */ 1654 if (args->count == 0) 1655 return 0; 1656 1657 bits = vmalloc(sizeof(*bits) * args->count); 1658 if (!bits) 1659 return -ENOMEM; 1660 1661 r = copy_from_user(bits, (void __user *)args->values, args->count); 1662 if (r) { 1663 r = -EFAULT; 1664 goto out; 1665 } 1666 1667 down_read(&kvm->mm->mmap_sem); 1668 srcu_idx = srcu_read_lock(&kvm->srcu); 1669 for (i = 0; i < args->count; i++) { 1670 hva = gfn_to_hva(kvm, args->start_gfn + i); 1671 if (kvm_is_error_hva(hva)) { 1672 r = -EFAULT; 1673 break; 1674 } 1675 1676 pgstev = bits[i]; 1677 pgstev = pgstev << 24; 1678 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 1679 set_pgste_bits(kvm->mm, hva, mask, pgstev); 1680 } 1681 srcu_read_unlock(&kvm->srcu, srcu_idx); 1682 up_read(&kvm->mm->mmap_sem); 1683 1684 if (!kvm->mm->context.use_cmma) { 1685 down_write(&kvm->mm->mmap_sem); 1686 kvm->mm->context.use_cmma = 1; 1687 up_write(&kvm->mm->mmap_sem); 1688 } 1689 out: 1690 vfree(bits); 1691 return r; 1692 } 1693 1694 long kvm_arch_vm_ioctl(struct file *filp, 1695 unsigned int ioctl, unsigned long arg) 1696 { 1697 struct kvm *kvm = filp->private_data; 1698 void __user *argp = (void __user *)arg; 1699 struct kvm_device_attr attr; 1700 int r; 1701 1702 switch (ioctl) { 1703 case KVM_S390_INTERRUPT: { 1704 struct kvm_s390_interrupt s390int; 1705 1706 r = -EFAULT; 1707 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1708 break; 1709 r = kvm_s390_inject_vm(kvm, &s390int); 1710 break; 1711 } 1712 case KVM_ENABLE_CAP: { 1713 struct kvm_enable_cap cap; 1714 r = -EFAULT; 1715 if (copy_from_user(&cap, argp, sizeof(cap))) 1716 break; 1717 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 1718 break; 1719 } 1720 case KVM_CREATE_IRQCHIP: { 1721 struct kvm_irq_routing_entry routing; 1722 1723 r = -EINVAL; 1724 if (kvm->arch.use_irqchip) { 1725 /* Set up dummy routing. */ 1726 memset(&routing, 0, sizeof(routing)); 1727 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 1728 } 1729 break; 1730 } 1731 case KVM_SET_DEVICE_ATTR: { 1732 r = -EFAULT; 1733 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1734 break; 1735 r = kvm_s390_vm_set_attr(kvm, &attr); 1736 break; 1737 } 1738 case KVM_GET_DEVICE_ATTR: { 1739 r = -EFAULT; 1740 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1741 break; 1742 r = kvm_s390_vm_get_attr(kvm, &attr); 1743 break; 1744 } 1745 case KVM_HAS_DEVICE_ATTR: { 1746 r = -EFAULT; 1747 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1748 break; 1749 r = kvm_s390_vm_has_attr(kvm, &attr); 1750 break; 1751 } 1752 case KVM_S390_GET_SKEYS: { 1753 struct kvm_s390_skeys args; 1754 1755 r = -EFAULT; 1756 if (copy_from_user(&args, argp, 1757 sizeof(struct kvm_s390_skeys))) 1758 break; 1759 r = kvm_s390_get_skeys(kvm, &args); 1760 break; 1761 } 1762 case KVM_S390_SET_SKEYS: { 1763 struct kvm_s390_skeys args; 1764 1765 r = -EFAULT; 1766 if (copy_from_user(&args, argp, 1767 sizeof(struct kvm_s390_skeys))) 1768 break; 1769 r = kvm_s390_set_skeys(kvm, &args); 1770 break; 1771 } 1772 case KVM_S390_GET_CMMA_BITS: { 1773 struct kvm_s390_cmma_log args; 1774 1775 r = -EFAULT; 1776 if (copy_from_user(&args, argp, sizeof(args))) 1777 break; 1778 r = kvm_s390_get_cmma_bits(kvm, &args); 1779 if (!r) { 1780 r = copy_to_user(argp, &args, sizeof(args)); 1781 if (r) 1782 r = -EFAULT; 1783 } 1784 break; 1785 } 1786 case KVM_S390_SET_CMMA_BITS: { 1787 struct kvm_s390_cmma_log args; 1788 1789 r = -EFAULT; 1790 if (copy_from_user(&args, argp, sizeof(args))) 1791 break; 1792 r = kvm_s390_set_cmma_bits(kvm, &args); 1793 break; 1794 } 1795 default: 1796 r = -ENOTTY; 1797 } 1798 1799 return r; 1800 } 1801 1802 static int kvm_s390_query_ap_config(u8 *config) 1803 { 1804 u32 fcn_code = 0x04000000UL; 1805 u32 cc = 0; 1806 1807 memset(config, 0, 128); 1808 asm volatile( 1809 "lgr 0,%1\n" 1810 "lgr 2,%2\n" 1811 ".long 0xb2af0000\n" /* PQAP(QCI) */ 1812 "0: ipm %0\n" 1813 "srl %0,28\n" 1814 "1:\n" 1815 EX_TABLE(0b, 1b) 1816 : "+r" (cc) 1817 : "r" (fcn_code), "r" (config) 1818 : "cc", "0", "2", "memory" 1819 ); 1820 1821 return cc; 1822 } 1823 1824 static int kvm_s390_apxa_installed(void) 1825 { 1826 u8 config[128]; 1827 int cc; 1828 1829 if (test_facility(12)) { 1830 cc = kvm_s390_query_ap_config(config); 1831 1832 if (cc) 1833 pr_err("PQAP(QCI) failed with cc=%d", cc); 1834 else 1835 return config[0] & 0x40; 1836 } 1837 1838 return 0; 1839 } 1840 1841 static void kvm_s390_set_crycb_format(struct kvm *kvm) 1842 { 1843 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 1844 1845 if (kvm_s390_apxa_installed()) 1846 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 1847 else 1848 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 1849 } 1850 1851 static u64 kvm_s390_get_initial_cpuid(void) 1852 { 1853 struct cpuid cpuid; 1854 1855 get_cpu_id(&cpuid); 1856 cpuid.version = 0xff; 1857 return *((u64 *) &cpuid); 1858 } 1859 1860 static void kvm_s390_crypto_init(struct kvm *kvm) 1861 { 1862 if (!test_kvm_facility(kvm, 76)) 1863 return; 1864 1865 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 1866 kvm_s390_set_crycb_format(kvm); 1867 1868 /* Enable AES/DEA protected key functions by default */ 1869 kvm->arch.crypto.aes_kw = 1; 1870 kvm->arch.crypto.dea_kw = 1; 1871 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1872 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1873 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1874 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1875 } 1876 1877 static void sca_dispose(struct kvm *kvm) 1878 { 1879 if (kvm->arch.use_esca) 1880 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 1881 else 1882 free_page((unsigned long)(kvm->arch.sca)); 1883 kvm->arch.sca = NULL; 1884 } 1885 1886 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 1887 { 1888 gfp_t alloc_flags = GFP_KERNEL; 1889 int i, rc; 1890 char debug_name[16]; 1891 static unsigned long sca_offset; 1892 1893 rc = -EINVAL; 1894 #ifdef CONFIG_KVM_S390_UCONTROL 1895 if (type & ~KVM_VM_S390_UCONTROL) 1896 goto out_err; 1897 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 1898 goto out_err; 1899 #else 1900 if (type) 1901 goto out_err; 1902 #endif 1903 1904 rc = s390_enable_sie(); 1905 if (rc) 1906 goto out_err; 1907 1908 rc = -ENOMEM; 1909 1910 kvm->arch.use_esca = 0; /* start with basic SCA */ 1911 if (!sclp.has_64bscao) 1912 alloc_flags |= GFP_DMA; 1913 rwlock_init(&kvm->arch.sca_lock); 1914 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 1915 if (!kvm->arch.sca) 1916 goto out_err; 1917 spin_lock(&kvm_lock); 1918 sca_offset += 16; 1919 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 1920 sca_offset = 0; 1921 kvm->arch.sca = (struct bsca_block *) 1922 ((char *) kvm->arch.sca + sca_offset); 1923 spin_unlock(&kvm_lock); 1924 1925 sprintf(debug_name, "kvm-%u", current->pid); 1926 1927 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 1928 if (!kvm->arch.dbf) 1929 goto out_err; 1930 1931 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 1932 kvm->arch.sie_page2 = 1933 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 1934 if (!kvm->arch.sie_page2) 1935 goto out_err; 1936 1937 /* Populate the facility mask initially. */ 1938 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, 1939 sizeof(S390_lowcore.stfle_fac_list)); 1940 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { 1941 if (i < kvm_s390_fac_list_mask_size()) 1942 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; 1943 else 1944 kvm->arch.model.fac_mask[i] = 0UL; 1945 } 1946 1947 /* Populate the facility list initially. */ 1948 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 1949 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, 1950 S390_ARCH_FAC_LIST_SIZE_BYTE); 1951 1952 /* we are always in czam mode - even on pre z14 machines */ 1953 set_kvm_facility(kvm->arch.model.fac_mask, 138); 1954 set_kvm_facility(kvm->arch.model.fac_list, 138); 1955 /* we emulate STHYI in kvm */ 1956 set_kvm_facility(kvm->arch.model.fac_mask, 74); 1957 set_kvm_facility(kvm->arch.model.fac_list, 74); 1958 if (MACHINE_HAS_TLB_GUEST) { 1959 set_kvm_facility(kvm->arch.model.fac_mask, 147); 1960 set_kvm_facility(kvm->arch.model.fac_list, 147); 1961 } 1962 1963 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 1964 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 1965 1966 kvm_s390_crypto_init(kvm); 1967 1968 mutex_init(&kvm->arch.float_int.ais_lock); 1969 kvm->arch.float_int.simm = 0; 1970 kvm->arch.float_int.nimm = 0; 1971 spin_lock_init(&kvm->arch.float_int.lock); 1972 for (i = 0; i < FIRQ_LIST_COUNT; i++) 1973 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 1974 init_waitqueue_head(&kvm->arch.ipte_wq); 1975 mutex_init(&kvm->arch.ipte_mutex); 1976 1977 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 1978 VM_EVENT(kvm, 3, "vm created with type %lu", type); 1979 1980 if (type & KVM_VM_S390_UCONTROL) { 1981 kvm->arch.gmap = NULL; 1982 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 1983 } else { 1984 if (sclp.hamax == U64_MAX) 1985 kvm->arch.mem_limit = TASK_SIZE_MAX; 1986 else 1987 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 1988 sclp.hamax + 1); 1989 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 1990 if (!kvm->arch.gmap) 1991 goto out_err; 1992 kvm->arch.gmap->private = kvm; 1993 kvm->arch.gmap->pfault_enabled = 0; 1994 } 1995 1996 kvm->arch.css_support = 0; 1997 kvm->arch.use_irqchip = 0; 1998 kvm->arch.epoch = 0; 1999 2000 spin_lock_init(&kvm->arch.start_stop_lock); 2001 kvm_s390_vsie_init(kvm); 2002 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2003 2004 return 0; 2005 out_err: 2006 free_page((unsigned long)kvm->arch.sie_page2); 2007 debug_unregister(kvm->arch.dbf); 2008 sca_dispose(kvm); 2009 KVM_EVENT(3, "creation of vm failed: %d", rc); 2010 return rc; 2011 } 2012 2013 bool kvm_arch_has_vcpu_debugfs(void) 2014 { 2015 return false; 2016 } 2017 2018 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 2019 { 2020 return 0; 2021 } 2022 2023 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2024 { 2025 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2026 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2027 kvm_s390_clear_local_irqs(vcpu); 2028 kvm_clear_async_pf_completion_queue(vcpu); 2029 if (!kvm_is_ucontrol(vcpu->kvm)) 2030 sca_del_vcpu(vcpu); 2031 2032 if (kvm_is_ucontrol(vcpu->kvm)) 2033 gmap_remove(vcpu->arch.gmap); 2034 2035 if (vcpu->kvm->arch.use_cmma) 2036 kvm_s390_vcpu_unsetup_cmma(vcpu); 2037 free_page((unsigned long)(vcpu->arch.sie_block)); 2038 2039 kvm_vcpu_uninit(vcpu); 2040 kmem_cache_free(kvm_vcpu_cache, vcpu); 2041 } 2042 2043 static void kvm_free_vcpus(struct kvm *kvm) 2044 { 2045 unsigned int i; 2046 struct kvm_vcpu *vcpu; 2047 2048 kvm_for_each_vcpu(i, vcpu, kvm) 2049 kvm_arch_vcpu_destroy(vcpu); 2050 2051 mutex_lock(&kvm->lock); 2052 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2053 kvm->vcpus[i] = NULL; 2054 2055 atomic_set(&kvm->online_vcpus, 0); 2056 mutex_unlock(&kvm->lock); 2057 } 2058 2059 void kvm_arch_destroy_vm(struct kvm *kvm) 2060 { 2061 kvm_free_vcpus(kvm); 2062 sca_dispose(kvm); 2063 debug_unregister(kvm->arch.dbf); 2064 free_page((unsigned long)kvm->arch.sie_page2); 2065 if (!kvm_is_ucontrol(kvm)) 2066 gmap_remove(kvm->arch.gmap); 2067 kvm_s390_destroy_adapters(kvm); 2068 kvm_s390_clear_float_irqs(kvm); 2069 kvm_s390_vsie_destroy(kvm); 2070 if (kvm->arch.migration_state) { 2071 vfree(kvm->arch.migration_state->pgste_bitmap); 2072 kfree(kvm->arch.migration_state); 2073 } 2074 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2075 } 2076 2077 /* Section: vcpu related */ 2078 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2079 { 2080 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2081 if (!vcpu->arch.gmap) 2082 return -ENOMEM; 2083 vcpu->arch.gmap->private = vcpu->kvm; 2084 2085 return 0; 2086 } 2087 2088 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2089 { 2090 if (!kvm_s390_use_sca_entries()) 2091 return; 2092 read_lock(&vcpu->kvm->arch.sca_lock); 2093 if (vcpu->kvm->arch.use_esca) { 2094 struct esca_block *sca = vcpu->kvm->arch.sca; 2095 2096 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2097 sca->cpu[vcpu->vcpu_id].sda = 0; 2098 } else { 2099 struct bsca_block *sca = vcpu->kvm->arch.sca; 2100 2101 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2102 sca->cpu[vcpu->vcpu_id].sda = 0; 2103 } 2104 read_unlock(&vcpu->kvm->arch.sca_lock); 2105 } 2106 2107 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2108 { 2109 if (!kvm_s390_use_sca_entries()) { 2110 struct bsca_block *sca = vcpu->kvm->arch.sca; 2111 2112 /* we still need the basic sca for the ipte control */ 2113 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2114 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2115 } 2116 read_lock(&vcpu->kvm->arch.sca_lock); 2117 if (vcpu->kvm->arch.use_esca) { 2118 struct esca_block *sca = vcpu->kvm->arch.sca; 2119 2120 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2121 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2122 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2123 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2124 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2125 } else { 2126 struct bsca_block *sca = vcpu->kvm->arch.sca; 2127 2128 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2129 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2130 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2131 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2132 } 2133 read_unlock(&vcpu->kvm->arch.sca_lock); 2134 } 2135 2136 /* Basic SCA to Extended SCA data copy routines */ 2137 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2138 { 2139 d->sda = s->sda; 2140 d->sigp_ctrl.c = s->sigp_ctrl.c; 2141 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2142 } 2143 2144 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2145 { 2146 int i; 2147 2148 d->ipte_control = s->ipte_control; 2149 d->mcn[0] = s->mcn; 2150 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2151 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2152 } 2153 2154 static int sca_switch_to_extended(struct kvm *kvm) 2155 { 2156 struct bsca_block *old_sca = kvm->arch.sca; 2157 struct esca_block *new_sca; 2158 struct kvm_vcpu *vcpu; 2159 unsigned int vcpu_idx; 2160 u32 scaol, scaoh; 2161 2162 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2163 if (!new_sca) 2164 return -ENOMEM; 2165 2166 scaoh = (u32)((u64)(new_sca) >> 32); 2167 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2168 2169 kvm_s390_vcpu_block_all(kvm); 2170 write_lock(&kvm->arch.sca_lock); 2171 2172 sca_copy_b_to_e(new_sca, old_sca); 2173 2174 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2175 vcpu->arch.sie_block->scaoh = scaoh; 2176 vcpu->arch.sie_block->scaol = scaol; 2177 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2178 } 2179 kvm->arch.sca = new_sca; 2180 kvm->arch.use_esca = 1; 2181 2182 write_unlock(&kvm->arch.sca_lock); 2183 kvm_s390_vcpu_unblock_all(kvm); 2184 2185 free_page((unsigned long)old_sca); 2186 2187 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2188 old_sca, kvm->arch.sca); 2189 return 0; 2190 } 2191 2192 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2193 { 2194 int rc; 2195 2196 if (!kvm_s390_use_sca_entries()) { 2197 if (id < KVM_MAX_VCPUS) 2198 return true; 2199 return false; 2200 } 2201 if (id < KVM_S390_BSCA_CPU_SLOTS) 2202 return true; 2203 if (!sclp.has_esca || !sclp.has_64bscao) 2204 return false; 2205 2206 mutex_lock(&kvm->lock); 2207 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2208 mutex_unlock(&kvm->lock); 2209 2210 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2211 } 2212 2213 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 2214 { 2215 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2216 kvm_clear_async_pf_completion_queue(vcpu); 2217 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 2218 KVM_SYNC_GPRS | 2219 KVM_SYNC_ACRS | 2220 KVM_SYNC_CRS | 2221 KVM_SYNC_ARCH0 | 2222 KVM_SYNC_PFAULT; 2223 kvm_s390_set_prefix(vcpu, 0); 2224 if (test_kvm_facility(vcpu->kvm, 64)) 2225 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 2226 if (test_kvm_facility(vcpu->kvm, 133)) 2227 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 2228 /* fprs can be synchronized via vrs, even if the guest has no vx. With 2229 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 2230 */ 2231 if (MACHINE_HAS_VX) 2232 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 2233 else 2234 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 2235 2236 if (kvm_is_ucontrol(vcpu->kvm)) 2237 return __kvm_ucontrol_vcpu_init(vcpu); 2238 2239 return 0; 2240 } 2241 2242 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2243 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2244 { 2245 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2246 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2247 vcpu->arch.cputm_start = get_tod_clock_fast(); 2248 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2249 } 2250 2251 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2252 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2253 { 2254 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2255 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2256 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2257 vcpu->arch.cputm_start = 0; 2258 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2259 } 2260 2261 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2262 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2263 { 2264 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2265 vcpu->arch.cputm_enabled = true; 2266 __start_cpu_timer_accounting(vcpu); 2267 } 2268 2269 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2270 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2271 { 2272 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2273 __stop_cpu_timer_accounting(vcpu); 2274 vcpu->arch.cputm_enabled = false; 2275 } 2276 2277 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2278 { 2279 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2280 __enable_cpu_timer_accounting(vcpu); 2281 preempt_enable(); 2282 } 2283 2284 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2285 { 2286 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2287 __disable_cpu_timer_accounting(vcpu); 2288 preempt_enable(); 2289 } 2290 2291 /* set the cpu timer - may only be called from the VCPU thread itself */ 2292 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2293 { 2294 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2295 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2296 if (vcpu->arch.cputm_enabled) 2297 vcpu->arch.cputm_start = get_tod_clock_fast(); 2298 vcpu->arch.sie_block->cputm = cputm; 2299 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2300 preempt_enable(); 2301 } 2302 2303 /* update and get the cpu timer - can also be called from other VCPU threads */ 2304 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2305 { 2306 unsigned int seq; 2307 __u64 value; 2308 2309 if (unlikely(!vcpu->arch.cputm_enabled)) 2310 return vcpu->arch.sie_block->cputm; 2311 2312 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2313 do { 2314 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2315 /* 2316 * If the writer would ever execute a read in the critical 2317 * section, e.g. in irq context, we have a deadlock. 2318 */ 2319 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 2320 value = vcpu->arch.sie_block->cputm; 2321 /* if cputm_start is 0, accounting is being started/stopped */ 2322 if (likely(vcpu->arch.cputm_start)) 2323 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2324 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 2325 preempt_enable(); 2326 return value; 2327 } 2328 2329 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2330 { 2331 2332 gmap_enable(vcpu->arch.enabled_gmap); 2333 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 2334 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2335 __start_cpu_timer_accounting(vcpu); 2336 vcpu->cpu = cpu; 2337 } 2338 2339 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2340 { 2341 vcpu->cpu = -1; 2342 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2343 __stop_cpu_timer_accounting(vcpu); 2344 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 2345 vcpu->arch.enabled_gmap = gmap_get_enabled(); 2346 gmap_disable(vcpu->arch.enabled_gmap); 2347 2348 } 2349 2350 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 2351 { 2352 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 2353 vcpu->arch.sie_block->gpsw.mask = 0UL; 2354 vcpu->arch.sie_block->gpsw.addr = 0UL; 2355 kvm_s390_set_prefix(vcpu, 0); 2356 kvm_s390_set_cpu_timer(vcpu, 0); 2357 vcpu->arch.sie_block->ckc = 0UL; 2358 vcpu->arch.sie_block->todpr = 0; 2359 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 2360 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 2361 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 2362 /* make sure the new fpc will be lazily loaded */ 2363 save_fpu_regs(); 2364 current->thread.fpu.fpc = 0; 2365 vcpu->arch.sie_block->gbea = 1; 2366 vcpu->arch.sie_block->pp = 0; 2367 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2368 kvm_clear_async_pf_completion_queue(vcpu); 2369 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 2370 kvm_s390_vcpu_stop(vcpu); 2371 kvm_s390_clear_local_irqs(vcpu); 2372 } 2373 2374 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 2375 { 2376 mutex_lock(&vcpu->kvm->lock); 2377 preempt_disable(); 2378 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2379 preempt_enable(); 2380 mutex_unlock(&vcpu->kvm->lock); 2381 if (!kvm_is_ucontrol(vcpu->kvm)) { 2382 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 2383 sca_add_vcpu(vcpu); 2384 } 2385 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 2386 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2387 /* make vcpu_load load the right gmap on the first trigger */ 2388 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 2389 } 2390 2391 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 2392 { 2393 if (!test_kvm_facility(vcpu->kvm, 76)) 2394 return; 2395 2396 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 2397 2398 if (vcpu->kvm->arch.crypto.aes_kw) 2399 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 2400 if (vcpu->kvm->arch.crypto.dea_kw) 2401 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 2402 2403 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 2404 } 2405 2406 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 2407 { 2408 free_page(vcpu->arch.sie_block->cbrlo); 2409 vcpu->arch.sie_block->cbrlo = 0; 2410 } 2411 2412 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 2413 { 2414 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 2415 if (!vcpu->arch.sie_block->cbrlo) 2416 return -ENOMEM; 2417 2418 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; 2419 return 0; 2420 } 2421 2422 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 2423 { 2424 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 2425 2426 vcpu->arch.sie_block->ibc = model->ibc; 2427 if (test_kvm_facility(vcpu->kvm, 7)) 2428 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 2429 } 2430 2431 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 2432 { 2433 int rc = 0; 2434 2435 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 2436 CPUSTAT_SM | 2437 CPUSTAT_STOPPED); 2438 2439 if (test_kvm_facility(vcpu->kvm, 78)) 2440 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 2441 else if (test_kvm_facility(vcpu->kvm, 8)) 2442 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 2443 2444 kvm_s390_vcpu_setup_model(vcpu); 2445 2446 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2447 if (MACHINE_HAS_ESOP) 2448 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 2449 if (test_kvm_facility(vcpu->kvm, 9)) 2450 vcpu->arch.sie_block->ecb |= ECB_SRSI; 2451 if (test_kvm_facility(vcpu->kvm, 73)) 2452 vcpu->arch.sie_block->ecb |= ECB_TE; 2453 2454 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) 2455 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 2456 if (test_kvm_facility(vcpu->kvm, 130)) 2457 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 2458 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 2459 if (sclp.has_cei) 2460 vcpu->arch.sie_block->eca |= ECA_CEI; 2461 if (sclp.has_ib) 2462 vcpu->arch.sie_block->eca |= ECA_IB; 2463 if (sclp.has_siif) 2464 vcpu->arch.sie_block->eca |= ECA_SII; 2465 if (sclp.has_sigpif) 2466 vcpu->arch.sie_block->eca |= ECA_SIGPI; 2467 if (test_kvm_facility(vcpu->kvm, 129)) { 2468 vcpu->arch.sie_block->eca |= ECA_VX; 2469 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 2470 } 2471 if (test_kvm_facility(vcpu->kvm, 139)) 2472 vcpu->arch.sie_block->ecd |= ECD_MEF; 2473 2474 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 2475 | SDNXC; 2476 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2477 2478 if (sclp.has_kss) 2479 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 2480 else 2481 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2482 2483 if (vcpu->kvm->arch.use_cmma) { 2484 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2485 if (rc) 2486 return rc; 2487 } 2488 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2489 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 2490 2491 kvm_s390_vcpu_crypto_setup(vcpu); 2492 2493 return rc; 2494 } 2495 2496 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 2497 unsigned int id) 2498 { 2499 struct kvm_vcpu *vcpu; 2500 struct sie_page *sie_page; 2501 int rc = -EINVAL; 2502 2503 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 2504 goto out; 2505 2506 rc = -ENOMEM; 2507 2508 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 2509 if (!vcpu) 2510 goto out; 2511 2512 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 2513 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 2514 if (!sie_page) 2515 goto out_free_cpu; 2516 2517 vcpu->arch.sie_block = &sie_page->sie_block; 2518 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 2519 2520 /* the real guest size will always be smaller than msl */ 2521 vcpu->arch.sie_block->mso = 0; 2522 vcpu->arch.sie_block->msl = sclp.hamax; 2523 2524 vcpu->arch.sie_block->icpua = id; 2525 spin_lock_init(&vcpu->arch.local_int.lock); 2526 seqcount_init(&vcpu->arch.cputm_seqcount); 2527 2528 rc = kvm_vcpu_init(vcpu, kvm, id); 2529 if (rc) 2530 goto out_free_sie_block; 2531 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 2532 vcpu->arch.sie_block); 2533 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 2534 2535 return vcpu; 2536 out_free_sie_block: 2537 free_page((unsigned long)(vcpu->arch.sie_block)); 2538 out_free_cpu: 2539 kmem_cache_free(kvm_vcpu_cache, vcpu); 2540 out: 2541 return ERR_PTR(rc); 2542 } 2543 2544 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 2545 { 2546 return kvm_s390_vcpu_has_irq(vcpu, 0); 2547 } 2548 2549 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 2550 { 2551 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 2552 } 2553 2554 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 2555 { 2556 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2557 exit_sie(vcpu); 2558 } 2559 2560 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 2561 { 2562 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2563 } 2564 2565 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 2566 { 2567 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2568 exit_sie(vcpu); 2569 } 2570 2571 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 2572 { 2573 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2574 } 2575 2576 /* 2577 * Kick a guest cpu out of SIE and wait until SIE is not running. 2578 * If the CPU is not running (e.g. waiting as idle) the function will 2579 * return immediately. */ 2580 void exit_sie(struct kvm_vcpu *vcpu) 2581 { 2582 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 2583 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 2584 cpu_relax(); 2585 } 2586 2587 /* Kick a guest cpu out of SIE to process a request synchronously */ 2588 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 2589 { 2590 kvm_make_request(req, vcpu); 2591 kvm_s390_vcpu_request(vcpu); 2592 } 2593 2594 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 2595 unsigned long end) 2596 { 2597 struct kvm *kvm = gmap->private; 2598 struct kvm_vcpu *vcpu; 2599 unsigned long prefix; 2600 int i; 2601 2602 if (gmap_is_shadow(gmap)) 2603 return; 2604 if (start >= 1UL << 31) 2605 /* We are only interested in prefix pages */ 2606 return; 2607 kvm_for_each_vcpu(i, vcpu, kvm) { 2608 /* match against both prefix pages */ 2609 prefix = kvm_s390_get_prefix(vcpu); 2610 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 2611 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 2612 start, end); 2613 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 2614 } 2615 } 2616 } 2617 2618 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 2619 { 2620 /* kvm common code refers to this, but never calls it */ 2621 BUG(); 2622 return 0; 2623 } 2624 2625 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 2626 struct kvm_one_reg *reg) 2627 { 2628 int r = -EINVAL; 2629 2630 switch (reg->id) { 2631 case KVM_REG_S390_TODPR: 2632 r = put_user(vcpu->arch.sie_block->todpr, 2633 (u32 __user *)reg->addr); 2634 break; 2635 case KVM_REG_S390_EPOCHDIFF: 2636 r = put_user(vcpu->arch.sie_block->epoch, 2637 (u64 __user *)reg->addr); 2638 break; 2639 case KVM_REG_S390_CPU_TIMER: 2640 r = put_user(kvm_s390_get_cpu_timer(vcpu), 2641 (u64 __user *)reg->addr); 2642 break; 2643 case KVM_REG_S390_CLOCK_COMP: 2644 r = put_user(vcpu->arch.sie_block->ckc, 2645 (u64 __user *)reg->addr); 2646 break; 2647 case KVM_REG_S390_PFTOKEN: 2648 r = put_user(vcpu->arch.pfault_token, 2649 (u64 __user *)reg->addr); 2650 break; 2651 case KVM_REG_S390_PFCOMPARE: 2652 r = put_user(vcpu->arch.pfault_compare, 2653 (u64 __user *)reg->addr); 2654 break; 2655 case KVM_REG_S390_PFSELECT: 2656 r = put_user(vcpu->arch.pfault_select, 2657 (u64 __user *)reg->addr); 2658 break; 2659 case KVM_REG_S390_PP: 2660 r = put_user(vcpu->arch.sie_block->pp, 2661 (u64 __user *)reg->addr); 2662 break; 2663 case KVM_REG_S390_GBEA: 2664 r = put_user(vcpu->arch.sie_block->gbea, 2665 (u64 __user *)reg->addr); 2666 break; 2667 default: 2668 break; 2669 } 2670 2671 return r; 2672 } 2673 2674 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 2675 struct kvm_one_reg *reg) 2676 { 2677 int r = -EINVAL; 2678 __u64 val; 2679 2680 switch (reg->id) { 2681 case KVM_REG_S390_TODPR: 2682 r = get_user(vcpu->arch.sie_block->todpr, 2683 (u32 __user *)reg->addr); 2684 break; 2685 case KVM_REG_S390_EPOCHDIFF: 2686 r = get_user(vcpu->arch.sie_block->epoch, 2687 (u64 __user *)reg->addr); 2688 break; 2689 case KVM_REG_S390_CPU_TIMER: 2690 r = get_user(val, (u64 __user *)reg->addr); 2691 if (!r) 2692 kvm_s390_set_cpu_timer(vcpu, val); 2693 break; 2694 case KVM_REG_S390_CLOCK_COMP: 2695 r = get_user(vcpu->arch.sie_block->ckc, 2696 (u64 __user *)reg->addr); 2697 break; 2698 case KVM_REG_S390_PFTOKEN: 2699 r = get_user(vcpu->arch.pfault_token, 2700 (u64 __user *)reg->addr); 2701 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 2702 kvm_clear_async_pf_completion_queue(vcpu); 2703 break; 2704 case KVM_REG_S390_PFCOMPARE: 2705 r = get_user(vcpu->arch.pfault_compare, 2706 (u64 __user *)reg->addr); 2707 break; 2708 case KVM_REG_S390_PFSELECT: 2709 r = get_user(vcpu->arch.pfault_select, 2710 (u64 __user *)reg->addr); 2711 break; 2712 case KVM_REG_S390_PP: 2713 r = get_user(vcpu->arch.sie_block->pp, 2714 (u64 __user *)reg->addr); 2715 break; 2716 case KVM_REG_S390_GBEA: 2717 r = get_user(vcpu->arch.sie_block->gbea, 2718 (u64 __user *)reg->addr); 2719 break; 2720 default: 2721 break; 2722 } 2723 2724 return r; 2725 } 2726 2727 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 2728 { 2729 kvm_s390_vcpu_initial_reset(vcpu); 2730 return 0; 2731 } 2732 2733 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2734 { 2735 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 2736 return 0; 2737 } 2738 2739 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2740 { 2741 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 2742 return 0; 2743 } 2744 2745 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 2746 struct kvm_sregs *sregs) 2747 { 2748 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 2749 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 2750 return 0; 2751 } 2752 2753 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 2754 struct kvm_sregs *sregs) 2755 { 2756 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 2757 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 2758 return 0; 2759 } 2760 2761 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2762 { 2763 if (test_fp_ctl(fpu->fpc)) 2764 return -EINVAL; 2765 vcpu->run->s.regs.fpc = fpu->fpc; 2766 if (MACHINE_HAS_VX) 2767 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 2768 (freg_t *) fpu->fprs); 2769 else 2770 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 2771 return 0; 2772 } 2773 2774 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2775 { 2776 /* make sure we have the latest values */ 2777 save_fpu_regs(); 2778 if (MACHINE_HAS_VX) 2779 convert_vx_to_fp((freg_t *) fpu->fprs, 2780 (__vector128 *) vcpu->run->s.regs.vrs); 2781 else 2782 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 2783 fpu->fpc = vcpu->run->s.regs.fpc; 2784 return 0; 2785 } 2786 2787 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 2788 { 2789 int rc = 0; 2790 2791 if (!is_vcpu_stopped(vcpu)) 2792 rc = -EBUSY; 2793 else { 2794 vcpu->run->psw_mask = psw.mask; 2795 vcpu->run->psw_addr = psw.addr; 2796 } 2797 return rc; 2798 } 2799 2800 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 2801 struct kvm_translation *tr) 2802 { 2803 return -EINVAL; /* not implemented yet */ 2804 } 2805 2806 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 2807 KVM_GUESTDBG_USE_HW_BP | \ 2808 KVM_GUESTDBG_ENABLE) 2809 2810 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 2811 struct kvm_guest_debug *dbg) 2812 { 2813 int rc = 0; 2814 2815 vcpu->guest_debug = 0; 2816 kvm_s390_clear_bp_data(vcpu); 2817 2818 if (dbg->control & ~VALID_GUESTDBG_FLAGS) 2819 return -EINVAL; 2820 if (!sclp.has_gpere) 2821 return -EINVAL; 2822 2823 if (dbg->control & KVM_GUESTDBG_ENABLE) { 2824 vcpu->guest_debug = dbg->control; 2825 /* enforce guest PER */ 2826 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 2827 2828 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 2829 rc = kvm_s390_import_bp_data(vcpu, dbg); 2830 } else { 2831 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 2832 vcpu->arch.guestdbg.last_bp = 0; 2833 } 2834 2835 if (rc) { 2836 vcpu->guest_debug = 0; 2837 kvm_s390_clear_bp_data(vcpu); 2838 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 2839 } 2840 2841 return rc; 2842 } 2843 2844 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 2845 struct kvm_mp_state *mp_state) 2846 { 2847 /* CHECK_STOP and LOAD are not supported yet */ 2848 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 2849 KVM_MP_STATE_OPERATING; 2850 } 2851 2852 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 2853 struct kvm_mp_state *mp_state) 2854 { 2855 int rc = 0; 2856 2857 /* user space knows about this interface - let it control the state */ 2858 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 2859 2860 switch (mp_state->mp_state) { 2861 case KVM_MP_STATE_STOPPED: 2862 kvm_s390_vcpu_stop(vcpu); 2863 break; 2864 case KVM_MP_STATE_OPERATING: 2865 kvm_s390_vcpu_start(vcpu); 2866 break; 2867 case KVM_MP_STATE_LOAD: 2868 case KVM_MP_STATE_CHECK_STOP: 2869 /* fall through - CHECK_STOP and LOAD are not supported yet */ 2870 default: 2871 rc = -ENXIO; 2872 } 2873 2874 return rc; 2875 } 2876 2877 static bool ibs_enabled(struct kvm_vcpu *vcpu) 2878 { 2879 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 2880 } 2881 2882 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 2883 { 2884 retry: 2885 kvm_s390_vcpu_request_handled(vcpu); 2886 if (!kvm_request_pending(vcpu)) 2887 return 0; 2888 /* 2889 * We use MMU_RELOAD just to re-arm the ipte notifier for the 2890 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 2891 * This ensures that the ipte instruction for this request has 2892 * already finished. We might race against a second unmapper that 2893 * wants to set the blocking bit. Lets just retry the request loop. 2894 */ 2895 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 2896 int rc; 2897 rc = gmap_mprotect_notify(vcpu->arch.gmap, 2898 kvm_s390_get_prefix(vcpu), 2899 PAGE_SIZE * 2, PROT_WRITE); 2900 if (rc) { 2901 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 2902 return rc; 2903 } 2904 goto retry; 2905 } 2906 2907 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 2908 vcpu->arch.sie_block->ihcpu = 0xffff; 2909 goto retry; 2910 } 2911 2912 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 2913 if (!ibs_enabled(vcpu)) { 2914 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 2915 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 2916 } 2917 goto retry; 2918 } 2919 2920 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 2921 if (ibs_enabled(vcpu)) { 2922 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 2923 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 2924 } 2925 goto retry; 2926 } 2927 2928 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 2929 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2930 goto retry; 2931 } 2932 2933 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 2934 /* 2935 * Disable CMMA virtualization; we will emulate the ESSA 2936 * instruction manually, in order to provide additional 2937 * functionalities needed for live migration. 2938 */ 2939 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 2940 goto retry; 2941 } 2942 2943 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 2944 /* 2945 * Re-enable CMMA virtualization if CMMA is available and 2946 * was used. 2947 */ 2948 if ((vcpu->kvm->arch.use_cmma) && 2949 (vcpu->kvm->mm->context.use_cmma)) 2950 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 2951 goto retry; 2952 } 2953 2954 /* nothing to do, just clear the request */ 2955 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 2956 2957 return 0; 2958 } 2959 2960 void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 2961 const struct kvm_s390_vm_tod_clock *gtod) 2962 { 2963 struct kvm_vcpu *vcpu; 2964 struct kvm_s390_tod_clock_ext htod; 2965 int i; 2966 2967 mutex_lock(&kvm->lock); 2968 preempt_disable(); 2969 2970 get_tod_clock_ext((char *)&htod); 2971 2972 kvm->arch.epoch = gtod->tod - htod.tod; 2973 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 2974 2975 if (kvm->arch.epoch > gtod->tod) 2976 kvm->arch.epdx -= 1; 2977 2978 kvm_s390_vcpu_block_all(kvm); 2979 kvm_for_each_vcpu(i, vcpu, kvm) { 2980 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 2981 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 2982 } 2983 2984 kvm_s390_vcpu_unblock_all(kvm); 2985 preempt_enable(); 2986 mutex_unlock(&kvm->lock); 2987 } 2988 2989 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) 2990 { 2991 struct kvm_vcpu *vcpu; 2992 int i; 2993 2994 mutex_lock(&kvm->lock); 2995 preempt_disable(); 2996 kvm->arch.epoch = tod - get_tod_clock(); 2997 kvm_s390_vcpu_block_all(kvm); 2998 kvm_for_each_vcpu(i, vcpu, kvm) 2999 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3000 kvm_s390_vcpu_unblock_all(kvm); 3001 preempt_enable(); 3002 mutex_unlock(&kvm->lock); 3003 } 3004 3005 /** 3006 * kvm_arch_fault_in_page - fault-in guest page if necessary 3007 * @vcpu: The corresponding virtual cpu 3008 * @gpa: Guest physical address 3009 * @writable: Whether the page should be writable or not 3010 * 3011 * Make sure that a guest page has been faulted-in on the host. 3012 * 3013 * Return: Zero on success, negative error code otherwise. 3014 */ 3015 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3016 { 3017 return gmap_fault(vcpu->arch.gmap, gpa, 3018 writable ? FAULT_FLAG_WRITE : 0); 3019 } 3020 3021 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3022 unsigned long token) 3023 { 3024 struct kvm_s390_interrupt inti; 3025 struct kvm_s390_irq irq; 3026 3027 if (start_token) { 3028 irq.u.ext.ext_params2 = token; 3029 irq.type = KVM_S390_INT_PFAULT_INIT; 3030 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3031 } else { 3032 inti.type = KVM_S390_INT_PFAULT_DONE; 3033 inti.parm64 = token; 3034 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3035 } 3036 } 3037 3038 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3039 struct kvm_async_pf *work) 3040 { 3041 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3042 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3043 } 3044 3045 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3046 struct kvm_async_pf *work) 3047 { 3048 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3049 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3050 } 3051 3052 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3053 struct kvm_async_pf *work) 3054 { 3055 /* s390 will always inject the page directly */ 3056 } 3057 3058 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3059 { 3060 /* 3061 * s390 will always inject the page directly, 3062 * but we still want check_async_completion to cleanup 3063 */ 3064 return true; 3065 } 3066 3067 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3068 { 3069 hva_t hva; 3070 struct kvm_arch_async_pf arch; 3071 int rc; 3072 3073 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3074 return 0; 3075 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3076 vcpu->arch.pfault_compare) 3077 return 0; 3078 if (psw_extint_disabled(vcpu)) 3079 return 0; 3080 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3081 return 0; 3082 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 3083 return 0; 3084 if (!vcpu->arch.gmap->pfault_enabled) 3085 return 0; 3086 3087 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3088 hva += current->thread.gmap_addr & ~PAGE_MASK; 3089 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3090 return 0; 3091 3092 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3093 return rc; 3094 } 3095 3096 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3097 { 3098 int rc, cpuflags; 3099 3100 /* 3101 * On s390 notifications for arriving pages will be delivered directly 3102 * to the guest but the house keeping for completed pfaults is 3103 * handled outside the worker. 3104 */ 3105 kvm_check_async_pf_completion(vcpu); 3106 3107 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3108 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3109 3110 if (need_resched()) 3111 schedule(); 3112 3113 if (test_cpu_flag(CIF_MCCK_PENDING)) 3114 s390_handle_mcck(); 3115 3116 if (!kvm_is_ucontrol(vcpu->kvm)) { 3117 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3118 if (rc) 3119 return rc; 3120 } 3121 3122 rc = kvm_s390_handle_requests(vcpu); 3123 if (rc) 3124 return rc; 3125 3126 if (guestdbg_enabled(vcpu)) { 3127 kvm_s390_backup_guest_per_regs(vcpu); 3128 kvm_s390_patch_guest_per_regs(vcpu); 3129 } 3130 3131 vcpu->arch.sie_block->icptcode = 0; 3132 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3133 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3134 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3135 3136 return 0; 3137 } 3138 3139 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3140 { 3141 struct kvm_s390_pgm_info pgm_info = { 3142 .code = PGM_ADDRESSING, 3143 }; 3144 u8 opcode, ilen; 3145 int rc; 3146 3147 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3148 trace_kvm_s390_sie_fault(vcpu); 3149 3150 /* 3151 * We want to inject an addressing exception, which is defined as a 3152 * suppressing or terminating exception. However, since we came here 3153 * by a DAT access exception, the PSW still points to the faulting 3154 * instruction since DAT exceptions are nullifying. So we've got 3155 * to look up the current opcode to get the length of the instruction 3156 * to be able to forward the PSW. 3157 */ 3158 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3159 ilen = insn_length(opcode); 3160 if (rc < 0) { 3161 return rc; 3162 } else if (rc) { 3163 /* Instruction-Fetching Exceptions - we can't detect the ilen. 3164 * Forward by arbitrary ilc, injection will take care of 3165 * nullification if necessary. 3166 */ 3167 pgm_info = vcpu->arch.pgm; 3168 ilen = 4; 3169 } 3170 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 3171 kvm_s390_forward_psw(vcpu, ilen); 3172 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 3173 } 3174 3175 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 3176 { 3177 struct mcck_volatile_info *mcck_info; 3178 struct sie_page *sie_page; 3179 3180 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 3181 vcpu->arch.sie_block->icptcode); 3182 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 3183 3184 if (guestdbg_enabled(vcpu)) 3185 kvm_s390_restore_guest_per_regs(vcpu); 3186 3187 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 3188 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 3189 3190 if (exit_reason == -EINTR) { 3191 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 3192 sie_page = container_of(vcpu->arch.sie_block, 3193 struct sie_page, sie_block); 3194 mcck_info = &sie_page->mcck_info; 3195 kvm_s390_reinject_machine_check(vcpu, mcck_info); 3196 return 0; 3197 } 3198 3199 if (vcpu->arch.sie_block->icptcode > 0) { 3200 int rc = kvm_handle_sie_intercept(vcpu); 3201 3202 if (rc != -EOPNOTSUPP) 3203 return rc; 3204 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 3205 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 3206 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 3207 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 3208 return -EREMOTE; 3209 } else if (exit_reason != -EFAULT) { 3210 vcpu->stat.exit_null++; 3211 return 0; 3212 } else if (kvm_is_ucontrol(vcpu->kvm)) { 3213 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 3214 vcpu->run->s390_ucontrol.trans_exc_code = 3215 current->thread.gmap_addr; 3216 vcpu->run->s390_ucontrol.pgm_code = 0x10; 3217 return -EREMOTE; 3218 } else if (current->thread.gmap_pfault) { 3219 trace_kvm_s390_major_guest_pfault(vcpu); 3220 current->thread.gmap_pfault = 0; 3221 if (kvm_arch_setup_async_pf(vcpu)) 3222 return 0; 3223 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 3224 } 3225 return vcpu_post_run_fault_in_sie(vcpu); 3226 } 3227 3228 static int __vcpu_run(struct kvm_vcpu *vcpu) 3229 { 3230 int rc, exit_reason; 3231 3232 /* 3233 * We try to hold kvm->srcu during most of vcpu_run (except when run- 3234 * ning the guest), so that memslots (and other stuff) are protected 3235 */ 3236 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3237 3238 do { 3239 rc = vcpu_pre_run(vcpu); 3240 if (rc) 3241 break; 3242 3243 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3244 /* 3245 * As PF_VCPU will be used in fault handler, between 3246 * guest_enter and guest_exit should be no uaccess. 3247 */ 3248 local_irq_disable(); 3249 guest_enter_irqoff(); 3250 __disable_cpu_timer_accounting(vcpu); 3251 local_irq_enable(); 3252 exit_reason = sie64a(vcpu->arch.sie_block, 3253 vcpu->run->s.regs.gprs); 3254 local_irq_disable(); 3255 __enable_cpu_timer_accounting(vcpu); 3256 guest_exit_irqoff(); 3257 local_irq_enable(); 3258 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3259 3260 rc = vcpu_post_run(vcpu, exit_reason); 3261 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 3262 3263 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3264 return rc; 3265 } 3266 3267 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3268 { 3269 struct runtime_instr_cb *riccb; 3270 struct gs_cb *gscb; 3271 3272 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 3273 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 3274 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 3275 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 3276 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 3277 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 3278 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 3279 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 3280 /* some control register changes require a tlb flush */ 3281 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3282 } 3283 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 3284 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 3285 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 3286 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 3287 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 3288 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 3289 } 3290 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 3291 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 3292 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 3293 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 3294 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3295 kvm_clear_async_pf_completion_queue(vcpu); 3296 } 3297 /* 3298 * If userspace sets the riccb (e.g. after migration) to a valid state, 3299 * we should enable RI here instead of doing the lazy enablement. 3300 */ 3301 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 3302 test_kvm_facility(vcpu->kvm, 64) && 3303 riccb->v && 3304 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 3305 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 3306 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 3307 } 3308 /* 3309 * If userspace sets the gscb (e.g. after migration) to non-zero, 3310 * we should enable GS here instead of doing the lazy enablement. 3311 */ 3312 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 3313 test_kvm_facility(vcpu->kvm, 133) && 3314 gscb->gssm && 3315 !vcpu->arch.gs_enabled) { 3316 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 3317 vcpu->arch.sie_block->ecb |= ECB_GS; 3318 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3319 vcpu->arch.gs_enabled = 1; 3320 } 3321 save_access_regs(vcpu->arch.host_acrs); 3322 restore_access_regs(vcpu->run->s.regs.acrs); 3323 /* save host (userspace) fprs/vrs */ 3324 save_fpu_regs(); 3325 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 3326 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 3327 if (MACHINE_HAS_VX) 3328 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 3329 else 3330 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 3331 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 3332 if (test_fp_ctl(current->thread.fpu.fpc)) 3333 /* User space provided an invalid FPC, let's clear it */ 3334 current->thread.fpu.fpc = 0; 3335 if (MACHINE_HAS_GS) { 3336 preempt_disable(); 3337 __ctl_set_bit(2, 4); 3338 if (current->thread.gs_cb) { 3339 vcpu->arch.host_gscb = current->thread.gs_cb; 3340 save_gs_cb(vcpu->arch.host_gscb); 3341 } 3342 if (vcpu->arch.gs_enabled) { 3343 current->thread.gs_cb = (struct gs_cb *) 3344 &vcpu->run->s.regs.gscb; 3345 restore_gs_cb(current->thread.gs_cb); 3346 } 3347 preempt_enable(); 3348 } 3349 3350 kvm_run->kvm_dirty_regs = 0; 3351 } 3352 3353 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3354 { 3355 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 3356 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 3357 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 3358 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 3359 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 3360 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 3361 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 3362 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 3363 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 3364 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 3365 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 3366 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 3367 save_access_regs(vcpu->run->s.regs.acrs); 3368 restore_access_regs(vcpu->arch.host_acrs); 3369 /* Save guest register state */ 3370 save_fpu_regs(); 3371 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3372 /* Restore will be done lazily at return */ 3373 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 3374 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 3375 if (MACHINE_HAS_GS) { 3376 __ctl_set_bit(2, 4); 3377 if (vcpu->arch.gs_enabled) 3378 save_gs_cb(current->thread.gs_cb); 3379 preempt_disable(); 3380 current->thread.gs_cb = vcpu->arch.host_gscb; 3381 restore_gs_cb(vcpu->arch.host_gscb); 3382 preempt_enable(); 3383 if (!vcpu->arch.host_gscb) 3384 __ctl_clear_bit(2, 4); 3385 vcpu->arch.host_gscb = NULL; 3386 } 3387 3388 } 3389 3390 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3391 { 3392 int rc; 3393 3394 if (kvm_run->immediate_exit) 3395 return -EINTR; 3396 3397 if (guestdbg_exit_pending(vcpu)) { 3398 kvm_s390_prepare_debug_exit(vcpu); 3399 return 0; 3400 } 3401 3402 kvm_sigset_activate(vcpu); 3403 3404 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3405 kvm_s390_vcpu_start(vcpu); 3406 } else if (is_vcpu_stopped(vcpu)) { 3407 pr_err_ratelimited("can't run stopped vcpu %d\n", 3408 vcpu->vcpu_id); 3409 return -EINVAL; 3410 } 3411 3412 sync_regs(vcpu, kvm_run); 3413 enable_cpu_timer_accounting(vcpu); 3414 3415 might_fault(); 3416 rc = __vcpu_run(vcpu); 3417 3418 if (signal_pending(current) && !rc) { 3419 kvm_run->exit_reason = KVM_EXIT_INTR; 3420 rc = -EINTR; 3421 } 3422 3423 if (guestdbg_exit_pending(vcpu) && !rc) { 3424 kvm_s390_prepare_debug_exit(vcpu); 3425 rc = 0; 3426 } 3427 3428 if (rc == -EREMOTE) { 3429 /* userspace support is needed, kvm_run has been prepared */ 3430 rc = 0; 3431 } 3432 3433 disable_cpu_timer_accounting(vcpu); 3434 store_regs(vcpu, kvm_run); 3435 3436 kvm_sigset_deactivate(vcpu); 3437 3438 vcpu->stat.exit_userspace++; 3439 return rc; 3440 } 3441 3442 /* 3443 * store status at address 3444 * we use have two special cases: 3445 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 3446 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 3447 */ 3448 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 3449 { 3450 unsigned char archmode = 1; 3451 freg_t fprs[NUM_FPRS]; 3452 unsigned int px; 3453 u64 clkcomp, cputm; 3454 int rc; 3455 3456 px = kvm_s390_get_prefix(vcpu); 3457 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 3458 if (write_guest_abs(vcpu, 163, &archmode, 1)) 3459 return -EFAULT; 3460 gpa = 0; 3461 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 3462 if (write_guest_real(vcpu, 163, &archmode, 1)) 3463 return -EFAULT; 3464 gpa = px; 3465 } else 3466 gpa -= __LC_FPREGS_SAVE_AREA; 3467 3468 /* manually convert vector registers if necessary */ 3469 if (MACHINE_HAS_VX) { 3470 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 3471 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3472 fprs, 128); 3473 } else { 3474 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3475 vcpu->run->s.regs.fprs, 128); 3476 } 3477 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 3478 vcpu->run->s.regs.gprs, 128); 3479 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 3480 &vcpu->arch.sie_block->gpsw, 16); 3481 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 3482 &px, 4); 3483 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 3484 &vcpu->run->s.regs.fpc, 4); 3485 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 3486 &vcpu->arch.sie_block->todpr, 4); 3487 cputm = kvm_s390_get_cpu_timer(vcpu); 3488 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 3489 &cputm, 8); 3490 clkcomp = vcpu->arch.sie_block->ckc >> 8; 3491 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 3492 &clkcomp, 8); 3493 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 3494 &vcpu->run->s.regs.acrs, 64); 3495 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 3496 &vcpu->arch.sie_block->gcr, 128); 3497 return rc ? -EFAULT : 0; 3498 } 3499 3500 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 3501 { 3502 /* 3503 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 3504 * switch in the run ioctl. Let's update our copies before we save 3505 * it into the save area 3506 */ 3507 save_fpu_regs(); 3508 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3509 save_access_regs(vcpu->run->s.regs.acrs); 3510 3511 return kvm_s390_store_status_unloaded(vcpu, addr); 3512 } 3513 3514 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3515 { 3516 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 3517 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 3518 } 3519 3520 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 3521 { 3522 unsigned int i; 3523 struct kvm_vcpu *vcpu; 3524 3525 kvm_for_each_vcpu(i, vcpu, kvm) { 3526 __disable_ibs_on_vcpu(vcpu); 3527 } 3528 } 3529 3530 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3531 { 3532 if (!sclp.has_ibs) 3533 return; 3534 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 3535 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 3536 } 3537 3538 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 3539 { 3540 int i, online_vcpus, started_vcpus = 0; 3541 3542 if (!is_vcpu_stopped(vcpu)) 3543 return; 3544 3545 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 3546 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3547 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3548 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3549 3550 for (i = 0; i < online_vcpus; i++) { 3551 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 3552 started_vcpus++; 3553 } 3554 3555 if (started_vcpus == 0) { 3556 /* we're the only active VCPU -> speed it up */ 3557 __enable_ibs_on_vcpu(vcpu); 3558 } else if (started_vcpus == 1) { 3559 /* 3560 * As we are starting a second VCPU, we have to disable 3561 * the IBS facility on all VCPUs to remove potentially 3562 * oustanding ENABLE requests. 3563 */ 3564 __disable_ibs_on_all_vcpus(vcpu->kvm); 3565 } 3566 3567 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 3568 /* 3569 * Another VCPU might have used IBS while we were offline. 3570 * Let's play safe and flush the VCPU at startup. 3571 */ 3572 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3573 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3574 return; 3575 } 3576 3577 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 3578 { 3579 int i, online_vcpus, started_vcpus = 0; 3580 struct kvm_vcpu *started_vcpu = NULL; 3581 3582 if (is_vcpu_stopped(vcpu)) 3583 return; 3584 3585 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 3586 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3587 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3588 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3589 3590 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 3591 kvm_s390_clear_stop_irq(vcpu); 3592 3593 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 3594 __disable_ibs_on_vcpu(vcpu); 3595 3596 for (i = 0; i < online_vcpus; i++) { 3597 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 3598 started_vcpus++; 3599 started_vcpu = vcpu->kvm->vcpus[i]; 3600 } 3601 } 3602 3603 if (started_vcpus == 1) { 3604 /* 3605 * As we only have one VCPU left, we want to enable the 3606 * IBS facility for that VCPU to speed it up. 3607 */ 3608 __enable_ibs_on_vcpu(started_vcpu); 3609 } 3610 3611 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3612 return; 3613 } 3614 3615 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3616 struct kvm_enable_cap *cap) 3617 { 3618 int r; 3619 3620 if (cap->flags) 3621 return -EINVAL; 3622 3623 switch (cap->cap) { 3624 case KVM_CAP_S390_CSS_SUPPORT: 3625 if (!vcpu->kvm->arch.css_support) { 3626 vcpu->kvm->arch.css_support = 1; 3627 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 3628 trace_kvm_s390_enable_css(vcpu->kvm); 3629 } 3630 r = 0; 3631 break; 3632 default: 3633 r = -EINVAL; 3634 break; 3635 } 3636 return r; 3637 } 3638 3639 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 3640 struct kvm_s390_mem_op *mop) 3641 { 3642 void __user *uaddr = (void __user *)mop->buf; 3643 void *tmpbuf = NULL; 3644 int r, srcu_idx; 3645 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 3646 | KVM_S390_MEMOP_F_CHECK_ONLY; 3647 3648 if (mop->flags & ~supported_flags) 3649 return -EINVAL; 3650 3651 if (mop->size > MEM_OP_MAX_SIZE) 3652 return -E2BIG; 3653 3654 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 3655 tmpbuf = vmalloc(mop->size); 3656 if (!tmpbuf) 3657 return -ENOMEM; 3658 } 3659 3660 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3661 3662 switch (mop->op) { 3663 case KVM_S390_MEMOP_LOGICAL_READ: 3664 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3665 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3666 mop->size, GACC_FETCH); 3667 break; 3668 } 3669 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3670 if (r == 0) { 3671 if (copy_to_user(uaddr, tmpbuf, mop->size)) 3672 r = -EFAULT; 3673 } 3674 break; 3675 case KVM_S390_MEMOP_LOGICAL_WRITE: 3676 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3677 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3678 mop->size, GACC_STORE); 3679 break; 3680 } 3681 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 3682 r = -EFAULT; 3683 break; 3684 } 3685 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3686 break; 3687 default: 3688 r = -EINVAL; 3689 } 3690 3691 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 3692 3693 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 3694 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 3695 3696 vfree(tmpbuf); 3697 return r; 3698 } 3699 3700 long kvm_arch_vcpu_ioctl(struct file *filp, 3701 unsigned int ioctl, unsigned long arg) 3702 { 3703 struct kvm_vcpu *vcpu = filp->private_data; 3704 void __user *argp = (void __user *)arg; 3705 int idx; 3706 long r; 3707 3708 switch (ioctl) { 3709 case KVM_S390_IRQ: { 3710 struct kvm_s390_irq s390irq; 3711 3712 r = -EFAULT; 3713 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 3714 break; 3715 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3716 break; 3717 } 3718 case KVM_S390_INTERRUPT: { 3719 struct kvm_s390_interrupt s390int; 3720 struct kvm_s390_irq s390irq; 3721 3722 r = -EFAULT; 3723 if (copy_from_user(&s390int, argp, sizeof(s390int))) 3724 break; 3725 if (s390int_to_s390irq(&s390int, &s390irq)) 3726 return -EINVAL; 3727 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3728 break; 3729 } 3730 case KVM_S390_STORE_STATUS: 3731 idx = srcu_read_lock(&vcpu->kvm->srcu); 3732 r = kvm_s390_vcpu_store_status(vcpu, arg); 3733 srcu_read_unlock(&vcpu->kvm->srcu, idx); 3734 break; 3735 case KVM_S390_SET_INITIAL_PSW: { 3736 psw_t psw; 3737 3738 r = -EFAULT; 3739 if (copy_from_user(&psw, argp, sizeof(psw))) 3740 break; 3741 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 3742 break; 3743 } 3744 case KVM_S390_INITIAL_RESET: 3745 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3746 break; 3747 case KVM_SET_ONE_REG: 3748 case KVM_GET_ONE_REG: { 3749 struct kvm_one_reg reg; 3750 r = -EFAULT; 3751 if (copy_from_user(®, argp, sizeof(reg))) 3752 break; 3753 if (ioctl == KVM_SET_ONE_REG) 3754 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 3755 else 3756 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 3757 break; 3758 } 3759 #ifdef CONFIG_KVM_S390_UCONTROL 3760 case KVM_S390_UCAS_MAP: { 3761 struct kvm_s390_ucas_mapping ucasmap; 3762 3763 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3764 r = -EFAULT; 3765 break; 3766 } 3767 3768 if (!kvm_is_ucontrol(vcpu->kvm)) { 3769 r = -EINVAL; 3770 break; 3771 } 3772 3773 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 3774 ucasmap.vcpu_addr, ucasmap.length); 3775 break; 3776 } 3777 case KVM_S390_UCAS_UNMAP: { 3778 struct kvm_s390_ucas_mapping ucasmap; 3779 3780 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3781 r = -EFAULT; 3782 break; 3783 } 3784 3785 if (!kvm_is_ucontrol(vcpu->kvm)) { 3786 r = -EINVAL; 3787 break; 3788 } 3789 3790 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 3791 ucasmap.length); 3792 break; 3793 } 3794 #endif 3795 case KVM_S390_VCPU_FAULT: { 3796 r = gmap_fault(vcpu->arch.gmap, arg, 0); 3797 break; 3798 } 3799 case KVM_ENABLE_CAP: 3800 { 3801 struct kvm_enable_cap cap; 3802 r = -EFAULT; 3803 if (copy_from_user(&cap, argp, sizeof(cap))) 3804 break; 3805 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 3806 break; 3807 } 3808 case KVM_S390_MEM_OP: { 3809 struct kvm_s390_mem_op mem_op; 3810 3811 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 3812 r = kvm_s390_guest_mem_op(vcpu, &mem_op); 3813 else 3814 r = -EFAULT; 3815 break; 3816 } 3817 case KVM_S390_SET_IRQ_STATE: { 3818 struct kvm_s390_irq_state irq_state; 3819 3820 r = -EFAULT; 3821 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3822 break; 3823 if (irq_state.len > VCPU_IRQS_MAX_BUF || 3824 irq_state.len == 0 || 3825 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 3826 r = -EINVAL; 3827 break; 3828 } 3829 /* do not use irq_state.flags, it will break old QEMUs */ 3830 r = kvm_s390_set_irq_state(vcpu, 3831 (void __user *) irq_state.buf, 3832 irq_state.len); 3833 break; 3834 } 3835 case KVM_S390_GET_IRQ_STATE: { 3836 struct kvm_s390_irq_state irq_state; 3837 3838 r = -EFAULT; 3839 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3840 break; 3841 if (irq_state.len == 0) { 3842 r = -EINVAL; 3843 break; 3844 } 3845 /* do not use irq_state.flags, it will break old QEMUs */ 3846 r = kvm_s390_get_irq_state(vcpu, 3847 (__u8 __user *) irq_state.buf, 3848 irq_state.len); 3849 break; 3850 } 3851 default: 3852 r = -ENOTTY; 3853 } 3854 return r; 3855 } 3856 3857 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 3858 { 3859 #ifdef CONFIG_KVM_S390_UCONTROL 3860 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 3861 && (kvm_is_ucontrol(vcpu->kvm))) { 3862 vmf->page = virt_to_page(vcpu->arch.sie_block); 3863 get_page(vmf->page); 3864 return 0; 3865 } 3866 #endif 3867 return VM_FAULT_SIGBUS; 3868 } 3869 3870 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 3871 unsigned long npages) 3872 { 3873 return 0; 3874 } 3875 3876 /* Section: memory related */ 3877 int kvm_arch_prepare_memory_region(struct kvm *kvm, 3878 struct kvm_memory_slot *memslot, 3879 const struct kvm_userspace_memory_region *mem, 3880 enum kvm_mr_change change) 3881 { 3882 /* A few sanity checks. We can have memory slots which have to be 3883 located/ended at a segment boundary (1MB). The memory in userland is 3884 ok to be fragmented into various different vmas. It is okay to mmap() 3885 and munmap() stuff in this slot after doing this call at any time */ 3886 3887 if (mem->userspace_addr & 0xffffful) 3888 return -EINVAL; 3889 3890 if (mem->memory_size & 0xffffful) 3891 return -EINVAL; 3892 3893 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 3894 return -EINVAL; 3895 3896 return 0; 3897 } 3898 3899 void kvm_arch_commit_memory_region(struct kvm *kvm, 3900 const struct kvm_userspace_memory_region *mem, 3901 const struct kvm_memory_slot *old, 3902 const struct kvm_memory_slot *new, 3903 enum kvm_mr_change change) 3904 { 3905 int rc; 3906 3907 /* If the basics of the memslot do not change, we do not want 3908 * to update the gmap. Every update causes several unnecessary 3909 * segment translation exceptions. This is usually handled just 3910 * fine by the normal fault handler + gmap, but it will also 3911 * cause faults on the prefix page of running guest CPUs. 3912 */ 3913 if (old->userspace_addr == mem->userspace_addr && 3914 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 3915 old->npages * PAGE_SIZE == mem->memory_size) 3916 return; 3917 3918 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 3919 mem->guest_phys_addr, mem->memory_size); 3920 if (rc) 3921 pr_warn("failed to commit memory region\n"); 3922 return; 3923 } 3924 3925 static inline unsigned long nonhyp_mask(int i) 3926 { 3927 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 3928 3929 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 3930 } 3931 3932 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 3933 { 3934 vcpu->valid_wakeup = false; 3935 } 3936 3937 static int __init kvm_s390_init(void) 3938 { 3939 int i; 3940 3941 if (!sclp.has_sief2) { 3942 pr_info("SIE not available\n"); 3943 return -ENODEV; 3944 } 3945 3946 for (i = 0; i < 16; i++) 3947 kvm_s390_fac_list_mask[i] |= 3948 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 3949 3950 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 3951 } 3952 3953 static void __exit kvm_s390_exit(void) 3954 { 3955 kvm_exit(); 3956 } 3957 3958 module_init(kvm_s390_init); 3959 module_exit(kvm_s390_exit); 3960 3961 /* 3962 * Enable autoloading of the kvm module. 3963 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 3964 * since x86 takes a different approach. 3965 */ 3966 #include <linux/miscdevice.h> 3967 MODULE_ALIAS_MISCDEV(KVM_MINOR); 3968 MODULE_ALIAS("devname:kvm"); 3969