1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #define KMSG_COMPONENT "kvm-s390" 15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 35 #include <asm/asm-offsets.h> 36 #include <asm/lowcore.h> 37 #include <asm/stp.h> 38 #include <asm/pgtable.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include "kvm-s390.h" 49 #include "gaccess.h" 50 51 #define CREATE_TRACE_POINTS 52 #include "trace.h" 53 #include "trace-s390.h" 54 55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 56 #define LOCAL_IRQS 32 57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 58 (KVM_MAX_VCPUS + LOCAL_IRQS)) 59 60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 62 63 struct kvm_stats_debugfs_item debugfs_entries[] = { 64 { "userspace_handled", VCPU_STAT(exit_userspace) }, 65 { "exit_null", VCPU_STAT(exit_null) }, 66 { "exit_validity", VCPU_STAT(exit_validity) }, 67 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 68 { "exit_external_request", VCPU_STAT(exit_external_request) }, 69 { "exit_io_request", VCPU_STAT(exit_io_request) }, 70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 71 { "exit_instruction", VCPU_STAT(exit_instruction) }, 72 { "exit_pei", VCPU_STAT(exit_pei) }, 73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 79 { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) }, 80 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 81 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 82 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 83 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 84 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 85 { "deliver_ckc", VCPU_STAT(deliver_ckc) }, 86 { "deliver_cputm", VCPU_STAT(deliver_cputm) }, 87 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 88 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 89 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 90 { "deliver_virtio", VCPU_STAT(deliver_virtio) }, 91 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 92 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 93 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 94 { "deliver_program", VCPU_STAT(deliver_program) }, 95 { "deliver_io", VCPU_STAT(deliver_io) }, 96 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, 97 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 98 { "inject_ckc", VCPU_STAT(inject_ckc) }, 99 { "inject_cputm", VCPU_STAT(inject_cputm) }, 100 { "inject_external_call", VCPU_STAT(inject_external_call) }, 101 { "inject_float_mchk", VM_STAT(inject_float_mchk) }, 102 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, 103 { "inject_io", VM_STAT(inject_io) }, 104 { "inject_mchk", VCPU_STAT(inject_mchk) }, 105 { "inject_pfault_done", VM_STAT(inject_pfault_done) }, 106 { "inject_program", VCPU_STAT(inject_program) }, 107 { "inject_restart", VCPU_STAT(inject_restart) }, 108 { "inject_service_signal", VM_STAT(inject_service_signal) }, 109 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, 110 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, 111 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, 112 { "inject_virtio", VM_STAT(inject_virtio) }, 113 { "instruction_epsw", VCPU_STAT(instruction_epsw) }, 114 { "instruction_gs", VCPU_STAT(instruction_gs) }, 115 { "instruction_io_other", VCPU_STAT(instruction_io_other) }, 116 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, 117 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, 118 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 119 { "instruction_ptff", VCPU_STAT(instruction_ptff) }, 120 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 121 { "instruction_sck", VCPU_STAT(instruction_sck) }, 122 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, 123 { "instruction_spx", VCPU_STAT(instruction_spx) }, 124 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 125 { "instruction_stap", VCPU_STAT(instruction_stap) }, 126 { "instruction_iske", VCPU_STAT(instruction_iske) }, 127 { "instruction_ri", VCPU_STAT(instruction_ri) }, 128 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, 129 { "instruction_sske", VCPU_STAT(instruction_sske) }, 130 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 131 { "instruction_essa", VCPU_STAT(instruction_essa) }, 132 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 133 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 134 { "instruction_tb", VCPU_STAT(instruction_tb) }, 135 { "instruction_tpi", VCPU_STAT(instruction_tpi) }, 136 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 137 { "instruction_tsch", VCPU_STAT(instruction_tsch) }, 138 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 139 { "instruction_sie", VCPU_STAT(instruction_sie) }, 140 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 141 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 142 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 143 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 144 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 145 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 146 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 147 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 148 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 149 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 150 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 151 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 152 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 153 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 154 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 155 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 156 { "instruction_diag_10", VCPU_STAT(diagnose_10) }, 157 { "instruction_diag_44", VCPU_STAT(diagnose_44) }, 158 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, 159 { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) }, 160 { "instruction_diag_258", VCPU_STAT(diagnose_258) }, 161 { "instruction_diag_308", VCPU_STAT(diagnose_308) }, 162 { "instruction_diag_500", VCPU_STAT(diagnose_500) }, 163 { "instruction_diag_other", VCPU_STAT(diagnose_other) }, 164 { NULL } 165 }; 166 167 struct kvm_s390_tod_clock_ext { 168 __u8 epoch_idx; 169 __u64 tod; 170 __u8 reserved[7]; 171 } __packed; 172 173 /* allow nested virtualization in KVM (if enabled by user space) */ 174 static int nested; 175 module_param(nested, int, S_IRUGO); 176 MODULE_PARM_DESC(nested, "Nested virtualization support"); 177 178 /* allow 1m huge page guest backing, if !nested */ 179 static int hpage; 180 module_param(hpage, int, 0444); 181 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 182 183 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 184 static u8 halt_poll_max_steal = 10; 185 module_param(halt_poll_max_steal, byte, 0644); 186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 187 188 /* 189 * For now we handle at most 16 double words as this is what the s390 base 190 * kernel handles and stores in the prefix page. If we ever need to go beyond 191 * this, this requires changes to code, but the external uapi can stay. 192 */ 193 #define SIZE_INTERNAL 16 194 195 /* 196 * Base feature mask that defines default mask for facilities. Consists of the 197 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 198 */ 199 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 200 /* 201 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 202 * and defines the facilities that can be enabled via a cpu model. 203 */ 204 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 205 206 static unsigned long kvm_s390_fac_size(void) 207 { 208 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 209 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 210 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 211 sizeof(S390_lowcore.stfle_fac_list)); 212 213 return SIZE_INTERNAL; 214 } 215 216 /* available cpu features supported by kvm */ 217 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 218 /* available subfunctions indicated via query / "test bit" */ 219 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 220 221 static struct gmap_notifier gmap_notifier; 222 static struct gmap_notifier vsie_gmap_notifier; 223 debug_info_t *kvm_s390_dbf; 224 debug_info_t *kvm_s390_dbf_uv; 225 226 /* Section: not file related */ 227 int kvm_arch_hardware_enable(void) 228 { 229 /* every s390 is virtualization enabled ;-) */ 230 return 0; 231 } 232 233 int kvm_arch_check_processor_compat(void) 234 { 235 return 0; 236 } 237 238 /* forward declarations */ 239 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 240 unsigned long end); 241 static int sca_switch_to_extended(struct kvm *kvm); 242 243 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 244 { 245 u8 delta_idx = 0; 246 247 /* 248 * The TOD jumps by delta, we have to compensate this by adding 249 * -delta to the epoch. 250 */ 251 delta = -delta; 252 253 /* sign-extension - we're adding to signed values below */ 254 if ((s64)delta < 0) 255 delta_idx = -1; 256 257 scb->epoch += delta; 258 if (scb->ecd & ECD_MEF) { 259 scb->epdx += delta_idx; 260 if (scb->epoch < delta) 261 scb->epdx += 1; 262 } 263 } 264 265 /* 266 * This callback is executed during stop_machine(). All CPUs are therefore 267 * temporarily stopped. In order not to change guest behavior, we have to 268 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 269 * so a CPU won't be stopped while calculating with the epoch. 270 */ 271 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 272 void *v) 273 { 274 struct kvm *kvm; 275 struct kvm_vcpu *vcpu; 276 int i; 277 unsigned long long *delta = v; 278 279 list_for_each_entry(kvm, &vm_list, vm_list) { 280 kvm_for_each_vcpu(i, vcpu, kvm) { 281 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 282 if (i == 0) { 283 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 284 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 285 } 286 if (vcpu->arch.cputm_enabled) 287 vcpu->arch.cputm_start += *delta; 288 if (vcpu->arch.vsie_block) 289 kvm_clock_sync_scb(vcpu->arch.vsie_block, 290 *delta); 291 } 292 } 293 return NOTIFY_OK; 294 } 295 296 static struct notifier_block kvm_clock_notifier = { 297 .notifier_call = kvm_clock_sync, 298 }; 299 300 int kvm_arch_hardware_setup(void) 301 { 302 gmap_notifier.notifier_call = kvm_gmap_notifier; 303 gmap_register_pte_notifier(&gmap_notifier); 304 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 305 gmap_register_pte_notifier(&vsie_gmap_notifier); 306 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 307 &kvm_clock_notifier); 308 return 0; 309 } 310 311 void kvm_arch_hardware_unsetup(void) 312 { 313 gmap_unregister_pte_notifier(&gmap_notifier); 314 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 315 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 316 &kvm_clock_notifier); 317 } 318 319 static void allow_cpu_feat(unsigned long nr) 320 { 321 set_bit_inv(nr, kvm_s390_available_cpu_feat); 322 } 323 324 static inline int plo_test_bit(unsigned char nr) 325 { 326 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 327 int cc; 328 329 asm volatile( 330 /* Parameter registers are ignored for "test bit" */ 331 " plo 0,0,0,0(0)\n" 332 " ipm %0\n" 333 " srl %0,28\n" 334 : "=d" (cc) 335 : "d" (r0) 336 : "cc"); 337 return cc == 0; 338 } 339 340 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 341 { 342 register unsigned long r0 asm("0") = 0; /* query function */ 343 register unsigned long r1 asm("1") = (unsigned long) query; 344 345 asm volatile( 346 /* Parameter regs are ignored */ 347 " .insn rrf,%[opc] << 16,2,4,6,0\n" 348 : 349 : "d" (r0), "a" (r1), [opc] "i" (opcode) 350 : "cc", "memory"); 351 } 352 353 #define INSN_SORTL 0xb938 354 #define INSN_DFLTCC 0xb939 355 356 static void kvm_s390_cpu_feat_init(void) 357 { 358 int i; 359 360 for (i = 0; i < 256; ++i) { 361 if (plo_test_bit(i)) 362 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 363 } 364 365 if (test_facility(28)) /* TOD-clock steering */ 366 ptff(kvm_s390_available_subfunc.ptff, 367 sizeof(kvm_s390_available_subfunc.ptff), 368 PTFF_QAF); 369 370 if (test_facility(17)) { /* MSA */ 371 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 372 kvm_s390_available_subfunc.kmac); 373 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 374 kvm_s390_available_subfunc.kmc); 375 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 376 kvm_s390_available_subfunc.km); 377 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 378 kvm_s390_available_subfunc.kimd); 379 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 380 kvm_s390_available_subfunc.klmd); 381 } 382 if (test_facility(76)) /* MSA3 */ 383 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 384 kvm_s390_available_subfunc.pckmo); 385 if (test_facility(77)) { /* MSA4 */ 386 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 387 kvm_s390_available_subfunc.kmctr); 388 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 389 kvm_s390_available_subfunc.kmf); 390 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 391 kvm_s390_available_subfunc.kmo); 392 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 393 kvm_s390_available_subfunc.pcc); 394 } 395 if (test_facility(57)) /* MSA5 */ 396 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 397 kvm_s390_available_subfunc.ppno); 398 399 if (test_facility(146)) /* MSA8 */ 400 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 401 kvm_s390_available_subfunc.kma); 402 403 if (test_facility(155)) /* MSA9 */ 404 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 405 kvm_s390_available_subfunc.kdsa); 406 407 if (test_facility(150)) /* SORTL */ 408 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 409 410 if (test_facility(151)) /* DFLTCC */ 411 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 412 413 if (MACHINE_HAS_ESOP) 414 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 415 /* 416 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 417 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 418 */ 419 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 420 !test_facility(3) || !nested) 421 return; 422 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 423 if (sclp.has_64bscao) 424 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 425 if (sclp.has_siif) 426 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 427 if (sclp.has_gpere) 428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 429 if (sclp.has_gsls) 430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 431 if (sclp.has_ib) 432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 433 if (sclp.has_cei) 434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 435 if (sclp.has_ibs) 436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 437 if (sclp.has_kss) 438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 439 /* 440 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 441 * all skey handling functions read/set the skey from the PGSTE 442 * instead of the real storage key. 443 * 444 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 445 * pages being detected as preserved although they are resident. 446 * 447 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 448 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 449 * 450 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 451 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 452 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 453 * 454 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 455 * cannot easily shadow the SCA because of the ipte lock. 456 */ 457 } 458 459 int kvm_arch_init(void *opaque) 460 { 461 int rc = -ENOMEM; 462 463 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 464 if (!kvm_s390_dbf) 465 return -ENOMEM; 466 467 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 468 if (!kvm_s390_dbf_uv) 469 goto out; 470 471 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 472 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 473 goto out; 474 475 kvm_s390_cpu_feat_init(); 476 477 /* Register floating interrupt controller interface. */ 478 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 479 if (rc) { 480 pr_err("A FLIC registration call failed with rc=%d\n", rc); 481 goto out; 482 } 483 484 rc = kvm_s390_gib_init(GAL_ISC); 485 if (rc) 486 goto out; 487 488 return 0; 489 490 out: 491 kvm_arch_exit(); 492 return rc; 493 } 494 495 void kvm_arch_exit(void) 496 { 497 kvm_s390_gib_destroy(); 498 debug_unregister(kvm_s390_dbf); 499 debug_unregister(kvm_s390_dbf_uv); 500 } 501 502 /* Section: device related */ 503 long kvm_arch_dev_ioctl(struct file *filp, 504 unsigned int ioctl, unsigned long arg) 505 { 506 if (ioctl == KVM_S390_ENABLE_SIE) 507 return s390_enable_sie(); 508 return -EINVAL; 509 } 510 511 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 512 { 513 int r; 514 515 switch (ext) { 516 case KVM_CAP_S390_PSW: 517 case KVM_CAP_S390_GMAP: 518 case KVM_CAP_SYNC_MMU: 519 #ifdef CONFIG_KVM_S390_UCONTROL 520 case KVM_CAP_S390_UCONTROL: 521 #endif 522 case KVM_CAP_ASYNC_PF: 523 case KVM_CAP_SYNC_REGS: 524 case KVM_CAP_ONE_REG: 525 case KVM_CAP_ENABLE_CAP: 526 case KVM_CAP_S390_CSS_SUPPORT: 527 case KVM_CAP_IOEVENTFD: 528 case KVM_CAP_DEVICE_CTRL: 529 case KVM_CAP_S390_IRQCHIP: 530 case KVM_CAP_VM_ATTRIBUTES: 531 case KVM_CAP_MP_STATE: 532 case KVM_CAP_IMMEDIATE_EXIT: 533 case KVM_CAP_S390_INJECT_IRQ: 534 case KVM_CAP_S390_USER_SIGP: 535 case KVM_CAP_S390_USER_STSI: 536 case KVM_CAP_S390_SKEYS: 537 case KVM_CAP_S390_IRQ_STATE: 538 case KVM_CAP_S390_USER_INSTR0: 539 case KVM_CAP_S390_CMMA_MIGRATION: 540 case KVM_CAP_S390_AIS: 541 case KVM_CAP_S390_AIS_MIGRATION: 542 case KVM_CAP_S390_VCPU_RESETS: 543 r = 1; 544 break; 545 case KVM_CAP_S390_HPAGE_1M: 546 r = 0; 547 if (hpage && !kvm_is_ucontrol(kvm)) 548 r = 1; 549 break; 550 case KVM_CAP_S390_MEM_OP: 551 r = MEM_OP_MAX_SIZE; 552 break; 553 case KVM_CAP_NR_VCPUS: 554 case KVM_CAP_MAX_VCPUS: 555 case KVM_CAP_MAX_VCPU_ID: 556 r = KVM_S390_BSCA_CPU_SLOTS; 557 if (!kvm_s390_use_sca_entries()) 558 r = KVM_MAX_VCPUS; 559 else if (sclp.has_esca && sclp.has_64bscao) 560 r = KVM_S390_ESCA_CPU_SLOTS; 561 break; 562 case KVM_CAP_S390_COW: 563 r = MACHINE_HAS_ESOP; 564 break; 565 case KVM_CAP_S390_VECTOR_REGISTERS: 566 r = MACHINE_HAS_VX; 567 break; 568 case KVM_CAP_S390_RI: 569 r = test_facility(64); 570 break; 571 case KVM_CAP_S390_GS: 572 r = test_facility(133); 573 break; 574 case KVM_CAP_S390_BPB: 575 r = test_facility(82); 576 break; 577 default: 578 r = 0; 579 } 580 return r; 581 } 582 583 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 584 struct kvm_memory_slot *memslot) 585 { 586 int i; 587 gfn_t cur_gfn, last_gfn; 588 unsigned long gaddr, vmaddr; 589 struct gmap *gmap = kvm->arch.gmap; 590 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 591 592 /* Loop over all guest segments */ 593 cur_gfn = memslot->base_gfn; 594 last_gfn = memslot->base_gfn + memslot->npages; 595 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 596 gaddr = gfn_to_gpa(cur_gfn); 597 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 598 if (kvm_is_error_hva(vmaddr)) 599 continue; 600 601 bitmap_zero(bitmap, _PAGE_ENTRIES); 602 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 603 for (i = 0; i < _PAGE_ENTRIES; i++) { 604 if (test_bit(i, bitmap)) 605 mark_page_dirty(kvm, cur_gfn + i); 606 } 607 608 if (fatal_signal_pending(current)) 609 return; 610 cond_resched(); 611 } 612 } 613 614 /* Section: vm related */ 615 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 616 617 /* 618 * Get (and clear) the dirty memory log for a memory slot. 619 */ 620 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 621 struct kvm_dirty_log *log) 622 { 623 int r; 624 unsigned long n; 625 struct kvm_memslots *slots; 626 struct kvm_memory_slot *memslot; 627 int is_dirty = 0; 628 629 if (kvm_is_ucontrol(kvm)) 630 return -EINVAL; 631 632 mutex_lock(&kvm->slots_lock); 633 634 r = -EINVAL; 635 if (log->slot >= KVM_USER_MEM_SLOTS) 636 goto out; 637 638 slots = kvm_memslots(kvm); 639 memslot = id_to_memslot(slots, log->slot); 640 r = -ENOENT; 641 if (!memslot->dirty_bitmap) 642 goto out; 643 644 kvm_s390_sync_dirty_log(kvm, memslot); 645 r = kvm_get_dirty_log(kvm, log, &is_dirty); 646 if (r) 647 goto out; 648 649 /* Clear the dirty log */ 650 if (is_dirty) { 651 n = kvm_dirty_bitmap_bytes(memslot); 652 memset(memslot->dirty_bitmap, 0, n); 653 } 654 r = 0; 655 out: 656 mutex_unlock(&kvm->slots_lock); 657 return r; 658 } 659 660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 661 { 662 unsigned int i; 663 struct kvm_vcpu *vcpu; 664 665 kvm_for_each_vcpu(i, vcpu, kvm) { 666 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 667 } 668 } 669 670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 671 { 672 int r; 673 674 if (cap->flags) 675 return -EINVAL; 676 677 switch (cap->cap) { 678 case KVM_CAP_S390_IRQCHIP: 679 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 680 kvm->arch.use_irqchip = 1; 681 r = 0; 682 break; 683 case KVM_CAP_S390_USER_SIGP: 684 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 685 kvm->arch.user_sigp = 1; 686 r = 0; 687 break; 688 case KVM_CAP_S390_VECTOR_REGISTERS: 689 mutex_lock(&kvm->lock); 690 if (kvm->created_vcpus) { 691 r = -EBUSY; 692 } else if (MACHINE_HAS_VX) { 693 set_kvm_facility(kvm->arch.model.fac_mask, 129); 694 set_kvm_facility(kvm->arch.model.fac_list, 129); 695 if (test_facility(134)) { 696 set_kvm_facility(kvm->arch.model.fac_mask, 134); 697 set_kvm_facility(kvm->arch.model.fac_list, 134); 698 } 699 if (test_facility(135)) { 700 set_kvm_facility(kvm->arch.model.fac_mask, 135); 701 set_kvm_facility(kvm->arch.model.fac_list, 135); 702 } 703 if (test_facility(148)) { 704 set_kvm_facility(kvm->arch.model.fac_mask, 148); 705 set_kvm_facility(kvm->arch.model.fac_list, 148); 706 } 707 if (test_facility(152)) { 708 set_kvm_facility(kvm->arch.model.fac_mask, 152); 709 set_kvm_facility(kvm->arch.model.fac_list, 152); 710 } 711 r = 0; 712 } else 713 r = -EINVAL; 714 mutex_unlock(&kvm->lock); 715 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 716 r ? "(not available)" : "(success)"); 717 break; 718 case KVM_CAP_S390_RI: 719 r = -EINVAL; 720 mutex_lock(&kvm->lock); 721 if (kvm->created_vcpus) { 722 r = -EBUSY; 723 } else if (test_facility(64)) { 724 set_kvm_facility(kvm->arch.model.fac_mask, 64); 725 set_kvm_facility(kvm->arch.model.fac_list, 64); 726 r = 0; 727 } 728 mutex_unlock(&kvm->lock); 729 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 730 r ? "(not available)" : "(success)"); 731 break; 732 case KVM_CAP_S390_AIS: 733 mutex_lock(&kvm->lock); 734 if (kvm->created_vcpus) { 735 r = -EBUSY; 736 } else { 737 set_kvm_facility(kvm->arch.model.fac_mask, 72); 738 set_kvm_facility(kvm->arch.model.fac_list, 72); 739 r = 0; 740 } 741 mutex_unlock(&kvm->lock); 742 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 743 r ? "(not available)" : "(success)"); 744 break; 745 case KVM_CAP_S390_GS: 746 r = -EINVAL; 747 mutex_lock(&kvm->lock); 748 if (kvm->created_vcpus) { 749 r = -EBUSY; 750 } else if (test_facility(133)) { 751 set_kvm_facility(kvm->arch.model.fac_mask, 133); 752 set_kvm_facility(kvm->arch.model.fac_list, 133); 753 r = 0; 754 } 755 mutex_unlock(&kvm->lock); 756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 757 r ? "(not available)" : "(success)"); 758 break; 759 case KVM_CAP_S390_HPAGE_1M: 760 mutex_lock(&kvm->lock); 761 if (kvm->created_vcpus) 762 r = -EBUSY; 763 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 764 r = -EINVAL; 765 else { 766 r = 0; 767 down_write(&kvm->mm->mmap_sem); 768 kvm->mm->context.allow_gmap_hpage_1m = 1; 769 up_write(&kvm->mm->mmap_sem); 770 /* 771 * We might have to create fake 4k page 772 * tables. To avoid that the hardware works on 773 * stale PGSTEs, we emulate these instructions. 774 */ 775 kvm->arch.use_skf = 0; 776 kvm->arch.use_pfmfi = 0; 777 } 778 mutex_unlock(&kvm->lock); 779 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 780 r ? "(not available)" : "(success)"); 781 break; 782 case KVM_CAP_S390_USER_STSI: 783 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 784 kvm->arch.user_stsi = 1; 785 r = 0; 786 break; 787 case KVM_CAP_S390_USER_INSTR0: 788 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 789 kvm->arch.user_instr0 = 1; 790 icpt_operexc_on_all_vcpus(kvm); 791 r = 0; 792 break; 793 default: 794 r = -EINVAL; 795 break; 796 } 797 return r; 798 } 799 800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 801 { 802 int ret; 803 804 switch (attr->attr) { 805 case KVM_S390_VM_MEM_LIMIT_SIZE: 806 ret = 0; 807 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 808 kvm->arch.mem_limit); 809 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 810 ret = -EFAULT; 811 break; 812 default: 813 ret = -ENXIO; 814 break; 815 } 816 return ret; 817 } 818 819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 820 { 821 int ret; 822 unsigned int idx; 823 switch (attr->attr) { 824 case KVM_S390_VM_MEM_ENABLE_CMMA: 825 ret = -ENXIO; 826 if (!sclp.has_cmma) 827 break; 828 829 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 830 mutex_lock(&kvm->lock); 831 if (kvm->created_vcpus) 832 ret = -EBUSY; 833 else if (kvm->mm->context.allow_gmap_hpage_1m) 834 ret = -EINVAL; 835 else { 836 kvm->arch.use_cmma = 1; 837 /* Not compatible with cmma. */ 838 kvm->arch.use_pfmfi = 0; 839 ret = 0; 840 } 841 mutex_unlock(&kvm->lock); 842 break; 843 case KVM_S390_VM_MEM_CLR_CMMA: 844 ret = -ENXIO; 845 if (!sclp.has_cmma) 846 break; 847 ret = -EINVAL; 848 if (!kvm->arch.use_cmma) 849 break; 850 851 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 852 mutex_lock(&kvm->lock); 853 idx = srcu_read_lock(&kvm->srcu); 854 s390_reset_cmma(kvm->arch.gmap->mm); 855 srcu_read_unlock(&kvm->srcu, idx); 856 mutex_unlock(&kvm->lock); 857 ret = 0; 858 break; 859 case KVM_S390_VM_MEM_LIMIT_SIZE: { 860 unsigned long new_limit; 861 862 if (kvm_is_ucontrol(kvm)) 863 return -EINVAL; 864 865 if (get_user(new_limit, (u64 __user *)attr->addr)) 866 return -EFAULT; 867 868 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 869 new_limit > kvm->arch.mem_limit) 870 return -E2BIG; 871 872 if (!new_limit) 873 return -EINVAL; 874 875 /* gmap_create takes last usable address */ 876 if (new_limit != KVM_S390_NO_MEM_LIMIT) 877 new_limit -= 1; 878 879 ret = -EBUSY; 880 mutex_lock(&kvm->lock); 881 if (!kvm->created_vcpus) { 882 /* gmap_create will round the limit up */ 883 struct gmap *new = gmap_create(current->mm, new_limit); 884 885 if (!new) { 886 ret = -ENOMEM; 887 } else { 888 gmap_remove(kvm->arch.gmap); 889 new->private = kvm; 890 kvm->arch.gmap = new; 891 ret = 0; 892 } 893 } 894 mutex_unlock(&kvm->lock); 895 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 896 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 897 (void *) kvm->arch.gmap->asce); 898 break; 899 } 900 default: 901 ret = -ENXIO; 902 break; 903 } 904 return ret; 905 } 906 907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 908 909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 910 { 911 struct kvm_vcpu *vcpu; 912 int i; 913 914 kvm_s390_vcpu_block_all(kvm); 915 916 kvm_for_each_vcpu(i, vcpu, kvm) { 917 kvm_s390_vcpu_crypto_setup(vcpu); 918 /* recreate the shadow crycb by leaving the VSIE handler */ 919 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 920 } 921 922 kvm_s390_vcpu_unblock_all(kvm); 923 } 924 925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 926 { 927 mutex_lock(&kvm->lock); 928 switch (attr->attr) { 929 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 930 if (!test_kvm_facility(kvm, 76)) { 931 mutex_unlock(&kvm->lock); 932 return -EINVAL; 933 } 934 get_random_bytes( 935 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 936 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 937 kvm->arch.crypto.aes_kw = 1; 938 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 939 break; 940 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 941 if (!test_kvm_facility(kvm, 76)) { 942 mutex_unlock(&kvm->lock); 943 return -EINVAL; 944 } 945 get_random_bytes( 946 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 947 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 948 kvm->arch.crypto.dea_kw = 1; 949 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 950 break; 951 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 952 if (!test_kvm_facility(kvm, 76)) { 953 mutex_unlock(&kvm->lock); 954 return -EINVAL; 955 } 956 kvm->arch.crypto.aes_kw = 0; 957 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 958 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 959 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 960 break; 961 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 962 if (!test_kvm_facility(kvm, 76)) { 963 mutex_unlock(&kvm->lock); 964 return -EINVAL; 965 } 966 kvm->arch.crypto.dea_kw = 0; 967 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 968 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 969 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 970 break; 971 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 972 if (!ap_instructions_available()) { 973 mutex_unlock(&kvm->lock); 974 return -EOPNOTSUPP; 975 } 976 kvm->arch.crypto.apie = 1; 977 break; 978 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 979 if (!ap_instructions_available()) { 980 mutex_unlock(&kvm->lock); 981 return -EOPNOTSUPP; 982 } 983 kvm->arch.crypto.apie = 0; 984 break; 985 default: 986 mutex_unlock(&kvm->lock); 987 return -ENXIO; 988 } 989 990 kvm_s390_vcpu_crypto_reset_all(kvm); 991 mutex_unlock(&kvm->lock); 992 return 0; 993 } 994 995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 996 { 997 int cx; 998 struct kvm_vcpu *vcpu; 999 1000 kvm_for_each_vcpu(cx, vcpu, kvm) 1001 kvm_s390_sync_request(req, vcpu); 1002 } 1003 1004 /* 1005 * Must be called with kvm->srcu held to avoid races on memslots, and with 1006 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1007 */ 1008 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1009 { 1010 struct kvm_memory_slot *ms; 1011 struct kvm_memslots *slots; 1012 unsigned long ram_pages = 0; 1013 int slotnr; 1014 1015 /* migration mode already enabled */ 1016 if (kvm->arch.migration_mode) 1017 return 0; 1018 slots = kvm_memslots(kvm); 1019 if (!slots || !slots->used_slots) 1020 return -EINVAL; 1021 1022 if (!kvm->arch.use_cmma) { 1023 kvm->arch.migration_mode = 1; 1024 return 0; 1025 } 1026 /* mark all the pages in active slots as dirty */ 1027 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 1028 ms = slots->memslots + slotnr; 1029 if (!ms->dirty_bitmap) 1030 return -EINVAL; 1031 /* 1032 * The second half of the bitmap is only used on x86, 1033 * and would be wasted otherwise, so we put it to good 1034 * use here to keep track of the state of the storage 1035 * attributes. 1036 */ 1037 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1038 ram_pages += ms->npages; 1039 } 1040 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1041 kvm->arch.migration_mode = 1; 1042 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1043 return 0; 1044 } 1045 1046 /* 1047 * Must be called with kvm->slots_lock to avoid races with ourselves and 1048 * kvm_s390_vm_start_migration. 1049 */ 1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1051 { 1052 /* migration mode already disabled */ 1053 if (!kvm->arch.migration_mode) 1054 return 0; 1055 kvm->arch.migration_mode = 0; 1056 if (kvm->arch.use_cmma) 1057 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1058 return 0; 1059 } 1060 1061 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1062 struct kvm_device_attr *attr) 1063 { 1064 int res = -ENXIO; 1065 1066 mutex_lock(&kvm->slots_lock); 1067 switch (attr->attr) { 1068 case KVM_S390_VM_MIGRATION_START: 1069 res = kvm_s390_vm_start_migration(kvm); 1070 break; 1071 case KVM_S390_VM_MIGRATION_STOP: 1072 res = kvm_s390_vm_stop_migration(kvm); 1073 break; 1074 default: 1075 break; 1076 } 1077 mutex_unlock(&kvm->slots_lock); 1078 1079 return res; 1080 } 1081 1082 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1083 struct kvm_device_attr *attr) 1084 { 1085 u64 mig = kvm->arch.migration_mode; 1086 1087 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1088 return -ENXIO; 1089 1090 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1091 return -EFAULT; 1092 return 0; 1093 } 1094 1095 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1096 { 1097 struct kvm_s390_vm_tod_clock gtod; 1098 1099 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1100 return -EFAULT; 1101 1102 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1103 return -EINVAL; 1104 kvm_s390_set_tod_clock(kvm, >od); 1105 1106 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1107 gtod.epoch_idx, gtod.tod); 1108 1109 return 0; 1110 } 1111 1112 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1113 { 1114 u8 gtod_high; 1115 1116 if (copy_from_user(>od_high, (void __user *)attr->addr, 1117 sizeof(gtod_high))) 1118 return -EFAULT; 1119 1120 if (gtod_high != 0) 1121 return -EINVAL; 1122 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1123 1124 return 0; 1125 } 1126 1127 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1128 { 1129 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1130 1131 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1132 sizeof(gtod.tod))) 1133 return -EFAULT; 1134 1135 kvm_s390_set_tod_clock(kvm, >od); 1136 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1137 return 0; 1138 } 1139 1140 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1141 { 1142 int ret; 1143 1144 if (attr->flags) 1145 return -EINVAL; 1146 1147 switch (attr->attr) { 1148 case KVM_S390_VM_TOD_EXT: 1149 ret = kvm_s390_set_tod_ext(kvm, attr); 1150 break; 1151 case KVM_S390_VM_TOD_HIGH: 1152 ret = kvm_s390_set_tod_high(kvm, attr); 1153 break; 1154 case KVM_S390_VM_TOD_LOW: 1155 ret = kvm_s390_set_tod_low(kvm, attr); 1156 break; 1157 default: 1158 ret = -ENXIO; 1159 break; 1160 } 1161 return ret; 1162 } 1163 1164 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1165 struct kvm_s390_vm_tod_clock *gtod) 1166 { 1167 struct kvm_s390_tod_clock_ext htod; 1168 1169 preempt_disable(); 1170 1171 get_tod_clock_ext((char *)&htod); 1172 1173 gtod->tod = htod.tod + kvm->arch.epoch; 1174 gtod->epoch_idx = 0; 1175 if (test_kvm_facility(kvm, 139)) { 1176 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 1177 if (gtod->tod < htod.tod) 1178 gtod->epoch_idx += 1; 1179 } 1180 1181 preempt_enable(); 1182 } 1183 1184 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1185 { 1186 struct kvm_s390_vm_tod_clock gtod; 1187 1188 memset(>od, 0, sizeof(gtod)); 1189 kvm_s390_get_tod_clock(kvm, >od); 1190 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1191 return -EFAULT; 1192 1193 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1194 gtod.epoch_idx, gtod.tod); 1195 return 0; 1196 } 1197 1198 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1199 { 1200 u8 gtod_high = 0; 1201 1202 if (copy_to_user((void __user *)attr->addr, >od_high, 1203 sizeof(gtod_high))) 1204 return -EFAULT; 1205 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1206 1207 return 0; 1208 } 1209 1210 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 u64 gtod; 1213 1214 gtod = kvm_s390_get_tod_clock_fast(kvm); 1215 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1216 return -EFAULT; 1217 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1218 1219 return 0; 1220 } 1221 1222 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1223 { 1224 int ret; 1225 1226 if (attr->flags) 1227 return -EINVAL; 1228 1229 switch (attr->attr) { 1230 case KVM_S390_VM_TOD_EXT: 1231 ret = kvm_s390_get_tod_ext(kvm, attr); 1232 break; 1233 case KVM_S390_VM_TOD_HIGH: 1234 ret = kvm_s390_get_tod_high(kvm, attr); 1235 break; 1236 case KVM_S390_VM_TOD_LOW: 1237 ret = kvm_s390_get_tod_low(kvm, attr); 1238 break; 1239 default: 1240 ret = -ENXIO; 1241 break; 1242 } 1243 return ret; 1244 } 1245 1246 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1247 { 1248 struct kvm_s390_vm_cpu_processor *proc; 1249 u16 lowest_ibc, unblocked_ibc; 1250 int ret = 0; 1251 1252 mutex_lock(&kvm->lock); 1253 if (kvm->created_vcpus) { 1254 ret = -EBUSY; 1255 goto out; 1256 } 1257 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1258 if (!proc) { 1259 ret = -ENOMEM; 1260 goto out; 1261 } 1262 if (!copy_from_user(proc, (void __user *)attr->addr, 1263 sizeof(*proc))) { 1264 kvm->arch.model.cpuid = proc->cpuid; 1265 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1266 unblocked_ibc = sclp.ibc & 0xfff; 1267 if (lowest_ibc && proc->ibc) { 1268 if (proc->ibc > unblocked_ibc) 1269 kvm->arch.model.ibc = unblocked_ibc; 1270 else if (proc->ibc < lowest_ibc) 1271 kvm->arch.model.ibc = lowest_ibc; 1272 else 1273 kvm->arch.model.ibc = proc->ibc; 1274 } 1275 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1276 S390_ARCH_FAC_LIST_SIZE_BYTE); 1277 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1278 kvm->arch.model.ibc, 1279 kvm->arch.model.cpuid); 1280 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1281 kvm->arch.model.fac_list[0], 1282 kvm->arch.model.fac_list[1], 1283 kvm->arch.model.fac_list[2]); 1284 } else 1285 ret = -EFAULT; 1286 kfree(proc); 1287 out: 1288 mutex_unlock(&kvm->lock); 1289 return ret; 1290 } 1291 1292 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1293 struct kvm_device_attr *attr) 1294 { 1295 struct kvm_s390_vm_cpu_feat data; 1296 1297 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1298 return -EFAULT; 1299 if (!bitmap_subset((unsigned long *) data.feat, 1300 kvm_s390_available_cpu_feat, 1301 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1302 return -EINVAL; 1303 1304 mutex_lock(&kvm->lock); 1305 if (kvm->created_vcpus) { 1306 mutex_unlock(&kvm->lock); 1307 return -EBUSY; 1308 } 1309 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1310 KVM_S390_VM_CPU_FEAT_NR_BITS); 1311 mutex_unlock(&kvm->lock); 1312 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1313 data.feat[0], 1314 data.feat[1], 1315 data.feat[2]); 1316 return 0; 1317 } 1318 1319 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1320 struct kvm_device_attr *attr) 1321 { 1322 mutex_lock(&kvm->lock); 1323 if (kvm->created_vcpus) { 1324 mutex_unlock(&kvm->lock); 1325 return -EBUSY; 1326 } 1327 1328 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1329 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1330 mutex_unlock(&kvm->lock); 1331 return -EFAULT; 1332 } 1333 mutex_unlock(&kvm->lock); 1334 1335 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1336 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1337 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1338 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1339 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1340 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1341 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1342 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1343 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1344 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1345 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1346 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1347 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1348 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1349 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1350 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1351 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1352 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1353 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1354 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1355 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1356 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1357 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1358 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1359 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1360 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1361 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1362 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1363 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1364 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1365 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1366 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1367 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1368 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1369 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1370 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1371 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1372 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1373 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1374 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1375 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1376 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1377 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1378 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1379 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1380 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1381 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1382 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1383 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1384 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1385 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1386 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1387 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1388 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1389 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1390 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1391 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1392 1393 return 0; 1394 } 1395 1396 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1397 { 1398 int ret = -ENXIO; 1399 1400 switch (attr->attr) { 1401 case KVM_S390_VM_CPU_PROCESSOR: 1402 ret = kvm_s390_set_processor(kvm, attr); 1403 break; 1404 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1405 ret = kvm_s390_set_processor_feat(kvm, attr); 1406 break; 1407 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1408 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1409 break; 1410 } 1411 return ret; 1412 } 1413 1414 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1415 { 1416 struct kvm_s390_vm_cpu_processor *proc; 1417 int ret = 0; 1418 1419 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1420 if (!proc) { 1421 ret = -ENOMEM; 1422 goto out; 1423 } 1424 proc->cpuid = kvm->arch.model.cpuid; 1425 proc->ibc = kvm->arch.model.ibc; 1426 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1427 S390_ARCH_FAC_LIST_SIZE_BYTE); 1428 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1429 kvm->arch.model.ibc, 1430 kvm->arch.model.cpuid); 1431 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1432 kvm->arch.model.fac_list[0], 1433 kvm->arch.model.fac_list[1], 1434 kvm->arch.model.fac_list[2]); 1435 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1436 ret = -EFAULT; 1437 kfree(proc); 1438 out: 1439 return ret; 1440 } 1441 1442 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1443 { 1444 struct kvm_s390_vm_cpu_machine *mach; 1445 int ret = 0; 1446 1447 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1448 if (!mach) { 1449 ret = -ENOMEM; 1450 goto out; 1451 } 1452 get_cpu_id((struct cpuid *) &mach->cpuid); 1453 mach->ibc = sclp.ibc; 1454 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1455 S390_ARCH_FAC_LIST_SIZE_BYTE); 1456 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1457 sizeof(S390_lowcore.stfle_fac_list)); 1458 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1459 kvm->arch.model.ibc, 1460 kvm->arch.model.cpuid); 1461 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1462 mach->fac_mask[0], 1463 mach->fac_mask[1], 1464 mach->fac_mask[2]); 1465 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1466 mach->fac_list[0], 1467 mach->fac_list[1], 1468 mach->fac_list[2]); 1469 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1470 ret = -EFAULT; 1471 kfree(mach); 1472 out: 1473 return ret; 1474 } 1475 1476 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1477 struct kvm_device_attr *attr) 1478 { 1479 struct kvm_s390_vm_cpu_feat data; 1480 1481 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1482 KVM_S390_VM_CPU_FEAT_NR_BITS); 1483 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1484 return -EFAULT; 1485 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1486 data.feat[0], 1487 data.feat[1], 1488 data.feat[2]); 1489 return 0; 1490 } 1491 1492 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1493 struct kvm_device_attr *attr) 1494 { 1495 struct kvm_s390_vm_cpu_feat data; 1496 1497 bitmap_copy((unsigned long *) data.feat, 1498 kvm_s390_available_cpu_feat, 1499 KVM_S390_VM_CPU_FEAT_NR_BITS); 1500 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1501 return -EFAULT; 1502 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1503 data.feat[0], 1504 data.feat[1], 1505 data.feat[2]); 1506 return 0; 1507 } 1508 1509 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1510 struct kvm_device_attr *attr) 1511 { 1512 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1513 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1514 return -EFAULT; 1515 1516 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1517 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1518 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1519 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1520 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1521 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1522 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1523 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1524 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1525 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1526 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1527 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1528 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1529 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1530 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1531 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1532 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1533 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1534 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1535 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1536 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1537 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1538 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1539 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1540 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1541 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1542 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1543 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1545 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1546 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1547 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1548 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1549 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1550 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1551 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1552 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1553 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1554 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1555 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1556 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1557 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1558 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1559 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1560 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1561 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1562 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1563 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1564 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1565 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1566 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1568 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1570 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1571 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1572 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1573 1574 return 0; 1575 } 1576 1577 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1578 struct kvm_device_attr *attr) 1579 { 1580 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1581 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1582 return -EFAULT; 1583 1584 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1585 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1586 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1587 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1588 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1589 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1590 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1591 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1592 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1593 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1594 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1595 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1596 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1597 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1598 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1599 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1600 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1601 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1602 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1603 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1604 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1605 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1606 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1607 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1608 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1609 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1610 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1611 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1612 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1613 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1614 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1615 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1616 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1617 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1618 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1619 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1620 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1621 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1622 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1623 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1624 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1625 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1626 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1627 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1628 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1629 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1630 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1631 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1632 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1633 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1634 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1635 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1636 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1637 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1638 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1639 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1640 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1641 1642 return 0; 1643 } 1644 1645 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1646 { 1647 int ret = -ENXIO; 1648 1649 switch (attr->attr) { 1650 case KVM_S390_VM_CPU_PROCESSOR: 1651 ret = kvm_s390_get_processor(kvm, attr); 1652 break; 1653 case KVM_S390_VM_CPU_MACHINE: 1654 ret = kvm_s390_get_machine(kvm, attr); 1655 break; 1656 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1657 ret = kvm_s390_get_processor_feat(kvm, attr); 1658 break; 1659 case KVM_S390_VM_CPU_MACHINE_FEAT: 1660 ret = kvm_s390_get_machine_feat(kvm, attr); 1661 break; 1662 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1663 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1664 break; 1665 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1666 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1667 break; 1668 } 1669 return ret; 1670 } 1671 1672 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1673 { 1674 int ret; 1675 1676 switch (attr->group) { 1677 case KVM_S390_VM_MEM_CTRL: 1678 ret = kvm_s390_set_mem_control(kvm, attr); 1679 break; 1680 case KVM_S390_VM_TOD: 1681 ret = kvm_s390_set_tod(kvm, attr); 1682 break; 1683 case KVM_S390_VM_CPU_MODEL: 1684 ret = kvm_s390_set_cpu_model(kvm, attr); 1685 break; 1686 case KVM_S390_VM_CRYPTO: 1687 ret = kvm_s390_vm_set_crypto(kvm, attr); 1688 break; 1689 case KVM_S390_VM_MIGRATION: 1690 ret = kvm_s390_vm_set_migration(kvm, attr); 1691 break; 1692 default: 1693 ret = -ENXIO; 1694 break; 1695 } 1696 1697 return ret; 1698 } 1699 1700 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1701 { 1702 int ret; 1703 1704 switch (attr->group) { 1705 case KVM_S390_VM_MEM_CTRL: 1706 ret = kvm_s390_get_mem_control(kvm, attr); 1707 break; 1708 case KVM_S390_VM_TOD: 1709 ret = kvm_s390_get_tod(kvm, attr); 1710 break; 1711 case KVM_S390_VM_CPU_MODEL: 1712 ret = kvm_s390_get_cpu_model(kvm, attr); 1713 break; 1714 case KVM_S390_VM_MIGRATION: 1715 ret = kvm_s390_vm_get_migration(kvm, attr); 1716 break; 1717 default: 1718 ret = -ENXIO; 1719 break; 1720 } 1721 1722 return ret; 1723 } 1724 1725 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1726 { 1727 int ret; 1728 1729 switch (attr->group) { 1730 case KVM_S390_VM_MEM_CTRL: 1731 switch (attr->attr) { 1732 case KVM_S390_VM_MEM_ENABLE_CMMA: 1733 case KVM_S390_VM_MEM_CLR_CMMA: 1734 ret = sclp.has_cmma ? 0 : -ENXIO; 1735 break; 1736 case KVM_S390_VM_MEM_LIMIT_SIZE: 1737 ret = 0; 1738 break; 1739 default: 1740 ret = -ENXIO; 1741 break; 1742 } 1743 break; 1744 case KVM_S390_VM_TOD: 1745 switch (attr->attr) { 1746 case KVM_S390_VM_TOD_LOW: 1747 case KVM_S390_VM_TOD_HIGH: 1748 ret = 0; 1749 break; 1750 default: 1751 ret = -ENXIO; 1752 break; 1753 } 1754 break; 1755 case KVM_S390_VM_CPU_MODEL: 1756 switch (attr->attr) { 1757 case KVM_S390_VM_CPU_PROCESSOR: 1758 case KVM_S390_VM_CPU_MACHINE: 1759 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1760 case KVM_S390_VM_CPU_MACHINE_FEAT: 1761 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1762 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1763 ret = 0; 1764 break; 1765 default: 1766 ret = -ENXIO; 1767 break; 1768 } 1769 break; 1770 case KVM_S390_VM_CRYPTO: 1771 switch (attr->attr) { 1772 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1773 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1774 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1775 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1776 ret = 0; 1777 break; 1778 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1779 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1780 ret = ap_instructions_available() ? 0 : -ENXIO; 1781 break; 1782 default: 1783 ret = -ENXIO; 1784 break; 1785 } 1786 break; 1787 case KVM_S390_VM_MIGRATION: 1788 ret = 0; 1789 break; 1790 default: 1791 ret = -ENXIO; 1792 break; 1793 } 1794 1795 return ret; 1796 } 1797 1798 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1799 { 1800 uint8_t *keys; 1801 uint64_t hva; 1802 int srcu_idx, i, r = 0; 1803 1804 if (args->flags != 0) 1805 return -EINVAL; 1806 1807 /* Is this guest using storage keys? */ 1808 if (!mm_uses_skeys(current->mm)) 1809 return KVM_S390_GET_SKEYS_NONE; 1810 1811 /* Enforce sane limit on memory allocation */ 1812 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1813 return -EINVAL; 1814 1815 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1816 if (!keys) 1817 return -ENOMEM; 1818 1819 down_read(¤t->mm->mmap_sem); 1820 srcu_idx = srcu_read_lock(&kvm->srcu); 1821 for (i = 0; i < args->count; i++) { 1822 hva = gfn_to_hva(kvm, args->start_gfn + i); 1823 if (kvm_is_error_hva(hva)) { 1824 r = -EFAULT; 1825 break; 1826 } 1827 1828 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1829 if (r) 1830 break; 1831 } 1832 srcu_read_unlock(&kvm->srcu, srcu_idx); 1833 up_read(¤t->mm->mmap_sem); 1834 1835 if (!r) { 1836 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1837 sizeof(uint8_t) * args->count); 1838 if (r) 1839 r = -EFAULT; 1840 } 1841 1842 kvfree(keys); 1843 return r; 1844 } 1845 1846 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1847 { 1848 uint8_t *keys; 1849 uint64_t hva; 1850 int srcu_idx, i, r = 0; 1851 bool unlocked; 1852 1853 if (args->flags != 0) 1854 return -EINVAL; 1855 1856 /* Enforce sane limit on memory allocation */ 1857 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1858 return -EINVAL; 1859 1860 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1861 if (!keys) 1862 return -ENOMEM; 1863 1864 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1865 sizeof(uint8_t) * args->count); 1866 if (r) { 1867 r = -EFAULT; 1868 goto out; 1869 } 1870 1871 /* Enable storage key handling for the guest */ 1872 r = s390_enable_skey(); 1873 if (r) 1874 goto out; 1875 1876 i = 0; 1877 down_read(¤t->mm->mmap_sem); 1878 srcu_idx = srcu_read_lock(&kvm->srcu); 1879 while (i < args->count) { 1880 unlocked = false; 1881 hva = gfn_to_hva(kvm, args->start_gfn + i); 1882 if (kvm_is_error_hva(hva)) { 1883 r = -EFAULT; 1884 break; 1885 } 1886 1887 /* Lowest order bit is reserved */ 1888 if (keys[i] & 0x01) { 1889 r = -EINVAL; 1890 break; 1891 } 1892 1893 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1894 if (r) { 1895 r = fixup_user_fault(current, current->mm, hva, 1896 FAULT_FLAG_WRITE, &unlocked); 1897 if (r) 1898 break; 1899 } 1900 if (!r) 1901 i++; 1902 } 1903 srcu_read_unlock(&kvm->srcu, srcu_idx); 1904 up_read(¤t->mm->mmap_sem); 1905 out: 1906 kvfree(keys); 1907 return r; 1908 } 1909 1910 /* 1911 * Base address and length must be sent at the start of each block, therefore 1912 * it's cheaper to send some clean data, as long as it's less than the size of 1913 * two longs. 1914 */ 1915 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1916 /* for consistency */ 1917 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1918 1919 /* 1920 * Similar to gfn_to_memslot, but returns the index of a memslot also when the 1921 * address falls in a hole. In that case the index of one of the memslots 1922 * bordering the hole is returned. 1923 */ 1924 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) 1925 { 1926 int start = 0, end = slots->used_slots; 1927 int slot = atomic_read(&slots->lru_slot); 1928 struct kvm_memory_slot *memslots = slots->memslots; 1929 1930 if (gfn >= memslots[slot].base_gfn && 1931 gfn < memslots[slot].base_gfn + memslots[slot].npages) 1932 return slot; 1933 1934 while (start < end) { 1935 slot = start + (end - start) / 2; 1936 1937 if (gfn >= memslots[slot].base_gfn) 1938 end = slot; 1939 else 1940 start = slot + 1; 1941 } 1942 1943 if (gfn >= memslots[start].base_gfn && 1944 gfn < memslots[start].base_gfn + memslots[start].npages) { 1945 atomic_set(&slots->lru_slot, start); 1946 } 1947 1948 return start; 1949 } 1950 1951 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1952 u8 *res, unsigned long bufsize) 1953 { 1954 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1955 1956 args->count = 0; 1957 while (args->count < bufsize) { 1958 hva = gfn_to_hva(kvm, cur_gfn); 1959 /* 1960 * We return an error if the first value was invalid, but we 1961 * return successfully if at least one value was copied. 1962 */ 1963 if (kvm_is_error_hva(hva)) 1964 return args->count ? 0 : -EFAULT; 1965 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1966 pgstev = 0; 1967 res[args->count++] = (pgstev >> 24) & 0x43; 1968 cur_gfn++; 1969 } 1970 1971 return 0; 1972 } 1973 1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1975 unsigned long cur_gfn) 1976 { 1977 int slotidx = gfn_to_memslot_approx(slots, cur_gfn); 1978 struct kvm_memory_slot *ms = slots->memslots + slotidx; 1979 unsigned long ofs = cur_gfn - ms->base_gfn; 1980 1981 if (ms->base_gfn + ms->npages <= cur_gfn) { 1982 slotidx--; 1983 /* If we are above the highest slot, wrap around */ 1984 if (slotidx < 0) 1985 slotidx = slots->used_slots - 1; 1986 1987 ms = slots->memslots + slotidx; 1988 ofs = 0; 1989 } 1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1991 while ((slotidx > 0) && (ofs >= ms->npages)) { 1992 slotidx--; 1993 ms = slots->memslots + slotidx; 1994 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 1995 } 1996 return ms->base_gfn + ofs; 1997 } 1998 1999 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2000 u8 *res, unsigned long bufsize) 2001 { 2002 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2003 struct kvm_memslots *slots = kvm_memslots(kvm); 2004 struct kvm_memory_slot *ms; 2005 2006 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2007 ms = gfn_to_memslot(kvm, cur_gfn); 2008 args->count = 0; 2009 args->start_gfn = cur_gfn; 2010 if (!ms) 2011 return 0; 2012 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2013 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; 2014 2015 while (args->count < bufsize) { 2016 hva = gfn_to_hva(kvm, cur_gfn); 2017 if (kvm_is_error_hva(hva)) 2018 return 0; 2019 /* Decrement only if we actually flipped the bit to 0 */ 2020 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2021 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2022 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2023 pgstev = 0; 2024 /* Save the value */ 2025 res[args->count++] = (pgstev >> 24) & 0x43; 2026 /* If the next bit is too far away, stop. */ 2027 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2028 return 0; 2029 /* If we reached the previous "next", find the next one */ 2030 if (cur_gfn == next_gfn) 2031 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2032 /* Reached the end of memory or of the buffer, stop */ 2033 if ((next_gfn >= mem_end) || 2034 (next_gfn - args->start_gfn >= bufsize)) 2035 return 0; 2036 cur_gfn++; 2037 /* Reached the end of the current memslot, take the next one. */ 2038 if (cur_gfn - ms->base_gfn >= ms->npages) { 2039 ms = gfn_to_memslot(kvm, cur_gfn); 2040 if (!ms) 2041 return 0; 2042 } 2043 } 2044 return 0; 2045 } 2046 2047 /* 2048 * This function searches for the next page with dirty CMMA attributes, and 2049 * saves the attributes in the buffer up to either the end of the buffer or 2050 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2051 * no trailing clean bytes are saved. 2052 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2053 * output buffer will indicate 0 as length. 2054 */ 2055 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2056 struct kvm_s390_cmma_log *args) 2057 { 2058 unsigned long bufsize; 2059 int srcu_idx, peek, ret; 2060 u8 *values; 2061 2062 if (!kvm->arch.use_cmma) 2063 return -ENXIO; 2064 /* Invalid/unsupported flags were specified */ 2065 if (args->flags & ~KVM_S390_CMMA_PEEK) 2066 return -EINVAL; 2067 /* Migration mode query, and we are not doing a migration */ 2068 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2069 if (!peek && !kvm->arch.migration_mode) 2070 return -EINVAL; 2071 /* CMMA is disabled or was not used, or the buffer has length zero */ 2072 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2073 if (!bufsize || !kvm->mm->context.uses_cmm) { 2074 memset(args, 0, sizeof(*args)); 2075 return 0; 2076 } 2077 /* We are not peeking, and there are no dirty pages */ 2078 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2079 memset(args, 0, sizeof(*args)); 2080 return 0; 2081 } 2082 2083 values = vmalloc(bufsize); 2084 if (!values) 2085 return -ENOMEM; 2086 2087 down_read(&kvm->mm->mmap_sem); 2088 srcu_idx = srcu_read_lock(&kvm->srcu); 2089 if (peek) 2090 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2091 else 2092 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2093 srcu_read_unlock(&kvm->srcu, srcu_idx); 2094 up_read(&kvm->mm->mmap_sem); 2095 2096 if (kvm->arch.migration_mode) 2097 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2098 else 2099 args->remaining = 0; 2100 2101 if (copy_to_user((void __user *)args->values, values, args->count)) 2102 ret = -EFAULT; 2103 2104 vfree(values); 2105 return ret; 2106 } 2107 2108 /* 2109 * This function sets the CMMA attributes for the given pages. If the input 2110 * buffer has zero length, no action is taken, otherwise the attributes are 2111 * set and the mm->context.uses_cmm flag is set. 2112 */ 2113 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2114 const struct kvm_s390_cmma_log *args) 2115 { 2116 unsigned long hva, mask, pgstev, i; 2117 uint8_t *bits; 2118 int srcu_idx, r = 0; 2119 2120 mask = args->mask; 2121 2122 if (!kvm->arch.use_cmma) 2123 return -ENXIO; 2124 /* invalid/unsupported flags */ 2125 if (args->flags != 0) 2126 return -EINVAL; 2127 /* Enforce sane limit on memory allocation */ 2128 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2129 return -EINVAL; 2130 /* Nothing to do */ 2131 if (args->count == 0) 2132 return 0; 2133 2134 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2135 if (!bits) 2136 return -ENOMEM; 2137 2138 r = copy_from_user(bits, (void __user *)args->values, args->count); 2139 if (r) { 2140 r = -EFAULT; 2141 goto out; 2142 } 2143 2144 down_read(&kvm->mm->mmap_sem); 2145 srcu_idx = srcu_read_lock(&kvm->srcu); 2146 for (i = 0; i < args->count; i++) { 2147 hva = gfn_to_hva(kvm, args->start_gfn + i); 2148 if (kvm_is_error_hva(hva)) { 2149 r = -EFAULT; 2150 break; 2151 } 2152 2153 pgstev = bits[i]; 2154 pgstev = pgstev << 24; 2155 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2156 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2157 } 2158 srcu_read_unlock(&kvm->srcu, srcu_idx); 2159 up_read(&kvm->mm->mmap_sem); 2160 2161 if (!kvm->mm->context.uses_cmm) { 2162 down_write(&kvm->mm->mmap_sem); 2163 kvm->mm->context.uses_cmm = 1; 2164 up_write(&kvm->mm->mmap_sem); 2165 } 2166 out: 2167 vfree(bits); 2168 return r; 2169 } 2170 2171 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2172 { 2173 struct kvm_vcpu *vcpu; 2174 u16 rc, rrc; 2175 int ret = 0; 2176 int i; 2177 2178 /* 2179 * We ignore failures and try to destroy as many CPUs as possible. 2180 * At the same time we must not free the assigned resources when 2181 * this fails, as the ultravisor has still access to that memory. 2182 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2183 * behind. 2184 * We want to return the first failure rc and rrc, though. 2185 */ 2186 kvm_for_each_vcpu(i, vcpu, kvm) { 2187 mutex_lock(&vcpu->mutex); 2188 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2189 *rcp = rc; 2190 *rrcp = rrc; 2191 ret = -EIO; 2192 } 2193 mutex_unlock(&vcpu->mutex); 2194 } 2195 return ret; 2196 } 2197 2198 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2199 { 2200 int i, r = 0; 2201 u16 dummy; 2202 2203 struct kvm_vcpu *vcpu; 2204 2205 kvm_for_each_vcpu(i, vcpu, kvm) { 2206 mutex_lock(&vcpu->mutex); 2207 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2208 mutex_unlock(&vcpu->mutex); 2209 if (r) 2210 break; 2211 } 2212 if (r) 2213 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2214 return r; 2215 } 2216 2217 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2218 { 2219 int r = 0; 2220 u16 dummy; 2221 void __user *argp = (void __user *)cmd->data; 2222 2223 switch (cmd->cmd) { 2224 case KVM_PV_ENABLE: { 2225 r = -EINVAL; 2226 if (kvm_s390_pv_is_protected(kvm)) 2227 break; 2228 2229 /* 2230 * FMT 4 SIE needs esca. As we never switch back to bsca from 2231 * esca, we need no cleanup in the error cases below 2232 */ 2233 r = sca_switch_to_extended(kvm); 2234 if (r) 2235 break; 2236 2237 down_write(¤t->mm->mmap_sem); 2238 r = gmap_mark_unmergeable(); 2239 up_write(¤t->mm->mmap_sem); 2240 if (r) 2241 break; 2242 2243 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2244 if (r) 2245 break; 2246 2247 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2248 if (r) 2249 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2250 2251 /* we need to block service interrupts from now on */ 2252 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2253 break; 2254 } 2255 case KVM_PV_DISABLE: { 2256 r = -EINVAL; 2257 if (!kvm_s390_pv_is_protected(kvm)) 2258 break; 2259 2260 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2261 /* 2262 * If a CPU could not be destroyed, destroy VM will also fail. 2263 * There is no point in trying to destroy it. Instead return 2264 * the rc and rrc from the first CPU that failed destroying. 2265 */ 2266 if (r) 2267 break; 2268 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2269 2270 /* no need to block service interrupts any more */ 2271 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2272 break; 2273 } 2274 case KVM_PV_SET_SEC_PARMS: { 2275 struct kvm_s390_pv_sec_parm parms = {}; 2276 void *hdr; 2277 2278 r = -EINVAL; 2279 if (!kvm_s390_pv_is_protected(kvm)) 2280 break; 2281 2282 r = -EFAULT; 2283 if (copy_from_user(&parms, argp, sizeof(parms))) 2284 break; 2285 2286 /* Currently restricted to 8KB */ 2287 r = -EINVAL; 2288 if (parms.length > PAGE_SIZE * 2) 2289 break; 2290 2291 r = -ENOMEM; 2292 hdr = vmalloc(parms.length); 2293 if (!hdr) 2294 break; 2295 2296 r = -EFAULT; 2297 if (!copy_from_user(hdr, (void __user *)parms.origin, 2298 parms.length)) 2299 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2300 &cmd->rc, &cmd->rrc); 2301 2302 vfree(hdr); 2303 break; 2304 } 2305 case KVM_PV_UNPACK: { 2306 struct kvm_s390_pv_unp unp = {}; 2307 2308 r = -EINVAL; 2309 if (!kvm_s390_pv_is_protected(kvm)) 2310 break; 2311 2312 r = -EFAULT; 2313 if (copy_from_user(&unp, argp, sizeof(unp))) 2314 break; 2315 2316 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2317 &cmd->rc, &cmd->rrc); 2318 break; 2319 } 2320 case KVM_PV_VERIFY: { 2321 r = -EINVAL; 2322 if (!kvm_s390_pv_is_protected(kvm)) 2323 break; 2324 2325 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2326 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2327 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2328 cmd->rrc); 2329 break; 2330 } 2331 default: 2332 r = -ENOTTY; 2333 } 2334 return r; 2335 } 2336 2337 long kvm_arch_vm_ioctl(struct file *filp, 2338 unsigned int ioctl, unsigned long arg) 2339 { 2340 struct kvm *kvm = filp->private_data; 2341 void __user *argp = (void __user *)arg; 2342 struct kvm_device_attr attr; 2343 int r; 2344 2345 switch (ioctl) { 2346 case KVM_S390_INTERRUPT: { 2347 struct kvm_s390_interrupt s390int; 2348 2349 r = -EFAULT; 2350 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2351 break; 2352 r = kvm_s390_inject_vm(kvm, &s390int); 2353 break; 2354 } 2355 case KVM_CREATE_IRQCHIP: { 2356 struct kvm_irq_routing_entry routing; 2357 2358 r = -EINVAL; 2359 if (kvm->arch.use_irqchip) { 2360 /* Set up dummy routing. */ 2361 memset(&routing, 0, sizeof(routing)); 2362 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2363 } 2364 break; 2365 } 2366 case KVM_SET_DEVICE_ATTR: { 2367 r = -EFAULT; 2368 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2369 break; 2370 r = kvm_s390_vm_set_attr(kvm, &attr); 2371 break; 2372 } 2373 case KVM_GET_DEVICE_ATTR: { 2374 r = -EFAULT; 2375 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2376 break; 2377 r = kvm_s390_vm_get_attr(kvm, &attr); 2378 break; 2379 } 2380 case KVM_HAS_DEVICE_ATTR: { 2381 r = -EFAULT; 2382 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2383 break; 2384 r = kvm_s390_vm_has_attr(kvm, &attr); 2385 break; 2386 } 2387 case KVM_S390_GET_SKEYS: { 2388 struct kvm_s390_skeys args; 2389 2390 r = -EFAULT; 2391 if (copy_from_user(&args, argp, 2392 sizeof(struct kvm_s390_skeys))) 2393 break; 2394 r = kvm_s390_get_skeys(kvm, &args); 2395 break; 2396 } 2397 case KVM_S390_SET_SKEYS: { 2398 struct kvm_s390_skeys args; 2399 2400 r = -EFAULT; 2401 if (copy_from_user(&args, argp, 2402 sizeof(struct kvm_s390_skeys))) 2403 break; 2404 r = kvm_s390_set_skeys(kvm, &args); 2405 break; 2406 } 2407 case KVM_S390_GET_CMMA_BITS: { 2408 struct kvm_s390_cmma_log args; 2409 2410 r = -EFAULT; 2411 if (copy_from_user(&args, argp, sizeof(args))) 2412 break; 2413 mutex_lock(&kvm->slots_lock); 2414 r = kvm_s390_get_cmma_bits(kvm, &args); 2415 mutex_unlock(&kvm->slots_lock); 2416 if (!r) { 2417 r = copy_to_user(argp, &args, sizeof(args)); 2418 if (r) 2419 r = -EFAULT; 2420 } 2421 break; 2422 } 2423 case KVM_S390_SET_CMMA_BITS: { 2424 struct kvm_s390_cmma_log args; 2425 2426 r = -EFAULT; 2427 if (copy_from_user(&args, argp, sizeof(args))) 2428 break; 2429 mutex_lock(&kvm->slots_lock); 2430 r = kvm_s390_set_cmma_bits(kvm, &args); 2431 mutex_unlock(&kvm->slots_lock); 2432 break; 2433 } 2434 case KVM_S390_PV_COMMAND: { 2435 struct kvm_pv_cmd args; 2436 2437 r = 0; 2438 if (!is_prot_virt_host()) { 2439 r = -EINVAL; 2440 break; 2441 } 2442 if (copy_from_user(&args, argp, sizeof(args))) { 2443 r = -EFAULT; 2444 break; 2445 } 2446 if (args.flags) { 2447 r = -EINVAL; 2448 break; 2449 } 2450 mutex_lock(&kvm->lock); 2451 r = kvm_s390_handle_pv(kvm, &args); 2452 mutex_unlock(&kvm->lock); 2453 if (copy_to_user(argp, &args, sizeof(args))) { 2454 r = -EFAULT; 2455 break; 2456 } 2457 break; 2458 } 2459 default: 2460 r = -ENOTTY; 2461 } 2462 2463 return r; 2464 } 2465 2466 static int kvm_s390_apxa_installed(void) 2467 { 2468 struct ap_config_info info; 2469 2470 if (ap_instructions_available()) { 2471 if (ap_qci(&info) == 0) 2472 return info.apxa; 2473 } 2474 2475 return 0; 2476 } 2477 2478 /* 2479 * The format of the crypto control block (CRYCB) is specified in the 3 low 2480 * order bits of the CRYCB designation (CRYCBD) field as follows: 2481 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2482 * AP extended addressing (APXA) facility are installed. 2483 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2484 * Format 2: Both the APXA and MSAX3 facilities are installed 2485 */ 2486 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2487 { 2488 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2489 2490 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2491 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2492 2493 /* Check whether MSAX3 is installed */ 2494 if (!test_kvm_facility(kvm, 76)) 2495 return; 2496 2497 if (kvm_s390_apxa_installed()) 2498 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2499 else 2500 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2501 } 2502 2503 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2504 unsigned long *aqm, unsigned long *adm) 2505 { 2506 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2507 2508 mutex_lock(&kvm->lock); 2509 kvm_s390_vcpu_block_all(kvm); 2510 2511 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2512 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2513 memcpy(crycb->apcb1.apm, apm, 32); 2514 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2515 apm[0], apm[1], apm[2], apm[3]); 2516 memcpy(crycb->apcb1.aqm, aqm, 32); 2517 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2518 aqm[0], aqm[1], aqm[2], aqm[3]); 2519 memcpy(crycb->apcb1.adm, adm, 32); 2520 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2521 adm[0], adm[1], adm[2], adm[3]); 2522 break; 2523 case CRYCB_FORMAT1: 2524 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2525 memcpy(crycb->apcb0.apm, apm, 8); 2526 memcpy(crycb->apcb0.aqm, aqm, 2); 2527 memcpy(crycb->apcb0.adm, adm, 2); 2528 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2529 apm[0], *((unsigned short *)aqm), 2530 *((unsigned short *)adm)); 2531 break; 2532 default: /* Can not happen */ 2533 break; 2534 } 2535 2536 /* recreate the shadow crycb for each vcpu */ 2537 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2538 kvm_s390_vcpu_unblock_all(kvm); 2539 mutex_unlock(&kvm->lock); 2540 } 2541 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2542 2543 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2544 { 2545 mutex_lock(&kvm->lock); 2546 kvm_s390_vcpu_block_all(kvm); 2547 2548 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2549 sizeof(kvm->arch.crypto.crycb->apcb0)); 2550 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2551 sizeof(kvm->arch.crypto.crycb->apcb1)); 2552 2553 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2554 /* recreate the shadow crycb for each vcpu */ 2555 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2556 kvm_s390_vcpu_unblock_all(kvm); 2557 mutex_unlock(&kvm->lock); 2558 } 2559 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2560 2561 static u64 kvm_s390_get_initial_cpuid(void) 2562 { 2563 struct cpuid cpuid; 2564 2565 get_cpu_id(&cpuid); 2566 cpuid.version = 0xff; 2567 return *((u64 *) &cpuid); 2568 } 2569 2570 static void kvm_s390_crypto_init(struct kvm *kvm) 2571 { 2572 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2573 kvm_s390_set_crycb_format(kvm); 2574 2575 if (!test_kvm_facility(kvm, 76)) 2576 return; 2577 2578 /* Enable AES/DEA protected key functions by default */ 2579 kvm->arch.crypto.aes_kw = 1; 2580 kvm->arch.crypto.dea_kw = 1; 2581 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2582 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2583 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2584 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2585 } 2586 2587 static void sca_dispose(struct kvm *kvm) 2588 { 2589 if (kvm->arch.use_esca) 2590 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2591 else 2592 free_page((unsigned long)(kvm->arch.sca)); 2593 kvm->arch.sca = NULL; 2594 } 2595 2596 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2597 { 2598 gfp_t alloc_flags = GFP_KERNEL; 2599 int i, rc; 2600 char debug_name[16]; 2601 static unsigned long sca_offset; 2602 2603 rc = -EINVAL; 2604 #ifdef CONFIG_KVM_S390_UCONTROL 2605 if (type & ~KVM_VM_S390_UCONTROL) 2606 goto out_err; 2607 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2608 goto out_err; 2609 #else 2610 if (type) 2611 goto out_err; 2612 #endif 2613 2614 rc = s390_enable_sie(); 2615 if (rc) 2616 goto out_err; 2617 2618 rc = -ENOMEM; 2619 2620 if (!sclp.has_64bscao) 2621 alloc_flags |= GFP_DMA; 2622 rwlock_init(&kvm->arch.sca_lock); 2623 /* start with basic SCA */ 2624 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2625 if (!kvm->arch.sca) 2626 goto out_err; 2627 mutex_lock(&kvm_lock); 2628 sca_offset += 16; 2629 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2630 sca_offset = 0; 2631 kvm->arch.sca = (struct bsca_block *) 2632 ((char *) kvm->arch.sca + sca_offset); 2633 mutex_unlock(&kvm_lock); 2634 2635 sprintf(debug_name, "kvm-%u", current->pid); 2636 2637 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2638 if (!kvm->arch.dbf) 2639 goto out_err; 2640 2641 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2642 kvm->arch.sie_page2 = 2643 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 2644 if (!kvm->arch.sie_page2) 2645 goto out_err; 2646 2647 kvm->arch.sie_page2->kvm = kvm; 2648 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2649 2650 for (i = 0; i < kvm_s390_fac_size(); i++) { 2651 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] & 2652 (kvm_s390_fac_base[i] | 2653 kvm_s390_fac_ext[i]); 2654 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & 2655 kvm_s390_fac_base[i]; 2656 } 2657 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2658 2659 /* we are always in czam mode - even on pre z14 machines */ 2660 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2661 set_kvm_facility(kvm->arch.model.fac_list, 138); 2662 /* we emulate STHYI in kvm */ 2663 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2664 set_kvm_facility(kvm->arch.model.fac_list, 74); 2665 if (MACHINE_HAS_TLB_GUEST) { 2666 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2667 set_kvm_facility(kvm->arch.model.fac_list, 147); 2668 } 2669 2670 if (css_general_characteristics.aiv && test_facility(65)) 2671 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2672 2673 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2674 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2675 2676 kvm_s390_crypto_init(kvm); 2677 2678 mutex_init(&kvm->arch.float_int.ais_lock); 2679 spin_lock_init(&kvm->arch.float_int.lock); 2680 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2681 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2682 init_waitqueue_head(&kvm->arch.ipte_wq); 2683 mutex_init(&kvm->arch.ipte_mutex); 2684 2685 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2686 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2687 2688 if (type & KVM_VM_S390_UCONTROL) { 2689 kvm->arch.gmap = NULL; 2690 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2691 } else { 2692 if (sclp.hamax == U64_MAX) 2693 kvm->arch.mem_limit = TASK_SIZE_MAX; 2694 else 2695 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2696 sclp.hamax + 1); 2697 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2698 if (!kvm->arch.gmap) 2699 goto out_err; 2700 kvm->arch.gmap->private = kvm; 2701 kvm->arch.gmap->pfault_enabled = 0; 2702 } 2703 2704 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2705 kvm->arch.use_skf = sclp.has_skey; 2706 spin_lock_init(&kvm->arch.start_stop_lock); 2707 kvm_s390_vsie_init(kvm); 2708 kvm_s390_gisa_init(kvm); 2709 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2710 2711 return 0; 2712 out_err: 2713 free_page((unsigned long)kvm->arch.sie_page2); 2714 debug_unregister(kvm->arch.dbf); 2715 sca_dispose(kvm); 2716 KVM_EVENT(3, "creation of vm failed: %d", rc); 2717 return rc; 2718 } 2719 2720 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2721 { 2722 u16 rc, rrc; 2723 2724 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2725 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2726 kvm_s390_clear_local_irqs(vcpu); 2727 kvm_clear_async_pf_completion_queue(vcpu); 2728 if (!kvm_is_ucontrol(vcpu->kvm)) 2729 sca_del_vcpu(vcpu); 2730 2731 if (kvm_is_ucontrol(vcpu->kvm)) 2732 gmap_remove(vcpu->arch.gmap); 2733 2734 if (vcpu->kvm->arch.use_cmma) 2735 kvm_s390_vcpu_unsetup_cmma(vcpu); 2736 /* We can not hold the vcpu mutex here, we are already dying */ 2737 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2738 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2739 free_page((unsigned long)(vcpu->arch.sie_block)); 2740 } 2741 2742 static void kvm_free_vcpus(struct kvm *kvm) 2743 { 2744 unsigned int i; 2745 struct kvm_vcpu *vcpu; 2746 2747 kvm_for_each_vcpu(i, vcpu, kvm) 2748 kvm_vcpu_destroy(vcpu); 2749 2750 mutex_lock(&kvm->lock); 2751 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2752 kvm->vcpus[i] = NULL; 2753 2754 atomic_set(&kvm->online_vcpus, 0); 2755 mutex_unlock(&kvm->lock); 2756 } 2757 2758 void kvm_arch_destroy_vm(struct kvm *kvm) 2759 { 2760 u16 rc, rrc; 2761 2762 kvm_free_vcpus(kvm); 2763 sca_dispose(kvm); 2764 kvm_s390_gisa_destroy(kvm); 2765 /* 2766 * We are already at the end of life and kvm->lock is not taken. 2767 * This is ok as the file descriptor is closed by now and nobody 2768 * can mess with the pv state. To avoid lockdep_assert_held from 2769 * complaining we do not use kvm_s390_pv_is_protected. 2770 */ 2771 if (kvm_s390_pv_get_handle(kvm)) 2772 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2773 debug_unregister(kvm->arch.dbf); 2774 free_page((unsigned long)kvm->arch.sie_page2); 2775 if (!kvm_is_ucontrol(kvm)) 2776 gmap_remove(kvm->arch.gmap); 2777 kvm_s390_destroy_adapters(kvm); 2778 kvm_s390_clear_float_irqs(kvm); 2779 kvm_s390_vsie_destroy(kvm); 2780 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2781 } 2782 2783 /* Section: vcpu related */ 2784 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2785 { 2786 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2787 if (!vcpu->arch.gmap) 2788 return -ENOMEM; 2789 vcpu->arch.gmap->private = vcpu->kvm; 2790 2791 return 0; 2792 } 2793 2794 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2795 { 2796 if (!kvm_s390_use_sca_entries()) 2797 return; 2798 read_lock(&vcpu->kvm->arch.sca_lock); 2799 if (vcpu->kvm->arch.use_esca) { 2800 struct esca_block *sca = vcpu->kvm->arch.sca; 2801 2802 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2803 sca->cpu[vcpu->vcpu_id].sda = 0; 2804 } else { 2805 struct bsca_block *sca = vcpu->kvm->arch.sca; 2806 2807 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2808 sca->cpu[vcpu->vcpu_id].sda = 0; 2809 } 2810 read_unlock(&vcpu->kvm->arch.sca_lock); 2811 } 2812 2813 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2814 { 2815 if (!kvm_s390_use_sca_entries()) { 2816 struct bsca_block *sca = vcpu->kvm->arch.sca; 2817 2818 /* we still need the basic sca for the ipte control */ 2819 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2820 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2821 return; 2822 } 2823 read_lock(&vcpu->kvm->arch.sca_lock); 2824 if (vcpu->kvm->arch.use_esca) { 2825 struct esca_block *sca = vcpu->kvm->arch.sca; 2826 2827 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2828 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2829 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2830 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2831 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2832 } else { 2833 struct bsca_block *sca = vcpu->kvm->arch.sca; 2834 2835 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2836 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2837 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2838 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2839 } 2840 read_unlock(&vcpu->kvm->arch.sca_lock); 2841 } 2842 2843 /* Basic SCA to Extended SCA data copy routines */ 2844 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2845 { 2846 d->sda = s->sda; 2847 d->sigp_ctrl.c = s->sigp_ctrl.c; 2848 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2849 } 2850 2851 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2852 { 2853 int i; 2854 2855 d->ipte_control = s->ipte_control; 2856 d->mcn[0] = s->mcn; 2857 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2858 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2859 } 2860 2861 static int sca_switch_to_extended(struct kvm *kvm) 2862 { 2863 struct bsca_block *old_sca = kvm->arch.sca; 2864 struct esca_block *new_sca; 2865 struct kvm_vcpu *vcpu; 2866 unsigned int vcpu_idx; 2867 u32 scaol, scaoh; 2868 2869 if (kvm->arch.use_esca) 2870 return 0; 2871 2872 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2873 if (!new_sca) 2874 return -ENOMEM; 2875 2876 scaoh = (u32)((u64)(new_sca) >> 32); 2877 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2878 2879 kvm_s390_vcpu_block_all(kvm); 2880 write_lock(&kvm->arch.sca_lock); 2881 2882 sca_copy_b_to_e(new_sca, old_sca); 2883 2884 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2885 vcpu->arch.sie_block->scaoh = scaoh; 2886 vcpu->arch.sie_block->scaol = scaol; 2887 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2888 } 2889 kvm->arch.sca = new_sca; 2890 kvm->arch.use_esca = 1; 2891 2892 write_unlock(&kvm->arch.sca_lock); 2893 kvm_s390_vcpu_unblock_all(kvm); 2894 2895 free_page((unsigned long)old_sca); 2896 2897 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2898 old_sca, kvm->arch.sca); 2899 return 0; 2900 } 2901 2902 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2903 { 2904 int rc; 2905 2906 if (!kvm_s390_use_sca_entries()) { 2907 if (id < KVM_MAX_VCPUS) 2908 return true; 2909 return false; 2910 } 2911 if (id < KVM_S390_BSCA_CPU_SLOTS) 2912 return true; 2913 if (!sclp.has_esca || !sclp.has_64bscao) 2914 return false; 2915 2916 mutex_lock(&kvm->lock); 2917 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2918 mutex_unlock(&kvm->lock); 2919 2920 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2921 } 2922 2923 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2924 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2925 { 2926 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2927 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2928 vcpu->arch.cputm_start = get_tod_clock_fast(); 2929 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2930 } 2931 2932 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2933 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2934 { 2935 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2936 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2937 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2938 vcpu->arch.cputm_start = 0; 2939 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2940 } 2941 2942 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2943 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2944 { 2945 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2946 vcpu->arch.cputm_enabled = true; 2947 __start_cpu_timer_accounting(vcpu); 2948 } 2949 2950 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2951 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2952 { 2953 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2954 __stop_cpu_timer_accounting(vcpu); 2955 vcpu->arch.cputm_enabled = false; 2956 } 2957 2958 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2959 { 2960 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2961 __enable_cpu_timer_accounting(vcpu); 2962 preempt_enable(); 2963 } 2964 2965 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2966 { 2967 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2968 __disable_cpu_timer_accounting(vcpu); 2969 preempt_enable(); 2970 } 2971 2972 /* set the cpu timer - may only be called from the VCPU thread itself */ 2973 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2974 { 2975 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2976 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2977 if (vcpu->arch.cputm_enabled) 2978 vcpu->arch.cputm_start = get_tod_clock_fast(); 2979 vcpu->arch.sie_block->cputm = cputm; 2980 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2981 preempt_enable(); 2982 } 2983 2984 /* update and get the cpu timer - can also be called from other VCPU threads */ 2985 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2986 { 2987 unsigned int seq; 2988 __u64 value; 2989 2990 if (unlikely(!vcpu->arch.cputm_enabled)) 2991 return vcpu->arch.sie_block->cputm; 2992 2993 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2994 do { 2995 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2996 /* 2997 * If the writer would ever execute a read in the critical 2998 * section, e.g. in irq context, we have a deadlock. 2999 */ 3000 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3001 value = vcpu->arch.sie_block->cputm; 3002 /* if cputm_start is 0, accounting is being started/stopped */ 3003 if (likely(vcpu->arch.cputm_start)) 3004 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3005 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3006 preempt_enable(); 3007 return value; 3008 } 3009 3010 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3011 { 3012 3013 gmap_enable(vcpu->arch.enabled_gmap); 3014 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3015 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3016 __start_cpu_timer_accounting(vcpu); 3017 vcpu->cpu = cpu; 3018 } 3019 3020 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3021 { 3022 vcpu->cpu = -1; 3023 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3024 __stop_cpu_timer_accounting(vcpu); 3025 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3026 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3027 gmap_disable(vcpu->arch.enabled_gmap); 3028 3029 } 3030 3031 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3032 { 3033 mutex_lock(&vcpu->kvm->lock); 3034 preempt_disable(); 3035 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3036 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3037 preempt_enable(); 3038 mutex_unlock(&vcpu->kvm->lock); 3039 if (!kvm_is_ucontrol(vcpu->kvm)) { 3040 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3041 sca_add_vcpu(vcpu); 3042 } 3043 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3044 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3045 /* make vcpu_load load the right gmap on the first trigger */ 3046 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3047 } 3048 3049 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3050 { 3051 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3052 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3053 return true; 3054 return false; 3055 } 3056 3057 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3058 { 3059 /* At least one ECC subfunction must be present */ 3060 return kvm_has_pckmo_subfunc(kvm, 32) || 3061 kvm_has_pckmo_subfunc(kvm, 33) || 3062 kvm_has_pckmo_subfunc(kvm, 34) || 3063 kvm_has_pckmo_subfunc(kvm, 40) || 3064 kvm_has_pckmo_subfunc(kvm, 41); 3065 3066 } 3067 3068 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3069 { 3070 /* 3071 * If the AP instructions are not being interpreted and the MSAX3 3072 * facility is not configured for the guest, there is nothing to set up. 3073 */ 3074 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3075 return; 3076 3077 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3078 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3079 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3080 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3081 3082 if (vcpu->kvm->arch.crypto.apie) 3083 vcpu->arch.sie_block->eca |= ECA_APIE; 3084 3085 /* Set up protected key support */ 3086 if (vcpu->kvm->arch.crypto.aes_kw) { 3087 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3088 /* ecc is also wrapped with AES key */ 3089 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3090 vcpu->arch.sie_block->ecd |= ECD_ECC; 3091 } 3092 3093 if (vcpu->kvm->arch.crypto.dea_kw) 3094 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3095 } 3096 3097 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3098 { 3099 free_page(vcpu->arch.sie_block->cbrlo); 3100 vcpu->arch.sie_block->cbrlo = 0; 3101 } 3102 3103 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3104 { 3105 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 3106 if (!vcpu->arch.sie_block->cbrlo) 3107 return -ENOMEM; 3108 return 0; 3109 } 3110 3111 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3112 { 3113 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3114 3115 vcpu->arch.sie_block->ibc = model->ibc; 3116 if (test_kvm_facility(vcpu->kvm, 7)) 3117 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3118 } 3119 3120 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3121 { 3122 int rc = 0; 3123 u16 uvrc, uvrrc; 3124 3125 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3126 CPUSTAT_SM | 3127 CPUSTAT_STOPPED); 3128 3129 if (test_kvm_facility(vcpu->kvm, 78)) 3130 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3131 else if (test_kvm_facility(vcpu->kvm, 8)) 3132 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3133 3134 kvm_s390_vcpu_setup_model(vcpu); 3135 3136 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3137 if (MACHINE_HAS_ESOP) 3138 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3139 if (test_kvm_facility(vcpu->kvm, 9)) 3140 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3141 if (test_kvm_facility(vcpu->kvm, 73)) 3142 vcpu->arch.sie_block->ecb |= ECB_TE; 3143 3144 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3145 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3146 if (test_kvm_facility(vcpu->kvm, 130)) 3147 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3148 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3149 if (sclp.has_cei) 3150 vcpu->arch.sie_block->eca |= ECA_CEI; 3151 if (sclp.has_ib) 3152 vcpu->arch.sie_block->eca |= ECA_IB; 3153 if (sclp.has_siif) 3154 vcpu->arch.sie_block->eca |= ECA_SII; 3155 if (sclp.has_sigpif) 3156 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3157 if (test_kvm_facility(vcpu->kvm, 129)) { 3158 vcpu->arch.sie_block->eca |= ECA_VX; 3159 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3160 } 3161 if (test_kvm_facility(vcpu->kvm, 139)) 3162 vcpu->arch.sie_block->ecd |= ECD_MEF; 3163 if (test_kvm_facility(vcpu->kvm, 156)) 3164 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3165 if (vcpu->arch.sie_block->gd) { 3166 vcpu->arch.sie_block->eca |= ECA_AIV; 3167 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3168 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3169 } 3170 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3171 | SDNXC; 3172 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3173 3174 if (sclp.has_kss) 3175 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3176 else 3177 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3178 3179 if (vcpu->kvm->arch.use_cmma) { 3180 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3181 if (rc) 3182 return rc; 3183 } 3184 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3185 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3186 3187 vcpu->arch.sie_block->hpid = HPID_KVM; 3188 3189 kvm_s390_vcpu_crypto_setup(vcpu); 3190 3191 mutex_lock(&vcpu->kvm->lock); 3192 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3193 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3194 if (rc) 3195 kvm_s390_vcpu_unsetup_cmma(vcpu); 3196 } 3197 mutex_unlock(&vcpu->kvm->lock); 3198 3199 return rc; 3200 } 3201 3202 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3203 { 3204 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3205 return -EINVAL; 3206 return 0; 3207 } 3208 3209 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3210 { 3211 struct sie_page *sie_page; 3212 int rc; 3213 3214 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3215 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 3216 if (!sie_page) 3217 return -ENOMEM; 3218 3219 vcpu->arch.sie_block = &sie_page->sie_block; 3220 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3221 3222 /* the real guest size will always be smaller than msl */ 3223 vcpu->arch.sie_block->mso = 0; 3224 vcpu->arch.sie_block->msl = sclp.hamax; 3225 3226 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3227 spin_lock_init(&vcpu->arch.local_int.lock); 3228 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; 3229 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 3230 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 3231 seqcount_init(&vcpu->arch.cputm_seqcount); 3232 3233 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3234 kvm_clear_async_pf_completion_queue(vcpu); 3235 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3236 KVM_SYNC_GPRS | 3237 KVM_SYNC_ACRS | 3238 KVM_SYNC_CRS | 3239 KVM_SYNC_ARCH0 | 3240 KVM_SYNC_PFAULT; 3241 kvm_s390_set_prefix(vcpu, 0); 3242 if (test_kvm_facility(vcpu->kvm, 64)) 3243 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3244 if (test_kvm_facility(vcpu->kvm, 82)) 3245 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3246 if (test_kvm_facility(vcpu->kvm, 133)) 3247 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3248 if (test_kvm_facility(vcpu->kvm, 156)) 3249 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3250 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3251 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3252 */ 3253 if (MACHINE_HAS_VX) 3254 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3255 else 3256 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3257 3258 if (kvm_is_ucontrol(vcpu->kvm)) { 3259 rc = __kvm_ucontrol_vcpu_init(vcpu); 3260 if (rc) 3261 goto out_free_sie_block; 3262 } 3263 3264 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3265 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3266 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3267 3268 rc = kvm_s390_vcpu_setup(vcpu); 3269 if (rc) 3270 goto out_ucontrol_uninit; 3271 return 0; 3272 3273 out_ucontrol_uninit: 3274 if (kvm_is_ucontrol(vcpu->kvm)) 3275 gmap_remove(vcpu->arch.gmap); 3276 out_free_sie_block: 3277 free_page((unsigned long)(vcpu->arch.sie_block)); 3278 return rc; 3279 } 3280 3281 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3282 { 3283 return kvm_s390_vcpu_has_irq(vcpu, 0); 3284 } 3285 3286 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3287 { 3288 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3289 } 3290 3291 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3292 { 3293 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3294 exit_sie(vcpu); 3295 } 3296 3297 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3298 { 3299 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3300 } 3301 3302 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3303 { 3304 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3305 exit_sie(vcpu); 3306 } 3307 3308 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3309 { 3310 return atomic_read(&vcpu->arch.sie_block->prog20) & 3311 (PROG_BLOCK_SIE | PROG_REQUEST); 3312 } 3313 3314 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3315 { 3316 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3317 } 3318 3319 /* 3320 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3321 * If the CPU is not running (e.g. waiting as idle) the function will 3322 * return immediately. */ 3323 void exit_sie(struct kvm_vcpu *vcpu) 3324 { 3325 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3326 kvm_s390_vsie_kick(vcpu); 3327 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3328 cpu_relax(); 3329 } 3330 3331 /* Kick a guest cpu out of SIE to process a request synchronously */ 3332 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3333 { 3334 kvm_make_request(req, vcpu); 3335 kvm_s390_vcpu_request(vcpu); 3336 } 3337 3338 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3339 unsigned long end) 3340 { 3341 struct kvm *kvm = gmap->private; 3342 struct kvm_vcpu *vcpu; 3343 unsigned long prefix; 3344 int i; 3345 3346 if (gmap_is_shadow(gmap)) 3347 return; 3348 if (start >= 1UL << 31) 3349 /* We are only interested in prefix pages */ 3350 return; 3351 kvm_for_each_vcpu(i, vcpu, kvm) { 3352 /* match against both prefix pages */ 3353 prefix = kvm_s390_get_prefix(vcpu); 3354 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3355 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3356 start, end); 3357 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 3358 } 3359 } 3360 } 3361 3362 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3363 { 3364 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3365 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3366 halt_poll_max_steal) { 3367 vcpu->stat.halt_no_poll_steal++; 3368 return true; 3369 } 3370 return false; 3371 } 3372 3373 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3374 { 3375 /* kvm common code refers to this, but never calls it */ 3376 BUG(); 3377 return 0; 3378 } 3379 3380 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3381 struct kvm_one_reg *reg) 3382 { 3383 int r = -EINVAL; 3384 3385 switch (reg->id) { 3386 case KVM_REG_S390_TODPR: 3387 r = put_user(vcpu->arch.sie_block->todpr, 3388 (u32 __user *)reg->addr); 3389 break; 3390 case KVM_REG_S390_EPOCHDIFF: 3391 r = put_user(vcpu->arch.sie_block->epoch, 3392 (u64 __user *)reg->addr); 3393 break; 3394 case KVM_REG_S390_CPU_TIMER: 3395 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3396 (u64 __user *)reg->addr); 3397 break; 3398 case KVM_REG_S390_CLOCK_COMP: 3399 r = put_user(vcpu->arch.sie_block->ckc, 3400 (u64 __user *)reg->addr); 3401 break; 3402 case KVM_REG_S390_PFTOKEN: 3403 r = put_user(vcpu->arch.pfault_token, 3404 (u64 __user *)reg->addr); 3405 break; 3406 case KVM_REG_S390_PFCOMPARE: 3407 r = put_user(vcpu->arch.pfault_compare, 3408 (u64 __user *)reg->addr); 3409 break; 3410 case KVM_REG_S390_PFSELECT: 3411 r = put_user(vcpu->arch.pfault_select, 3412 (u64 __user *)reg->addr); 3413 break; 3414 case KVM_REG_S390_PP: 3415 r = put_user(vcpu->arch.sie_block->pp, 3416 (u64 __user *)reg->addr); 3417 break; 3418 case KVM_REG_S390_GBEA: 3419 r = put_user(vcpu->arch.sie_block->gbea, 3420 (u64 __user *)reg->addr); 3421 break; 3422 default: 3423 break; 3424 } 3425 3426 return r; 3427 } 3428 3429 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3430 struct kvm_one_reg *reg) 3431 { 3432 int r = -EINVAL; 3433 __u64 val; 3434 3435 switch (reg->id) { 3436 case KVM_REG_S390_TODPR: 3437 r = get_user(vcpu->arch.sie_block->todpr, 3438 (u32 __user *)reg->addr); 3439 break; 3440 case KVM_REG_S390_EPOCHDIFF: 3441 r = get_user(vcpu->arch.sie_block->epoch, 3442 (u64 __user *)reg->addr); 3443 break; 3444 case KVM_REG_S390_CPU_TIMER: 3445 r = get_user(val, (u64 __user *)reg->addr); 3446 if (!r) 3447 kvm_s390_set_cpu_timer(vcpu, val); 3448 break; 3449 case KVM_REG_S390_CLOCK_COMP: 3450 r = get_user(vcpu->arch.sie_block->ckc, 3451 (u64 __user *)reg->addr); 3452 break; 3453 case KVM_REG_S390_PFTOKEN: 3454 r = get_user(vcpu->arch.pfault_token, 3455 (u64 __user *)reg->addr); 3456 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3457 kvm_clear_async_pf_completion_queue(vcpu); 3458 break; 3459 case KVM_REG_S390_PFCOMPARE: 3460 r = get_user(vcpu->arch.pfault_compare, 3461 (u64 __user *)reg->addr); 3462 break; 3463 case KVM_REG_S390_PFSELECT: 3464 r = get_user(vcpu->arch.pfault_select, 3465 (u64 __user *)reg->addr); 3466 break; 3467 case KVM_REG_S390_PP: 3468 r = get_user(vcpu->arch.sie_block->pp, 3469 (u64 __user *)reg->addr); 3470 break; 3471 case KVM_REG_S390_GBEA: 3472 r = get_user(vcpu->arch.sie_block->gbea, 3473 (u64 __user *)reg->addr); 3474 break; 3475 default: 3476 break; 3477 } 3478 3479 return r; 3480 } 3481 3482 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3483 { 3484 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3485 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3486 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3487 3488 kvm_clear_async_pf_completion_queue(vcpu); 3489 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3490 kvm_s390_vcpu_stop(vcpu); 3491 kvm_s390_clear_local_irqs(vcpu); 3492 } 3493 3494 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3495 { 3496 /* Initial reset is a superset of the normal reset */ 3497 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3498 3499 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 3500 vcpu->arch.sie_block->gpsw.mask = 0; 3501 vcpu->arch.sie_block->gpsw.addr = 0; 3502 kvm_s390_set_prefix(vcpu, 0); 3503 kvm_s390_set_cpu_timer(vcpu, 0); 3504 vcpu->arch.sie_block->ckc = 0; 3505 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3506 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3507 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3508 vcpu->run->s.regs.fpc = 0; 3509 /* 3510 * Do not reset these registers in the protected case, as some of 3511 * them are overlayed and they are not accessible in this case 3512 * anyway. 3513 */ 3514 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3515 vcpu->arch.sie_block->gbea = 1; 3516 vcpu->arch.sie_block->pp = 0; 3517 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3518 vcpu->arch.sie_block->todpr = 0; 3519 } 3520 } 3521 3522 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3523 { 3524 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3525 3526 /* Clear reset is a superset of the initial reset */ 3527 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3528 3529 memset(®s->gprs, 0, sizeof(regs->gprs)); 3530 memset(®s->vrs, 0, sizeof(regs->vrs)); 3531 memset(®s->acrs, 0, sizeof(regs->acrs)); 3532 memset(®s->gscb, 0, sizeof(regs->gscb)); 3533 3534 regs->etoken = 0; 3535 regs->etoken_extension = 0; 3536 } 3537 3538 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3539 { 3540 vcpu_load(vcpu); 3541 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3542 vcpu_put(vcpu); 3543 return 0; 3544 } 3545 3546 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3547 { 3548 vcpu_load(vcpu); 3549 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3550 vcpu_put(vcpu); 3551 return 0; 3552 } 3553 3554 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3555 struct kvm_sregs *sregs) 3556 { 3557 vcpu_load(vcpu); 3558 3559 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3560 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3561 3562 vcpu_put(vcpu); 3563 return 0; 3564 } 3565 3566 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3567 struct kvm_sregs *sregs) 3568 { 3569 vcpu_load(vcpu); 3570 3571 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3572 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3573 3574 vcpu_put(vcpu); 3575 return 0; 3576 } 3577 3578 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3579 { 3580 int ret = 0; 3581 3582 vcpu_load(vcpu); 3583 3584 if (test_fp_ctl(fpu->fpc)) { 3585 ret = -EINVAL; 3586 goto out; 3587 } 3588 vcpu->run->s.regs.fpc = fpu->fpc; 3589 if (MACHINE_HAS_VX) 3590 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3591 (freg_t *) fpu->fprs); 3592 else 3593 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3594 3595 out: 3596 vcpu_put(vcpu); 3597 return ret; 3598 } 3599 3600 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3601 { 3602 vcpu_load(vcpu); 3603 3604 /* make sure we have the latest values */ 3605 save_fpu_regs(); 3606 if (MACHINE_HAS_VX) 3607 convert_vx_to_fp((freg_t *) fpu->fprs, 3608 (__vector128 *) vcpu->run->s.regs.vrs); 3609 else 3610 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3611 fpu->fpc = vcpu->run->s.regs.fpc; 3612 3613 vcpu_put(vcpu); 3614 return 0; 3615 } 3616 3617 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3618 { 3619 int rc = 0; 3620 3621 if (!is_vcpu_stopped(vcpu)) 3622 rc = -EBUSY; 3623 else { 3624 vcpu->run->psw_mask = psw.mask; 3625 vcpu->run->psw_addr = psw.addr; 3626 } 3627 return rc; 3628 } 3629 3630 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3631 struct kvm_translation *tr) 3632 { 3633 return -EINVAL; /* not implemented yet */ 3634 } 3635 3636 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3637 KVM_GUESTDBG_USE_HW_BP | \ 3638 KVM_GUESTDBG_ENABLE) 3639 3640 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3641 struct kvm_guest_debug *dbg) 3642 { 3643 int rc = 0; 3644 3645 vcpu_load(vcpu); 3646 3647 vcpu->guest_debug = 0; 3648 kvm_s390_clear_bp_data(vcpu); 3649 3650 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3651 rc = -EINVAL; 3652 goto out; 3653 } 3654 if (!sclp.has_gpere) { 3655 rc = -EINVAL; 3656 goto out; 3657 } 3658 3659 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3660 vcpu->guest_debug = dbg->control; 3661 /* enforce guest PER */ 3662 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3663 3664 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3665 rc = kvm_s390_import_bp_data(vcpu, dbg); 3666 } else { 3667 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3668 vcpu->arch.guestdbg.last_bp = 0; 3669 } 3670 3671 if (rc) { 3672 vcpu->guest_debug = 0; 3673 kvm_s390_clear_bp_data(vcpu); 3674 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3675 } 3676 3677 out: 3678 vcpu_put(vcpu); 3679 return rc; 3680 } 3681 3682 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3683 struct kvm_mp_state *mp_state) 3684 { 3685 int ret; 3686 3687 vcpu_load(vcpu); 3688 3689 /* CHECK_STOP and LOAD are not supported yet */ 3690 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3691 KVM_MP_STATE_OPERATING; 3692 3693 vcpu_put(vcpu); 3694 return ret; 3695 } 3696 3697 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3698 struct kvm_mp_state *mp_state) 3699 { 3700 int rc = 0; 3701 3702 vcpu_load(vcpu); 3703 3704 /* user space knows about this interface - let it control the state */ 3705 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 3706 3707 switch (mp_state->mp_state) { 3708 case KVM_MP_STATE_STOPPED: 3709 kvm_s390_vcpu_stop(vcpu); 3710 break; 3711 case KVM_MP_STATE_OPERATING: 3712 kvm_s390_vcpu_start(vcpu); 3713 break; 3714 case KVM_MP_STATE_LOAD: 3715 case KVM_MP_STATE_CHECK_STOP: 3716 /* fall through - CHECK_STOP and LOAD are not supported yet */ 3717 default: 3718 rc = -ENXIO; 3719 } 3720 3721 vcpu_put(vcpu); 3722 return rc; 3723 } 3724 3725 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3726 { 3727 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3728 } 3729 3730 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3731 { 3732 retry: 3733 kvm_s390_vcpu_request_handled(vcpu); 3734 if (!kvm_request_pending(vcpu)) 3735 return 0; 3736 /* 3737 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3738 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3739 * This ensures that the ipte instruction for this request has 3740 * already finished. We might race against a second unmapper that 3741 * wants to set the blocking bit. Lets just retry the request loop. 3742 */ 3743 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3744 int rc; 3745 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3746 kvm_s390_get_prefix(vcpu), 3747 PAGE_SIZE * 2, PROT_WRITE); 3748 if (rc) { 3749 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3750 return rc; 3751 } 3752 goto retry; 3753 } 3754 3755 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3756 vcpu->arch.sie_block->ihcpu = 0xffff; 3757 goto retry; 3758 } 3759 3760 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3761 if (!ibs_enabled(vcpu)) { 3762 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3763 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3764 } 3765 goto retry; 3766 } 3767 3768 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3769 if (ibs_enabled(vcpu)) { 3770 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3771 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3772 } 3773 goto retry; 3774 } 3775 3776 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3777 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3778 goto retry; 3779 } 3780 3781 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3782 /* 3783 * Disable CMM virtualization; we will emulate the ESSA 3784 * instruction manually, in order to provide additional 3785 * functionalities needed for live migration. 3786 */ 3787 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3788 goto retry; 3789 } 3790 3791 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3792 /* 3793 * Re-enable CMM virtualization if CMMA is available and 3794 * CMM has been used. 3795 */ 3796 if ((vcpu->kvm->arch.use_cmma) && 3797 (vcpu->kvm->mm->context.uses_cmm)) 3798 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3799 goto retry; 3800 } 3801 3802 /* nothing to do, just clear the request */ 3803 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3804 /* we left the vsie handler, nothing to do, just clear the request */ 3805 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3806 3807 return 0; 3808 } 3809 3810 void kvm_s390_set_tod_clock(struct kvm *kvm, 3811 const struct kvm_s390_vm_tod_clock *gtod) 3812 { 3813 struct kvm_vcpu *vcpu; 3814 struct kvm_s390_tod_clock_ext htod; 3815 int i; 3816 3817 mutex_lock(&kvm->lock); 3818 preempt_disable(); 3819 3820 get_tod_clock_ext((char *)&htod); 3821 3822 kvm->arch.epoch = gtod->tod - htod.tod; 3823 kvm->arch.epdx = 0; 3824 if (test_kvm_facility(kvm, 139)) { 3825 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 3826 if (kvm->arch.epoch > gtod->tod) 3827 kvm->arch.epdx -= 1; 3828 } 3829 3830 kvm_s390_vcpu_block_all(kvm); 3831 kvm_for_each_vcpu(i, vcpu, kvm) { 3832 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3833 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3834 } 3835 3836 kvm_s390_vcpu_unblock_all(kvm); 3837 preempt_enable(); 3838 mutex_unlock(&kvm->lock); 3839 } 3840 3841 /** 3842 * kvm_arch_fault_in_page - fault-in guest page if necessary 3843 * @vcpu: The corresponding virtual cpu 3844 * @gpa: Guest physical address 3845 * @writable: Whether the page should be writable or not 3846 * 3847 * Make sure that a guest page has been faulted-in on the host. 3848 * 3849 * Return: Zero on success, negative error code otherwise. 3850 */ 3851 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3852 { 3853 return gmap_fault(vcpu->arch.gmap, gpa, 3854 writable ? FAULT_FLAG_WRITE : 0); 3855 } 3856 3857 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3858 unsigned long token) 3859 { 3860 struct kvm_s390_interrupt inti; 3861 struct kvm_s390_irq irq; 3862 3863 if (start_token) { 3864 irq.u.ext.ext_params2 = token; 3865 irq.type = KVM_S390_INT_PFAULT_INIT; 3866 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3867 } else { 3868 inti.type = KVM_S390_INT_PFAULT_DONE; 3869 inti.parm64 = token; 3870 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3871 } 3872 } 3873 3874 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3875 struct kvm_async_pf *work) 3876 { 3877 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3878 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3879 } 3880 3881 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3882 struct kvm_async_pf *work) 3883 { 3884 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3885 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3886 } 3887 3888 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3889 struct kvm_async_pf *work) 3890 { 3891 /* s390 will always inject the page directly */ 3892 } 3893 3894 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3895 { 3896 /* 3897 * s390 will always inject the page directly, 3898 * but we still want check_async_completion to cleanup 3899 */ 3900 return true; 3901 } 3902 3903 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3904 { 3905 hva_t hva; 3906 struct kvm_arch_async_pf arch; 3907 int rc; 3908 3909 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3910 return 0; 3911 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3912 vcpu->arch.pfault_compare) 3913 return 0; 3914 if (psw_extint_disabled(vcpu)) 3915 return 0; 3916 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3917 return 0; 3918 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 3919 return 0; 3920 if (!vcpu->arch.gmap->pfault_enabled) 3921 return 0; 3922 3923 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3924 hva += current->thread.gmap_addr & ~PAGE_MASK; 3925 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3926 return 0; 3927 3928 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3929 return rc; 3930 } 3931 3932 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3933 { 3934 int rc, cpuflags; 3935 3936 /* 3937 * On s390 notifications for arriving pages will be delivered directly 3938 * to the guest but the house keeping for completed pfaults is 3939 * handled outside the worker. 3940 */ 3941 kvm_check_async_pf_completion(vcpu); 3942 3943 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3944 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3945 3946 if (need_resched()) 3947 schedule(); 3948 3949 if (test_cpu_flag(CIF_MCCK_PENDING)) 3950 s390_handle_mcck(); 3951 3952 if (!kvm_is_ucontrol(vcpu->kvm)) { 3953 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3954 if (rc) 3955 return rc; 3956 } 3957 3958 rc = kvm_s390_handle_requests(vcpu); 3959 if (rc) 3960 return rc; 3961 3962 if (guestdbg_enabled(vcpu)) { 3963 kvm_s390_backup_guest_per_regs(vcpu); 3964 kvm_s390_patch_guest_per_regs(vcpu); 3965 } 3966 3967 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); 3968 3969 vcpu->arch.sie_block->icptcode = 0; 3970 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3971 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3972 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3973 3974 return 0; 3975 } 3976 3977 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3978 { 3979 struct kvm_s390_pgm_info pgm_info = { 3980 .code = PGM_ADDRESSING, 3981 }; 3982 u8 opcode, ilen; 3983 int rc; 3984 3985 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3986 trace_kvm_s390_sie_fault(vcpu); 3987 3988 /* 3989 * We want to inject an addressing exception, which is defined as a 3990 * suppressing or terminating exception. However, since we came here 3991 * by a DAT access exception, the PSW still points to the faulting 3992 * instruction since DAT exceptions are nullifying. So we've got 3993 * to look up the current opcode to get the length of the instruction 3994 * to be able to forward the PSW. 3995 */ 3996 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3997 ilen = insn_length(opcode); 3998 if (rc < 0) { 3999 return rc; 4000 } else if (rc) { 4001 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4002 * Forward by arbitrary ilc, injection will take care of 4003 * nullification if necessary. 4004 */ 4005 pgm_info = vcpu->arch.pgm; 4006 ilen = 4; 4007 } 4008 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4009 kvm_s390_forward_psw(vcpu, ilen); 4010 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4011 } 4012 4013 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4014 { 4015 struct mcck_volatile_info *mcck_info; 4016 struct sie_page *sie_page; 4017 4018 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4019 vcpu->arch.sie_block->icptcode); 4020 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4021 4022 if (guestdbg_enabled(vcpu)) 4023 kvm_s390_restore_guest_per_regs(vcpu); 4024 4025 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4026 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4027 4028 if (exit_reason == -EINTR) { 4029 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4030 sie_page = container_of(vcpu->arch.sie_block, 4031 struct sie_page, sie_block); 4032 mcck_info = &sie_page->mcck_info; 4033 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4034 return 0; 4035 } 4036 4037 if (vcpu->arch.sie_block->icptcode > 0) { 4038 int rc = kvm_handle_sie_intercept(vcpu); 4039 4040 if (rc != -EOPNOTSUPP) 4041 return rc; 4042 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4043 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4044 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4045 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4046 return -EREMOTE; 4047 } else if (exit_reason != -EFAULT) { 4048 vcpu->stat.exit_null++; 4049 return 0; 4050 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4051 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4052 vcpu->run->s390_ucontrol.trans_exc_code = 4053 current->thread.gmap_addr; 4054 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4055 return -EREMOTE; 4056 } else if (current->thread.gmap_pfault) { 4057 trace_kvm_s390_major_guest_pfault(vcpu); 4058 current->thread.gmap_pfault = 0; 4059 if (kvm_arch_setup_async_pf(vcpu)) 4060 return 0; 4061 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4062 } 4063 return vcpu_post_run_fault_in_sie(vcpu); 4064 } 4065 4066 static int __vcpu_run(struct kvm_vcpu *vcpu) 4067 { 4068 int rc, exit_reason; 4069 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4070 4071 /* 4072 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4073 * ning the guest), so that memslots (and other stuff) are protected 4074 */ 4075 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4076 4077 do { 4078 rc = vcpu_pre_run(vcpu); 4079 if (rc) 4080 break; 4081 4082 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4083 /* 4084 * As PF_VCPU will be used in fault handler, between 4085 * guest_enter and guest_exit should be no uaccess. 4086 */ 4087 local_irq_disable(); 4088 guest_enter_irqoff(); 4089 __disable_cpu_timer_accounting(vcpu); 4090 local_irq_enable(); 4091 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4092 memcpy(sie_page->pv_grregs, 4093 vcpu->run->s.regs.gprs, 4094 sizeof(sie_page->pv_grregs)); 4095 } 4096 exit_reason = sie64a(vcpu->arch.sie_block, 4097 vcpu->run->s.regs.gprs); 4098 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4099 memcpy(vcpu->run->s.regs.gprs, 4100 sie_page->pv_grregs, 4101 sizeof(sie_page->pv_grregs)); 4102 } 4103 local_irq_disable(); 4104 __enable_cpu_timer_accounting(vcpu); 4105 guest_exit_irqoff(); 4106 local_irq_enable(); 4107 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4108 4109 rc = vcpu_post_run(vcpu, exit_reason); 4110 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4111 4112 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4113 return rc; 4114 } 4115 4116 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4117 { 4118 struct runtime_instr_cb *riccb; 4119 struct gs_cb *gscb; 4120 4121 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4122 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4123 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4124 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4125 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4126 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4127 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4128 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4129 /* some control register changes require a tlb flush */ 4130 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4131 } 4132 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4133 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4134 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4135 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4136 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4137 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4138 } 4139 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4140 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4141 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4142 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4143 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4144 kvm_clear_async_pf_completion_queue(vcpu); 4145 } 4146 /* 4147 * If userspace sets the riccb (e.g. after migration) to a valid state, 4148 * we should enable RI here instead of doing the lazy enablement. 4149 */ 4150 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4151 test_kvm_facility(vcpu->kvm, 64) && 4152 riccb->v && 4153 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4154 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4155 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4156 } 4157 /* 4158 * If userspace sets the gscb (e.g. after migration) to non-zero, 4159 * we should enable GS here instead of doing the lazy enablement. 4160 */ 4161 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4162 test_kvm_facility(vcpu->kvm, 133) && 4163 gscb->gssm && 4164 !vcpu->arch.gs_enabled) { 4165 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4166 vcpu->arch.sie_block->ecb |= ECB_GS; 4167 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4168 vcpu->arch.gs_enabled = 1; 4169 } 4170 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4171 test_kvm_facility(vcpu->kvm, 82)) { 4172 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4173 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4174 } 4175 save_access_regs(vcpu->arch.host_acrs); 4176 restore_access_regs(vcpu->run->s.regs.acrs); 4177 /* save host (userspace) fprs/vrs */ 4178 save_fpu_regs(); 4179 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4180 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4181 if (MACHINE_HAS_VX) 4182 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4183 else 4184 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4185 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4186 if (test_fp_ctl(current->thread.fpu.fpc)) 4187 /* User space provided an invalid FPC, let's clear it */ 4188 current->thread.fpu.fpc = 0; 4189 if (MACHINE_HAS_GS) { 4190 preempt_disable(); 4191 __ctl_set_bit(2, 4); 4192 if (current->thread.gs_cb) { 4193 vcpu->arch.host_gscb = current->thread.gs_cb; 4194 save_gs_cb(vcpu->arch.host_gscb); 4195 } 4196 if (vcpu->arch.gs_enabled) { 4197 current->thread.gs_cb = (struct gs_cb *) 4198 &vcpu->run->s.regs.gscb; 4199 restore_gs_cb(current->thread.gs_cb); 4200 } 4201 preempt_enable(); 4202 } 4203 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4204 4205 kvm_run->kvm_dirty_regs = 0; 4206 } 4207 4208 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4209 { 4210 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4211 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4212 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4213 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4214 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4215 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4216 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4217 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4218 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4219 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4220 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4221 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4222 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4223 save_access_regs(vcpu->run->s.regs.acrs); 4224 restore_access_regs(vcpu->arch.host_acrs); 4225 /* Save guest register state */ 4226 save_fpu_regs(); 4227 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4228 /* Restore will be done lazily at return */ 4229 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4230 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4231 if (MACHINE_HAS_GS) { 4232 __ctl_set_bit(2, 4); 4233 if (vcpu->arch.gs_enabled) 4234 save_gs_cb(current->thread.gs_cb); 4235 preempt_disable(); 4236 current->thread.gs_cb = vcpu->arch.host_gscb; 4237 restore_gs_cb(vcpu->arch.host_gscb); 4238 preempt_enable(); 4239 if (!vcpu->arch.host_gscb) 4240 __ctl_clear_bit(2, 4); 4241 vcpu->arch.host_gscb = NULL; 4242 } 4243 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4244 } 4245 4246 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4247 { 4248 int rc; 4249 4250 if (kvm_run->immediate_exit) 4251 return -EINTR; 4252 4253 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4254 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4255 return -EINVAL; 4256 4257 vcpu_load(vcpu); 4258 4259 if (guestdbg_exit_pending(vcpu)) { 4260 kvm_s390_prepare_debug_exit(vcpu); 4261 rc = 0; 4262 goto out; 4263 } 4264 4265 kvm_sigset_activate(vcpu); 4266 4267 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4268 kvm_s390_vcpu_start(vcpu); 4269 } else if (is_vcpu_stopped(vcpu)) { 4270 pr_err_ratelimited("can't run stopped vcpu %d\n", 4271 vcpu->vcpu_id); 4272 rc = -EINVAL; 4273 goto out; 4274 } 4275 4276 sync_regs(vcpu, kvm_run); 4277 enable_cpu_timer_accounting(vcpu); 4278 4279 might_fault(); 4280 rc = __vcpu_run(vcpu); 4281 4282 if (signal_pending(current) && !rc) { 4283 kvm_run->exit_reason = KVM_EXIT_INTR; 4284 rc = -EINTR; 4285 } 4286 4287 if (guestdbg_exit_pending(vcpu) && !rc) { 4288 kvm_s390_prepare_debug_exit(vcpu); 4289 rc = 0; 4290 } 4291 4292 if (rc == -EREMOTE) { 4293 /* userspace support is needed, kvm_run has been prepared */ 4294 rc = 0; 4295 } 4296 4297 disable_cpu_timer_accounting(vcpu); 4298 store_regs(vcpu, kvm_run); 4299 4300 kvm_sigset_deactivate(vcpu); 4301 4302 vcpu->stat.exit_userspace++; 4303 out: 4304 vcpu_put(vcpu); 4305 return rc; 4306 } 4307 4308 /* 4309 * store status at address 4310 * we use have two special cases: 4311 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4312 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4313 */ 4314 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4315 { 4316 unsigned char archmode = 1; 4317 freg_t fprs[NUM_FPRS]; 4318 unsigned int px; 4319 u64 clkcomp, cputm; 4320 int rc; 4321 4322 px = kvm_s390_get_prefix(vcpu); 4323 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4324 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4325 return -EFAULT; 4326 gpa = 0; 4327 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4328 if (write_guest_real(vcpu, 163, &archmode, 1)) 4329 return -EFAULT; 4330 gpa = px; 4331 } else 4332 gpa -= __LC_FPREGS_SAVE_AREA; 4333 4334 /* manually convert vector registers if necessary */ 4335 if (MACHINE_HAS_VX) { 4336 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4337 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4338 fprs, 128); 4339 } else { 4340 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4341 vcpu->run->s.regs.fprs, 128); 4342 } 4343 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4344 vcpu->run->s.regs.gprs, 128); 4345 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4346 &vcpu->arch.sie_block->gpsw, 16); 4347 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4348 &px, 4); 4349 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4350 &vcpu->run->s.regs.fpc, 4); 4351 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4352 &vcpu->arch.sie_block->todpr, 4); 4353 cputm = kvm_s390_get_cpu_timer(vcpu); 4354 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4355 &cputm, 8); 4356 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4357 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4358 &clkcomp, 8); 4359 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4360 &vcpu->run->s.regs.acrs, 64); 4361 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4362 &vcpu->arch.sie_block->gcr, 128); 4363 return rc ? -EFAULT : 0; 4364 } 4365 4366 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4367 { 4368 /* 4369 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4370 * switch in the run ioctl. Let's update our copies before we save 4371 * it into the save area 4372 */ 4373 save_fpu_regs(); 4374 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4375 save_access_regs(vcpu->run->s.regs.acrs); 4376 4377 return kvm_s390_store_status_unloaded(vcpu, addr); 4378 } 4379 4380 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4381 { 4382 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4383 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4384 } 4385 4386 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4387 { 4388 unsigned int i; 4389 struct kvm_vcpu *vcpu; 4390 4391 kvm_for_each_vcpu(i, vcpu, kvm) { 4392 __disable_ibs_on_vcpu(vcpu); 4393 } 4394 } 4395 4396 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4397 { 4398 if (!sclp.has_ibs) 4399 return; 4400 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4401 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4402 } 4403 4404 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4405 { 4406 int i, online_vcpus, started_vcpus = 0; 4407 4408 if (!is_vcpu_stopped(vcpu)) 4409 return; 4410 4411 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4412 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4413 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4414 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4415 4416 for (i = 0; i < online_vcpus; i++) { 4417 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 4418 started_vcpus++; 4419 } 4420 4421 if (started_vcpus == 0) { 4422 /* we're the only active VCPU -> speed it up */ 4423 __enable_ibs_on_vcpu(vcpu); 4424 } else if (started_vcpus == 1) { 4425 /* 4426 * As we are starting a second VCPU, we have to disable 4427 * the IBS facility on all VCPUs to remove potentially 4428 * oustanding ENABLE requests. 4429 */ 4430 __disable_ibs_on_all_vcpus(vcpu->kvm); 4431 } 4432 4433 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4434 /* 4435 * Another VCPU might have used IBS while we were offline. 4436 * Let's play safe and flush the VCPU at startup. 4437 */ 4438 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4439 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4440 return; 4441 } 4442 4443 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4444 { 4445 int i, online_vcpus, started_vcpus = 0; 4446 struct kvm_vcpu *started_vcpu = NULL; 4447 4448 if (is_vcpu_stopped(vcpu)) 4449 return; 4450 4451 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4452 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4453 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4454 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4455 4456 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 4457 kvm_s390_clear_stop_irq(vcpu); 4458 4459 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4460 __disable_ibs_on_vcpu(vcpu); 4461 4462 for (i = 0; i < online_vcpus; i++) { 4463 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 4464 started_vcpus++; 4465 started_vcpu = vcpu->kvm->vcpus[i]; 4466 } 4467 } 4468 4469 if (started_vcpus == 1) { 4470 /* 4471 * As we only have one VCPU left, we want to enable the 4472 * IBS facility for that VCPU to speed it up. 4473 */ 4474 __enable_ibs_on_vcpu(started_vcpu); 4475 } 4476 4477 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4478 return; 4479 } 4480 4481 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4482 struct kvm_enable_cap *cap) 4483 { 4484 int r; 4485 4486 if (cap->flags) 4487 return -EINVAL; 4488 4489 switch (cap->cap) { 4490 case KVM_CAP_S390_CSS_SUPPORT: 4491 if (!vcpu->kvm->arch.css_support) { 4492 vcpu->kvm->arch.css_support = 1; 4493 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4494 trace_kvm_s390_enable_css(vcpu->kvm); 4495 } 4496 r = 0; 4497 break; 4498 default: 4499 r = -EINVAL; 4500 break; 4501 } 4502 return r; 4503 } 4504 4505 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, 4506 struct kvm_s390_mem_op *mop) 4507 { 4508 void __user *uaddr = (void __user *)mop->buf; 4509 int r = 0; 4510 4511 if (mop->flags || !mop->size) 4512 return -EINVAL; 4513 if (mop->size + mop->sida_offset < mop->size) 4514 return -EINVAL; 4515 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4516 return -E2BIG; 4517 4518 switch (mop->op) { 4519 case KVM_S390_MEMOP_SIDA_READ: 4520 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4521 mop->sida_offset), mop->size)) 4522 r = -EFAULT; 4523 4524 break; 4525 case KVM_S390_MEMOP_SIDA_WRITE: 4526 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4527 mop->sida_offset), uaddr, mop->size)) 4528 r = -EFAULT; 4529 break; 4530 } 4531 return r; 4532 } 4533 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 4534 struct kvm_s390_mem_op *mop) 4535 { 4536 void __user *uaddr = (void __user *)mop->buf; 4537 void *tmpbuf = NULL; 4538 int r = 0; 4539 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4540 | KVM_S390_MEMOP_F_CHECK_ONLY; 4541 4542 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4543 return -EINVAL; 4544 4545 if (mop->size > MEM_OP_MAX_SIZE) 4546 return -E2BIG; 4547 4548 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4549 return -EINVAL; 4550 4551 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4552 tmpbuf = vmalloc(mop->size); 4553 if (!tmpbuf) 4554 return -ENOMEM; 4555 } 4556 4557 switch (mop->op) { 4558 case KVM_S390_MEMOP_LOGICAL_READ: 4559 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4560 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4561 mop->size, GACC_FETCH); 4562 break; 4563 } 4564 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4565 if (r == 0) { 4566 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4567 r = -EFAULT; 4568 } 4569 break; 4570 case KVM_S390_MEMOP_LOGICAL_WRITE: 4571 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4572 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4573 mop->size, GACC_STORE); 4574 break; 4575 } 4576 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4577 r = -EFAULT; 4578 break; 4579 } 4580 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4581 break; 4582 } 4583 4584 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4585 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4586 4587 vfree(tmpbuf); 4588 return r; 4589 } 4590 4591 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, 4592 struct kvm_s390_mem_op *mop) 4593 { 4594 int r, srcu_idx; 4595 4596 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4597 4598 switch (mop->op) { 4599 case KVM_S390_MEMOP_LOGICAL_READ: 4600 case KVM_S390_MEMOP_LOGICAL_WRITE: 4601 r = kvm_s390_guest_mem_op(vcpu, mop); 4602 break; 4603 case KVM_S390_MEMOP_SIDA_READ: 4604 case KVM_S390_MEMOP_SIDA_WRITE: 4605 /* we are locked against sida going away by the vcpu->mutex */ 4606 r = kvm_s390_guest_sida_op(vcpu, mop); 4607 break; 4608 default: 4609 r = -EINVAL; 4610 } 4611 4612 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4613 return r; 4614 } 4615 4616 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4617 unsigned int ioctl, unsigned long arg) 4618 { 4619 struct kvm_vcpu *vcpu = filp->private_data; 4620 void __user *argp = (void __user *)arg; 4621 4622 switch (ioctl) { 4623 case KVM_S390_IRQ: { 4624 struct kvm_s390_irq s390irq; 4625 4626 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4627 return -EFAULT; 4628 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4629 } 4630 case KVM_S390_INTERRUPT: { 4631 struct kvm_s390_interrupt s390int; 4632 struct kvm_s390_irq s390irq = {}; 4633 4634 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4635 return -EFAULT; 4636 if (s390int_to_s390irq(&s390int, &s390irq)) 4637 return -EINVAL; 4638 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4639 } 4640 } 4641 return -ENOIOCTLCMD; 4642 } 4643 4644 long kvm_arch_vcpu_ioctl(struct file *filp, 4645 unsigned int ioctl, unsigned long arg) 4646 { 4647 struct kvm_vcpu *vcpu = filp->private_data; 4648 void __user *argp = (void __user *)arg; 4649 int idx; 4650 long r; 4651 4652 vcpu_load(vcpu); 4653 4654 switch (ioctl) { 4655 case KVM_S390_STORE_STATUS: 4656 idx = srcu_read_lock(&vcpu->kvm->srcu); 4657 r = kvm_s390_store_status_unloaded(vcpu, arg); 4658 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4659 break; 4660 case KVM_S390_SET_INITIAL_PSW: { 4661 psw_t psw; 4662 4663 r = -EFAULT; 4664 if (copy_from_user(&psw, argp, sizeof(psw))) 4665 break; 4666 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4667 break; 4668 } 4669 case KVM_S390_CLEAR_RESET: 4670 r = 0; 4671 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4672 break; 4673 case KVM_S390_INITIAL_RESET: 4674 r = 0; 4675 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4676 break; 4677 case KVM_S390_NORMAL_RESET: 4678 r = 0; 4679 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4680 break; 4681 case KVM_SET_ONE_REG: 4682 case KVM_GET_ONE_REG: { 4683 struct kvm_one_reg reg; 4684 r = -EINVAL; 4685 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4686 break; 4687 r = -EFAULT; 4688 if (copy_from_user(®, argp, sizeof(reg))) 4689 break; 4690 if (ioctl == KVM_SET_ONE_REG) 4691 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4692 else 4693 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4694 break; 4695 } 4696 #ifdef CONFIG_KVM_S390_UCONTROL 4697 case KVM_S390_UCAS_MAP: { 4698 struct kvm_s390_ucas_mapping ucasmap; 4699 4700 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4701 r = -EFAULT; 4702 break; 4703 } 4704 4705 if (!kvm_is_ucontrol(vcpu->kvm)) { 4706 r = -EINVAL; 4707 break; 4708 } 4709 4710 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4711 ucasmap.vcpu_addr, ucasmap.length); 4712 break; 4713 } 4714 case KVM_S390_UCAS_UNMAP: { 4715 struct kvm_s390_ucas_mapping ucasmap; 4716 4717 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4718 r = -EFAULT; 4719 break; 4720 } 4721 4722 if (!kvm_is_ucontrol(vcpu->kvm)) { 4723 r = -EINVAL; 4724 break; 4725 } 4726 4727 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4728 ucasmap.length); 4729 break; 4730 } 4731 #endif 4732 case KVM_S390_VCPU_FAULT: { 4733 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4734 break; 4735 } 4736 case KVM_ENABLE_CAP: 4737 { 4738 struct kvm_enable_cap cap; 4739 r = -EFAULT; 4740 if (copy_from_user(&cap, argp, sizeof(cap))) 4741 break; 4742 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4743 break; 4744 } 4745 case KVM_S390_MEM_OP: { 4746 struct kvm_s390_mem_op mem_op; 4747 4748 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4749 r = kvm_s390_guest_memsida_op(vcpu, &mem_op); 4750 else 4751 r = -EFAULT; 4752 break; 4753 } 4754 case KVM_S390_SET_IRQ_STATE: { 4755 struct kvm_s390_irq_state irq_state; 4756 4757 r = -EFAULT; 4758 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4759 break; 4760 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4761 irq_state.len == 0 || 4762 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4763 r = -EINVAL; 4764 break; 4765 } 4766 /* do not use irq_state.flags, it will break old QEMUs */ 4767 r = kvm_s390_set_irq_state(vcpu, 4768 (void __user *) irq_state.buf, 4769 irq_state.len); 4770 break; 4771 } 4772 case KVM_S390_GET_IRQ_STATE: { 4773 struct kvm_s390_irq_state irq_state; 4774 4775 r = -EFAULT; 4776 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4777 break; 4778 if (irq_state.len == 0) { 4779 r = -EINVAL; 4780 break; 4781 } 4782 /* do not use irq_state.flags, it will break old QEMUs */ 4783 r = kvm_s390_get_irq_state(vcpu, 4784 (__u8 __user *) irq_state.buf, 4785 irq_state.len); 4786 break; 4787 } 4788 default: 4789 r = -ENOTTY; 4790 } 4791 4792 vcpu_put(vcpu); 4793 return r; 4794 } 4795 4796 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 4797 { 4798 #ifdef CONFIG_KVM_S390_UCONTROL 4799 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 4800 && (kvm_is_ucontrol(vcpu->kvm))) { 4801 vmf->page = virt_to_page(vcpu->arch.sie_block); 4802 get_page(vmf->page); 4803 return 0; 4804 } 4805 #endif 4806 return VM_FAULT_SIGBUS; 4807 } 4808 4809 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 4810 unsigned long npages) 4811 { 4812 return 0; 4813 } 4814 4815 /* Section: memory related */ 4816 int kvm_arch_prepare_memory_region(struct kvm *kvm, 4817 struct kvm_memory_slot *memslot, 4818 const struct kvm_userspace_memory_region *mem, 4819 enum kvm_mr_change change) 4820 { 4821 /* A few sanity checks. We can have memory slots which have to be 4822 located/ended at a segment boundary (1MB). The memory in userland is 4823 ok to be fragmented into various different vmas. It is okay to mmap() 4824 and munmap() stuff in this slot after doing this call at any time */ 4825 4826 if (mem->userspace_addr & 0xffffful) 4827 return -EINVAL; 4828 4829 if (mem->memory_size & 0xffffful) 4830 return -EINVAL; 4831 4832 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 4833 return -EINVAL; 4834 4835 /* When we are protected, we should not change the memory slots */ 4836 if (kvm_s390_pv_get_handle(kvm)) 4837 return -EINVAL; 4838 return 0; 4839 } 4840 4841 void kvm_arch_commit_memory_region(struct kvm *kvm, 4842 const struct kvm_userspace_memory_region *mem, 4843 const struct kvm_memory_slot *old, 4844 const struct kvm_memory_slot *new, 4845 enum kvm_mr_change change) 4846 { 4847 int rc = 0; 4848 4849 switch (change) { 4850 case KVM_MR_DELETE: 4851 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 4852 old->npages * PAGE_SIZE); 4853 break; 4854 case KVM_MR_MOVE: 4855 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 4856 old->npages * PAGE_SIZE); 4857 if (rc) 4858 break; 4859 /* FALLTHROUGH */ 4860 case KVM_MR_CREATE: 4861 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 4862 mem->guest_phys_addr, mem->memory_size); 4863 break; 4864 case KVM_MR_FLAGS_ONLY: 4865 break; 4866 default: 4867 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 4868 } 4869 if (rc) 4870 pr_warn("failed to commit memory region\n"); 4871 return; 4872 } 4873 4874 static inline unsigned long nonhyp_mask(int i) 4875 { 4876 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 4877 4878 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 4879 } 4880 4881 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 4882 { 4883 vcpu->valid_wakeup = false; 4884 } 4885 4886 static int __init kvm_s390_init(void) 4887 { 4888 int i; 4889 4890 if (!sclp.has_sief2) { 4891 pr_info("SIE is not available\n"); 4892 return -ENODEV; 4893 } 4894 4895 if (nested && hpage) { 4896 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 4897 return -EINVAL; 4898 } 4899 4900 for (i = 0; i < 16; i++) 4901 kvm_s390_fac_base[i] |= 4902 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 4903 4904 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 4905 } 4906 4907 static void __exit kvm_s390_exit(void) 4908 { 4909 kvm_exit(); 4910 } 4911 4912 module_init(kvm_s390_init); 4913 module_exit(kvm_s390_exit); 4914 4915 /* 4916 * Enable autoloading of the kvm module. 4917 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 4918 * since x86 takes a different approach. 4919 */ 4920 #include <linux/miscdevice.h> 4921 MODULE_ALIAS_MISCDEV(KVM_MINOR); 4922 MODULE_ALIAS("devname:kvm"); 4923