1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com> 10 * Jason J. Herne <jjherne@us.ibm.com> 11 */ 12 13 #define KMSG_COMPONENT "kvm-s390" 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/mman.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/random.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/vmalloc.h> 30 #include <linux/bitmap.h> 31 #include <linux/sched/signal.h> 32 #include <linux/string.h> 33 #include <linux/pgtable.h> 34 #include <linux/mmu_notifier.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 #include "pci.h" 52 53 #define CREATE_TRACE_POINTS 54 #include "trace.h" 55 #include "trace-s390.h" 56 57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 58 #define LOCAL_IRQS 32 59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 60 (KVM_MAX_VCPUS + LOCAL_IRQS)) 61 62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 63 KVM_GENERIC_VM_STATS(), 64 STATS_DESC_COUNTER(VM, inject_io), 65 STATS_DESC_COUNTER(VM, inject_float_mchk), 66 STATS_DESC_COUNTER(VM, inject_pfault_done), 67 STATS_DESC_COUNTER(VM, inject_service_signal), 68 STATS_DESC_COUNTER(VM, inject_virtio), 69 STATS_DESC_COUNTER(VM, aen_forward) 70 }; 71 72 const struct kvm_stats_header kvm_vm_stats_header = { 73 .name_size = KVM_STATS_NAME_SIZE, 74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 75 .id_offset = sizeof(struct kvm_stats_header), 76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 78 sizeof(kvm_vm_stats_desc), 79 }; 80 81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 82 KVM_GENERIC_VCPU_STATS(), 83 STATS_DESC_COUNTER(VCPU, exit_userspace), 84 STATS_DESC_COUNTER(VCPU, exit_null), 85 STATS_DESC_COUNTER(VCPU, exit_external_request), 86 STATS_DESC_COUNTER(VCPU, exit_io_request), 87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 88 STATS_DESC_COUNTER(VCPU, exit_stop_request), 89 STATS_DESC_COUNTER(VCPU, exit_validity), 90 STATS_DESC_COUNTER(VCPU, exit_instruction), 91 STATS_DESC_COUNTER(VCPU, exit_pei), 92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 93 STATS_DESC_COUNTER(VCPU, instruction_lctl), 94 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 95 STATS_DESC_COUNTER(VCPU, instruction_stctl), 96 STATS_DESC_COUNTER(VCPU, instruction_stctg), 97 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 99 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 100 STATS_DESC_COUNTER(VCPU, deliver_ckc), 101 STATS_DESC_COUNTER(VCPU, deliver_cputm), 102 STATS_DESC_COUNTER(VCPU, deliver_external_call), 103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 104 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_virtio), 106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 109 STATS_DESC_COUNTER(VCPU, deliver_program), 110 STATS_DESC_COUNTER(VCPU, deliver_io), 111 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 112 STATS_DESC_COUNTER(VCPU, exit_wait_state), 113 STATS_DESC_COUNTER(VCPU, inject_ckc), 114 STATS_DESC_COUNTER(VCPU, inject_cputm), 115 STATS_DESC_COUNTER(VCPU, inject_external_call), 116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 117 STATS_DESC_COUNTER(VCPU, inject_mchk), 118 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 119 STATS_DESC_COUNTER(VCPU, inject_program), 120 STATS_DESC_COUNTER(VCPU, inject_restart), 121 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 122 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 123 STATS_DESC_COUNTER(VCPU, instruction_epsw), 124 STATS_DESC_COUNTER(VCPU, instruction_gs), 125 STATS_DESC_COUNTER(VCPU, instruction_io_other), 126 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 127 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 128 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 129 STATS_DESC_COUNTER(VCPU, instruction_ptff), 130 STATS_DESC_COUNTER(VCPU, instruction_sck), 131 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 132 STATS_DESC_COUNTER(VCPU, instruction_stidp), 133 STATS_DESC_COUNTER(VCPU, instruction_spx), 134 STATS_DESC_COUNTER(VCPU, instruction_stpx), 135 STATS_DESC_COUNTER(VCPU, instruction_stap), 136 STATS_DESC_COUNTER(VCPU, instruction_iske), 137 STATS_DESC_COUNTER(VCPU, instruction_ri), 138 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 139 STATS_DESC_COUNTER(VCPU, instruction_sske), 140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 141 STATS_DESC_COUNTER(VCPU, instruction_stsi), 142 STATS_DESC_COUNTER(VCPU, instruction_stfl), 143 STATS_DESC_COUNTER(VCPU, instruction_tb), 144 STATS_DESC_COUNTER(VCPU, instruction_tpi), 145 STATS_DESC_COUNTER(VCPU, instruction_tprot), 146 STATS_DESC_COUNTER(VCPU, instruction_tsch), 147 STATS_DESC_COUNTER(VCPU, instruction_sie), 148 STATS_DESC_COUNTER(VCPU, instruction_essa), 149 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 170 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 175 STATS_DESC_COUNTER(VCPU, pfault_sync) 176 }; 177 178 const struct kvm_stats_header kvm_vcpu_stats_header = { 179 .name_size = KVM_STATS_NAME_SIZE, 180 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 181 .id_offset = sizeof(struct kvm_stats_header), 182 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 183 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 184 sizeof(kvm_vcpu_stats_desc), 185 }; 186 187 /* allow nested virtualization in KVM (if enabled by user space) */ 188 static int nested; 189 module_param(nested, int, S_IRUGO); 190 MODULE_PARM_DESC(nested, "Nested virtualization support"); 191 192 /* allow 1m huge page guest backing, if !nested */ 193 static int hpage; 194 module_param(hpage, int, 0444); 195 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 196 197 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 198 static u8 halt_poll_max_steal = 10; 199 module_param(halt_poll_max_steal, byte, 0644); 200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 201 202 /* if set to true, the GISA will be initialized and used if available */ 203 static bool use_gisa = true; 204 module_param(use_gisa, bool, 0644); 205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 206 207 /* maximum diag9c forwarding per second */ 208 unsigned int diag9c_forwarding_hz; 209 module_param(diag9c_forwarding_hz, uint, 0644); 210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 211 212 /* 213 * For now we handle at most 16 double words as this is what the s390 base 214 * kernel handles and stores in the prefix page. If we ever need to go beyond 215 * this, this requires changes to code, but the external uapi can stay. 216 */ 217 #define SIZE_INTERNAL 16 218 219 /* 220 * Base feature mask that defines default mask for facilities. Consists of the 221 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 222 */ 223 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 224 /* 225 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 226 * and defines the facilities that can be enabled via a cpu model. 227 */ 228 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 229 230 static unsigned long kvm_s390_fac_size(void) 231 { 232 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 233 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 234 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 235 sizeof(stfle_fac_list)); 236 237 return SIZE_INTERNAL; 238 } 239 240 /* available cpu features supported by kvm */ 241 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 242 /* available subfunctions indicated via query / "test bit" */ 243 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 244 245 static struct gmap_notifier gmap_notifier; 246 static struct gmap_notifier vsie_gmap_notifier; 247 debug_info_t *kvm_s390_dbf; 248 debug_info_t *kvm_s390_dbf_uv; 249 250 /* Section: not file related */ 251 int kvm_arch_hardware_enable(void) 252 { 253 /* every s390 is virtualization enabled ;-) */ 254 return 0; 255 } 256 257 int kvm_arch_check_processor_compat(void *opaque) 258 { 259 return 0; 260 } 261 262 /* forward declarations */ 263 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 264 unsigned long end); 265 static int sca_switch_to_extended(struct kvm *kvm); 266 267 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 268 { 269 u8 delta_idx = 0; 270 271 /* 272 * The TOD jumps by delta, we have to compensate this by adding 273 * -delta to the epoch. 274 */ 275 delta = -delta; 276 277 /* sign-extension - we're adding to signed values below */ 278 if ((s64)delta < 0) 279 delta_idx = -1; 280 281 scb->epoch += delta; 282 if (scb->ecd & ECD_MEF) { 283 scb->epdx += delta_idx; 284 if (scb->epoch < delta) 285 scb->epdx += 1; 286 } 287 } 288 289 /* 290 * This callback is executed during stop_machine(). All CPUs are therefore 291 * temporarily stopped. In order not to change guest behavior, we have to 292 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 293 * so a CPU won't be stopped while calculating with the epoch. 294 */ 295 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 296 void *v) 297 { 298 struct kvm *kvm; 299 struct kvm_vcpu *vcpu; 300 unsigned long i; 301 unsigned long long *delta = v; 302 303 list_for_each_entry(kvm, &vm_list, vm_list) { 304 kvm_for_each_vcpu(i, vcpu, kvm) { 305 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 306 if (i == 0) { 307 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 308 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 309 } 310 if (vcpu->arch.cputm_enabled) 311 vcpu->arch.cputm_start += *delta; 312 if (vcpu->arch.vsie_block) 313 kvm_clock_sync_scb(vcpu->arch.vsie_block, 314 *delta); 315 } 316 } 317 return NOTIFY_OK; 318 } 319 320 static struct notifier_block kvm_clock_notifier = { 321 .notifier_call = kvm_clock_sync, 322 }; 323 324 int kvm_arch_hardware_setup(void *opaque) 325 { 326 gmap_notifier.notifier_call = kvm_gmap_notifier; 327 gmap_register_pte_notifier(&gmap_notifier); 328 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 329 gmap_register_pte_notifier(&vsie_gmap_notifier); 330 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 331 &kvm_clock_notifier); 332 return 0; 333 } 334 335 void kvm_arch_hardware_unsetup(void) 336 { 337 gmap_unregister_pte_notifier(&gmap_notifier); 338 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 339 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 340 &kvm_clock_notifier); 341 } 342 343 static void allow_cpu_feat(unsigned long nr) 344 { 345 set_bit_inv(nr, kvm_s390_available_cpu_feat); 346 } 347 348 static inline int plo_test_bit(unsigned char nr) 349 { 350 unsigned long function = (unsigned long)nr | 0x100; 351 int cc; 352 353 asm volatile( 354 " lgr 0,%[function]\n" 355 /* Parameter registers are ignored for "test bit" */ 356 " plo 0,0,0,0(0)\n" 357 " ipm %0\n" 358 " srl %0,28\n" 359 : "=d" (cc) 360 : [function] "d" (function) 361 : "cc", "0"); 362 return cc == 0; 363 } 364 365 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 366 { 367 asm volatile( 368 " lghi 0,0\n" 369 " lgr 1,%[query]\n" 370 /* Parameter registers are ignored */ 371 " .insn rrf,%[opc] << 16,2,4,6,0\n" 372 : 373 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 374 : "cc", "memory", "0", "1"); 375 } 376 377 #define INSN_SORTL 0xb938 378 #define INSN_DFLTCC 0xb939 379 380 static void kvm_s390_cpu_feat_init(void) 381 { 382 int i; 383 384 for (i = 0; i < 256; ++i) { 385 if (plo_test_bit(i)) 386 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 387 } 388 389 if (test_facility(28)) /* TOD-clock steering */ 390 ptff(kvm_s390_available_subfunc.ptff, 391 sizeof(kvm_s390_available_subfunc.ptff), 392 PTFF_QAF); 393 394 if (test_facility(17)) { /* MSA */ 395 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 396 kvm_s390_available_subfunc.kmac); 397 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 398 kvm_s390_available_subfunc.kmc); 399 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 400 kvm_s390_available_subfunc.km); 401 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.kimd); 403 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 404 kvm_s390_available_subfunc.klmd); 405 } 406 if (test_facility(76)) /* MSA3 */ 407 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 408 kvm_s390_available_subfunc.pckmo); 409 if (test_facility(77)) { /* MSA4 */ 410 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 411 kvm_s390_available_subfunc.kmctr); 412 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 413 kvm_s390_available_subfunc.kmf); 414 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 415 kvm_s390_available_subfunc.kmo); 416 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 417 kvm_s390_available_subfunc.pcc); 418 } 419 if (test_facility(57)) /* MSA5 */ 420 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 421 kvm_s390_available_subfunc.ppno); 422 423 if (test_facility(146)) /* MSA8 */ 424 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 425 kvm_s390_available_subfunc.kma); 426 427 if (test_facility(155)) /* MSA9 */ 428 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 429 kvm_s390_available_subfunc.kdsa); 430 431 if (test_facility(150)) /* SORTL */ 432 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 433 434 if (test_facility(151)) /* DFLTCC */ 435 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 436 437 if (MACHINE_HAS_ESOP) 438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 439 /* 440 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 441 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 442 */ 443 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 444 !test_facility(3) || !nested) 445 return; 446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 447 if (sclp.has_64bscao) 448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 449 if (sclp.has_siif) 450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 451 if (sclp.has_gpere) 452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 453 if (sclp.has_gsls) 454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 455 if (sclp.has_ib) 456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 457 if (sclp.has_cei) 458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 459 if (sclp.has_ibs) 460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 461 if (sclp.has_kss) 462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 463 /* 464 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 465 * all skey handling functions read/set the skey from the PGSTE 466 * instead of the real storage key. 467 * 468 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 469 * pages being detected as preserved although they are resident. 470 * 471 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 472 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 473 * 474 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 475 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 476 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 477 * 478 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 479 * cannot easily shadow the SCA because of the ipte lock. 480 */ 481 } 482 483 int kvm_arch_init(void *opaque) 484 { 485 int rc = -ENOMEM; 486 487 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 488 if (!kvm_s390_dbf) 489 return -ENOMEM; 490 491 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 492 if (!kvm_s390_dbf_uv) 493 goto out; 494 495 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 496 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 497 goto out; 498 499 kvm_s390_cpu_feat_init(); 500 501 /* Register floating interrupt controller interface. */ 502 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 503 if (rc) { 504 pr_err("A FLIC registration call failed with rc=%d\n", rc); 505 goto out; 506 } 507 508 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 509 rc = kvm_s390_pci_init(); 510 if (rc) { 511 pr_err("Unable to allocate AIFT for PCI\n"); 512 goto out; 513 } 514 } 515 516 rc = kvm_s390_gib_init(GAL_ISC); 517 if (rc) 518 goto out; 519 520 return 0; 521 522 out: 523 kvm_arch_exit(); 524 return rc; 525 } 526 527 void kvm_arch_exit(void) 528 { 529 kvm_s390_gib_destroy(); 530 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 531 kvm_s390_pci_exit(); 532 debug_unregister(kvm_s390_dbf); 533 debug_unregister(kvm_s390_dbf_uv); 534 } 535 536 /* Section: device related */ 537 long kvm_arch_dev_ioctl(struct file *filp, 538 unsigned int ioctl, unsigned long arg) 539 { 540 if (ioctl == KVM_S390_ENABLE_SIE) 541 return s390_enable_sie(); 542 return -EINVAL; 543 } 544 545 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 546 { 547 int r; 548 549 switch (ext) { 550 case KVM_CAP_S390_PSW: 551 case KVM_CAP_S390_GMAP: 552 case KVM_CAP_SYNC_MMU: 553 #ifdef CONFIG_KVM_S390_UCONTROL 554 case KVM_CAP_S390_UCONTROL: 555 #endif 556 case KVM_CAP_ASYNC_PF: 557 case KVM_CAP_SYNC_REGS: 558 case KVM_CAP_ONE_REG: 559 case KVM_CAP_ENABLE_CAP: 560 case KVM_CAP_S390_CSS_SUPPORT: 561 case KVM_CAP_IOEVENTFD: 562 case KVM_CAP_DEVICE_CTRL: 563 case KVM_CAP_S390_IRQCHIP: 564 case KVM_CAP_VM_ATTRIBUTES: 565 case KVM_CAP_MP_STATE: 566 case KVM_CAP_IMMEDIATE_EXIT: 567 case KVM_CAP_S390_INJECT_IRQ: 568 case KVM_CAP_S390_USER_SIGP: 569 case KVM_CAP_S390_USER_STSI: 570 case KVM_CAP_S390_SKEYS: 571 case KVM_CAP_S390_IRQ_STATE: 572 case KVM_CAP_S390_USER_INSTR0: 573 case KVM_CAP_S390_CMMA_MIGRATION: 574 case KVM_CAP_S390_AIS: 575 case KVM_CAP_S390_AIS_MIGRATION: 576 case KVM_CAP_S390_VCPU_RESETS: 577 case KVM_CAP_SET_GUEST_DEBUG: 578 case KVM_CAP_S390_DIAG318: 579 case KVM_CAP_S390_MEM_OP_EXTENSION: 580 r = 1; 581 break; 582 case KVM_CAP_SET_GUEST_DEBUG2: 583 r = KVM_GUESTDBG_VALID_MASK; 584 break; 585 case KVM_CAP_S390_HPAGE_1M: 586 r = 0; 587 if (hpage && !kvm_is_ucontrol(kvm)) 588 r = 1; 589 break; 590 case KVM_CAP_S390_MEM_OP: 591 r = MEM_OP_MAX_SIZE; 592 break; 593 case KVM_CAP_NR_VCPUS: 594 case KVM_CAP_MAX_VCPUS: 595 case KVM_CAP_MAX_VCPU_ID: 596 r = KVM_S390_BSCA_CPU_SLOTS; 597 if (!kvm_s390_use_sca_entries()) 598 r = KVM_MAX_VCPUS; 599 else if (sclp.has_esca && sclp.has_64bscao) 600 r = KVM_S390_ESCA_CPU_SLOTS; 601 if (ext == KVM_CAP_NR_VCPUS) 602 r = min_t(unsigned int, num_online_cpus(), r); 603 break; 604 case KVM_CAP_S390_COW: 605 r = MACHINE_HAS_ESOP; 606 break; 607 case KVM_CAP_S390_VECTOR_REGISTERS: 608 r = MACHINE_HAS_VX; 609 break; 610 case KVM_CAP_S390_RI: 611 r = test_facility(64); 612 break; 613 case KVM_CAP_S390_GS: 614 r = test_facility(133); 615 break; 616 case KVM_CAP_S390_BPB: 617 r = test_facility(82); 618 break; 619 case KVM_CAP_S390_PROTECTED: 620 r = is_prot_virt_host(); 621 break; 622 case KVM_CAP_S390_PROTECTED_DUMP: { 623 u64 pv_cmds_dump[] = { 624 BIT_UVC_CMD_DUMP_INIT, 625 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE, 626 BIT_UVC_CMD_DUMP_CPU, 627 BIT_UVC_CMD_DUMP_COMPLETE, 628 }; 629 int i; 630 631 r = is_prot_virt_host(); 632 633 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) { 634 if (!test_bit_inv(pv_cmds_dump[i], 635 (unsigned long *)&uv_info.inst_calls_list)) { 636 r = 0; 637 break; 638 } 639 } 640 break; 641 } 642 case KVM_CAP_S390_ZPCI_OP: 643 r = kvm_s390_pci_interp_allowed(); 644 break; 645 case KVM_CAP_S390_CPU_TOPOLOGY: 646 r = test_facility(11); 647 break; 648 default: 649 r = 0; 650 } 651 return r; 652 } 653 654 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 655 { 656 int i; 657 gfn_t cur_gfn, last_gfn; 658 unsigned long gaddr, vmaddr; 659 struct gmap *gmap = kvm->arch.gmap; 660 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 661 662 /* Loop over all guest segments */ 663 cur_gfn = memslot->base_gfn; 664 last_gfn = memslot->base_gfn + memslot->npages; 665 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 666 gaddr = gfn_to_gpa(cur_gfn); 667 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 668 if (kvm_is_error_hva(vmaddr)) 669 continue; 670 671 bitmap_zero(bitmap, _PAGE_ENTRIES); 672 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 673 for (i = 0; i < _PAGE_ENTRIES; i++) { 674 if (test_bit(i, bitmap)) 675 mark_page_dirty(kvm, cur_gfn + i); 676 } 677 678 if (fatal_signal_pending(current)) 679 return; 680 cond_resched(); 681 } 682 } 683 684 /* Section: vm related */ 685 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 686 687 /* 688 * Get (and clear) the dirty memory log for a memory slot. 689 */ 690 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 691 struct kvm_dirty_log *log) 692 { 693 int r; 694 unsigned long n; 695 struct kvm_memory_slot *memslot; 696 int is_dirty; 697 698 if (kvm_is_ucontrol(kvm)) 699 return -EINVAL; 700 701 mutex_lock(&kvm->slots_lock); 702 703 r = -EINVAL; 704 if (log->slot >= KVM_USER_MEM_SLOTS) 705 goto out; 706 707 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 708 if (r) 709 goto out; 710 711 /* Clear the dirty log */ 712 if (is_dirty) { 713 n = kvm_dirty_bitmap_bytes(memslot); 714 memset(memslot->dirty_bitmap, 0, n); 715 } 716 r = 0; 717 out: 718 mutex_unlock(&kvm->slots_lock); 719 return r; 720 } 721 722 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 723 { 724 unsigned long i; 725 struct kvm_vcpu *vcpu; 726 727 kvm_for_each_vcpu(i, vcpu, kvm) { 728 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 729 } 730 } 731 732 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 733 { 734 int r; 735 736 if (cap->flags) 737 return -EINVAL; 738 739 switch (cap->cap) { 740 case KVM_CAP_S390_IRQCHIP: 741 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 742 kvm->arch.use_irqchip = 1; 743 r = 0; 744 break; 745 case KVM_CAP_S390_USER_SIGP: 746 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 747 kvm->arch.user_sigp = 1; 748 r = 0; 749 break; 750 case KVM_CAP_S390_VECTOR_REGISTERS: 751 mutex_lock(&kvm->lock); 752 if (kvm->created_vcpus) { 753 r = -EBUSY; 754 } else if (MACHINE_HAS_VX) { 755 set_kvm_facility(kvm->arch.model.fac_mask, 129); 756 set_kvm_facility(kvm->arch.model.fac_list, 129); 757 if (test_facility(134)) { 758 set_kvm_facility(kvm->arch.model.fac_mask, 134); 759 set_kvm_facility(kvm->arch.model.fac_list, 134); 760 } 761 if (test_facility(135)) { 762 set_kvm_facility(kvm->arch.model.fac_mask, 135); 763 set_kvm_facility(kvm->arch.model.fac_list, 135); 764 } 765 if (test_facility(148)) { 766 set_kvm_facility(kvm->arch.model.fac_mask, 148); 767 set_kvm_facility(kvm->arch.model.fac_list, 148); 768 } 769 if (test_facility(152)) { 770 set_kvm_facility(kvm->arch.model.fac_mask, 152); 771 set_kvm_facility(kvm->arch.model.fac_list, 152); 772 } 773 if (test_facility(192)) { 774 set_kvm_facility(kvm->arch.model.fac_mask, 192); 775 set_kvm_facility(kvm->arch.model.fac_list, 192); 776 } 777 r = 0; 778 } else 779 r = -EINVAL; 780 mutex_unlock(&kvm->lock); 781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 782 r ? "(not available)" : "(success)"); 783 break; 784 case KVM_CAP_S390_RI: 785 r = -EINVAL; 786 mutex_lock(&kvm->lock); 787 if (kvm->created_vcpus) { 788 r = -EBUSY; 789 } else if (test_facility(64)) { 790 set_kvm_facility(kvm->arch.model.fac_mask, 64); 791 set_kvm_facility(kvm->arch.model.fac_list, 64); 792 r = 0; 793 } 794 mutex_unlock(&kvm->lock); 795 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 796 r ? "(not available)" : "(success)"); 797 break; 798 case KVM_CAP_S390_AIS: 799 mutex_lock(&kvm->lock); 800 if (kvm->created_vcpus) { 801 r = -EBUSY; 802 } else { 803 set_kvm_facility(kvm->arch.model.fac_mask, 72); 804 set_kvm_facility(kvm->arch.model.fac_list, 72); 805 r = 0; 806 } 807 mutex_unlock(&kvm->lock); 808 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 809 r ? "(not available)" : "(success)"); 810 break; 811 case KVM_CAP_S390_GS: 812 r = -EINVAL; 813 mutex_lock(&kvm->lock); 814 if (kvm->created_vcpus) { 815 r = -EBUSY; 816 } else if (test_facility(133)) { 817 set_kvm_facility(kvm->arch.model.fac_mask, 133); 818 set_kvm_facility(kvm->arch.model.fac_list, 133); 819 r = 0; 820 } 821 mutex_unlock(&kvm->lock); 822 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 823 r ? "(not available)" : "(success)"); 824 break; 825 case KVM_CAP_S390_HPAGE_1M: 826 mutex_lock(&kvm->lock); 827 if (kvm->created_vcpus) 828 r = -EBUSY; 829 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 830 r = -EINVAL; 831 else { 832 r = 0; 833 mmap_write_lock(kvm->mm); 834 kvm->mm->context.allow_gmap_hpage_1m = 1; 835 mmap_write_unlock(kvm->mm); 836 /* 837 * We might have to create fake 4k page 838 * tables. To avoid that the hardware works on 839 * stale PGSTEs, we emulate these instructions. 840 */ 841 kvm->arch.use_skf = 0; 842 kvm->arch.use_pfmfi = 0; 843 } 844 mutex_unlock(&kvm->lock); 845 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 846 r ? "(not available)" : "(success)"); 847 break; 848 case KVM_CAP_S390_USER_STSI: 849 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 850 kvm->arch.user_stsi = 1; 851 r = 0; 852 break; 853 case KVM_CAP_S390_USER_INSTR0: 854 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 855 kvm->arch.user_instr0 = 1; 856 icpt_operexc_on_all_vcpus(kvm); 857 r = 0; 858 break; 859 case KVM_CAP_S390_CPU_TOPOLOGY: 860 r = -EINVAL; 861 mutex_lock(&kvm->lock); 862 if (kvm->created_vcpus) { 863 r = -EBUSY; 864 } else if (test_facility(11)) { 865 set_kvm_facility(kvm->arch.model.fac_mask, 11); 866 set_kvm_facility(kvm->arch.model.fac_list, 11); 867 r = 0; 868 } 869 mutex_unlock(&kvm->lock); 870 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s", 871 r ? "(not available)" : "(success)"); 872 break; 873 default: 874 r = -EINVAL; 875 break; 876 } 877 return r; 878 } 879 880 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 881 { 882 int ret; 883 884 switch (attr->attr) { 885 case KVM_S390_VM_MEM_LIMIT_SIZE: 886 ret = 0; 887 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 888 kvm->arch.mem_limit); 889 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 890 ret = -EFAULT; 891 break; 892 default: 893 ret = -ENXIO; 894 break; 895 } 896 return ret; 897 } 898 899 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 900 { 901 int ret; 902 unsigned int idx; 903 switch (attr->attr) { 904 case KVM_S390_VM_MEM_ENABLE_CMMA: 905 ret = -ENXIO; 906 if (!sclp.has_cmma) 907 break; 908 909 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 910 mutex_lock(&kvm->lock); 911 if (kvm->created_vcpus) 912 ret = -EBUSY; 913 else if (kvm->mm->context.allow_gmap_hpage_1m) 914 ret = -EINVAL; 915 else { 916 kvm->arch.use_cmma = 1; 917 /* Not compatible with cmma. */ 918 kvm->arch.use_pfmfi = 0; 919 ret = 0; 920 } 921 mutex_unlock(&kvm->lock); 922 break; 923 case KVM_S390_VM_MEM_CLR_CMMA: 924 ret = -ENXIO; 925 if (!sclp.has_cmma) 926 break; 927 ret = -EINVAL; 928 if (!kvm->arch.use_cmma) 929 break; 930 931 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 932 mutex_lock(&kvm->lock); 933 idx = srcu_read_lock(&kvm->srcu); 934 s390_reset_cmma(kvm->arch.gmap->mm); 935 srcu_read_unlock(&kvm->srcu, idx); 936 mutex_unlock(&kvm->lock); 937 ret = 0; 938 break; 939 case KVM_S390_VM_MEM_LIMIT_SIZE: { 940 unsigned long new_limit; 941 942 if (kvm_is_ucontrol(kvm)) 943 return -EINVAL; 944 945 if (get_user(new_limit, (u64 __user *)attr->addr)) 946 return -EFAULT; 947 948 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 949 new_limit > kvm->arch.mem_limit) 950 return -E2BIG; 951 952 if (!new_limit) 953 return -EINVAL; 954 955 /* gmap_create takes last usable address */ 956 if (new_limit != KVM_S390_NO_MEM_LIMIT) 957 new_limit -= 1; 958 959 ret = -EBUSY; 960 mutex_lock(&kvm->lock); 961 if (!kvm->created_vcpus) { 962 /* gmap_create will round the limit up */ 963 struct gmap *new = gmap_create(current->mm, new_limit); 964 965 if (!new) { 966 ret = -ENOMEM; 967 } else { 968 gmap_remove(kvm->arch.gmap); 969 new->private = kvm; 970 kvm->arch.gmap = new; 971 ret = 0; 972 } 973 } 974 mutex_unlock(&kvm->lock); 975 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 976 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 977 (void *) kvm->arch.gmap->asce); 978 break; 979 } 980 default: 981 ret = -ENXIO; 982 break; 983 } 984 return ret; 985 } 986 987 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 988 989 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 990 { 991 struct kvm_vcpu *vcpu; 992 unsigned long i; 993 994 kvm_s390_vcpu_block_all(kvm); 995 996 kvm_for_each_vcpu(i, vcpu, kvm) { 997 kvm_s390_vcpu_crypto_setup(vcpu); 998 /* recreate the shadow crycb by leaving the VSIE handler */ 999 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1000 } 1001 1002 kvm_s390_vcpu_unblock_all(kvm); 1003 } 1004 1005 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 1006 { 1007 mutex_lock(&kvm->lock); 1008 switch (attr->attr) { 1009 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1010 if (!test_kvm_facility(kvm, 76)) { 1011 mutex_unlock(&kvm->lock); 1012 return -EINVAL; 1013 } 1014 get_random_bytes( 1015 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1016 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1017 kvm->arch.crypto.aes_kw = 1; 1018 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 1019 break; 1020 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1021 if (!test_kvm_facility(kvm, 76)) { 1022 mutex_unlock(&kvm->lock); 1023 return -EINVAL; 1024 } 1025 get_random_bytes( 1026 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1027 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1028 kvm->arch.crypto.dea_kw = 1; 1029 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 1030 break; 1031 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1032 if (!test_kvm_facility(kvm, 76)) { 1033 mutex_unlock(&kvm->lock); 1034 return -EINVAL; 1035 } 1036 kvm->arch.crypto.aes_kw = 0; 1037 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 1038 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1039 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 1040 break; 1041 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1042 if (!test_kvm_facility(kvm, 76)) { 1043 mutex_unlock(&kvm->lock); 1044 return -EINVAL; 1045 } 1046 kvm->arch.crypto.dea_kw = 0; 1047 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 1048 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1049 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 1050 break; 1051 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1052 if (!ap_instructions_available()) { 1053 mutex_unlock(&kvm->lock); 1054 return -EOPNOTSUPP; 1055 } 1056 kvm->arch.crypto.apie = 1; 1057 break; 1058 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1059 if (!ap_instructions_available()) { 1060 mutex_unlock(&kvm->lock); 1061 return -EOPNOTSUPP; 1062 } 1063 kvm->arch.crypto.apie = 0; 1064 break; 1065 default: 1066 mutex_unlock(&kvm->lock); 1067 return -ENXIO; 1068 } 1069 1070 kvm_s390_vcpu_crypto_reset_all(kvm); 1071 mutex_unlock(&kvm->lock); 1072 return 0; 1073 } 1074 1075 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu) 1076 { 1077 /* Only set the ECB bits after guest requests zPCI interpretation */ 1078 if (!vcpu->kvm->arch.use_zpci_interp) 1079 return; 1080 1081 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI; 1082 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI; 1083 } 1084 1085 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm) 1086 { 1087 struct kvm_vcpu *vcpu; 1088 unsigned long i; 1089 1090 lockdep_assert_held(&kvm->lock); 1091 1092 if (!kvm_s390_pci_interp_allowed()) 1093 return; 1094 1095 /* 1096 * If host is configured for PCI and the necessary facilities are 1097 * available, turn on interpretation for the life of this guest 1098 */ 1099 kvm->arch.use_zpci_interp = 1; 1100 1101 kvm_s390_vcpu_block_all(kvm); 1102 1103 kvm_for_each_vcpu(i, vcpu, kvm) { 1104 kvm_s390_vcpu_pci_setup(vcpu); 1105 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1106 } 1107 1108 kvm_s390_vcpu_unblock_all(kvm); 1109 } 1110 1111 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1112 { 1113 unsigned long cx; 1114 struct kvm_vcpu *vcpu; 1115 1116 kvm_for_each_vcpu(cx, vcpu, kvm) 1117 kvm_s390_sync_request(req, vcpu); 1118 } 1119 1120 /* 1121 * Must be called with kvm->srcu held to avoid races on memslots, and with 1122 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1123 */ 1124 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1125 { 1126 struct kvm_memory_slot *ms; 1127 struct kvm_memslots *slots; 1128 unsigned long ram_pages = 0; 1129 int bkt; 1130 1131 /* migration mode already enabled */ 1132 if (kvm->arch.migration_mode) 1133 return 0; 1134 slots = kvm_memslots(kvm); 1135 if (!slots || kvm_memslots_empty(slots)) 1136 return -EINVAL; 1137 1138 if (!kvm->arch.use_cmma) { 1139 kvm->arch.migration_mode = 1; 1140 return 0; 1141 } 1142 /* mark all the pages in active slots as dirty */ 1143 kvm_for_each_memslot(ms, bkt, slots) { 1144 if (!ms->dirty_bitmap) 1145 return -EINVAL; 1146 /* 1147 * The second half of the bitmap is only used on x86, 1148 * and would be wasted otherwise, so we put it to good 1149 * use here to keep track of the state of the storage 1150 * attributes. 1151 */ 1152 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1153 ram_pages += ms->npages; 1154 } 1155 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1156 kvm->arch.migration_mode = 1; 1157 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1158 return 0; 1159 } 1160 1161 /* 1162 * Must be called with kvm->slots_lock to avoid races with ourselves and 1163 * kvm_s390_vm_start_migration. 1164 */ 1165 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1166 { 1167 /* migration mode already disabled */ 1168 if (!kvm->arch.migration_mode) 1169 return 0; 1170 kvm->arch.migration_mode = 0; 1171 if (kvm->arch.use_cmma) 1172 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1173 return 0; 1174 } 1175 1176 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1177 struct kvm_device_attr *attr) 1178 { 1179 int res = -ENXIO; 1180 1181 mutex_lock(&kvm->slots_lock); 1182 switch (attr->attr) { 1183 case KVM_S390_VM_MIGRATION_START: 1184 res = kvm_s390_vm_start_migration(kvm); 1185 break; 1186 case KVM_S390_VM_MIGRATION_STOP: 1187 res = kvm_s390_vm_stop_migration(kvm); 1188 break; 1189 default: 1190 break; 1191 } 1192 mutex_unlock(&kvm->slots_lock); 1193 1194 return res; 1195 } 1196 1197 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1198 struct kvm_device_attr *attr) 1199 { 1200 u64 mig = kvm->arch.migration_mode; 1201 1202 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1203 return -ENXIO; 1204 1205 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1206 return -EFAULT; 1207 return 0; 1208 } 1209 1210 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); 1211 1212 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1213 { 1214 struct kvm_s390_vm_tod_clock gtod; 1215 1216 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1217 return -EFAULT; 1218 1219 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1220 return -EINVAL; 1221 __kvm_s390_set_tod_clock(kvm, >od); 1222 1223 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1224 gtod.epoch_idx, gtod.tod); 1225 1226 return 0; 1227 } 1228 1229 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1230 { 1231 u8 gtod_high; 1232 1233 if (copy_from_user(>od_high, (void __user *)attr->addr, 1234 sizeof(gtod_high))) 1235 return -EFAULT; 1236 1237 if (gtod_high != 0) 1238 return -EINVAL; 1239 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1240 1241 return 0; 1242 } 1243 1244 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1245 { 1246 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1247 1248 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1249 sizeof(gtod.tod))) 1250 return -EFAULT; 1251 1252 __kvm_s390_set_tod_clock(kvm, >od); 1253 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1254 return 0; 1255 } 1256 1257 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1258 { 1259 int ret; 1260 1261 if (attr->flags) 1262 return -EINVAL; 1263 1264 mutex_lock(&kvm->lock); 1265 /* 1266 * For protected guests, the TOD is managed by the ultravisor, so trying 1267 * to change it will never bring the expected results. 1268 */ 1269 if (kvm_s390_pv_is_protected(kvm)) { 1270 ret = -EOPNOTSUPP; 1271 goto out_unlock; 1272 } 1273 1274 switch (attr->attr) { 1275 case KVM_S390_VM_TOD_EXT: 1276 ret = kvm_s390_set_tod_ext(kvm, attr); 1277 break; 1278 case KVM_S390_VM_TOD_HIGH: 1279 ret = kvm_s390_set_tod_high(kvm, attr); 1280 break; 1281 case KVM_S390_VM_TOD_LOW: 1282 ret = kvm_s390_set_tod_low(kvm, attr); 1283 break; 1284 default: 1285 ret = -ENXIO; 1286 break; 1287 } 1288 1289 out_unlock: 1290 mutex_unlock(&kvm->lock); 1291 return ret; 1292 } 1293 1294 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1295 struct kvm_s390_vm_tod_clock *gtod) 1296 { 1297 union tod_clock clk; 1298 1299 preempt_disable(); 1300 1301 store_tod_clock_ext(&clk); 1302 1303 gtod->tod = clk.tod + kvm->arch.epoch; 1304 gtod->epoch_idx = 0; 1305 if (test_kvm_facility(kvm, 139)) { 1306 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1307 if (gtod->tod < clk.tod) 1308 gtod->epoch_idx += 1; 1309 } 1310 1311 preempt_enable(); 1312 } 1313 1314 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1315 { 1316 struct kvm_s390_vm_tod_clock gtod; 1317 1318 memset(>od, 0, sizeof(gtod)); 1319 kvm_s390_get_tod_clock(kvm, >od); 1320 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1321 return -EFAULT; 1322 1323 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1324 gtod.epoch_idx, gtod.tod); 1325 return 0; 1326 } 1327 1328 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1329 { 1330 u8 gtod_high = 0; 1331 1332 if (copy_to_user((void __user *)attr->addr, >od_high, 1333 sizeof(gtod_high))) 1334 return -EFAULT; 1335 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1336 1337 return 0; 1338 } 1339 1340 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1341 { 1342 u64 gtod; 1343 1344 gtod = kvm_s390_get_tod_clock_fast(kvm); 1345 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1346 return -EFAULT; 1347 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1348 1349 return 0; 1350 } 1351 1352 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1353 { 1354 int ret; 1355 1356 if (attr->flags) 1357 return -EINVAL; 1358 1359 switch (attr->attr) { 1360 case KVM_S390_VM_TOD_EXT: 1361 ret = kvm_s390_get_tod_ext(kvm, attr); 1362 break; 1363 case KVM_S390_VM_TOD_HIGH: 1364 ret = kvm_s390_get_tod_high(kvm, attr); 1365 break; 1366 case KVM_S390_VM_TOD_LOW: 1367 ret = kvm_s390_get_tod_low(kvm, attr); 1368 break; 1369 default: 1370 ret = -ENXIO; 1371 break; 1372 } 1373 return ret; 1374 } 1375 1376 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1377 { 1378 struct kvm_s390_vm_cpu_processor *proc; 1379 u16 lowest_ibc, unblocked_ibc; 1380 int ret = 0; 1381 1382 mutex_lock(&kvm->lock); 1383 if (kvm->created_vcpus) { 1384 ret = -EBUSY; 1385 goto out; 1386 } 1387 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1388 if (!proc) { 1389 ret = -ENOMEM; 1390 goto out; 1391 } 1392 if (!copy_from_user(proc, (void __user *)attr->addr, 1393 sizeof(*proc))) { 1394 kvm->arch.model.cpuid = proc->cpuid; 1395 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1396 unblocked_ibc = sclp.ibc & 0xfff; 1397 if (lowest_ibc && proc->ibc) { 1398 if (proc->ibc > unblocked_ibc) 1399 kvm->arch.model.ibc = unblocked_ibc; 1400 else if (proc->ibc < lowest_ibc) 1401 kvm->arch.model.ibc = lowest_ibc; 1402 else 1403 kvm->arch.model.ibc = proc->ibc; 1404 } 1405 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1406 S390_ARCH_FAC_LIST_SIZE_BYTE); 1407 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1408 kvm->arch.model.ibc, 1409 kvm->arch.model.cpuid); 1410 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1411 kvm->arch.model.fac_list[0], 1412 kvm->arch.model.fac_list[1], 1413 kvm->arch.model.fac_list[2]); 1414 } else 1415 ret = -EFAULT; 1416 kfree(proc); 1417 out: 1418 mutex_unlock(&kvm->lock); 1419 return ret; 1420 } 1421 1422 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1423 struct kvm_device_attr *attr) 1424 { 1425 struct kvm_s390_vm_cpu_feat data; 1426 1427 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1428 return -EFAULT; 1429 if (!bitmap_subset((unsigned long *) data.feat, 1430 kvm_s390_available_cpu_feat, 1431 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1432 return -EINVAL; 1433 1434 mutex_lock(&kvm->lock); 1435 if (kvm->created_vcpus) { 1436 mutex_unlock(&kvm->lock); 1437 return -EBUSY; 1438 } 1439 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1440 mutex_unlock(&kvm->lock); 1441 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1442 data.feat[0], 1443 data.feat[1], 1444 data.feat[2]); 1445 return 0; 1446 } 1447 1448 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1449 struct kvm_device_attr *attr) 1450 { 1451 mutex_lock(&kvm->lock); 1452 if (kvm->created_vcpus) { 1453 mutex_unlock(&kvm->lock); 1454 return -EBUSY; 1455 } 1456 1457 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1458 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1459 mutex_unlock(&kvm->lock); 1460 return -EFAULT; 1461 } 1462 mutex_unlock(&kvm->lock); 1463 1464 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1465 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1466 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1467 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1468 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1469 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1470 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1471 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1472 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1473 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1474 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1475 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1476 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1477 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1478 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1479 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1480 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1481 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1482 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1483 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1484 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1485 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1486 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1487 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1488 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1489 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1490 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1491 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1492 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1493 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1494 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1495 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1496 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1497 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1498 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1499 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1500 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1501 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1502 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1503 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1504 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1505 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1506 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1507 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1508 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1509 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1510 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1511 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1512 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1513 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1514 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1515 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1516 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1517 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1518 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1519 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1520 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1521 1522 return 0; 1523 } 1524 1525 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1526 { 1527 int ret = -ENXIO; 1528 1529 switch (attr->attr) { 1530 case KVM_S390_VM_CPU_PROCESSOR: 1531 ret = kvm_s390_set_processor(kvm, attr); 1532 break; 1533 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1534 ret = kvm_s390_set_processor_feat(kvm, attr); 1535 break; 1536 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1537 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1538 break; 1539 } 1540 return ret; 1541 } 1542 1543 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1544 { 1545 struct kvm_s390_vm_cpu_processor *proc; 1546 int ret = 0; 1547 1548 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1549 if (!proc) { 1550 ret = -ENOMEM; 1551 goto out; 1552 } 1553 proc->cpuid = kvm->arch.model.cpuid; 1554 proc->ibc = kvm->arch.model.ibc; 1555 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1556 S390_ARCH_FAC_LIST_SIZE_BYTE); 1557 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1558 kvm->arch.model.ibc, 1559 kvm->arch.model.cpuid); 1560 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1561 kvm->arch.model.fac_list[0], 1562 kvm->arch.model.fac_list[1], 1563 kvm->arch.model.fac_list[2]); 1564 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1565 ret = -EFAULT; 1566 kfree(proc); 1567 out: 1568 return ret; 1569 } 1570 1571 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1572 { 1573 struct kvm_s390_vm_cpu_machine *mach; 1574 int ret = 0; 1575 1576 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1577 if (!mach) { 1578 ret = -ENOMEM; 1579 goto out; 1580 } 1581 get_cpu_id((struct cpuid *) &mach->cpuid); 1582 mach->ibc = sclp.ibc; 1583 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1584 S390_ARCH_FAC_LIST_SIZE_BYTE); 1585 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1586 sizeof(stfle_fac_list)); 1587 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1588 kvm->arch.model.ibc, 1589 kvm->arch.model.cpuid); 1590 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1591 mach->fac_mask[0], 1592 mach->fac_mask[1], 1593 mach->fac_mask[2]); 1594 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1595 mach->fac_list[0], 1596 mach->fac_list[1], 1597 mach->fac_list[2]); 1598 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1599 ret = -EFAULT; 1600 kfree(mach); 1601 out: 1602 return ret; 1603 } 1604 1605 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1606 struct kvm_device_attr *attr) 1607 { 1608 struct kvm_s390_vm_cpu_feat data; 1609 1610 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1611 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1612 return -EFAULT; 1613 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1614 data.feat[0], 1615 data.feat[1], 1616 data.feat[2]); 1617 return 0; 1618 } 1619 1620 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1621 struct kvm_device_attr *attr) 1622 { 1623 struct kvm_s390_vm_cpu_feat data; 1624 1625 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1626 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1627 return -EFAULT; 1628 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1629 data.feat[0], 1630 data.feat[1], 1631 data.feat[2]); 1632 return 0; 1633 } 1634 1635 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1636 struct kvm_device_attr *attr) 1637 { 1638 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1639 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1640 return -EFAULT; 1641 1642 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1643 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1644 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1645 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1646 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1647 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1648 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1649 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1650 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1651 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1652 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1653 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1654 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1655 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1656 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1657 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1658 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1659 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1660 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1661 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1662 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1663 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1664 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1665 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1666 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1667 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1668 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1669 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1670 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1671 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1672 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1673 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1674 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1675 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1676 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1677 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1678 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1679 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1680 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1681 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1682 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1683 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1684 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1685 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1686 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1687 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1688 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1689 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1690 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1691 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1692 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1693 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1694 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1695 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1696 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1697 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1698 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1699 1700 return 0; 1701 } 1702 1703 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1704 struct kvm_device_attr *attr) 1705 { 1706 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1707 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1708 return -EFAULT; 1709 1710 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1711 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1712 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1713 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1714 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1715 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1716 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1717 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1718 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1719 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1720 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1721 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1722 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1723 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1724 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1725 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1726 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1727 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1728 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1729 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1730 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1731 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1732 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1733 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1734 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1735 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1736 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1737 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1738 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1739 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1740 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1741 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1742 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1743 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1744 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1745 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1746 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1747 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1748 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1749 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1750 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1751 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1752 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1753 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1754 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1755 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1756 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1757 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1758 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1759 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1760 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1761 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1762 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1763 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1764 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1765 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1766 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1767 1768 return 0; 1769 } 1770 1771 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1772 { 1773 int ret = -ENXIO; 1774 1775 switch (attr->attr) { 1776 case KVM_S390_VM_CPU_PROCESSOR: 1777 ret = kvm_s390_get_processor(kvm, attr); 1778 break; 1779 case KVM_S390_VM_CPU_MACHINE: 1780 ret = kvm_s390_get_machine(kvm, attr); 1781 break; 1782 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1783 ret = kvm_s390_get_processor_feat(kvm, attr); 1784 break; 1785 case KVM_S390_VM_CPU_MACHINE_FEAT: 1786 ret = kvm_s390_get_machine_feat(kvm, attr); 1787 break; 1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1789 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1790 break; 1791 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1792 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1793 break; 1794 } 1795 return ret; 1796 } 1797 1798 /** 1799 * kvm_s390_update_topology_change_report - update CPU topology change report 1800 * @kvm: guest KVM description 1801 * @val: set or clear the MTCR bit 1802 * 1803 * Updates the Multiprocessor Topology-Change-Report bit to signal 1804 * the guest with a topology change. 1805 * This is only relevant if the topology facility is present. 1806 * 1807 * The SCA version, bsca or esca, doesn't matter as offset is the same. 1808 */ 1809 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) 1810 { 1811 union sca_utility new, old; 1812 struct bsca_block *sca; 1813 1814 read_lock(&kvm->arch.sca_lock); 1815 sca = kvm->arch.sca; 1816 do { 1817 old = READ_ONCE(sca->utility); 1818 new = old; 1819 new.mtcr = val; 1820 } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); 1821 read_unlock(&kvm->arch.sca_lock); 1822 } 1823 1824 static int kvm_s390_set_topo_change_indication(struct kvm *kvm, 1825 struct kvm_device_attr *attr) 1826 { 1827 if (!test_kvm_facility(kvm, 11)) 1828 return -ENXIO; 1829 1830 kvm_s390_update_topology_change_report(kvm, !!attr->attr); 1831 return 0; 1832 } 1833 1834 static int kvm_s390_get_topo_change_indication(struct kvm *kvm, 1835 struct kvm_device_attr *attr) 1836 { 1837 u8 topo; 1838 1839 if (!test_kvm_facility(kvm, 11)) 1840 return -ENXIO; 1841 1842 read_lock(&kvm->arch.sca_lock); 1843 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr; 1844 read_unlock(&kvm->arch.sca_lock); 1845 1846 return put_user(topo, (u8 __user *)attr->addr); 1847 } 1848 1849 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1850 { 1851 int ret; 1852 1853 switch (attr->group) { 1854 case KVM_S390_VM_MEM_CTRL: 1855 ret = kvm_s390_set_mem_control(kvm, attr); 1856 break; 1857 case KVM_S390_VM_TOD: 1858 ret = kvm_s390_set_tod(kvm, attr); 1859 break; 1860 case KVM_S390_VM_CPU_MODEL: 1861 ret = kvm_s390_set_cpu_model(kvm, attr); 1862 break; 1863 case KVM_S390_VM_CRYPTO: 1864 ret = kvm_s390_vm_set_crypto(kvm, attr); 1865 break; 1866 case KVM_S390_VM_MIGRATION: 1867 ret = kvm_s390_vm_set_migration(kvm, attr); 1868 break; 1869 case KVM_S390_VM_CPU_TOPOLOGY: 1870 ret = kvm_s390_set_topo_change_indication(kvm, attr); 1871 break; 1872 default: 1873 ret = -ENXIO; 1874 break; 1875 } 1876 1877 return ret; 1878 } 1879 1880 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1881 { 1882 int ret; 1883 1884 switch (attr->group) { 1885 case KVM_S390_VM_MEM_CTRL: 1886 ret = kvm_s390_get_mem_control(kvm, attr); 1887 break; 1888 case KVM_S390_VM_TOD: 1889 ret = kvm_s390_get_tod(kvm, attr); 1890 break; 1891 case KVM_S390_VM_CPU_MODEL: 1892 ret = kvm_s390_get_cpu_model(kvm, attr); 1893 break; 1894 case KVM_S390_VM_MIGRATION: 1895 ret = kvm_s390_vm_get_migration(kvm, attr); 1896 break; 1897 case KVM_S390_VM_CPU_TOPOLOGY: 1898 ret = kvm_s390_get_topo_change_indication(kvm, attr); 1899 break; 1900 default: 1901 ret = -ENXIO; 1902 break; 1903 } 1904 1905 return ret; 1906 } 1907 1908 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1909 { 1910 int ret; 1911 1912 switch (attr->group) { 1913 case KVM_S390_VM_MEM_CTRL: 1914 switch (attr->attr) { 1915 case KVM_S390_VM_MEM_ENABLE_CMMA: 1916 case KVM_S390_VM_MEM_CLR_CMMA: 1917 ret = sclp.has_cmma ? 0 : -ENXIO; 1918 break; 1919 case KVM_S390_VM_MEM_LIMIT_SIZE: 1920 ret = 0; 1921 break; 1922 default: 1923 ret = -ENXIO; 1924 break; 1925 } 1926 break; 1927 case KVM_S390_VM_TOD: 1928 switch (attr->attr) { 1929 case KVM_S390_VM_TOD_LOW: 1930 case KVM_S390_VM_TOD_HIGH: 1931 ret = 0; 1932 break; 1933 default: 1934 ret = -ENXIO; 1935 break; 1936 } 1937 break; 1938 case KVM_S390_VM_CPU_MODEL: 1939 switch (attr->attr) { 1940 case KVM_S390_VM_CPU_PROCESSOR: 1941 case KVM_S390_VM_CPU_MACHINE: 1942 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1943 case KVM_S390_VM_CPU_MACHINE_FEAT: 1944 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1945 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1946 ret = 0; 1947 break; 1948 default: 1949 ret = -ENXIO; 1950 break; 1951 } 1952 break; 1953 case KVM_S390_VM_CRYPTO: 1954 switch (attr->attr) { 1955 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1956 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1957 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1958 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1959 ret = 0; 1960 break; 1961 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1962 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1963 ret = ap_instructions_available() ? 0 : -ENXIO; 1964 break; 1965 default: 1966 ret = -ENXIO; 1967 break; 1968 } 1969 break; 1970 case KVM_S390_VM_MIGRATION: 1971 ret = 0; 1972 break; 1973 case KVM_S390_VM_CPU_TOPOLOGY: 1974 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO; 1975 break; 1976 default: 1977 ret = -ENXIO; 1978 break; 1979 } 1980 1981 return ret; 1982 } 1983 1984 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1985 { 1986 uint8_t *keys; 1987 uint64_t hva; 1988 int srcu_idx, i, r = 0; 1989 1990 if (args->flags != 0) 1991 return -EINVAL; 1992 1993 /* Is this guest using storage keys? */ 1994 if (!mm_uses_skeys(current->mm)) 1995 return KVM_S390_GET_SKEYS_NONE; 1996 1997 /* Enforce sane limit on memory allocation */ 1998 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1999 return -EINVAL; 2000 2001 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 2002 if (!keys) 2003 return -ENOMEM; 2004 2005 mmap_read_lock(current->mm); 2006 srcu_idx = srcu_read_lock(&kvm->srcu); 2007 for (i = 0; i < args->count; i++) { 2008 hva = gfn_to_hva(kvm, args->start_gfn + i); 2009 if (kvm_is_error_hva(hva)) { 2010 r = -EFAULT; 2011 break; 2012 } 2013 2014 r = get_guest_storage_key(current->mm, hva, &keys[i]); 2015 if (r) 2016 break; 2017 } 2018 srcu_read_unlock(&kvm->srcu, srcu_idx); 2019 mmap_read_unlock(current->mm); 2020 2021 if (!r) { 2022 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 2023 sizeof(uint8_t) * args->count); 2024 if (r) 2025 r = -EFAULT; 2026 } 2027 2028 kvfree(keys); 2029 return r; 2030 } 2031 2032 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 2033 { 2034 uint8_t *keys; 2035 uint64_t hva; 2036 int srcu_idx, i, r = 0; 2037 bool unlocked; 2038 2039 if (args->flags != 0) 2040 return -EINVAL; 2041 2042 /* Enforce sane limit on memory allocation */ 2043 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 2044 return -EINVAL; 2045 2046 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 2047 if (!keys) 2048 return -ENOMEM; 2049 2050 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 2051 sizeof(uint8_t) * args->count); 2052 if (r) { 2053 r = -EFAULT; 2054 goto out; 2055 } 2056 2057 /* Enable storage key handling for the guest */ 2058 r = s390_enable_skey(); 2059 if (r) 2060 goto out; 2061 2062 i = 0; 2063 mmap_read_lock(current->mm); 2064 srcu_idx = srcu_read_lock(&kvm->srcu); 2065 while (i < args->count) { 2066 unlocked = false; 2067 hva = gfn_to_hva(kvm, args->start_gfn + i); 2068 if (kvm_is_error_hva(hva)) { 2069 r = -EFAULT; 2070 break; 2071 } 2072 2073 /* Lowest order bit is reserved */ 2074 if (keys[i] & 0x01) { 2075 r = -EINVAL; 2076 break; 2077 } 2078 2079 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 2080 if (r) { 2081 r = fixup_user_fault(current->mm, hva, 2082 FAULT_FLAG_WRITE, &unlocked); 2083 if (r) 2084 break; 2085 } 2086 if (!r) 2087 i++; 2088 } 2089 srcu_read_unlock(&kvm->srcu, srcu_idx); 2090 mmap_read_unlock(current->mm); 2091 out: 2092 kvfree(keys); 2093 return r; 2094 } 2095 2096 /* 2097 * Base address and length must be sent at the start of each block, therefore 2098 * it's cheaper to send some clean data, as long as it's less than the size of 2099 * two longs. 2100 */ 2101 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 2102 /* for consistency */ 2103 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 2104 2105 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2106 u8 *res, unsigned long bufsize) 2107 { 2108 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 2109 2110 args->count = 0; 2111 while (args->count < bufsize) { 2112 hva = gfn_to_hva(kvm, cur_gfn); 2113 /* 2114 * We return an error if the first value was invalid, but we 2115 * return successfully if at least one value was copied. 2116 */ 2117 if (kvm_is_error_hva(hva)) 2118 return args->count ? 0 : -EFAULT; 2119 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2120 pgstev = 0; 2121 res[args->count++] = (pgstev >> 24) & 0x43; 2122 cur_gfn++; 2123 } 2124 2125 return 0; 2126 } 2127 2128 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 2129 gfn_t gfn) 2130 { 2131 return ____gfn_to_memslot(slots, gfn, true); 2132 } 2133 2134 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 2135 unsigned long cur_gfn) 2136 { 2137 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 2138 unsigned long ofs = cur_gfn - ms->base_gfn; 2139 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 2140 2141 if (ms->base_gfn + ms->npages <= cur_gfn) { 2142 mnode = rb_next(mnode); 2143 /* If we are above the highest slot, wrap around */ 2144 if (!mnode) 2145 mnode = rb_first(&slots->gfn_tree); 2146 2147 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2148 ofs = 0; 2149 } 2150 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 2151 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 2152 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2153 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 2154 } 2155 return ms->base_gfn + ofs; 2156 } 2157 2158 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2159 u8 *res, unsigned long bufsize) 2160 { 2161 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2162 struct kvm_memslots *slots = kvm_memslots(kvm); 2163 struct kvm_memory_slot *ms; 2164 2165 if (unlikely(kvm_memslots_empty(slots))) 2166 return 0; 2167 2168 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2169 ms = gfn_to_memslot(kvm, cur_gfn); 2170 args->count = 0; 2171 args->start_gfn = cur_gfn; 2172 if (!ms) 2173 return 0; 2174 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2175 mem_end = kvm_s390_get_gfn_end(slots); 2176 2177 while (args->count < bufsize) { 2178 hva = gfn_to_hva(kvm, cur_gfn); 2179 if (kvm_is_error_hva(hva)) 2180 return 0; 2181 /* Decrement only if we actually flipped the bit to 0 */ 2182 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2183 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2184 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2185 pgstev = 0; 2186 /* Save the value */ 2187 res[args->count++] = (pgstev >> 24) & 0x43; 2188 /* If the next bit is too far away, stop. */ 2189 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2190 return 0; 2191 /* If we reached the previous "next", find the next one */ 2192 if (cur_gfn == next_gfn) 2193 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2194 /* Reached the end of memory or of the buffer, stop */ 2195 if ((next_gfn >= mem_end) || 2196 (next_gfn - args->start_gfn >= bufsize)) 2197 return 0; 2198 cur_gfn++; 2199 /* Reached the end of the current memslot, take the next one. */ 2200 if (cur_gfn - ms->base_gfn >= ms->npages) { 2201 ms = gfn_to_memslot(kvm, cur_gfn); 2202 if (!ms) 2203 return 0; 2204 } 2205 } 2206 return 0; 2207 } 2208 2209 /* 2210 * This function searches for the next page with dirty CMMA attributes, and 2211 * saves the attributes in the buffer up to either the end of the buffer or 2212 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2213 * no trailing clean bytes are saved. 2214 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2215 * output buffer will indicate 0 as length. 2216 */ 2217 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2218 struct kvm_s390_cmma_log *args) 2219 { 2220 unsigned long bufsize; 2221 int srcu_idx, peek, ret; 2222 u8 *values; 2223 2224 if (!kvm->arch.use_cmma) 2225 return -ENXIO; 2226 /* Invalid/unsupported flags were specified */ 2227 if (args->flags & ~KVM_S390_CMMA_PEEK) 2228 return -EINVAL; 2229 /* Migration mode query, and we are not doing a migration */ 2230 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2231 if (!peek && !kvm->arch.migration_mode) 2232 return -EINVAL; 2233 /* CMMA is disabled or was not used, or the buffer has length zero */ 2234 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2235 if (!bufsize || !kvm->mm->context.uses_cmm) { 2236 memset(args, 0, sizeof(*args)); 2237 return 0; 2238 } 2239 /* We are not peeking, and there are no dirty pages */ 2240 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2241 memset(args, 0, sizeof(*args)); 2242 return 0; 2243 } 2244 2245 values = vmalloc(bufsize); 2246 if (!values) 2247 return -ENOMEM; 2248 2249 mmap_read_lock(kvm->mm); 2250 srcu_idx = srcu_read_lock(&kvm->srcu); 2251 if (peek) 2252 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2253 else 2254 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2255 srcu_read_unlock(&kvm->srcu, srcu_idx); 2256 mmap_read_unlock(kvm->mm); 2257 2258 if (kvm->arch.migration_mode) 2259 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2260 else 2261 args->remaining = 0; 2262 2263 if (copy_to_user((void __user *)args->values, values, args->count)) 2264 ret = -EFAULT; 2265 2266 vfree(values); 2267 return ret; 2268 } 2269 2270 /* 2271 * This function sets the CMMA attributes for the given pages. If the input 2272 * buffer has zero length, no action is taken, otherwise the attributes are 2273 * set and the mm->context.uses_cmm flag is set. 2274 */ 2275 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2276 const struct kvm_s390_cmma_log *args) 2277 { 2278 unsigned long hva, mask, pgstev, i; 2279 uint8_t *bits; 2280 int srcu_idx, r = 0; 2281 2282 mask = args->mask; 2283 2284 if (!kvm->arch.use_cmma) 2285 return -ENXIO; 2286 /* invalid/unsupported flags */ 2287 if (args->flags != 0) 2288 return -EINVAL; 2289 /* Enforce sane limit on memory allocation */ 2290 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2291 return -EINVAL; 2292 /* Nothing to do */ 2293 if (args->count == 0) 2294 return 0; 2295 2296 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2297 if (!bits) 2298 return -ENOMEM; 2299 2300 r = copy_from_user(bits, (void __user *)args->values, args->count); 2301 if (r) { 2302 r = -EFAULT; 2303 goto out; 2304 } 2305 2306 mmap_read_lock(kvm->mm); 2307 srcu_idx = srcu_read_lock(&kvm->srcu); 2308 for (i = 0; i < args->count; i++) { 2309 hva = gfn_to_hva(kvm, args->start_gfn + i); 2310 if (kvm_is_error_hva(hva)) { 2311 r = -EFAULT; 2312 break; 2313 } 2314 2315 pgstev = bits[i]; 2316 pgstev = pgstev << 24; 2317 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2318 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2319 } 2320 srcu_read_unlock(&kvm->srcu, srcu_idx); 2321 mmap_read_unlock(kvm->mm); 2322 2323 if (!kvm->mm->context.uses_cmm) { 2324 mmap_write_lock(kvm->mm); 2325 kvm->mm->context.uses_cmm = 1; 2326 mmap_write_unlock(kvm->mm); 2327 } 2328 out: 2329 vfree(bits); 2330 return r; 2331 } 2332 2333 /** 2334 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to 2335 * non protected. 2336 * @kvm: the VM whose protected vCPUs are to be converted 2337 * @rc: return value for the RC field of the UVC (in case of error) 2338 * @rrc: return value for the RRC field of the UVC (in case of error) 2339 * 2340 * Does not stop in case of error, tries to convert as many 2341 * CPUs as possible. In case of error, the RC and RRC of the last error are 2342 * returned. 2343 * 2344 * Return: 0 in case of success, otherwise -EIO 2345 */ 2346 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2347 { 2348 struct kvm_vcpu *vcpu; 2349 unsigned long i; 2350 u16 _rc, _rrc; 2351 int ret = 0; 2352 2353 /* 2354 * We ignore failures and try to destroy as many CPUs as possible. 2355 * At the same time we must not free the assigned resources when 2356 * this fails, as the ultravisor has still access to that memory. 2357 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2358 * behind. 2359 * We want to return the first failure rc and rrc, though. 2360 */ 2361 kvm_for_each_vcpu(i, vcpu, kvm) { 2362 mutex_lock(&vcpu->mutex); 2363 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) { 2364 *rc = _rc; 2365 *rrc = _rrc; 2366 ret = -EIO; 2367 } 2368 mutex_unlock(&vcpu->mutex); 2369 } 2370 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ 2371 if (use_gisa) 2372 kvm_s390_gisa_enable(kvm); 2373 return ret; 2374 } 2375 2376 /** 2377 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM 2378 * to protected. 2379 * @kvm: the VM whose protected vCPUs are to be converted 2380 * @rc: return value for the RC field of the UVC (in case of error) 2381 * @rrc: return value for the RRC field of the UVC (in case of error) 2382 * 2383 * Tries to undo the conversion in case of error. 2384 * 2385 * Return: 0 in case of success, otherwise -EIO 2386 */ 2387 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2388 { 2389 unsigned long i; 2390 int r = 0; 2391 u16 dummy; 2392 2393 struct kvm_vcpu *vcpu; 2394 2395 /* Disable the GISA if the ultravisor does not support AIV. */ 2396 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) 2397 kvm_s390_gisa_disable(kvm); 2398 2399 kvm_for_each_vcpu(i, vcpu, kvm) { 2400 mutex_lock(&vcpu->mutex); 2401 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2402 mutex_unlock(&vcpu->mutex); 2403 if (r) 2404 break; 2405 } 2406 if (r) 2407 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2408 return r; 2409 } 2410 2411 /* 2412 * Here we provide user space with a direct interface to query UV 2413 * related data like UV maxima and available features as well as 2414 * feature specific data. 2415 * 2416 * To facilitate future extension of the data structures we'll try to 2417 * write data up to the maximum requested length. 2418 */ 2419 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info) 2420 { 2421 ssize_t len_min; 2422 2423 switch (info->header.id) { 2424 case KVM_PV_INFO_VM: { 2425 len_min = sizeof(info->header) + sizeof(info->vm); 2426 2427 if (info->header.len_max < len_min) 2428 return -EINVAL; 2429 2430 memcpy(info->vm.inst_calls_list, 2431 uv_info.inst_calls_list, 2432 sizeof(uv_info.inst_calls_list)); 2433 2434 /* It's max cpuid not max cpus, so it's off by one */ 2435 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1; 2436 info->vm.max_guests = uv_info.max_num_sec_conf; 2437 info->vm.max_guest_addr = uv_info.max_sec_stor_addr; 2438 info->vm.feature_indication = uv_info.uv_feature_indications; 2439 2440 return len_min; 2441 } 2442 case KVM_PV_INFO_DUMP: { 2443 len_min = sizeof(info->header) + sizeof(info->dump); 2444 2445 if (info->header.len_max < len_min) 2446 return -EINVAL; 2447 2448 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len; 2449 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len; 2450 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len; 2451 return len_min; 2452 } 2453 default: 2454 return -EINVAL; 2455 } 2456 } 2457 2458 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, 2459 struct kvm_s390_pv_dmp dmp) 2460 { 2461 int r = -EINVAL; 2462 void __user *result_buff = (void __user *)dmp.buff_addr; 2463 2464 switch (dmp.subcmd) { 2465 case KVM_PV_DUMP_INIT: { 2466 if (kvm->arch.pv.dumping) 2467 break; 2468 2469 /* 2470 * Block SIE entry as concurrent dump UVCs could lead 2471 * to validities. 2472 */ 2473 kvm_s390_vcpu_block_all(kvm); 2474 2475 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2476 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc); 2477 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x", 2478 cmd->rc, cmd->rrc); 2479 if (!r) { 2480 kvm->arch.pv.dumping = true; 2481 } else { 2482 kvm_s390_vcpu_unblock_all(kvm); 2483 r = -EINVAL; 2484 } 2485 break; 2486 } 2487 case KVM_PV_DUMP_CONFIG_STOR_STATE: { 2488 if (!kvm->arch.pv.dumping) 2489 break; 2490 2491 /* 2492 * gaddr is an output parameter since we might stop 2493 * early. As dmp will be copied back in our caller, we 2494 * don't need to do it ourselves. 2495 */ 2496 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len, 2497 &cmd->rc, &cmd->rrc); 2498 break; 2499 } 2500 case KVM_PV_DUMP_COMPLETE: { 2501 if (!kvm->arch.pv.dumping) 2502 break; 2503 2504 r = -EINVAL; 2505 if (dmp.buff_len < uv_info.conf_dump_finalize_len) 2506 break; 2507 2508 r = kvm_s390_pv_dump_complete(kvm, result_buff, 2509 &cmd->rc, &cmd->rrc); 2510 break; 2511 } 2512 default: 2513 r = -ENOTTY; 2514 break; 2515 } 2516 2517 return r; 2518 } 2519 2520 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2521 { 2522 int r = 0; 2523 u16 dummy; 2524 void __user *argp = (void __user *)cmd->data; 2525 2526 switch (cmd->cmd) { 2527 case KVM_PV_ENABLE: { 2528 r = -EINVAL; 2529 if (kvm_s390_pv_is_protected(kvm)) 2530 break; 2531 2532 /* 2533 * FMT 4 SIE needs esca. As we never switch back to bsca from 2534 * esca, we need no cleanup in the error cases below 2535 */ 2536 r = sca_switch_to_extended(kvm); 2537 if (r) 2538 break; 2539 2540 mmap_write_lock(current->mm); 2541 r = gmap_mark_unmergeable(); 2542 mmap_write_unlock(current->mm); 2543 if (r) 2544 break; 2545 2546 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2547 if (r) 2548 break; 2549 2550 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2551 if (r) 2552 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2553 2554 /* we need to block service interrupts from now on */ 2555 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2556 break; 2557 } 2558 case KVM_PV_DISABLE: { 2559 r = -EINVAL; 2560 if (!kvm_s390_pv_is_protected(kvm)) 2561 break; 2562 2563 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2564 /* 2565 * If a CPU could not be destroyed, destroy VM will also fail. 2566 * There is no point in trying to destroy it. Instead return 2567 * the rc and rrc from the first CPU that failed destroying. 2568 */ 2569 if (r) 2570 break; 2571 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2572 2573 /* no need to block service interrupts any more */ 2574 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2575 break; 2576 } 2577 case KVM_PV_SET_SEC_PARMS: { 2578 struct kvm_s390_pv_sec_parm parms = {}; 2579 void *hdr; 2580 2581 r = -EINVAL; 2582 if (!kvm_s390_pv_is_protected(kvm)) 2583 break; 2584 2585 r = -EFAULT; 2586 if (copy_from_user(&parms, argp, sizeof(parms))) 2587 break; 2588 2589 /* Currently restricted to 8KB */ 2590 r = -EINVAL; 2591 if (parms.length > PAGE_SIZE * 2) 2592 break; 2593 2594 r = -ENOMEM; 2595 hdr = vmalloc(parms.length); 2596 if (!hdr) 2597 break; 2598 2599 r = -EFAULT; 2600 if (!copy_from_user(hdr, (void __user *)parms.origin, 2601 parms.length)) 2602 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2603 &cmd->rc, &cmd->rrc); 2604 2605 vfree(hdr); 2606 break; 2607 } 2608 case KVM_PV_UNPACK: { 2609 struct kvm_s390_pv_unp unp = {}; 2610 2611 r = -EINVAL; 2612 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2613 break; 2614 2615 r = -EFAULT; 2616 if (copy_from_user(&unp, argp, sizeof(unp))) 2617 break; 2618 2619 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2620 &cmd->rc, &cmd->rrc); 2621 break; 2622 } 2623 case KVM_PV_VERIFY: { 2624 r = -EINVAL; 2625 if (!kvm_s390_pv_is_protected(kvm)) 2626 break; 2627 2628 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2629 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2630 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2631 cmd->rrc); 2632 break; 2633 } 2634 case KVM_PV_PREP_RESET: { 2635 r = -EINVAL; 2636 if (!kvm_s390_pv_is_protected(kvm)) 2637 break; 2638 2639 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2640 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2641 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2642 cmd->rc, cmd->rrc); 2643 break; 2644 } 2645 case KVM_PV_UNSHARE_ALL: { 2646 r = -EINVAL; 2647 if (!kvm_s390_pv_is_protected(kvm)) 2648 break; 2649 2650 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2651 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2652 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2653 cmd->rc, cmd->rrc); 2654 break; 2655 } 2656 case KVM_PV_INFO: { 2657 struct kvm_s390_pv_info info = {}; 2658 ssize_t data_len; 2659 2660 /* 2661 * No need to check the VM protection here. 2662 * 2663 * Maybe user space wants to query some of the data 2664 * when the VM is still unprotected. If we see the 2665 * need to fence a new data command we can still 2666 * return an error in the info handler. 2667 */ 2668 2669 r = -EFAULT; 2670 if (copy_from_user(&info, argp, sizeof(info.header))) 2671 break; 2672 2673 r = -EINVAL; 2674 if (info.header.len_max < sizeof(info.header)) 2675 break; 2676 2677 data_len = kvm_s390_handle_pv_info(&info); 2678 if (data_len < 0) { 2679 r = data_len; 2680 break; 2681 } 2682 /* 2683 * If a data command struct is extended (multiple 2684 * times) this can be used to determine how much of it 2685 * is valid. 2686 */ 2687 info.header.len_written = data_len; 2688 2689 r = -EFAULT; 2690 if (copy_to_user(argp, &info, data_len)) 2691 break; 2692 2693 r = 0; 2694 break; 2695 } 2696 case KVM_PV_DUMP: { 2697 struct kvm_s390_pv_dmp dmp; 2698 2699 r = -EINVAL; 2700 if (!kvm_s390_pv_is_protected(kvm)) 2701 break; 2702 2703 r = -EFAULT; 2704 if (copy_from_user(&dmp, argp, sizeof(dmp))) 2705 break; 2706 2707 r = kvm_s390_pv_dmp(kvm, cmd, dmp); 2708 if (r) 2709 break; 2710 2711 if (copy_to_user(argp, &dmp, sizeof(dmp))) { 2712 r = -EFAULT; 2713 break; 2714 } 2715 2716 break; 2717 } 2718 default: 2719 r = -ENOTTY; 2720 } 2721 return r; 2722 } 2723 2724 static bool access_key_invalid(u8 access_key) 2725 { 2726 return access_key > 0xf; 2727 } 2728 2729 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2730 { 2731 void __user *uaddr = (void __user *)mop->buf; 2732 u64 supported_flags; 2733 void *tmpbuf = NULL; 2734 int r, srcu_idx; 2735 2736 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION 2737 | KVM_S390_MEMOP_F_CHECK_ONLY; 2738 if (mop->flags & ~supported_flags || !mop->size) 2739 return -EINVAL; 2740 if (mop->size > MEM_OP_MAX_SIZE) 2741 return -E2BIG; 2742 /* 2743 * This is technically a heuristic only, if the kvm->lock is not 2744 * taken, it is not guaranteed that the vm is/remains non-protected. 2745 * This is ok from a kernel perspective, wrongdoing is detected 2746 * on the access, -EFAULT is returned and the vm may crash the 2747 * next time it accesses the memory in question. 2748 * There is no sane usecase to do switching and a memop on two 2749 * different CPUs at the same time. 2750 */ 2751 if (kvm_s390_pv_get_handle(kvm)) 2752 return -EINVAL; 2753 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2754 if (access_key_invalid(mop->key)) 2755 return -EINVAL; 2756 } else { 2757 mop->key = 0; 2758 } 2759 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2760 tmpbuf = vmalloc(mop->size); 2761 if (!tmpbuf) 2762 return -ENOMEM; 2763 } 2764 2765 srcu_idx = srcu_read_lock(&kvm->srcu); 2766 2767 if (kvm_is_error_gpa(kvm, mop->gaddr)) { 2768 r = PGM_ADDRESSING; 2769 goto out_unlock; 2770 } 2771 2772 switch (mop->op) { 2773 case KVM_S390_MEMOP_ABSOLUTE_READ: { 2774 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2775 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); 2776 } else { 2777 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2778 mop->size, GACC_FETCH, mop->key); 2779 if (r == 0) { 2780 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2781 r = -EFAULT; 2782 } 2783 } 2784 break; 2785 } 2786 case KVM_S390_MEMOP_ABSOLUTE_WRITE: { 2787 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2788 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); 2789 } else { 2790 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2791 r = -EFAULT; 2792 break; 2793 } 2794 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2795 mop->size, GACC_STORE, mop->key); 2796 } 2797 break; 2798 } 2799 default: 2800 r = -EINVAL; 2801 } 2802 2803 out_unlock: 2804 srcu_read_unlock(&kvm->srcu, srcu_idx); 2805 2806 vfree(tmpbuf); 2807 return r; 2808 } 2809 2810 long kvm_arch_vm_ioctl(struct file *filp, 2811 unsigned int ioctl, unsigned long arg) 2812 { 2813 struct kvm *kvm = filp->private_data; 2814 void __user *argp = (void __user *)arg; 2815 struct kvm_device_attr attr; 2816 int r; 2817 2818 switch (ioctl) { 2819 case KVM_S390_INTERRUPT: { 2820 struct kvm_s390_interrupt s390int; 2821 2822 r = -EFAULT; 2823 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2824 break; 2825 r = kvm_s390_inject_vm(kvm, &s390int); 2826 break; 2827 } 2828 case KVM_CREATE_IRQCHIP: { 2829 struct kvm_irq_routing_entry routing; 2830 2831 r = -EINVAL; 2832 if (kvm->arch.use_irqchip) { 2833 /* Set up dummy routing. */ 2834 memset(&routing, 0, sizeof(routing)); 2835 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2836 } 2837 break; 2838 } 2839 case KVM_SET_DEVICE_ATTR: { 2840 r = -EFAULT; 2841 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2842 break; 2843 r = kvm_s390_vm_set_attr(kvm, &attr); 2844 break; 2845 } 2846 case KVM_GET_DEVICE_ATTR: { 2847 r = -EFAULT; 2848 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2849 break; 2850 r = kvm_s390_vm_get_attr(kvm, &attr); 2851 break; 2852 } 2853 case KVM_HAS_DEVICE_ATTR: { 2854 r = -EFAULT; 2855 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2856 break; 2857 r = kvm_s390_vm_has_attr(kvm, &attr); 2858 break; 2859 } 2860 case KVM_S390_GET_SKEYS: { 2861 struct kvm_s390_skeys args; 2862 2863 r = -EFAULT; 2864 if (copy_from_user(&args, argp, 2865 sizeof(struct kvm_s390_skeys))) 2866 break; 2867 r = kvm_s390_get_skeys(kvm, &args); 2868 break; 2869 } 2870 case KVM_S390_SET_SKEYS: { 2871 struct kvm_s390_skeys args; 2872 2873 r = -EFAULT; 2874 if (copy_from_user(&args, argp, 2875 sizeof(struct kvm_s390_skeys))) 2876 break; 2877 r = kvm_s390_set_skeys(kvm, &args); 2878 break; 2879 } 2880 case KVM_S390_GET_CMMA_BITS: { 2881 struct kvm_s390_cmma_log args; 2882 2883 r = -EFAULT; 2884 if (copy_from_user(&args, argp, sizeof(args))) 2885 break; 2886 mutex_lock(&kvm->slots_lock); 2887 r = kvm_s390_get_cmma_bits(kvm, &args); 2888 mutex_unlock(&kvm->slots_lock); 2889 if (!r) { 2890 r = copy_to_user(argp, &args, sizeof(args)); 2891 if (r) 2892 r = -EFAULT; 2893 } 2894 break; 2895 } 2896 case KVM_S390_SET_CMMA_BITS: { 2897 struct kvm_s390_cmma_log args; 2898 2899 r = -EFAULT; 2900 if (copy_from_user(&args, argp, sizeof(args))) 2901 break; 2902 mutex_lock(&kvm->slots_lock); 2903 r = kvm_s390_set_cmma_bits(kvm, &args); 2904 mutex_unlock(&kvm->slots_lock); 2905 break; 2906 } 2907 case KVM_S390_PV_COMMAND: { 2908 struct kvm_pv_cmd args; 2909 2910 /* protvirt means user cpu state */ 2911 kvm_s390_set_user_cpu_state_ctrl(kvm); 2912 r = 0; 2913 if (!is_prot_virt_host()) { 2914 r = -EINVAL; 2915 break; 2916 } 2917 if (copy_from_user(&args, argp, sizeof(args))) { 2918 r = -EFAULT; 2919 break; 2920 } 2921 if (args.flags) { 2922 r = -EINVAL; 2923 break; 2924 } 2925 mutex_lock(&kvm->lock); 2926 r = kvm_s390_handle_pv(kvm, &args); 2927 mutex_unlock(&kvm->lock); 2928 if (copy_to_user(argp, &args, sizeof(args))) { 2929 r = -EFAULT; 2930 break; 2931 } 2932 break; 2933 } 2934 case KVM_S390_MEM_OP: { 2935 struct kvm_s390_mem_op mem_op; 2936 2937 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 2938 r = kvm_s390_vm_mem_op(kvm, &mem_op); 2939 else 2940 r = -EFAULT; 2941 break; 2942 } 2943 case KVM_S390_ZPCI_OP: { 2944 struct kvm_s390_zpci_op args; 2945 2946 r = -EINVAL; 2947 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 2948 break; 2949 if (copy_from_user(&args, argp, sizeof(args))) { 2950 r = -EFAULT; 2951 break; 2952 } 2953 r = kvm_s390_pci_zpci_op(kvm, &args); 2954 break; 2955 } 2956 default: 2957 r = -ENOTTY; 2958 } 2959 2960 return r; 2961 } 2962 2963 static int kvm_s390_apxa_installed(void) 2964 { 2965 struct ap_config_info info; 2966 2967 if (ap_instructions_available()) { 2968 if (ap_qci(&info) == 0) 2969 return info.apxa; 2970 } 2971 2972 return 0; 2973 } 2974 2975 /* 2976 * The format of the crypto control block (CRYCB) is specified in the 3 low 2977 * order bits of the CRYCB designation (CRYCBD) field as follows: 2978 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2979 * AP extended addressing (APXA) facility are installed. 2980 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2981 * Format 2: Both the APXA and MSAX3 facilities are installed 2982 */ 2983 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2984 { 2985 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2986 2987 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2988 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2989 2990 /* Check whether MSAX3 is installed */ 2991 if (!test_kvm_facility(kvm, 76)) 2992 return; 2993 2994 if (kvm_s390_apxa_installed()) 2995 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2996 else 2997 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2998 } 2999 3000 /* 3001 * kvm_arch_crypto_set_masks 3002 * 3003 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 3004 * to be set. 3005 * @apm: the mask identifying the accessible AP adapters 3006 * @aqm: the mask identifying the accessible AP domains 3007 * @adm: the mask identifying the accessible AP control domains 3008 * 3009 * Set the masks that identify the adapters, domains and control domains to 3010 * which the KVM guest is granted access. 3011 * 3012 * Note: The kvm->lock mutex must be locked by the caller before invoking this 3013 * function. 3014 */ 3015 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 3016 unsigned long *aqm, unsigned long *adm) 3017 { 3018 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 3019 3020 kvm_s390_vcpu_block_all(kvm); 3021 3022 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 3023 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 3024 memcpy(crycb->apcb1.apm, apm, 32); 3025 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 3026 apm[0], apm[1], apm[2], apm[3]); 3027 memcpy(crycb->apcb1.aqm, aqm, 32); 3028 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 3029 aqm[0], aqm[1], aqm[2], aqm[3]); 3030 memcpy(crycb->apcb1.adm, adm, 32); 3031 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 3032 adm[0], adm[1], adm[2], adm[3]); 3033 break; 3034 case CRYCB_FORMAT1: 3035 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 3036 memcpy(crycb->apcb0.apm, apm, 8); 3037 memcpy(crycb->apcb0.aqm, aqm, 2); 3038 memcpy(crycb->apcb0.adm, adm, 2); 3039 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 3040 apm[0], *((unsigned short *)aqm), 3041 *((unsigned short *)adm)); 3042 break; 3043 default: /* Can not happen */ 3044 break; 3045 } 3046 3047 /* recreate the shadow crycb for each vcpu */ 3048 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3049 kvm_s390_vcpu_unblock_all(kvm); 3050 } 3051 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 3052 3053 /* 3054 * kvm_arch_crypto_clear_masks 3055 * 3056 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 3057 * to be cleared. 3058 * 3059 * Clear the masks that identify the adapters, domains and control domains to 3060 * which the KVM guest is granted access. 3061 * 3062 * Note: The kvm->lock mutex must be locked by the caller before invoking this 3063 * function. 3064 */ 3065 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 3066 { 3067 kvm_s390_vcpu_block_all(kvm); 3068 3069 memset(&kvm->arch.crypto.crycb->apcb0, 0, 3070 sizeof(kvm->arch.crypto.crycb->apcb0)); 3071 memset(&kvm->arch.crypto.crycb->apcb1, 0, 3072 sizeof(kvm->arch.crypto.crycb->apcb1)); 3073 3074 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 3075 /* recreate the shadow crycb for each vcpu */ 3076 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3077 kvm_s390_vcpu_unblock_all(kvm); 3078 } 3079 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 3080 3081 static u64 kvm_s390_get_initial_cpuid(void) 3082 { 3083 struct cpuid cpuid; 3084 3085 get_cpu_id(&cpuid); 3086 cpuid.version = 0xff; 3087 return *((u64 *) &cpuid); 3088 } 3089 3090 static void kvm_s390_crypto_init(struct kvm *kvm) 3091 { 3092 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 3093 kvm_s390_set_crycb_format(kvm); 3094 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 3095 3096 if (!test_kvm_facility(kvm, 76)) 3097 return; 3098 3099 /* Enable AES/DEA protected key functions by default */ 3100 kvm->arch.crypto.aes_kw = 1; 3101 kvm->arch.crypto.dea_kw = 1; 3102 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 3103 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 3104 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 3105 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 3106 } 3107 3108 static void sca_dispose(struct kvm *kvm) 3109 { 3110 if (kvm->arch.use_esca) 3111 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 3112 else 3113 free_page((unsigned long)(kvm->arch.sca)); 3114 kvm->arch.sca = NULL; 3115 } 3116 3117 void kvm_arch_free_vm(struct kvm *kvm) 3118 { 3119 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 3120 kvm_s390_pci_clear_list(kvm); 3121 3122 __kvm_arch_free_vm(kvm); 3123 } 3124 3125 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 3126 { 3127 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 3128 int i, rc; 3129 char debug_name[16]; 3130 static unsigned long sca_offset; 3131 3132 rc = -EINVAL; 3133 #ifdef CONFIG_KVM_S390_UCONTROL 3134 if (type & ~KVM_VM_S390_UCONTROL) 3135 goto out_err; 3136 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 3137 goto out_err; 3138 #else 3139 if (type) 3140 goto out_err; 3141 #endif 3142 3143 rc = s390_enable_sie(); 3144 if (rc) 3145 goto out_err; 3146 3147 rc = -ENOMEM; 3148 3149 if (!sclp.has_64bscao) 3150 alloc_flags |= GFP_DMA; 3151 rwlock_init(&kvm->arch.sca_lock); 3152 /* start with basic SCA */ 3153 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 3154 if (!kvm->arch.sca) 3155 goto out_err; 3156 mutex_lock(&kvm_lock); 3157 sca_offset += 16; 3158 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 3159 sca_offset = 0; 3160 kvm->arch.sca = (struct bsca_block *) 3161 ((char *) kvm->arch.sca + sca_offset); 3162 mutex_unlock(&kvm_lock); 3163 3164 sprintf(debug_name, "kvm-%u", current->pid); 3165 3166 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 3167 if (!kvm->arch.dbf) 3168 goto out_err; 3169 3170 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 3171 kvm->arch.sie_page2 = 3172 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 3173 if (!kvm->arch.sie_page2) 3174 goto out_err; 3175 3176 kvm->arch.sie_page2->kvm = kvm; 3177 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 3178 3179 for (i = 0; i < kvm_s390_fac_size(); i++) { 3180 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 3181 (kvm_s390_fac_base[i] | 3182 kvm_s390_fac_ext[i]); 3183 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 3184 kvm_s390_fac_base[i]; 3185 } 3186 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 3187 3188 /* we are always in czam mode - even on pre z14 machines */ 3189 set_kvm_facility(kvm->arch.model.fac_mask, 138); 3190 set_kvm_facility(kvm->arch.model.fac_list, 138); 3191 /* we emulate STHYI in kvm */ 3192 set_kvm_facility(kvm->arch.model.fac_mask, 74); 3193 set_kvm_facility(kvm->arch.model.fac_list, 74); 3194 if (MACHINE_HAS_TLB_GUEST) { 3195 set_kvm_facility(kvm->arch.model.fac_mask, 147); 3196 set_kvm_facility(kvm->arch.model.fac_list, 147); 3197 } 3198 3199 if (css_general_characteristics.aiv && test_facility(65)) 3200 set_kvm_facility(kvm->arch.model.fac_mask, 65); 3201 3202 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 3203 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 3204 3205 kvm_s390_crypto_init(kvm); 3206 3207 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 3208 mutex_lock(&kvm->lock); 3209 kvm_s390_pci_init_list(kvm); 3210 kvm_s390_vcpu_pci_enable_interp(kvm); 3211 mutex_unlock(&kvm->lock); 3212 } 3213 3214 mutex_init(&kvm->arch.float_int.ais_lock); 3215 spin_lock_init(&kvm->arch.float_int.lock); 3216 for (i = 0; i < FIRQ_LIST_COUNT; i++) 3217 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 3218 init_waitqueue_head(&kvm->arch.ipte_wq); 3219 mutex_init(&kvm->arch.ipte_mutex); 3220 3221 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 3222 VM_EVENT(kvm, 3, "vm created with type %lu", type); 3223 3224 if (type & KVM_VM_S390_UCONTROL) { 3225 kvm->arch.gmap = NULL; 3226 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 3227 } else { 3228 if (sclp.hamax == U64_MAX) 3229 kvm->arch.mem_limit = TASK_SIZE_MAX; 3230 else 3231 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 3232 sclp.hamax + 1); 3233 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 3234 if (!kvm->arch.gmap) 3235 goto out_err; 3236 kvm->arch.gmap->private = kvm; 3237 kvm->arch.gmap->pfault_enabled = 0; 3238 } 3239 3240 kvm->arch.use_pfmfi = sclp.has_pfmfi; 3241 kvm->arch.use_skf = sclp.has_skey; 3242 spin_lock_init(&kvm->arch.start_stop_lock); 3243 kvm_s390_vsie_init(kvm); 3244 if (use_gisa) 3245 kvm_s390_gisa_init(kvm); 3246 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 3247 3248 return 0; 3249 out_err: 3250 free_page((unsigned long)kvm->arch.sie_page2); 3251 debug_unregister(kvm->arch.dbf); 3252 sca_dispose(kvm); 3253 KVM_EVENT(3, "creation of vm failed: %d", rc); 3254 return rc; 3255 } 3256 3257 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 3258 { 3259 u16 rc, rrc; 3260 3261 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 3262 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 3263 kvm_s390_clear_local_irqs(vcpu); 3264 kvm_clear_async_pf_completion_queue(vcpu); 3265 if (!kvm_is_ucontrol(vcpu->kvm)) 3266 sca_del_vcpu(vcpu); 3267 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 3268 3269 if (kvm_is_ucontrol(vcpu->kvm)) 3270 gmap_remove(vcpu->arch.gmap); 3271 3272 if (vcpu->kvm->arch.use_cmma) 3273 kvm_s390_vcpu_unsetup_cmma(vcpu); 3274 /* We can not hold the vcpu mutex here, we are already dying */ 3275 if (kvm_s390_pv_cpu_get_handle(vcpu)) 3276 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 3277 free_page((unsigned long)(vcpu->arch.sie_block)); 3278 } 3279 3280 void kvm_arch_destroy_vm(struct kvm *kvm) 3281 { 3282 u16 rc, rrc; 3283 3284 kvm_destroy_vcpus(kvm); 3285 sca_dispose(kvm); 3286 kvm_s390_gisa_destroy(kvm); 3287 /* 3288 * We are already at the end of life and kvm->lock is not taken. 3289 * This is ok as the file descriptor is closed by now and nobody 3290 * can mess with the pv state. To avoid lockdep_assert_held from 3291 * complaining we do not use kvm_s390_pv_is_protected. 3292 */ 3293 if (kvm_s390_pv_get_handle(kvm)) 3294 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 3295 /* 3296 * Remove the mmu notifier only when the whole KVM VM is torn down, 3297 * and only if one was registered to begin with. If the VM is 3298 * currently not protected, but has been previously been protected, 3299 * then it's possible that the notifier is still registered. 3300 */ 3301 if (kvm->arch.pv.mmu_notifier.ops) 3302 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm); 3303 3304 debug_unregister(kvm->arch.dbf); 3305 free_page((unsigned long)kvm->arch.sie_page2); 3306 if (!kvm_is_ucontrol(kvm)) 3307 gmap_remove(kvm->arch.gmap); 3308 kvm_s390_destroy_adapters(kvm); 3309 kvm_s390_clear_float_irqs(kvm); 3310 kvm_s390_vsie_destroy(kvm); 3311 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 3312 } 3313 3314 /* Section: vcpu related */ 3315 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 3316 { 3317 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 3318 if (!vcpu->arch.gmap) 3319 return -ENOMEM; 3320 vcpu->arch.gmap->private = vcpu->kvm; 3321 3322 return 0; 3323 } 3324 3325 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 3326 { 3327 if (!kvm_s390_use_sca_entries()) 3328 return; 3329 read_lock(&vcpu->kvm->arch.sca_lock); 3330 if (vcpu->kvm->arch.use_esca) { 3331 struct esca_block *sca = vcpu->kvm->arch.sca; 3332 3333 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3334 sca->cpu[vcpu->vcpu_id].sda = 0; 3335 } else { 3336 struct bsca_block *sca = vcpu->kvm->arch.sca; 3337 3338 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3339 sca->cpu[vcpu->vcpu_id].sda = 0; 3340 } 3341 read_unlock(&vcpu->kvm->arch.sca_lock); 3342 } 3343 3344 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 3345 { 3346 if (!kvm_s390_use_sca_entries()) { 3347 struct bsca_block *sca = vcpu->kvm->arch.sca; 3348 3349 /* we still need the basic sca for the ipte control */ 3350 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3351 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 3352 return; 3353 } 3354 read_lock(&vcpu->kvm->arch.sca_lock); 3355 if (vcpu->kvm->arch.use_esca) { 3356 struct esca_block *sca = vcpu->kvm->arch.sca; 3357 3358 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 3359 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3360 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 3361 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3362 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3363 } else { 3364 struct bsca_block *sca = vcpu->kvm->arch.sca; 3365 3366 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 3367 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3368 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 3369 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3370 } 3371 read_unlock(&vcpu->kvm->arch.sca_lock); 3372 } 3373 3374 /* Basic SCA to Extended SCA data copy routines */ 3375 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 3376 { 3377 d->sda = s->sda; 3378 d->sigp_ctrl.c = s->sigp_ctrl.c; 3379 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 3380 } 3381 3382 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 3383 { 3384 int i; 3385 3386 d->ipte_control = s->ipte_control; 3387 d->mcn[0] = s->mcn; 3388 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 3389 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 3390 } 3391 3392 static int sca_switch_to_extended(struct kvm *kvm) 3393 { 3394 struct bsca_block *old_sca = kvm->arch.sca; 3395 struct esca_block *new_sca; 3396 struct kvm_vcpu *vcpu; 3397 unsigned long vcpu_idx; 3398 u32 scaol, scaoh; 3399 3400 if (kvm->arch.use_esca) 3401 return 0; 3402 3403 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3404 if (!new_sca) 3405 return -ENOMEM; 3406 3407 scaoh = (u32)((u64)(new_sca) >> 32); 3408 scaol = (u32)(u64)(new_sca) & ~0x3fU; 3409 3410 kvm_s390_vcpu_block_all(kvm); 3411 write_lock(&kvm->arch.sca_lock); 3412 3413 sca_copy_b_to_e(new_sca, old_sca); 3414 3415 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3416 vcpu->arch.sie_block->scaoh = scaoh; 3417 vcpu->arch.sie_block->scaol = scaol; 3418 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3419 } 3420 kvm->arch.sca = new_sca; 3421 kvm->arch.use_esca = 1; 3422 3423 write_unlock(&kvm->arch.sca_lock); 3424 kvm_s390_vcpu_unblock_all(kvm); 3425 3426 free_page((unsigned long)old_sca); 3427 3428 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3429 old_sca, kvm->arch.sca); 3430 return 0; 3431 } 3432 3433 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3434 { 3435 int rc; 3436 3437 if (!kvm_s390_use_sca_entries()) { 3438 if (id < KVM_MAX_VCPUS) 3439 return true; 3440 return false; 3441 } 3442 if (id < KVM_S390_BSCA_CPU_SLOTS) 3443 return true; 3444 if (!sclp.has_esca || !sclp.has_64bscao) 3445 return false; 3446 3447 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3448 3449 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3450 } 3451 3452 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3453 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3454 { 3455 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3456 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3457 vcpu->arch.cputm_start = get_tod_clock_fast(); 3458 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3459 } 3460 3461 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3462 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3463 { 3464 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3465 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3466 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3467 vcpu->arch.cputm_start = 0; 3468 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3469 } 3470 3471 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3472 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3473 { 3474 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3475 vcpu->arch.cputm_enabled = true; 3476 __start_cpu_timer_accounting(vcpu); 3477 } 3478 3479 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3480 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3481 { 3482 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3483 __stop_cpu_timer_accounting(vcpu); 3484 vcpu->arch.cputm_enabled = false; 3485 } 3486 3487 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3488 { 3489 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3490 __enable_cpu_timer_accounting(vcpu); 3491 preempt_enable(); 3492 } 3493 3494 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3495 { 3496 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3497 __disable_cpu_timer_accounting(vcpu); 3498 preempt_enable(); 3499 } 3500 3501 /* set the cpu timer - may only be called from the VCPU thread itself */ 3502 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3503 { 3504 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3505 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3506 if (vcpu->arch.cputm_enabled) 3507 vcpu->arch.cputm_start = get_tod_clock_fast(); 3508 vcpu->arch.sie_block->cputm = cputm; 3509 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3510 preempt_enable(); 3511 } 3512 3513 /* update and get the cpu timer - can also be called from other VCPU threads */ 3514 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3515 { 3516 unsigned int seq; 3517 __u64 value; 3518 3519 if (unlikely(!vcpu->arch.cputm_enabled)) 3520 return vcpu->arch.sie_block->cputm; 3521 3522 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3523 do { 3524 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3525 /* 3526 * If the writer would ever execute a read in the critical 3527 * section, e.g. in irq context, we have a deadlock. 3528 */ 3529 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3530 value = vcpu->arch.sie_block->cputm; 3531 /* if cputm_start is 0, accounting is being started/stopped */ 3532 if (likely(vcpu->arch.cputm_start)) 3533 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3534 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3535 preempt_enable(); 3536 return value; 3537 } 3538 3539 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3540 { 3541 3542 gmap_enable(vcpu->arch.enabled_gmap); 3543 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3544 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3545 __start_cpu_timer_accounting(vcpu); 3546 vcpu->cpu = cpu; 3547 } 3548 3549 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3550 { 3551 vcpu->cpu = -1; 3552 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3553 __stop_cpu_timer_accounting(vcpu); 3554 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3555 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3556 gmap_disable(vcpu->arch.enabled_gmap); 3557 3558 } 3559 3560 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3561 { 3562 mutex_lock(&vcpu->kvm->lock); 3563 preempt_disable(); 3564 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3565 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3566 preempt_enable(); 3567 mutex_unlock(&vcpu->kvm->lock); 3568 if (!kvm_is_ucontrol(vcpu->kvm)) { 3569 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3570 sca_add_vcpu(vcpu); 3571 } 3572 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3573 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3574 /* make vcpu_load load the right gmap on the first trigger */ 3575 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3576 } 3577 3578 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3579 { 3580 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3581 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3582 return true; 3583 return false; 3584 } 3585 3586 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3587 { 3588 /* At least one ECC subfunction must be present */ 3589 return kvm_has_pckmo_subfunc(kvm, 32) || 3590 kvm_has_pckmo_subfunc(kvm, 33) || 3591 kvm_has_pckmo_subfunc(kvm, 34) || 3592 kvm_has_pckmo_subfunc(kvm, 40) || 3593 kvm_has_pckmo_subfunc(kvm, 41); 3594 3595 } 3596 3597 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3598 { 3599 /* 3600 * If the AP instructions are not being interpreted and the MSAX3 3601 * facility is not configured for the guest, there is nothing to set up. 3602 */ 3603 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3604 return; 3605 3606 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3607 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3608 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3609 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3610 3611 if (vcpu->kvm->arch.crypto.apie) 3612 vcpu->arch.sie_block->eca |= ECA_APIE; 3613 3614 /* Set up protected key support */ 3615 if (vcpu->kvm->arch.crypto.aes_kw) { 3616 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3617 /* ecc is also wrapped with AES key */ 3618 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3619 vcpu->arch.sie_block->ecd |= ECD_ECC; 3620 } 3621 3622 if (vcpu->kvm->arch.crypto.dea_kw) 3623 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3624 } 3625 3626 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3627 { 3628 free_page(vcpu->arch.sie_block->cbrlo); 3629 vcpu->arch.sie_block->cbrlo = 0; 3630 } 3631 3632 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3633 { 3634 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3635 if (!vcpu->arch.sie_block->cbrlo) 3636 return -ENOMEM; 3637 return 0; 3638 } 3639 3640 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3641 { 3642 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3643 3644 vcpu->arch.sie_block->ibc = model->ibc; 3645 if (test_kvm_facility(vcpu->kvm, 7)) 3646 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3647 } 3648 3649 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3650 { 3651 int rc = 0; 3652 u16 uvrc, uvrrc; 3653 3654 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3655 CPUSTAT_SM | 3656 CPUSTAT_STOPPED); 3657 3658 if (test_kvm_facility(vcpu->kvm, 78)) 3659 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3660 else if (test_kvm_facility(vcpu->kvm, 8)) 3661 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3662 3663 kvm_s390_vcpu_setup_model(vcpu); 3664 3665 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3666 if (MACHINE_HAS_ESOP) 3667 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3668 if (test_kvm_facility(vcpu->kvm, 9)) 3669 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3670 if (test_kvm_facility(vcpu->kvm, 11)) 3671 vcpu->arch.sie_block->ecb |= ECB_PTF; 3672 if (test_kvm_facility(vcpu->kvm, 73)) 3673 vcpu->arch.sie_block->ecb |= ECB_TE; 3674 if (!kvm_is_ucontrol(vcpu->kvm)) 3675 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3676 3677 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3678 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3679 if (test_kvm_facility(vcpu->kvm, 130)) 3680 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3681 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3682 if (sclp.has_cei) 3683 vcpu->arch.sie_block->eca |= ECA_CEI; 3684 if (sclp.has_ib) 3685 vcpu->arch.sie_block->eca |= ECA_IB; 3686 if (sclp.has_siif) 3687 vcpu->arch.sie_block->eca |= ECA_SII; 3688 if (sclp.has_sigpif) 3689 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3690 if (test_kvm_facility(vcpu->kvm, 129)) { 3691 vcpu->arch.sie_block->eca |= ECA_VX; 3692 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3693 } 3694 if (test_kvm_facility(vcpu->kvm, 139)) 3695 vcpu->arch.sie_block->ecd |= ECD_MEF; 3696 if (test_kvm_facility(vcpu->kvm, 156)) 3697 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3698 if (vcpu->arch.sie_block->gd) { 3699 vcpu->arch.sie_block->eca |= ECA_AIV; 3700 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3701 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3702 } 3703 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3704 | SDNXC; 3705 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3706 3707 if (sclp.has_kss) 3708 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3709 else 3710 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3711 3712 if (vcpu->kvm->arch.use_cmma) { 3713 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3714 if (rc) 3715 return rc; 3716 } 3717 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3718 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3719 3720 vcpu->arch.sie_block->hpid = HPID_KVM; 3721 3722 kvm_s390_vcpu_crypto_setup(vcpu); 3723 3724 kvm_s390_vcpu_pci_setup(vcpu); 3725 3726 mutex_lock(&vcpu->kvm->lock); 3727 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3728 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3729 if (rc) 3730 kvm_s390_vcpu_unsetup_cmma(vcpu); 3731 } 3732 mutex_unlock(&vcpu->kvm->lock); 3733 3734 return rc; 3735 } 3736 3737 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3738 { 3739 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3740 return -EINVAL; 3741 return 0; 3742 } 3743 3744 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3745 { 3746 struct sie_page *sie_page; 3747 int rc; 3748 3749 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3750 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3751 if (!sie_page) 3752 return -ENOMEM; 3753 3754 vcpu->arch.sie_block = &sie_page->sie_block; 3755 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3756 3757 /* the real guest size will always be smaller than msl */ 3758 vcpu->arch.sie_block->mso = 0; 3759 vcpu->arch.sie_block->msl = sclp.hamax; 3760 3761 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3762 spin_lock_init(&vcpu->arch.local_int.lock); 3763 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); 3764 seqcount_init(&vcpu->arch.cputm_seqcount); 3765 3766 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3767 kvm_clear_async_pf_completion_queue(vcpu); 3768 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3769 KVM_SYNC_GPRS | 3770 KVM_SYNC_ACRS | 3771 KVM_SYNC_CRS | 3772 KVM_SYNC_ARCH0 | 3773 KVM_SYNC_PFAULT | 3774 KVM_SYNC_DIAG318; 3775 kvm_s390_set_prefix(vcpu, 0); 3776 if (test_kvm_facility(vcpu->kvm, 64)) 3777 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3778 if (test_kvm_facility(vcpu->kvm, 82)) 3779 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3780 if (test_kvm_facility(vcpu->kvm, 133)) 3781 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3782 if (test_kvm_facility(vcpu->kvm, 156)) 3783 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3784 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3785 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3786 */ 3787 if (MACHINE_HAS_VX) 3788 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3789 else 3790 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3791 3792 if (kvm_is_ucontrol(vcpu->kvm)) { 3793 rc = __kvm_ucontrol_vcpu_init(vcpu); 3794 if (rc) 3795 goto out_free_sie_block; 3796 } 3797 3798 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3799 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3800 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3801 3802 rc = kvm_s390_vcpu_setup(vcpu); 3803 if (rc) 3804 goto out_ucontrol_uninit; 3805 3806 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 3807 return 0; 3808 3809 out_ucontrol_uninit: 3810 if (kvm_is_ucontrol(vcpu->kvm)) 3811 gmap_remove(vcpu->arch.gmap); 3812 out_free_sie_block: 3813 free_page((unsigned long)(vcpu->arch.sie_block)); 3814 return rc; 3815 } 3816 3817 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3818 { 3819 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3820 return kvm_s390_vcpu_has_irq(vcpu, 0); 3821 } 3822 3823 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3824 { 3825 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3826 } 3827 3828 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3829 { 3830 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3831 exit_sie(vcpu); 3832 } 3833 3834 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3835 { 3836 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3837 } 3838 3839 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3840 { 3841 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3842 exit_sie(vcpu); 3843 } 3844 3845 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3846 { 3847 return atomic_read(&vcpu->arch.sie_block->prog20) & 3848 (PROG_BLOCK_SIE | PROG_REQUEST); 3849 } 3850 3851 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3852 { 3853 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3854 } 3855 3856 /* 3857 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3858 * If the CPU is not running (e.g. waiting as idle) the function will 3859 * return immediately. */ 3860 void exit_sie(struct kvm_vcpu *vcpu) 3861 { 3862 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3863 kvm_s390_vsie_kick(vcpu); 3864 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3865 cpu_relax(); 3866 } 3867 3868 /* Kick a guest cpu out of SIE to process a request synchronously */ 3869 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3870 { 3871 __kvm_make_request(req, vcpu); 3872 kvm_s390_vcpu_request(vcpu); 3873 } 3874 3875 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3876 unsigned long end) 3877 { 3878 struct kvm *kvm = gmap->private; 3879 struct kvm_vcpu *vcpu; 3880 unsigned long prefix; 3881 unsigned long i; 3882 3883 if (gmap_is_shadow(gmap)) 3884 return; 3885 if (start >= 1UL << 31) 3886 /* We are only interested in prefix pages */ 3887 return; 3888 kvm_for_each_vcpu(i, vcpu, kvm) { 3889 /* match against both prefix pages */ 3890 prefix = kvm_s390_get_prefix(vcpu); 3891 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3892 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3893 start, end); 3894 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3895 } 3896 } 3897 } 3898 3899 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3900 { 3901 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3902 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3903 READ_ONCE(halt_poll_max_steal)) { 3904 vcpu->stat.halt_no_poll_steal++; 3905 return true; 3906 } 3907 return false; 3908 } 3909 3910 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3911 { 3912 /* kvm common code refers to this, but never calls it */ 3913 BUG(); 3914 return 0; 3915 } 3916 3917 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3918 struct kvm_one_reg *reg) 3919 { 3920 int r = -EINVAL; 3921 3922 switch (reg->id) { 3923 case KVM_REG_S390_TODPR: 3924 r = put_user(vcpu->arch.sie_block->todpr, 3925 (u32 __user *)reg->addr); 3926 break; 3927 case KVM_REG_S390_EPOCHDIFF: 3928 r = put_user(vcpu->arch.sie_block->epoch, 3929 (u64 __user *)reg->addr); 3930 break; 3931 case KVM_REG_S390_CPU_TIMER: 3932 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3933 (u64 __user *)reg->addr); 3934 break; 3935 case KVM_REG_S390_CLOCK_COMP: 3936 r = put_user(vcpu->arch.sie_block->ckc, 3937 (u64 __user *)reg->addr); 3938 break; 3939 case KVM_REG_S390_PFTOKEN: 3940 r = put_user(vcpu->arch.pfault_token, 3941 (u64 __user *)reg->addr); 3942 break; 3943 case KVM_REG_S390_PFCOMPARE: 3944 r = put_user(vcpu->arch.pfault_compare, 3945 (u64 __user *)reg->addr); 3946 break; 3947 case KVM_REG_S390_PFSELECT: 3948 r = put_user(vcpu->arch.pfault_select, 3949 (u64 __user *)reg->addr); 3950 break; 3951 case KVM_REG_S390_PP: 3952 r = put_user(vcpu->arch.sie_block->pp, 3953 (u64 __user *)reg->addr); 3954 break; 3955 case KVM_REG_S390_GBEA: 3956 r = put_user(vcpu->arch.sie_block->gbea, 3957 (u64 __user *)reg->addr); 3958 break; 3959 default: 3960 break; 3961 } 3962 3963 return r; 3964 } 3965 3966 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3967 struct kvm_one_reg *reg) 3968 { 3969 int r = -EINVAL; 3970 __u64 val; 3971 3972 switch (reg->id) { 3973 case KVM_REG_S390_TODPR: 3974 r = get_user(vcpu->arch.sie_block->todpr, 3975 (u32 __user *)reg->addr); 3976 break; 3977 case KVM_REG_S390_EPOCHDIFF: 3978 r = get_user(vcpu->arch.sie_block->epoch, 3979 (u64 __user *)reg->addr); 3980 break; 3981 case KVM_REG_S390_CPU_TIMER: 3982 r = get_user(val, (u64 __user *)reg->addr); 3983 if (!r) 3984 kvm_s390_set_cpu_timer(vcpu, val); 3985 break; 3986 case KVM_REG_S390_CLOCK_COMP: 3987 r = get_user(vcpu->arch.sie_block->ckc, 3988 (u64 __user *)reg->addr); 3989 break; 3990 case KVM_REG_S390_PFTOKEN: 3991 r = get_user(vcpu->arch.pfault_token, 3992 (u64 __user *)reg->addr); 3993 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3994 kvm_clear_async_pf_completion_queue(vcpu); 3995 break; 3996 case KVM_REG_S390_PFCOMPARE: 3997 r = get_user(vcpu->arch.pfault_compare, 3998 (u64 __user *)reg->addr); 3999 break; 4000 case KVM_REG_S390_PFSELECT: 4001 r = get_user(vcpu->arch.pfault_select, 4002 (u64 __user *)reg->addr); 4003 break; 4004 case KVM_REG_S390_PP: 4005 r = get_user(vcpu->arch.sie_block->pp, 4006 (u64 __user *)reg->addr); 4007 break; 4008 case KVM_REG_S390_GBEA: 4009 r = get_user(vcpu->arch.sie_block->gbea, 4010 (u64 __user *)reg->addr); 4011 break; 4012 default: 4013 break; 4014 } 4015 4016 return r; 4017 } 4018 4019 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 4020 { 4021 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 4022 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 4023 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 4024 4025 kvm_clear_async_pf_completion_queue(vcpu); 4026 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 4027 kvm_s390_vcpu_stop(vcpu); 4028 kvm_s390_clear_local_irqs(vcpu); 4029 } 4030 4031 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 4032 { 4033 /* Initial reset is a superset of the normal reset */ 4034 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4035 4036 /* 4037 * This equals initial cpu reset in pop, but we don't switch to ESA. 4038 * We do not only reset the internal data, but also ... 4039 */ 4040 vcpu->arch.sie_block->gpsw.mask = 0; 4041 vcpu->arch.sie_block->gpsw.addr = 0; 4042 kvm_s390_set_prefix(vcpu, 0); 4043 kvm_s390_set_cpu_timer(vcpu, 0); 4044 vcpu->arch.sie_block->ckc = 0; 4045 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 4046 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 4047 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 4048 4049 /* ... the data in sync regs */ 4050 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 4051 vcpu->run->s.regs.ckc = 0; 4052 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 4053 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 4054 vcpu->run->psw_addr = 0; 4055 vcpu->run->psw_mask = 0; 4056 vcpu->run->s.regs.todpr = 0; 4057 vcpu->run->s.regs.cputm = 0; 4058 vcpu->run->s.regs.ckc = 0; 4059 vcpu->run->s.regs.pp = 0; 4060 vcpu->run->s.regs.gbea = 1; 4061 vcpu->run->s.regs.fpc = 0; 4062 /* 4063 * Do not reset these registers in the protected case, as some of 4064 * them are overlayed and they are not accessible in this case 4065 * anyway. 4066 */ 4067 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4068 vcpu->arch.sie_block->gbea = 1; 4069 vcpu->arch.sie_block->pp = 0; 4070 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4071 vcpu->arch.sie_block->todpr = 0; 4072 } 4073 } 4074 4075 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 4076 { 4077 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 4078 4079 /* Clear reset is a superset of the initial reset */ 4080 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4081 4082 memset(®s->gprs, 0, sizeof(regs->gprs)); 4083 memset(®s->vrs, 0, sizeof(regs->vrs)); 4084 memset(®s->acrs, 0, sizeof(regs->acrs)); 4085 memset(®s->gscb, 0, sizeof(regs->gscb)); 4086 4087 regs->etoken = 0; 4088 regs->etoken_extension = 0; 4089 } 4090 4091 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4092 { 4093 vcpu_load(vcpu); 4094 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 4095 vcpu_put(vcpu); 4096 return 0; 4097 } 4098 4099 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4100 { 4101 vcpu_load(vcpu); 4102 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 4103 vcpu_put(vcpu); 4104 return 0; 4105 } 4106 4107 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 4108 struct kvm_sregs *sregs) 4109 { 4110 vcpu_load(vcpu); 4111 4112 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 4113 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 4114 4115 vcpu_put(vcpu); 4116 return 0; 4117 } 4118 4119 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4120 struct kvm_sregs *sregs) 4121 { 4122 vcpu_load(vcpu); 4123 4124 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 4125 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 4126 4127 vcpu_put(vcpu); 4128 return 0; 4129 } 4130 4131 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4132 { 4133 int ret = 0; 4134 4135 vcpu_load(vcpu); 4136 4137 if (test_fp_ctl(fpu->fpc)) { 4138 ret = -EINVAL; 4139 goto out; 4140 } 4141 vcpu->run->s.regs.fpc = fpu->fpc; 4142 if (MACHINE_HAS_VX) 4143 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 4144 (freg_t *) fpu->fprs); 4145 else 4146 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 4147 4148 out: 4149 vcpu_put(vcpu); 4150 return ret; 4151 } 4152 4153 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4154 { 4155 vcpu_load(vcpu); 4156 4157 /* make sure we have the latest values */ 4158 save_fpu_regs(); 4159 if (MACHINE_HAS_VX) 4160 convert_vx_to_fp((freg_t *) fpu->fprs, 4161 (__vector128 *) vcpu->run->s.regs.vrs); 4162 else 4163 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 4164 fpu->fpc = vcpu->run->s.regs.fpc; 4165 4166 vcpu_put(vcpu); 4167 return 0; 4168 } 4169 4170 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 4171 { 4172 int rc = 0; 4173 4174 if (!is_vcpu_stopped(vcpu)) 4175 rc = -EBUSY; 4176 else { 4177 vcpu->run->psw_mask = psw.mask; 4178 vcpu->run->psw_addr = psw.addr; 4179 } 4180 return rc; 4181 } 4182 4183 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 4184 struct kvm_translation *tr) 4185 { 4186 return -EINVAL; /* not implemented yet */ 4187 } 4188 4189 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 4190 KVM_GUESTDBG_USE_HW_BP | \ 4191 KVM_GUESTDBG_ENABLE) 4192 4193 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 4194 struct kvm_guest_debug *dbg) 4195 { 4196 int rc = 0; 4197 4198 vcpu_load(vcpu); 4199 4200 vcpu->guest_debug = 0; 4201 kvm_s390_clear_bp_data(vcpu); 4202 4203 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 4204 rc = -EINVAL; 4205 goto out; 4206 } 4207 if (!sclp.has_gpere) { 4208 rc = -EINVAL; 4209 goto out; 4210 } 4211 4212 if (dbg->control & KVM_GUESTDBG_ENABLE) { 4213 vcpu->guest_debug = dbg->control; 4214 /* enforce guest PER */ 4215 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 4216 4217 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 4218 rc = kvm_s390_import_bp_data(vcpu, dbg); 4219 } else { 4220 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4221 vcpu->arch.guestdbg.last_bp = 0; 4222 } 4223 4224 if (rc) { 4225 vcpu->guest_debug = 0; 4226 kvm_s390_clear_bp_data(vcpu); 4227 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4228 } 4229 4230 out: 4231 vcpu_put(vcpu); 4232 return rc; 4233 } 4234 4235 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 4236 struct kvm_mp_state *mp_state) 4237 { 4238 int ret; 4239 4240 vcpu_load(vcpu); 4241 4242 /* CHECK_STOP and LOAD are not supported yet */ 4243 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 4244 KVM_MP_STATE_OPERATING; 4245 4246 vcpu_put(vcpu); 4247 return ret; 4248 } 4249 4250 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 4251 struct kvm_mp_state *mp_state) 4252 { 4253 int rc = 0; 4254 4255 vcpu_load(vcpu); 4256 4257 /* user space knows about this interface - let it control the state */ 4258 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 4259 4260 switch (mp_state->mp_state) { 4261 case KVM_MP_STATE_STOPPED: 4262 rc = kvm_s390_vcpu_stop(vcpu); 4263 break; 4264 case KVM_MP_STATE_OPERATING: 4265 rc = kvm_s390_vcpu_start(vcpu); 4266 break; 4267 case KVM_MP_STATE_LOAD: 4268 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4269 rc = -ENXIO; 4270 break; 4271 } 4272 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 4273 break; 4274 case KVM_MP_STATE_CHECK_STOP: 4275 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 4276 default: 4277 rc = -ENXIO; 4278 } 4279 4280 vcpu_put(vcpu); 4281 return rc; 4282 } 4283 4284 static bool ibs_enabled(struct kvm_vcpu *vcpu) 4285 { 4286 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 4287 } 4288 4289 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 4290 { 4291 retry: 4292 kvm_s390_vcpu_request_handled(vcpu); 4293 if (!kvm_request_pending(vcpu)) 4294 return 0; 4295 /* 4296 * If the guest prefix changed, re-arm the ipte notifier for the 4297 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 4298 * This ensures that the ipte instruction for this request has 4299 * already finished. We might race against a second unmapper that 4300 * wants to set the blocking bit. Lets just retry the request loop. 4301 */ 4302 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { 4303 int rc; 4304 rc = gmap_mprotect_notify(vcpu->arch.gmap, 4305 kvm_s390_get_prefix(vcpu), 4306 PAGE_SIZE * 2, PROT_WRITE); 4307 if (rc) { 4308 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 4309 return rc; 4310 } 4311 goto retry; 4312 } 4313 4314 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 4315 vcpu->arch.sie_block->ihcpu = 0xffff; 4316 goto retry; 4317 } 4318 4319 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 4320 if (!ibs_enabled(vcpu)) { 4321 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 4322 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 4323 } 4324 goto retry; 4325 } 4326 4327 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 4328 if (ibs_enabled(vcpu)) { 4329 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 4330 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 4331 } 4332 goto retry; 4333 } 4334 4335 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 4336 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 4337 goto retry; 4338 } 4339 4340 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 4341 /* 4342 * Disable CMM virtualization; we will emulate the ESSA 4343 * instruction manually, in order to provide additional 4344 * functionalities needed for live migration. 4345 */ 4346 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 4347 goto retry; 4348 } 4349 4350 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 4351 /* 4352 * Re-enable CMM virtualization if CMMA is available and 4353 * CMM has been used. 4354 */ 4355 if ((vcpu->kvm->arch.use_cmma) && 4356 (vcpu->kvm->mm->context.uses_cmm)) 4357 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 4358 goto retry; 4359 } 4360 4361 /* we left the vsie handler, nothing to do, just clear the request */ 4362 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 4363 4364 return 0; 4365 } 4366 4367 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4368 { 4369 struct kvm_vcpu *vcpu; 4370 union tod_clock clk; 4371 unsigned long i; 4372 4373 preempt_disable(); 4374 4375 store_tod_clock_ext(&clk); 4376 4377 kvm->arch.epoch = gtod->tod - clk.tod; 4378 kvm->arch.epdx = 0; 4379 if (test_kvm_facility(kvm, 139)) { 4380 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 4381 if (kvm->arch.epoch > gtod->tod) 4382 kvm->arch.epdx -= 1; 4383 } 4384 4385 kvm_s390_vcpu_block_all(kvm); 4386 kvm_for_each_vcpu(i, vcpu, kvm) { 4387 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 4388 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 4389 } 4390 4391 kvm_s390_vcpu_unblock_all(kvm); 4392 preempt_enable(); 4393 } 4394 4395 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4396 { 4397 if (!mutex_trylock(&kvm->lock)) 4398 return 0; 4399 __kvm_s390_set_tod_clock(kvm, gtod); 4400 mutex_unlock(&kvm->lock); 4401 return 1; 4402 } 4403 4404 /** 4405 * kvm_arch_fault_in_page - fault-in guest page if necessary 4406 * @vcpu: The corresponding virtual cpu 4407 * @gpa: Guest physical address 4408 * @writable: Whether the page should be writable or not 4409 * 4410 * Make sure that a guest page has been faulted-in on the host. 4411 * 4412 * Return: Zero on success, negative error code otherwise. 4413 */ 4414 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 4415 { 4416 return gmap_fault(vcpu->arch.gmap, gpa, 4417 writable ? FAULT_FLAG_WRITE : 0); 4418 } 4419 4420 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4421 unsigned long token) 4422 { 4423 struct kvm_s390_interrupt inti; 4424 struct kvm_s390_irq irq; 4425 4426 if (start_token) { 4427 irq.u.ext.ext_params2 = token; 4428 irq.type = KVM_S390_INT_PFAULT_INIT; 4429 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4430 } else { 4431 inti.type = KVM_S390_INT_PFAULT_DONE; 4432 inti.parm64 = token; 4433 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4434 } 4435 } 4436 4437 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4438 struct kvm_async_pf *work) 4439 { 4440 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4441 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4442 4443 return true; 4444 } 4445 4446 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4447 struct kvm_async_pf *work) 4448 { 4449 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4450 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4451 } 4452 4453 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4454 struct kvm_async_pf *work) 4455 { 4456 /* s390 will always inject the page directly */ 4457 } 4458 4459 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4460 { 4461 /* 4462 * s390 will always inject the page directly, 4463 * but we still want check_async_completion to cleanup 4464 */ 4465 return true; 4466 } 4467 4468 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4469 { 4470 hva_t hva; 4471 struct kvm_arch_async_pf arch; 4472 4473 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4474 return false; 4475 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4476 vcpu->arch.pfault_compare) 4477 return false; 4478 if (psw_extint_disabled(vcpu)) 4479 return false; 4480 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4481 return false; 4482 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4483 return false; 4484 if (!vcpu->arch.gmap->pfault_enabled) 4485 return false; 4486 4487 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4488 hva += current->thread.gmap_addr & ~PAGE_MASK; 4489 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4490 return false; 4491 4492 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4493 } 4494 4495 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4496 { 4497 int rc, cpuflags; 4498 4499 /* 4500 * On s390 notifications for arriving pages will be delivered directly 4501 * to the guest but the house keeping for completed pfaults is 4502 * handled outside the worker. 4503 */ 4504 kvm_check_async_pf_completion(vcpu); 4505 4506 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4507 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4508 4509 if (need_resched()) 4510 schedule(); 4511 4512 if (!kvm_is_ucontrol(vcpu->kvm)) { 4513 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4514 if (rc) 4515 return rc; 4516 } 4517 4518 rc = kvm_s390_handle_requests(vcpu); 4519 if (rc) 4520 return rc; 4521 4522 if (guestdbg_enabled(vcpu)) { 4523 kvm_s390_backup_guest_per_regs(vcpu); 4524 kvm_s390_patch_guest_per_regs(vcpu); 4525 } 4526 4527 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4528 4529 vcpu->arch.sie_block->icptcode = 0; 4530 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4531 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4532 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4533 4534 return 0; 4535 } 4536 4537 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4538 { 4539 struct kvm_s390_pgm_info pgm_info = { 4540 .code = PGM_ADDRESSING, 4541 }; 4542 u8 opcode, ilen; 4543 int rc; 4544 4545 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4546 trace_kvm_s390_sie_fault(vcpu); 4547 4548 /* 4549 * We want to inject an addressing exception, which is defined as a 4550 * suppressing or terminating exception. However, since we came here 4551 * by a DAT access exception, the PSW still points to the faulting 4552 * instruction since DAT exceptions are nullifying. So we've got 4553 * to look up the current opcode to get the length of the instruction 4554 * to be able to forward the PSW. 4555 */ 4556 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4557 ilen = insn_length(opcode); 4558 if (rc < 0) { 4559 return rc; 4560 } else if (rc) { 4561 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4562 * Forward by arbitrary ilc, injection will take care of 4563 * nullification if necessary. 4564 */ 4565 pgm_info = vcpu->arch.pgm; 4566 ilen = 4; 4567 } 4568 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4569 kvm_s390_forward_psw(vcpu, ilen); 4570 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4571 } 4572 4573 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4574 { 4575 struct mcck_volatile_info *mcck_info; 4576 struct sie_page *sie_page; 4577 4578 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4579 vcpu->arch.sie_block->icptcode); 4580 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4581 4582 if (guestdbg_enabled(vcpu)) 4583 kvm_s390_restore_guest_per_regs(vcpu); 4584 4585 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4586 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4587 4588 if (exit_reason == -EINTR) { 4589 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4590 sie_page = container_of(vcpu->arch.sie_block, 4591 struct sie_page, sie_block); 4592 mcck_info = &sie_page->mcck_info; 4593 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4594 return 0; 4595 } 4596 4597 if (vcpu->arch.sie_block->icptcode > 0) { 4598 int rc = kvm_handle_sie_intercept(vcpu); 4599 4600 if (rc != -EOPNOTSUPP) 4601 return rc; 4602 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4603 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4604 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4605 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4606 return -EREMOTE; 4607 } else if (exit_reason != -EFAULT) { 4608 vcpu->stat.exit_null++; 4609 return 0; 4610 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4611 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4612 vcpu->run->s390_ucontrol.trans_exc_code = 4613 current->thread.gmap_addr; 4614 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4615 return -EREMOTE; 4616 } else if (current->thread.gmap_pfault) { 4617 trace_kvm_s390_major_guest_pfault(vcpu); 4618 current->thread.gmap_pfault = 0; 4619 if (kvm_arch_setup_async_pf(vcpu)) 4620 return 0; 4621 vcpu->stat.pfault_sync++; 4622 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4623 } 4624 return vcpu_post_run_fault_in_sie(vcpu); 4625 } 4626 4627 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4628 static int __vcpu_run(struct kvm_vcpu *vcpu) 4629 { 4630 int rc, exit_reason; 4631 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4632 4633 /* 4634 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4635 * ning the guest), so that memslots (and other stuff) are protected 4636 */ 4637 kvm_vcpu_srcu_read_lock(vcpu); 4638 4639 do { 4640 rc = vcpu_pre_run(vcpu); 4641 if (rc) 4642 break; 4643 4644 kvm_vcpu_srcu_read_unlock(vcpu); 4645 /* 4646 * As PF_VCPU will be used in fault handler, between 4647 * guest_enter and guest_exit should be no uaccess. 4648 */ 4649 local_irq_disable(); 4650 guest_enter_irqoff(); 4651 __disable_cpu_timer_accounting(vcpu); 4652 local_irq_enable(); 4653 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4654 memcpy(sie_page->pv_grregs, 4655 vcpu->run->s.regs.gprs, 4656 sizeof(sie_page->pv_grregs)); 4657 } 4658 if (test_cpu_flag(CIF_FPU)) 4659 load_fpu_regs(); 4660 exit_reason = sie64a(vcpu->arch.sie_block, 4661 vcpu->run->s.regs.gprs); 4662 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4663 memcpy(vcpu->run->s.regs.gprs, 4664 sie_page->pv_grregs, 4665 sizeof(sie_page->pv_grregs)); 4666 /* 4667 * We're not allowed to inject interrupts on intercepts 4668 * that leave the guest state in an "in-between" state 4669 * where the next SIE entry will do a continuation. 4670 * Fence interrupts in our "internal" PSW. 4671 */ 4672 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4673 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4674 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4675 } 4676 } 4677 local_irq_disable(); 4678 __enable_cpu_timer_accounting(vcpu); 4679 guest_exit_irqoff(); 4680 local_irq_enable(); 4681 kvm_vcpu_srcu_read_lock(vcpu); 4682 4683 rc = vcpu_post_run(vcpu, exit_reason); 4684 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4685 4686 kvm_vcpu_srcu_read_unlock(vcpu); 4687 return rc; 4688 } 4689 4690 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4691 { 4692 struct kvm_run *kvm_run = vcpu->run; 4693 struct runtime_instr_cb *riccb; 4694 struct gs_cb *gscb; 4695 4696 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4697 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4698 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4699 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4700 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4701 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4702 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4703 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4704 } 4705 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4706 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4707 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4708 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4709 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4710 kvm_clear_async_pf_completion_queue(vcpu); 4711 } 4712 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4713 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4714 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4715 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4716 } 4717 /* 4718 * If userspace sets the riccb (e.g. after migration) to a valid state, 4719 * we should enable RI here instead of doing the lazy enablement. 4720 */ 4721 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4722 test_kvm_facility(vcpu->kvm, 64) && 4723 riccb->v && 4724 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4725 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4726 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4727 } 4728 /* 4729 * If userspace sets the gscb (e.g. after migration) to non-zero, 4730 * we should enable GS here instead of doing the lazy enablement. 4731 */ 4732 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4733 test_kvm_facility(vcpu->kvm, 133) && 4734 gscb->gssm && 4735 !vcpu->arch.gs_enabled) { 4736 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4737 vcpu->arch.sie_block->ecb |= ECB_GS; 4738 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4739 vcpu->arch.gs_enabled = 1; 4740 } 4741 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4742 test_kvm_facility(vcpu->kvm, 82)) { 4743 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4744 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4745 } 4746 if (MACHINE_HAS_GS) { 4747 preempt_disable(); 4748 __ctl_set_bit(2, 4); 4749 if (current->thread.gs_cb) { 4750 vcpu->arch.host_gscb = current->thread.gs_cb; 4751 save_gs_cb(vcpu->arch.host_gscb); 4752 } 4753 if (vcpu->arch.gs_enabled) { 4754 current->thread.gs_cb = (struct gs_cb *) 4755 &vcpu->run->s.regs.gscb; 4756 restore_gs_cb(current->thread.gs_cb); 4757 } 4758 preempt_enable(); 4759 } 4760 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4761 } 4762 4763 static void sync_regs(struct kvm_vcpu *vcpu) 4764 { 4765 struct kvm_run *kvm_run = vcpu->run; 4766 4767 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4768 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4769 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4770 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4771 /* some control register changes require a tlb flush */ 4772 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4773 } 4774 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4775 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4776 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4777 } 4778 save_access_regs(vcpu->arch.host_acrs); 4779 restore_access_regs(vcpu->run->s.regs.acrs); 4780 /* save host (userspace) fprs/vrs */ 4781 save_fpu_regs(); 4782 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4783 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4784 if (MACHINE_HAS_VX) 4785 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4786 else 4787 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4788 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4789 if (test_fp_ctl(current->thread.fpu.fpc)) 4790 /* User space provided an invalid FPC, let's clear it */ 4791 current->thread.fpu.fpc = 0; 4792 4793 /* Sync fmt2 only data */ 4794 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4795 sync_regs_fmt2(vcpu); 4796 } else { 4797 /* 4798 * In several places we have to modify our internal view to 4799 * not do things that are disallowed by the ultravisor. For 4800 * example we must not inject interrupts after specific exits 4801 * (e.g. 112 prefix page not secure). We do this by turning 4802 * off the machine check, external and I/O interrupt bits 4803 * of our PSW copy. To avoid getting validity intercepts, we 4804 * do only accept the condition code from userspace. 4805 */ 4806 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4807 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4808 PSW_MASK_CC; 4809 } 4810 4811 kvm_run->kvm_dirty_regs = 0; 4812 } 4813 4814 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4815 { 4816 struct kvm_run *kvm_run = vcpu->run; 4817 4818 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4819 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4820 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4821 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4822 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4823 if (MACHINE_HAS_GS) { 4824 preempt_disable(); 4825 __ctl_set_bit(2, 4); 4826 if (vcpu->arch.gs_enabled) 4827 save_gs_cb(current->thread.gs_cb); 4828 current->thread.gs_cb = vcpu->arch.host_gscb; 4829 restore_gs_cb(vcpu->arch.host_gscb); 4830 if (!vcpu->arch.host_gscb) 4831 __ctl_clear_bit(2, 4); 4832 vcpu->arch.host_gscb = NULL; 4833 preempt_enable(); 4834 } 4835 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4836 } 4837 4838 static void store_regs(struct kvm_vcpu *vcpu) 4839 { 4840 struct kvm_run *kvm_run = vcpu->run; 4841 4842 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4843 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4844 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4845 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4846 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4847 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4848 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4849 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4850 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4851 save_access_regs(vcpu->run->s.regs.acrs); 4852 restore_access_regs(vcpu->arch.host_acrs); 4853 /* Save guest register state */ 4854 save_fpu_regs(); 4855 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4856 /* Restore will be done lazily at return */ 4857 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4858 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4859 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4860 store_regs_fmt2(vcpu); 4861 } 4862 4863 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4864 { 4865 struct kvm_run *kvm_run = vcpu->run; 4866 int rc; 4867 4868 /* 4869 * Running a VM while dumping always has the potential to 4870 * produce inconsistent dump data. But for PV vcpus a SIE 4871 * entry while dumping could also lead to a fatal validity 4872 * intercept which we absolutely want to avoid. 4873 */ 4874 if (vcpu->kvm->arch.pv.dumping) 4875 return -EINVAL; 4876 4877 if (kvm_run->immediate_exit) 4878 return -EINTR; 4879 4880 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4881 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4882 return -EINVAL; 4883 4884 vcpu_load(vcpu); 4885 4886 if (guestdbg_exit_pending(vcpu)) { 4887 kvm_s390_prepare_debug_exit(vcpu); 4888 rc = 0; 4889 goto out; 4890 } 4891 4892 kvm_sigset_activate(vcpu); 4893 4894 /* 4895 * no need to check the return value of vcpu_start as it can only have 4896 * an error for protvirt, but protvirt means user cpu state 4897 */ 4898 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4899 kvm_s390_vcpu_start(vcpu); 4900 } else if (is_vcpu_stopped(vcpu)) { 4901 pr_err_ratelimited("can't run stopped vcpu %d\n", 4902 vcpu->vcpu_id); 4903 rc = -EINVAL; 4904 goto out; 4905 } 4906 4907 sync_regs(vcpu); 4908 enable_cpu_timer_accounting(vcpu); 4909 4910 might_fault(); 4911 rc = __vcpu_run(vcpu); 4912 4913 if (signal_pending(current) && !rc) { 4914 kvm_run->exit_reason = KVM_EXIT_INTR; 4915 rc = -EINTR; 4916 } 4917 4918 if (guestdbg_exit_pending(vcpu) && !rc) { 4919 kvm_s390_prepare_debug_exit(vcpu); 4920 rc = 0; 4921 } 4922 4923 if (rc == -EREMOTE) { 4924 /* userspace support is needed, kvm_run has been prepared */ 4925 rc = 0; 4926 } 4927 4928 disable_cpu_timer_accounting(vcpu); 4929 store_regs(vcpu); 4930 4931 kvm_sigset_deactivate(vcpu); 4932 4933 vcpu->stat.exit_userspace++; 4934 out: 4935 vcpu_put(vcpu); 4936 return rc; 4937 } 4938 4939 /* 4940 * store status at address 4941 * we use have two special cases: 4942 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4943 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4944 */ 4945 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4946 { 4947 unsigned char archmode = 1; 4948 freg_t fprs[NUM_FPRS]; 4949 unsigned int px; 4950 u64 clkcomp, cputm; 4951 int rc; 4952 4953 px = kvm_s390_get_prefix(vcpu); 4954 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4955 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4956 return -EFAULT; 4957 gpa = 0; 4958 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4959 if (write_guest_real(vcpu, 163, &archmode, 1)) 4960 return -EFAULT; 4961 gpa = px; 4962 } else 4963 gpa -= __LC_FPREGS_SAVE_AREA; 4964 4965 /* manually convert vector registers if necessary */ 4966 if (MACHINE_HAS_VX) { 4967 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4968 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4969 fprs, 128); 4970 } else { 4971 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4972 vcpu->run->s.regs.fprs, 128); 4973 } 4974 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4975 vcpu->run->s.regs.gprs, 128); 4976 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4977 &vcpu->arch.sie_block->gpsw, 16); 4978 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4979 &px, 4); 4980 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4981 &vcpu->run->s.regs.fpc, 4); 4982 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4983 &vcpu->arch.sie_block->todpr, 4); 4984 cputm = kvm_s390_get_cpu_timer(vcpu); 4985 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4986 &cputm, 8); 4987 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4988 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4989 &clkcomp, 8); 4990 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4991 &vcpu->run->s.regs.acrs, 64); 4992 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4993 &vcpu->arch.sie_block->gcr, 128); 4994 return rc ? -EFAULT : 0; 4995 } 4996 4997 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4998 { 4999 /* 5000 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 5001 * switch in the run ioctl. Let's update our copies before we save 5002 * it into the save area 5003 */ 5004 save_fpu_regs(); 5005 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 5006 save_access_regs(vcpu->run->s.regs.acrs); 5007 5008 return kvm_s390_store_status_unloaded(vcpu, addr); 5009 } 5010 5011 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5012 { 5013 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 5014 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 5015 } 5016 5017 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 5018 { 5019 unsigned long i; 5020 struct kvm_vcpu *vcpu; 5021 5022 kvm_for_each_vcpu(i, vcpu, kvm) { 5023 __disable_ibs_on_vcpu(vcpu); 5024 } 5025 } 5026 5027 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5028 { 5029 if (!sclp.has_ibs) 5030 return; 5031 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 5032 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 5033 } 5034 5035 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 5036 { 5037 int i, online_vcpus, r = 0, started_vcpus = 0; 5038 5039 if (!is_vcpu_stopped(vcpu)) 5040 return 0; 5041 5042 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 5043 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5044 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5045 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5046 5047 /* Let's tell the UV that we want to change into the operating state */ 5048 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5049 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 5050 if (r) { 5051 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5052 return r; 5053 } 5054 } 5055 5056 for (i = 0; i < online_vcpus; i++) { 5057 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 5058 started_vcpus++; 5059 } 5060 5061 if (started_vcpus == 0) { 5062 /* we're the only active VCPU -> speed it up */ 5063 __enable_ibs_on_vcpu(vcpu); 5064 } else if (started_vcpus == 1) { 5065 /* 5066 * As we are starting a second VCPU, we have to disable 5067 * the IBS facility on all VCPUs to remove potentially 5068 * outstanding ENABLE requests. 5069 */ 5070 __disable_ibs_on_all_vcpus(vcpu->kvm); 5071 } 5072 5073 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 5074 /* 5075 * The real PSW might have changed due to a RESTART interpreted by the 5076 * ultravisor. We block all interrupts and let the next sie exit 5077 * refresh our view. 5078 */ 5079 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5080 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 5081 /* 5082 * Another VCPU might have used IBS while we were offline. 5083 * Let's play safe and flush the VCPU at startup. 5084 */ 5085 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 5086 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5087 return 0; 5088 } 5089 5090 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 5091 { 5092 int i, online_vcpus, r = 0, started_vcpus = 0; 5093 struct kvm_vcpu *started_vcpu = NULL; 5094 5095 if (is_vcpu_stopped(vcpu)) 5096 return 0; 5097 5098 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 5099 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5100 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5101 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5102 5103 /* Let's tell the UV that we want to change into the stopped state */ 5104 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5105 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 5106 if (r) { 5107 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5108 return r; 5109 } 5110 } 5111 5112 /* 5113 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 5114 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 5115 * have been fully processed. This will ensure that the VCPU 5116 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 5117 */ 5118 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 5119 kvm_s390_clear_stop_irq(vcpu); 5120 5121 __disable_ibs_on_vcpu(vcpu); 5122 5123 for (i = 0; i < online_vcpus; i++) { 5124 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 5125 5126 if (!is_vcpu_stopped(tmp)) { 5127 started_vcpus++; 5128 started_vcpu = tmp; 5129 } 5130 } 5131 5132 if (started_vcpus == 1) { 5133 /* 5134 * As we only have one VCPU left, we want to enable the 5135 * IBS facility for that VCPU to speed it up. 5136 */ 5137 __enable_ibs_on_vcpu(started_vcpu); 5138 } 5139 5140 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5141 return 0; 5142 } 5143 5144 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 5145 struct kvm_enable_cap *cap) 5146 { 5147 int r; 5148 5149 if (cap->flags) 5150 return -EINVAL; 5151 5152 switch (cap->cap) { 5153 case KVM_CAP_S390_CSS_SUPPORT: 5154 if (!vcpu->kvm->arch.css_support) { 5155 vcpu->kvm->arch.css_support = 1; 5156 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 5157 trace_kvm_s390_enable_css(vcpu->kvm); 5158 } 5159 r = 0; 5160 break; 5161 default: 5162 r = -EINVAL; 5163 break; 5164 } 5165 return r; 5166 } 5167 5168 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, 5169 struct kvm_s390_mem_op *mop) 5170 { 5171 void __user *uaddr = (void __user *)mop->buf; 5172 int r = 0; 5173 5174 if (mop->flags || !mop->size) 5175 return -EINVAL; 5176 if (mop->size + mop->sida_offset < mop->size) 5177 return -EINVAL; 5178 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 5179 return -E2BIG; 5180 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 5181 return -EINVAL; 5182 5183 switch (mop->op) { 5184 case KVM_S390_MEMOP_SIDA_READ: 5185 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 5186 mop->sida_offset), mop->size)) 5187 r = -EFAULT; 5188 5189 break; 5190 case KVM_S390_MEMOP_SIDA_WRITE: 5191 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 5192 mop->sida_offset), uaddr, mop->size)) 5193 r = -EFAULT; 5194 break; 5195 } 5196 return r; 5197 } 5198 5199 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, 5200 struct kvm_s390_mem_op *mop) 5201 { 5202 void __user *uaddr = (void __user *)mop->buf; 5203 void *tmpbuf = NULL; 5204 int r = 0; 5205 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 5206 | KVM_S390_MEMOP_F_CHECK_ONLY 5207 | KVM_S390_MEMOP_F_SKEY_PROTECTION; 5208 5209 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 5210 return -EINVAL; 5211 if (mop->size > MEM_OP_MAX_SIZE) 5212 return -E2BIG; 5213 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5214 return -EINVAL; 5215 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 5216 if (access_key_invalid(mop->key)) 5217 return -EINVAL; 5218 } else { 5219 mop->key = 0; 5220 } 5221 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 5222 tmpbuf = vmalloc(mop->size); 5223 if (!tmpbuf) 5224 return -ENOMEM; 5225 } 5226 5227 switch (mop->op) { 5228 case KVM_S390_MEMOP_LOGICAL_READ: 5229 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 5230 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 5231 GACC_FETCH, mop->key); 5232 break; 5233 } 5234 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5235 mop->size, mop->key); 5236 if (r == 0) { 5237 if (copy_to_user(uaddr, tmpbuf, mop->size)) 5238 r = -EFAULT; 5239 } 5240 break; 5241 case KVM_S390_MEMOP_LOGICAL_WRITE: 5242 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 5243 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 5244 GACC_STORE, mop->key); 5245 break; 5246 } 5247 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 5248 r = -EFAULT; 5249 break; 5250 } 5251 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5252 mop->size, mop->key); 5253 break; 5254 } 5255 5256 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 5257 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 5258 5259 vfree(tmpbuf); 5260 return r; 5261 } 5262 5263 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, 5264 struct kvm_s390_mem_op *mop) 5265 { 5266 int r, srcu_idx; 5267 5268 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5269 5270 switch (mop->op) { 5271 case KVM_S390_MEMOP_LOGICAL_READ: 5272 case KVM_S390_MEMOP_LOGICAL_WRITE: 5273 r = kvm_s390_vcpu_mem_op(vcpu, mop); 5274 break; 5275 case KVM_S390_MEMOP_SIDA_READ: 5276 case KVM_S390_MEMOP_SIDA_WRITE: 5277 /* we are locked against sida going away by the vcpu->mutex */ 5278 r = kvm_s390_vcpu_sida_op(vcpu, mop); 5279 break; 5280 default: 5281 r = -EINVAL; 5282 } 5283 5284 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 5285 return r; 5286 } 5287 5288 long kvm_arch_vcpu_async_ioctl(struct file *filp, 5289 unsigned int ioctl, unsigned long arg) 5290 { 5291 struct kvm_vcpu *vcpu = filp->private_data; 5292 void __user *argp = (void __user *)arg; 5293 5294 switch (ioctl) { 5295 case KVM_S390_IRQ: { 5296 struct kvm_s390_irq s390irq; 5297 5298 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 5299 return -EFAULT; 5300 return kvm_s390_inject_vcpu(vcpu, &s390irq); 5301 } 5302 case KVM_S390_INTERRUPT: { 5303 struct kvm_s390_interrupt s390int; 5304 struct kvm_s390_irq s390irq = {}; 5305 5306 if (copy_from_user(&s390int, argp, sizeof(s390int))) 5307 return -EFAULT; 5308 if (s390int_to_s390irq(&s390int, &s390irq)) 5309 return -EINVAL; 5310 return kvm_s390_inject_vcpu(vcpu, &s390irq); 5311 } 5312 } 5313 return -ENOIOCTLCMD; 5314 } 5315 5316 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu, 5317 struct kvm_pv_cmd *cmd) 5318 { 5319 struct kvm_s390_pv_dmp dmp; 5320 void *data; 5321 int ret; 5322 5323 /* Dump initialization is a prerequisite */ 5324 if (!vcpu->kvm->arch.pv.dumping) 5325 return -EINVAL; 5326 5327 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp))) 5328 return -EFAULT; 5329 5330 /* We only handle this subcmd right now */ 5331 if (dmp.subcmd != KVM_PV_DUMP_CPU) 5332 return -EINVAL; 5333 5334 /* CPU dump length is the same as create cpu storage donation. */ 5335 if (dmp.buff_len != uv_info.guest_cpu_stor_len) 5336 return -EINVAL; 5337 5338 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL); 5339 if (!data) 5340 return -ENOMEM; 5341 5342 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc); 5343 5344 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x", 5345 vcpu->vcpu_id, cmd->rc, cmd->rrc); 5346 5347 if (ret) 5348 ret = -EINVAL; 5349 5350 /* On success copy over the dump data */ 5351 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len)) 5352 ret = -EFAULT; 5353 5354 kvfree(data); 5355 return ret; 5356 } 5357 5358 long kvm_arch_vcpu_ioctl(struct file *filp, 5359 unsigned int ioctl, unsigned long arg) 5360 { 5361 struct kvm_vcpu *vcpu = filp->private_data; 5362 void __user *argp = (void __user *)arg; 5363 int idx; 5364 long r; 5365 u16 rc, rrc; 5366 5367 vcpu_load(vcpu); 5368 5369 switch (ioctl) { 5370 case KVM_S390_STORE_STATUS: 5371 idx = srcu_read_lock(&vcpu->kvm->srcu); 5372 r = kvm_s390_store_status_unloaded(vcpu, arg); 5373 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5374 break; 5375 case KVM_S390_SET_INITIAL_PSW: { 5376 psw_t psw; 5377 5378 r = -EFAULT; 5379 if (copy_from_user(&psw, argp, sizeof(psw))) 5380 break; 5381 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 5382 break; 5383 } 5384 case KVM_S390_CLEAR_RESET: 5385 r = 0; 5386 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 5387 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5388 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5389 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 5390 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 5391 rc, rrc); 5392 } 5393 break; 5394 case KVM_S390_INITIAL_RESET: 5395 r = 0; 5396 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 5397 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5398 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5399 UVC_CMD_CPU_RESET_INITIAL, 5400 &rc, &rrc); 5401 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 5402 rc, rrc); 5403 } 5404 break; 5405 case KVM_S390_NORMAL_RESET: 5406 r = 0; 5407 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 5408 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5409 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5410 UVC_CMD_CPU_RESET, &rc, &rrc); 5411 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 5412 rc, rrc); 5413 } 5414 break; 5415 case KVM_SET_ONE_REG: 5416 case KVM_GET_ONE_REG: { 5417 struct kvm_one_reg reg; 5418 r = -EINVAL; 5419 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5420 break; 5421 r = -EFAULT; 5422 if (copy_from_user(®, argp, sizeof(reg))) 5423 break; 5424 if (ioctl == KVM_SET_ONE_REG) 5425 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 5426 else 5427 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 5428 break; 5429 } 5430 #ifdef CONFIG_KVM_S390_UCONTROL 5431 case KVM_S390_UCAS_MAP: { 5432 struct kvm_s390_ucas_mapping ucasmap; 5433 5434 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5435 r = -EFAULT; 5436 break; 5437 } 5438 5439 if (!kvm_is_ucontrol(vcpu->kvm)) { 5440 r = -EINVAL; 5441 break; 5442 } 5443 5444 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 5445 ucasmap.vcpu_addr, ucasmap.length); 5446 break; 5447 } 5448 case KVM_S390_UCAS_UNMAP: { 5449 struct kvm_s390_ucas_mapping ucasmap; 5450 5451 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5452 r = -EFAULT; 5453 break; 5454 } 5455 5456 if (!kvm_is_ucontrol(vcpu->kvm)) { 5457 r = -EINVAL; 5458 break; 5459 } 5460 5461 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 5462 ucasmap.length); 5463 break; 5464 } 5465 #endif 5466 case KVM_S390_VCPU_FAULT: { 5467 r = gmap_fault(vcpu->arch.gmap, arg, 0); 5468 break; 5469 } 5470 case KVM_ENABLE_CAP: 5471 { 5472 struct kvm_enable_cap cap; 5473 r = -EFAULT; 5474 if (copy_from_user(&cap, argp, sizeof(cap))) 5475 break; 5476 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5477 break; 5478 } 5479 case KVM_S390_MEM_OP: { 5480 struct kvm_s390_mem_op mem_op; 5481 5482 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5483 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); 5484 else 5485 r = -EFAULT; 5486 break; 5487 } 5488 case KVM_S390_SET_IRQ_STATE: { 5489 struct kvm_s390_irq_state irq_state; 5490 5491 r = -EFAULT; 5492 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5493 break; 5494 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5495 irq_state.len == 0 || 5496 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5497 r = -EINVAL; 5498 break; 5499 } 5500 /* do not use irq_state.flags, it will break old QEMUs */ 5501 r = kvm_s390_set_irq_state(vcpu, 5502 (void __user *) irq_state.buf, 5503 irq_state.len); 5504 break; 5505 } 5506 case KVM_S390_GET_IRQ_STATE: { 5507 struct kvm_s390_irq_state irq_state; 5508 5509 r = -EFAULT; 5510 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5511 break; 5512 if (irq_state.len == 0) { 5513 r = -EINVAL; 5514 break; 5515 } 5516 /* do not use irq_state.flags, it will break old QEMUs */ 5517 r = kvm_s390_get_irq_state(vcpu, 5518 (__u8 __user *) irq_state.buf, 5519 irq_state.len); 5520 break; 5521 } 5522 case KVM_S390_PV_CPU_COMMAND: { 5523 struct kvm_pv_cmd cmd; 5524 5525 r = -EINVAL; 5526 if (!is_prot_virt_host()) 5527 break; 5528 5529 r = -EFAULT; 5530 if (copy_from_user(&cmd, argp, sizeof(cmd))) 5531 break; 5532 5533 r = -EINVAL; 5534 if (cmd.flags) 5535 break; 5536 5537 /* We only handle this cmd right now */ 5538 if (cmd.cmd != KVM_PV_DUMP) 5539 break; 5540 5541 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd); 5542 5543 /* Always copy over UV rc / rrc data */ 5544 if (copy_to_user((__u8 __user *)argp, &cmd.rc, 5545 sizeof(cmd.rc) + sizeof(cmd.rrc))) 5546 r = -EFAULT; 5547 break; 5548 } 5549 default: 5550 r = -ENOTTY; 5551 } 5552 5553 vcpu_put(vcpu); 5554 return r; 5555 } 5556 5557 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5558 { 5559 #ifdef CONFIG_KVM_S390_UCONTROL 5560 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5561 && (kvm_is_ucontrol(vcpu->kvm))) { 5562 vmf->page = virt_to_page(vcpu->arch.sie_block); 5563 get_page(vmf->page); 5564 return 0; 5565 } 5566 #endif 5567 return VM_FAULT_SIGBUS; 5568 } 5569 5570 /* Section: memory related */ 5571 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5572 const struct kvm_memory_slot *old, 5573 struct kvm_memory_slot *new, 5574 enum kvm_mr_change change) 5575 { 5576 gpa_t size; 5577 5578 /* When we are protected, we should not change the memory slots */ 5579 if (kvm_s390_pv_get_handle(kvm)) 5580 return -EINVAL; 5581 5582 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 5583 return 0; 5584 5585 /* A few sanity checks. We can have memory slots which have to be 5586 located/ended at a segment boundary (1MB). The memory in userland is 5587 ok to be fragmented into various different vmas. It is okay to mmap() 5588 and munmap() stuff in this slot after doing this call at any time */ 5589 5590 if (new->userspace_addr & 0xffffful) 5591 return -EINVAL; 5592 5593 size = new->npages * PAGE_SIZE; 5594 if (size & 0xffffful) 5595 return -EINVAL; 5596 5597 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5598 return -EINVAL; 5599 5600 return 0; 5601 } 5602 5603 void kvm_arch_commit_memory_region(struct kvm *kvm, 5604 struct kvm_memory_slot *old, 5605 const struct kvm_memory_slot *new, 5606 enum kvm_mr_change change) 5607 { 5608 int rc = 0; 5609 5610 switch (change) { 5611 case KVM_MR_DELETE: 5612 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5613 old->npages * PAGE_SIZE); 5614 break; 5615 case KVM_MR_MOVE: 5616 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5617 old->npages * PAGE_SIZE); 5618 if (rc) 5619 break; 5620 fallthrough; 5621 case KVM_MR_CREATE: 5622 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5623 new->base_gfn * PAGE_SIZE, 5624 new->npages * PAGE_SIZE); 5625 break; 5626 case KVM_MR_FLAGS_ONLY: 5627 break; 5628 default: 5629 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5630 } 5631 if (rc) 5632 pr_warn("failed to commit memory region\n"); 5633 return; 5634 } 5635 5636 static inline unsigned long nonhyp_mask(int i) 5637 { 5638 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5639 5640 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5641 } 5642 5643 static int __init kvm_s390_init(void) 5644 { 5645 int i; 5646 5647 if (!sclp.has_sief2) { 5648 pr_info("SIE is not available\n"); 5649 return -ENODEV; 5650 } 5651 5652 if (nested && hpage) { 5653 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5654 return -EINVAL; 5655 } 5656 5657 for (i = 0; i < 16; i++) 5658 kvm_s390_fac_base[i] |= 5659 stfle_fac_list[i] & nonhyp_mask(i); 5660 5661 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5662 } 5663 5664 static void __exit kvm_s390_exit(void) 5665 { 5666 kvm_exit(); 5667 } 5668 5669 module_init(kvm_s390_init); 5670 module_exit(kvm_s390_exit); 5671 5672 /* 5673 * Enable autoloading of the kvm module. 5674 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5675 * since x86 takes a different approach. 5676 */ 5677 #include <linux/miscdevice.h> 5678 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5679 MODULE_ALIAS("devname:kvm"); 5680