1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com> 10 * Jason J. Herne <jjherne@us.ibm.com> 11 */ 12 13 #define KMSG_COMPONENT "kvm-s390" 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/mman.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/random.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/vmalloc.h> 30 #include <linux/bitmap.h> 31 #include <linux/sched/signal.h> 32 #include <linux/string.h> 33 #include <linux/pgtable.h> 34 #include <linux/mmu_notifier.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 #include "pci.h" 52 53 #define CREATE_TRACE_POINTS 54 #include "trace.h" 55 #include "trace-s390.h" 56 57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 58 #define LOCAL_IRQS 32 59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 60 (KVM_MAX_VCPUS + LOCAL_IRQS)) 61 62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 63 KVM_GENERIC_VM_STATS(), 64 STATS_DESC_COUNTER(VM, inject_io), 65 STATS_DESC_COUNTER(VM, inject_float_mchk), 66 STATS_DESC_COUNTER(VM, inject_pfault_done), 67 STATS_DESC_COUNTER(VM, inject_service_signal), 68 STATS_DESC_COUNTER(VM, inject_virtio), 69 STATS_DESC_COUNTER(VM, aen_forward) 70 }; 71 72 const struct kvm_stats_header kvm_vm_stats_header = { 73 .name_size = KVM_STATS_NAME_SIZE, 74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 75 .id_offset = sizeof(struct kvm_stats_header), 76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 78 sizeof(kvm_vm_stats_desc), 79 }; 80 81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 82 KVM_GENERIC_VCPU_STATS(), 83 STATS_DESC_COUNTER(VCPU, exit_userspace), 84 STATS_DESC_COUNTER(VCPU, exit_null), 85 STATS_DESC_COUNTER(VCPU, exit_external_request), 86 STATS_DESC_COUNTER(VCPU, exit_io_request), 87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 88 STATS_DESC_COUNTER(VCPU, exit_stop_request), 89 STATS_DESC_COUNTER(VCPU, exit_validity), 90 STATS_DESC_COUNTER(VCPU, exit_instruction), 91 STATS_DESC_COUNTER(VCPU, exit_pei), 92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 93 STATS_DESC_COUNTER(VCPU, instruction_lctl), 94 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 95 STATS_DESC_COUNTER(VCPU, instruction_stctl), 96 STATS_DESC_COUNTER(VCPU, instruction_stctg), 97 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 99 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 100 STATS_DESC_COUNTER(VCPU, deliver_ckc), 101 STATS_DESC_COUNTER(VCPU, deliver_cputm), 102 STATS_DESC_COUNTER(VCPU, deliver_external_call), 103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 104 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_virtio), 106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 109 STATS_DESC_COUNTER(VCPU, deliver_program), 110 STATS_DESC_COUNTER(VCPU, deliver_io), 111 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 112 STATS_DESC_COUNTER(VCPU, exit_wait_state), 113 STATS_DESC_COUNTER(VCPU, inject_ckc), 114 STATS_DESC_COUNTER(VCPU, inject_cputm), 115 STATS_DESC_COUNTER(VCPU, inject_external_call), 116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 117 STATS_DESC_COUNTER(VCPU, inject_mchk), 118 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 119 STATS_DESC_COUNTER(VCPU, inject_program), 120 STATS_DESC_COUNTER(VCPU, inject_restart), 121 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 122 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 123 STATS_DESC_COUNTER(VCPU, instruction_epsw), 124 STATS_DESC_COUNTER(VCPU, instruction_gs), 125 STATS_DESC_COUNTER(VCPU, instruction_io_other), 126 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 127 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 128 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 129 STATS_DESC_COUNTER(VCPU, instruction_ptff), 130 STATS_DESC_COUNTER(VCPU, instruction_sck), 131 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 132 STATS_DESC_COUNTER(VCPU, instruction_stidp), 133 STATS_DESC_COUNTER(VCPU, instruction_spx), 134 STATS_DESC_COUNTER(VCPU, instruction_stpx), 135 STATS_DESC_COUNTER(VCPU, instruction_stap), 136 STATS_DESC_COUNTER(VCPU, instruction_iske), 137 STATS_DESC_COUNTER(VCPU, instruction_ri), 138 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 139 STATS_DESC_COUNTER(VCPU, instruction_sske), 140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 141 STATS_DESC_COUNTER(VCPU, instruction_stsi), 142 STATS_DESC_COUNTER(VCPU, instruction_stfl), 143 STATS_DESC_COUNTER(VCPU, instruction_tb), 144 STATS_DESC_COUNTER(VCPU, instruction_tpi), 145 STATS_DESC_COUNTER(VCPU, instruction_tprot), 146 STATS_DESC_COUNTER(VCPU, instruction_tsch), 147 STATS_DESC_COUNTER(VCPU, instruction_sie), 148 STATS_DESC_COUNTER(VCPU, instruction_essa), 149 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 170 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 175 STATS_DESC_COUNTER(VCPU, pfault_sync) 176 }; 177 178 const struct kvm_stats_header kvm_vcpu_stats_header = { 179 .name_size = KVM_STATS_NAME_SIZE, 180 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 181 .id_offset = sizeof(struct kvm_stats_header), 182 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 183 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 184 sizeof(kvm_vcpu_stats_desc), 185 }; 186 187 /* allow nested virtualization in KVM (if enabled by user space) */ 188 static int nested; 189 module_param(nested, int, S_IRUGO); 190 MODULE_PARM_DESC(nested, "Nested virtualization support"); 191 192 /* allow 1m huge page guest backing, if !nested */ 193 static int hpage; 194 module_param(hpage, int, 0444); 195 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 196 197 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 198 static u8 halt_poll_max_steal = 10; 199 module_param(halt_poll_max_steal, byte, 0644); 200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 201 202 /* if set to true, the GISA will be initialized and used if available */ 203 static bool use_gisa = true; 204 module_param(use_gisa, bool, 0644); 205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 206 207 /* maximum diag9c forwarding per second */ 208 unsigned int diag9c_forwarding_hz; 209 module_param(diag9c_forwarding_hz, uint, 0644); 210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 211 212 /* 213 * For now we handle at most 16 double words as this is what the s390 base 214 * kernel handles and stores in the prefix page. If we ever need to go beyond 215 * this, this requires changes to code, but the external uapi can stay. 216 */ 217 #define SIZE_INTERNAL 16 218 219 /* 220 * Base feature mask that defines default mask for facilities. Consists of the 221 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 222 */ 223 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 224 /* 225 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 226 * and defines the facilities that can be enabled via a cpu model. 227 */ 228 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 229 230 static unsigned long kvm_s390_fac_size(void) 231 { 232 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 233 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 234 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 235 sizeof(stfle_fac_list)); 236 237 return SIZE_INTERNAL; 238 } 239 240 /* available cpu features supported by kvm */ 241 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 242 /* available subfunctions indicated via query / "test bit" */ 243 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 244 245 static struct gmap_notifier gmap_notifier; 246 static struct gmap_notifier vsie_gmap_notifier; 247 debug_info_t *kvm_s390_dbf; 248 debug_info_t *kvm_s390_dbf_uv; 249 250 /* Section: not file related */ 251 int kvm_arch_hardware_enable(void) 252 { 253 /* every s390 is virtualization enabled ;-) */ 254 return 0; 255 } 256 257 int kvm_arch_check_processor_compat(void *opaque) 258 { 259 return 0; 260 } 261 262 /* forward declarations */ 263 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 264 unsigned long end); 265 static int sca_switch_to_extended(struct kvm *kvm); 266 267 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 268 { 269 u8 delta_idx = 0; 270 271 /* 272 * The TOD jumps by delta, we have to compensate this by adding 273 * -delta to the epoch. 274 */ 275 delta = -delta; 276 277 /* sign-extension - we're adding to signed values below */ 278 if ((s64)delta < 0) 279 delta_idx = -1; 280 281 scb->epoch += delta; 282 if (scb->ecd & ECD_MEF) { 283 scb->epdx += delta_idx; 284 if (scb->epoch < delta) 285 scb->epdx += 1; 286 } 287 } 288 289 /* 290 * This callback is executed during stop_machine(). All CPUs are therefore 291 * temporarily stopped. In order not to change guest behavior, we have to 292 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 293 * so a CPU won't be stopped while calculating with the epoch. 294 */ 295 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 296 void *v) 297 { 298 struct kvm *kvm; 299 struct kvm_vcpu *vcpu; 300 unsigned long i; 301 unsigned long long *delta = v; 302 303 list_for_each_entry(kvm, &vm_list, vm_list) { 304 kvm_for_each_vcpu(i, vcpu, kvm) { 305 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 306 if (i == 0) { 307 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 308 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 309 } 310 if (vcpu->arch.cputm_enabled) 311 vcpu->arch.cputm_start += *delta; 312 if (vcpu->arch.vsie_block) 313 kvm_clock_sync_scb(vcpu->arch.vsie_block, 314 *delta); 315 } 316 } 317 return NOTIFY_OK; 318 } 319 320 static struct notifier_block kvm_clock_notifier = { 321 .notifier_call = kvm_clock_sync, 322 }; 323 324 int kvm_arch_hardware_setup(void *opaque) 325 { 326 gmap_notifier.notifier_call = kvm_gmap_notifier; 327 gmap_register_pte_notifier(&gmap_notifier); 328 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 329 gmap_register_pte_notifier(&vsie_gmap_notifier); 330 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 331 &kvm_clock_notifier); 332 return 0; 333 } 334 335 void kvm_arch_hardware_unsetup(void) 336 { 337 gmap_unregister_pte_notifier(&gmap_notifier); 338 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 339 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 340 &kvm_clock_notifier); 341 } 342 343 static void allow_cpu_feat(unsigned long nr) 344 { 345 set_bit_inv(nr, kvm_s390_available_cpu_feat); 346 } 347 348 static inline int plo_test_bit(unsigned char nr) 349 { 350 unsigned long function = (unsigned long)nr | 0x100; 351 int cc; 352 353 asm volatile( 354 " lgr 0,%[function]\n" 355 /* Parameter registers are ignored for "test bit" */ 356 " plo 0,0,0,0(0)\n" 357 " ipm %0\n" 358 " srl %0,28\n" 359 : "=d" (cc) 360 : [function] "d" (function) 361 : "cc", "0"); 362 return cc == 0; 363 } 364 365 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 366 { 367 asm volatile( 368 " lghi 0,0\n" 369 " lgr 1,%[query]\n" 370 /* Parameter registers are ignored */ 371 " .insn rrf,%[opc] << 16,2,4,6,0\n" 372 : 373 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 374 : "cc", "memory", "0", "1"); 375 } 376 377 #define INSN_SORTL 0xb938 378 #define INSN_DFLTCC 0xb939 379 380 static void kvm_s390_cpu_feat_init(void) 381 { 382 int i; 383 384 for (i = 0; i < 256; ++i) { 385 if (plo_test_bit(i)) 386 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 387 } 388 389 if (test_facility(28)) /* TOD-clock steering */ 390 ptff(kvm_s390_available_subfunc.ptff, 391 sizeof(kvm_s390_available_subfunc.ptff), 392 PTFF_QAF); 393 394 if (test_facility(17)) { /* MSA */ 395 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 396 kvm_s390_available_subfunc.kmac); 397 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 398 kvm_s390_available_subfunc.kmc); 399 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 400 kvm_s390_available_subfunc.km); 401 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.kimd); 403 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 404 kvm_s390_available_subfunc.klmd); 405 } 406 if (test_facility(76)) /* MSA3 */ 407 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 408 kvm_s390_available_subfunc.pckmo); 409 if (test_facility(77)) { /* MSA4 */ 410 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 411 kvm_s390_available_subfunc.kmctr); 412 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 413 kvm_s390_available_subfunc.kmf); 414 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 415 kvm_s390_available_subfunc.kmo); 416 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 417 kvm_s390_available_subfunc.pcc); 418 } 419 if (test_facility(57)) /* MSA5 */ 420 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 421 kvm_s390_available_subfunc.ppno); 422 423 if (test_facility(146)) /* MSA8 */ 424 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 425 kvm_s390_available_subfunc.kma); 426 427 if (test_facility(155)) /* MSA9 */ 428 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 429 kvm_s390_available_subfunc.kdsa); 430 431 if (test_facility(150)) /* SORTL */ 432 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 433 434 if (test_facility(151)) /* DFLTCC */ 435 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 436 437 if (MACHINE_HAS_ESOP) 438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 439 /* 440 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 441 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 442 */ 443 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 444 !test_facility(3) || !nested) 445 return; 446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 447 if (sclp.has_64bscao) 448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 449 if (sclp.has_siif) 450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 451 if (sclp.has_gpere) 452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 453 if (sclp.has_gsls) 454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 455 if (sclp.has_ib) 456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 457 if (sclp.has_cei) 458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 459 if (sclp.has_ibs) 460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 461 if (sclp.has_kss) 462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 463 /* 464 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 465 * all skey handling functions read/set the skey from the PGSTE 466 * instead of the real storage key. 467 * 468 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 469 * pages being detected as preserved although they are resident. 470 * 471 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 472 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 473 * 474 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 475 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 476 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 477 * 478 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 479 * cannot easily shadow the SCA because of the ipte lock. 480 */ 481 } 482 483 int kvm_arch_init(void *opaque) 484 { 485 int rc = -ENOMEM; 486 487 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 488 if (!kvm_s390_dbf) 489 return -ENOMEM; 490 491 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 492 if (!kvm_s390_dbf_uv) 493 goto out; 494 495 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 496 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 497 goto out; 498 499 kvm_s390_cpu_feat_init(); 500 501 /* Register floating interrupt controller interface. */ 502 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 503 if (rc) { 504 pr_err("A FLIC registration call failed with rc=%d\n", rc); 505 goto out; 506 } 507 508 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 509 rc = kvm_s390_pci_init(); 510 if (rc) { 511 pr_err("Unable to allocate AIFT for PCI\n"); 512 goto out; 513 } 514 } 515 516 rc = kvm_s390_gib_init(GAL_ISC); 517 if (rc) 518 goto out; 519 520 return 0; 521 522 out: 523 kvm_arch_exit(); 524 return rc; 525 } 526 527 void kvm_arch_exit(void) 528 { 529 kvm_s390_gib_destroy(); 530 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 531 kvm_s390_pci_exit(); 532 debug_unregister(kvm_s390_dbf); 533 debug_unregister(kvm_s390_dbf_uv); 534 } 535 536 /* Section: device related */ 537 long kvm_arch_dev_ioctl(struct file *filp, 538 unsigned int ioctl, unsigned long arg) 539 { 540 if (ioctl == KVM_S390_ENABLE_SIE) 541 return s390_enable_sie(); 542 return -EINVAL; 543 } 544 545 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 546 { 547 int r; 548 549 switch (ext) { 550 case KVM_CAP_S390_PSW: 551 case KVM_CAP_S390_GMAP: 552 case KVM_CAP_SYNC_MMU: 553 #ifdef CONFIG_KVM_S390_UCONTROL 554 case KVM_CAP_S390_UCONTROL: 555 #endif 556 case KVM_CAP_ASYNC_PF: 557 case KVM_CAP_SYNC_REGS: 558 case KVM_CAP_ONE_REG: 559 case KVM_CAP_ENABLE_CAP: 560 case KVM_CAP_S390_CSS_SUPPORT: 561 case KVM_CAP_IOEVENTFD: 562 case KVM_CAP_DEVICE_CTRL: 563 case KVM_CAP_S390_IRQCHIP: 564 case KVM_CAP_VM_ATTRIBUTES: 565 case KVM_CAP_MP_STATE: 566 case KVM_CAP_IMMEDIATE_EXIT: 567 case KVM_CAP_S390_INJECT_IRQ: 568 case KVM_CAP_S390_USER_SIGP: 569 case KVM_CAP_S390_USER_STSI: 570 case KVM_CAP_S390_SKEYS: 571 case KVM_CAP_S390_IRQ_STATE: 572 case KVM_CAP_S390_USER_INSTR0: 573 case KVM_CAP_S390_CMMA_MIGRATION: 574 case KVM_CAP_S390_AIS: 575 case KVM_CAP_S390_AIS_MIGRATION: 576 case KVM_CAP_S390_VCPU_RESETS: 577 case KVM_CAP_SET_GUEST_DEBUG: 578 case KVM_CAP_S390_DIAG318: 579 case KVM_CAP_S390_MEM_OP_EXTENSION: 580 r = 1; 581 break; 582 case KVM_CAP_SET_GUEST_DEBUG2: 583 r = KVM_GUESTDBG_VALID_MASK; 584 break; 585 case KVM_CAP_S390_HPAGE_1M: 586 r = 0; 587 if (hpage && !kvm_is_ucontrol(kvm)) 588 r = 1; 589 break; 590 case KVM_CAP_S390_MEM_OP: 591 r = MEM_OP_MAX_SIZE; 592 break; 593 case KVM_CAP_NR_VCPUS: 594 case KVM_CAP_MAX_VCPUS: 595 case KVM_CAP_MAX_VCPU_ID: 596 r = KVM_S390_BSCA_CPU_SLOTS; 597 if (!kvm_s390_use_sca_entries()) 598 r = KVM_MAX_VCPUS; 599 else if (sclp.has_esca && sclp.has_64bscao) 600 r = KVM_S390_ESCA_CPU_SLOTS; 601 if (ext == KVM_CAP_NR_VCPUS) 602 r = min_t(unsigned int, num_online_cpus(), r); 603 break; 604 case KVM_CAP_S390_COW: 605 r = MACHINE_HAS_ESOP; 606 break; 607 case KVM_CAP_S390_VECTOR_REGISTERS: 608 r = MACHINE_HAS_VX; 609 break; 610 case KVM_CAP_S390_RI: 611 r = test_facility(64); 612 break; 613 case KVM_CAP_S390_GS: 614 r = test_facility(133); 615 break; 616 case KVM_CAP_S390_BPB: 617 r = test_facility(82); 618 break; 619 case KVM_CAP_S390_PROTECTED: 620 r = is_prot_virt_host(); 621 break; 622 case KVM_CAP_S390_PROTECTED_DUMP: { 623 u64 pv_cmds_dump[] = { 624 BIT_UVC_CMD_DUMP_INIT, 625 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE, 626 BIT_UVC_CMD_DUMP_CPU, 627 BIT_UVC_CMD_DUMP_COMPLETE, 628 }; 629 int i; 630 631 r = is_prot_virt_host(); 632 633 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) { 634 if (!test_bit_inv(pv_cmds_dump[i], 635 (unsigned long *)&uv_info.inst_calls_list)) { 636 r = 0; 637 break; 638 } 639 } 640 break; 641 } 642 case KVM_CAP_S390_ZPCI_OP: 643 r = kvm_s390_pci_interp_allowed(); 644 break; 645 case KVM_CAP_S390_CPU_TOPOLOGY: 646 r = test_facility(11); 647 break; 648 default: 649 r = 0; 650 } 651 return r; 652 } 653 654 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 655 { 656 int i; 657 gfn_t cur_gfn, last_gfn; 658 unsigned long gaddr, vmaddr; 659 struct gmap *gmap = kvm->arch.gmap; 660 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 661 662 /* Loop over all guest segments */ 663 cur_gfn = memslot->base_gfn; 664 last_gfn = memslot->base_gfn + memslot->npages; 665 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 666 gaddr = gfn_to_gpa(cur_gfn); 667 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 668 if (kvm_is_error_hva(vmaddr)) 669 continue; 670 671 bitmap_zero(bitmap, _PAGE_ENTRIES); 672 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 673 for (i = 0; i < _PAGE_ENTRIES; i++) { 674 if (test_bit(i, bitmap)) 675 mark_page_dirty(kvm, cur_gfn + i); 676 } 677 678 if (fatal_signal_pending(current)) 679 return; 680 cond_resched(); 681 } 682 } 683 684 /* Section: vm related */ 685 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 686 687 /* 688 * Get (and clear) the dirty memory log for a memory slot. 689 */ 690 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 691 struct kvm_dirty_log *log) 692 { 693 int r; 694 unsigned long n; 695 struct kvm_memory_slot *memslot; 696 int is_dirty; 697 698 if (kvm_is_ucontrol(kvm)) 699 return -EINVAL; 700 701 mutex_lock(&kvm->slots_lock); 702 703 r = -EINVAL; 704 if (log->slot >= KVM_USER_MEM_SLOTS) 705 goto out; 706 707 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 708 if (r) 709 goto out; 710 711 /* Clear the dirty log */ 712 if (is_dirty) { 713 n = kvm_dirty_bitmap_bytes(memslot); 714 memset(memslot->dirty_bitmap, 0, n); 715 } 716 r = 0; 717 out: 718 mutex_unlock(&kvm->slots_lock); 719 return r; 720 } 721 722 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 723 { 724 unsigned long i; 725 struct kvm_vcpu *vcpu; 726 727 kvm_for_each_vcpu(i, vcpu, kvm) { 728 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 729 } 730 } 731 732 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 733 { 734 int r; 735 736 if (cap->flags) 737 return -EINVAL; 738 739 switch (cap->cap) { 740 case KVM_CAP_S390_IRQCHIP: 741 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 742 kvm->arch.use_irqchip = 1; 743 r = 0; 744 break; 745 case KVM_CAP_S390_USER_SIGP: 746 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 747 kvm->arch.user_sigp = 1; 748 r = 0; 749 break; 750 case KVM_CAP_S390_VECTOR_REGISTERS: 751 mutex_lock(&kvm->lock); 752 if (kvm->created_vcpus) { 753 r = -EBUSY; 754 } else if (MACHINE_HAS_VX) { 755 set_kvm_facility(kvm->arch.model.fac_mask, 129); 756 set_kvm_facility(kvm->arch.model.fac_list, 129); 757 if (test_facility(134)) { 758 set_kvm_facility(kvm->arch.model.fac_mask, 134); 759 set_kvm_facility(kvm->arch.model.fac_list, 134); 760 } 761 if (test_facility(135)) { 762 set_kvm_facility(kvm->arch.model.fac_mask, 135); 763 set_kvm_facility(kvm->arch.model.fac_list, 135); 764 } 765 if (test_facility(148)) { 766 set_kvm_facility(kvm->arch.model.fac_mask, 148); 767 set_kvm_facility(kvm->arch.model.fac_list, 148); 768 } 769 if (test_facility(152)) { 770 set_kvm_facility(kvm->arch.model.fac_mask, 152); 771 set_kvm_facility(kvm->arch.model.fac_list, 152); 772 } 773 if (test_facility(192)) { 774 set_kvm_facility(kvm->arch.model.fac_mask, 192); 775 set_kvm_facility(kvm->arch.model.fac_list, 192); 776 } 777 r = 0; 778 } else 779 r = -EINVAL; 780 mutex_unlock(&kvm->lock); 781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 782 r ? "(not available)" : "(success)"); 783 break; 784 case KVM_CAP_S390_RI: 785 r = -EINVAL; 786 mutex_lock(&kvm->lock); 787 if (kvm->created_vcpus) { 788 r = -EBUSY; 789 } else if (test_facility(64)) { 790 set_kvm_facility(kvm->arch.model.fac_mask, 64); 791 set_kvm_facility(kvm->arch.model.fac_list, 64); 792 r = 0; 793 } 794 mutex_unlock(&kvm->lock); 795 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 796 r ? "(not available)" : "(success)"); 797 break; 798 case KVM_CAP_S390_AIS: 799 mutex_lock(&kvm->lock); 800 if (kvm->created_vcpus) { 801 r = -EBUSY; 802 } else { 803 set_kvm_facility(kvm->arch.model.fac_mask, 72); 804 set_kvm_facility(kvm->arch.model.fac_list, 72); 805 r = 0; 806 } 807 mutex_unlock(&kvm->lock); 808 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 809 r ? "(not available)" : "(success)"); 810 break; 811 case KVM_CAP_S390_GS: 812 r = -EINVAL; 813 mutex_lock(&kvm->lock); 814 if (kvm->created_vcpus) { 815 r = -EBUSY; 816 } else if (test_facility(133)) { 817 set_kvm_facility(kvm->arch.model.fac_mask, 133); 818 set_kvm_facility(kvm->arch.model.fac_list, 133); 819 r = 0; 820 } 821 mutex_unlock(&kvm->lock); 822 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 823 r ? "(not available)" : "(success)"); 824 break; 825 case KVM_CAP_S390_HPAGE_1M: 826 mutex_lock(&kvm->lock); 827 if (kvm->created_vcpus) 828 r = -EBUSY; 829 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 830 r = -EINVAL; 831 else { 832 r = 0; 833 mmap_write_lock(kvm->mm); 834 kvm->mm->context.allow_gmap_hpage_1m = 1; 835 mmap_write_unlock(kvm->mm); 836 /* 837 * We might have to create fake 4k page 838 * tables. To avoid that the hardware works on 839 * stale PGSTEs, we emulate these instructions. 840 */ 841 kvm->arch.use_skf = 0; 842 kvm->arch.use_pfmfi = 0; 843 } 844 mutex_unlock(&kvm->lock); 845 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 846 r ? "(not available)" : "(success)"); 847 break; 848 case KVM_CAP_S390_USER_STSI: 849 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 850 kvm->arch.user_stsi = 1; 851 r = 0; 852 break; 853 case KVM_CAP_S390_USER_INSTR0: 854 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 855 kvm->arch.user_instr0 = 1; 856 icpt_operexc_on_all_vcpus(kvm); 857 r = 0; 858 break; 859 case KVM_CAP_S390_CPU_TOPOLOGY: 860 r = -EINVAL; 861 mutex_lock(&kvm->lock); 862 if (kvm->created_vcpus) { 863 r = -EBUSY; 864 } else if (test_facility(11)) { 865 set_kvm_facility(kvm->arch.model.fac_mask, 11); 866 set_kvm_facility(kvm->arch.model.fac_list, 11); 867 r = 0; 868 } 869 mutex_unlock(&kvm->lock); 870 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s", 871 r ? "(not available)" : "(success)"); 872 break; 873 default: 874 r = -EINVAL; 875 break; 876 } 877 return r; 878 } 879 880 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 881 { 882 int ret; 883 884 switch (attr->attr) { 885 case KVM_S390_VM_MEM_LIMIT_SIZE: 886 ret = 0; 887 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 888 kvm->arch.mem_limit); 889 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 890 ret = -EFAULT; 891 break; 892 default: 893 ret = -ENXIO; 894 break; 895 } 896 return ret; 897 } 898 899 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 900 { 901 int ret; 902 unsigned int idx; 903 switch (attr->attr) { 904 case KVM_S390_VM_MEM_ENABLE_CMMA: 905 ret = -ENXIO; 906 if (!sclp.has_cmma) 907 break; 908 909 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 910 mutex_lock(&kvm->lock); 911 if (kvm->created_vcpus) 912 ret = -EBUSY; 913 else if (kvm->mm->context.allow_gmap_hpage_1m) 914 ret = -EINVAL; 915 else { 916 kvm->arch.use_cmma = 1; 917 /* Not compatible with cmma. */ 918 kvm->arch.use_pfmfi = 0; 919 ret = 0; 920 } 921 mutex_unlock(&kvm->lock); 922 break; 923 case KVM_S390_VM_MEM_CLR_CMMA: 924 ret = -ENXIO; 925 if (!sclp.has_cmma) 926 break; 927 ret = -EINVAL; 928 if (!kvm->arch.use_cmma) 929 break; 930 931 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 932 mutex_lock(&kvm->lock); 933 idx = srcu_read_lock(&kvm->srcu); 934 s390_reset_cmma(kvm->arch.gmap->mm); 935 srcu_read_unlock(&kvm->srcu, idx); 936 mutex_unlock(&kvm->lock); 937 ret = 0; 938 break; 939 case KVM_S390_VM_MEM_LIMIT_SIZE: { 940 unsigned long new_limit; 941 942 if (kvm_is_ucontrol(kvm)) 943 return -EINVAL; 944 945 if (get_user(new_limit, (u64 __user *)attr->addr)) 946 return -EFAULT; 947 948 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 949 new_limit > kvm->arch.mem_limit) 950 return -E2BIG; 951 952 if (!new_limit) 953 return -EINVAL; 954 955 /* gmap_create takes last usable address */ 956 if (new_limit != KVM_S390_NO_MEM_LIMIT) 957 new_limit -= 1; 958 959 ret = -EBUSY; 960 mutex_lock(&kvm->lock); 961 if (!kvm->created_vcpus) { 962 /* gmap_create will round the limit up */ 963 struct gmap *new = gmap_create(current->mm, new_limit); 964 965 if (!new) { 966 ret = -ENOMEM; 967 } else { 968 gmap_remove(kvm->arch.gmap); 969 new->private = kvm; 970 kvm->arch.gmap = new; 971 ret = 0; 972 } 973 } 974 mutex_unlock(&kvm->lock); 975 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 976 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 977 (void *) kvm->arch.gmap->asce); 978 break; 979 } 980 default: 981 ret = -ENXIO; 982 break; 983 } 984 return ret; 985 } 986 987 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 988 989 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 990 { 991 struct kvm_vcpu *vcpu; 992 unsigned long i; 993 994 kvm_s390_vcpu_block_all(kvm); 995 996 kvm_for_each_vcpu(i, vcpu, kvm) { 997 kvm_s390_vcpu_crypto_setup(vcpu); 998 /* recreate the shadow crycb by leaving the VSIE handler */ 999 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1000 } 1001 1002 kvm_s390_vcpu_unblock_all(kvm); 1003 } 1004 1005 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 1006 { 1007 mutex_lock(&kvm->lock); 1008 switch (attr->attr) { 1009 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1010 if (!test_kvm_facility(kvm, 76)) { 1011 mutex_unlock(&kvm->lock); 1012 return -EINVAL; 1013 } 1014 get_random_bytes( 1015 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1016 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1017 kvm->arch.crypto.aes_kw = 1; 1018 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 1019 break; 1020 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1021 if (!test_kvm_facility(kvm, 76)) { 1022 mutex_unlock(&kvm->lock); 1023 return -EINVAL; 1024 } 1025 get_random_bytes( 1026 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1027 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1028 kvm->arch.crypto.dea_kw = 1; 1029 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 1030 break; 1031 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1032 if (!test_kvm_facility(kvm, 76)) { 1033 mutex_unlock(&kvm->lock); 1034 return -EINVAL; 1035 } 1036 kvm->arch.crypto.aes_kw = 0; 1037 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 1038 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1039 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 1040 break; 1041 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1042 if (!test_kvm_facility(kvm, 76)) { 1043 mutex_unlock(&kvm->lock); 1044 return -EINVAL; 1045 } 1046 kvm->arch.crypto.dea_kw = 0; 1047 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 1048 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1049 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 1050 break; 1051 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1052 if (!ap_instructions_available()) { 1053 mutex_unlock(&kvm->lock); 1054 return -EOPNOTSUPP; 1055 } 1056 kvm->arch.crypto.apie = 1; 1057 break; 1058 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1059 if (!ap_instructions_available()) { 1060 mutex_unlock(&kvm->lock); 1061 return -EOPNOTSUPP; 1062 } 1063 kvm->arch.crypto.apie = 0; 1064 break; 1065 default: 1066 mutex_unlock(&kvm->lock); 1067 return -ENXIO; 1068 } 1069 1070 kvm_s390_vcpu_crypto_reset_all(kvm); 1071 mutex_unlock(&kvm->lock); 1072 return 0; 1073 } 1074 1075 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu) 1076 { 1077 /* Only set the ECB bits after guest requests zPCI interpretation */ 1078 if (!vcpu->kvm->arch.use_zpci_interp) 1079 return; 1080 1081 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI; 1082 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI; 1083 } 1084 1085 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm) 1086 { 1087 struct kvm_vcpu *vcpu; 1088 unsigned long i; 1089 1090 lockdep_assert_held(&kvm->lock); 1091 1092 if (!kvm_s390_pci_interp_allowed()) 1093 return; 1094 1095 /* 1096 * If host is configured for PCI and the necessary facilities are 1097 * available, turn on interpretation for the life of this guest 1098 */ 1099 kvm->arch.use_zpci_interp = 1; 1100 1101 kvm_s390_vcpu_block_all(kvm); 1102 1103 kvm_for_each_vcpu(i, vcpu, kvm) { 1104 kvm_s390_vcpu_pci_setup(vcpu); 1105 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1106 } 1107 1108 kvm_s390_vcpu_unblock_all(kvm); 1109 } 1110 1111 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1112 { 1113 unsigned long cx; 1114 struct kvm_vcpu *vcpu; 1115 1116 kvm_for_each_vcpu(cx, vcpu, kvm) 1117 kvm_s390_sync_request(req, vcpu); 1118 } 1119 1120 /* 1121 * Must be called with kvm->srcu held to avoid races on memslots, and with 1122 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1123 */ 1124 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1125 { 1126 struct kvm_memory_slot *ms; 1127 struct kvm_memslots *slots; 1128 unsigned long ram_pages = 0; 1129 int bkt; 1130 1131 /* migration mode already enabled */ 1132 if (kvm->arch.migration_mode) 1133 return 0; 1134 slots = kvm_memslots(kvm); 1135 if (!slots || kvm_memslots_empty(slots)) 1136 return -EINVAL; 1137 1138 if (!kvm->arch.use_cmma) { 1139 kvm->arch.migration_mode = 1; 1140 return 0; 1141 } 1142 /* mark all the pages in active slots as dirty */ 1143 kvm_for_each_memslot(ms, bkt, slots) { 1144 if (!ms->dirty_bitmap) 1145 return -EINVAL; 1146 /* 1147 * The second half of the bitmap is only used on x86, 1148 * and would be wasted otherwise, so we put it to good 1149 * use here to keep track of the state of the storage 1150 * attributes. 1151 */ 1152 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1153 ram_pages += ms->npages; 1154 } 1155 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1156 kvm->arch.migration_mode = 1; 1157 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1158 return 0; 1159 } 1160 1161 /* 1162 * Must be called with kvm->slots_lock to avoid races with ourselves and 1163 * kvm_s390_vm_start_migration. 1164 */ 1165 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1166 { 1167 /* migration mode already disabled */ 1168 if (!kvm->arch.migration_mode) 1169 return 0; 1170 kvm->arch.migration_mode = 0; 1171 if (kvm->arch.use_cmma) 1172 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1173 return 0; 1174 } 1175 1176 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1177 struct kvm_device_attr *attr) 1178 { 1179 int res = -ENXIO; 1180 1181 mutex_lock(&kvm->slots_lock); 1182 switch (attr->attr) { 1183 case KVM_S390_VM_MIGRATION_START: 1184 res = kvm_s390_vm_start_migration(kvm); 1185 break; 1186 case KVM_S390_VM_MIGRATION_STOP: 1187 res = kvm_s390_vm_stop_migration(kvm); 1188 break; 1189 default: 1190 break; 1191 } 1192 mutex_unlock(&kvm->slots_lock); 1193 1194 return res; 1195 } 1196 1197 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1198 struct kvm_device_attr *attr) 1199 { 1200 u64 mig = kvm->arch.migration_mode; 1201 1202 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1203 return -ENXIO; 1204 1205 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1206 return -EFAULT; 1207 return 0; 1208 } 1209 1210 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 struct kvm_s390_vm_tod_clock gtod; 1213 1214 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1215 return -EFAULT; 1216 1217 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1218 return -EINVAL; 1219 kvm_s390_set_tod_clock(kvm, >od); 1220 1221 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1222 gtod.epoch_idx, gtod.tod); 1223 1224 return 0; 1225 } 1226 1227 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1228 { 1229 u8 gtod_high; 1230 1231 if (copy_from_user(>od_high, (void __user *)attr->addr, 1232 sizeof(gtod_high))) 1233 return -EFAULT; 1234 1235 if (gtod_high != 0) 1236 return -EINVAL; 1237 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1238 1239 return 0; 1240 } 1241 1242 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1243 { 1244 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1245 1246 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1247 sizeof(gtod.tod))) 1248 return -EFAULT; 1249 1250 kvm_s390_set_tod_clock(kvm, >od); 1251 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1252 return 0; 1253 } 1254 1255 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1256 { 1257 int ret; 1258 1259 if (attr->flags) 1260 return -EINVAL; 1261 1262 switch (attr->attr) { 1263 case KVM_S390_VM_TOD_EXT: 1264 ret = kvm_s390_set_tod_ext(kvm, attr); 1265 break; 1266 case KVM_S390_VM_TOD_HIGH: 1267 ret = kvm_s390_set_tod_high(kvm, attr); 1268 break; 1269 case KVM_S390_VM_TOD_LOW: 1270 ret = kvm_s390_set_tod_low(kvm, attr); 1271 break; 1272 default: 1273 ret = -ENXIO; 1274 break; 1275 } 1276 return ret; 1277 } 1278 1279 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1280 struct kvm_s390_vm_tod_clock *gtod) 1281 { 1282 union tod_clock clk; 1283 1284 preempt_disable(); 1285 1286 store_tod_clock_ext(&clk); 1287 1288 gtod->tod = clk.tod + kvm->arch.epoch; 1289 gtod->epoch_idx = 0; 1290 if (test_kvm_facility(kvm, 139)) { 1291 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1292 if (gtod->tod < clk.tod) 1293 gtod->epoch_idx += 1; 1294 } 1295 1296 preempt_enable(); 1297 } 1298 1299 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1300 { 1301 struct kvm_s390_vm_tod_clock gtod; 1302 1303 memset(>od, 0, sizeof(gtod)); 1304 kvm_s390_get_tod_clock(kvm, >od); 1305 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1306 return -EFAULT; 1307 1308 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1309 gtod.epoch_idx, gtod.tod); 1310 return 0; 1311 } 1312 1313 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1314 { 1315 u8 gtod_high = 0; 1316 1317 if (copy_to_user((void __user *)attr->addr, >od_high, 1318 sizeof(gtod_high))) 1319 return -EFAULT; 1320 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1321 1322 return 0; 1323 } 1324 1325 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1326 { 1327 u64 gtod; 1328 1329 gtod = kvm_s390_get_tod_clock_fast(kvm); 1330 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1331 return -EFAULT; 1332 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1333 1334 return 0; 1335 } 1336 1337 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1338 { 1339 int ret; 1340 1341 if (attr->flags) 1342 return -EINVAL; 1343 1344 switch (attr->attr) { 1345 case KVM_S390_VM_TOD_EXT: 1346 ret = kvm_s390_get_tod_ext(kvm, attr); 1347 break; 1348 case KVM_S390_VM_TOD_HIGH: 1349 ret = kvm_s390_get_tod_high(kvm, attr); 1350 break; 1351 case KVM_S390_VM_TOD_LOW: 1352 ret = kvm_s390_get_tod_low(kvm, attr); 1353 break; 1354 default: 1355 ret = -ENXIO; 1356 break; 1357 } 1358 return ret; 1359 } 1360 1361 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1362 { 1363 struct kvm_s390_vm_cpu_processor *proc; 1364 u16 lowest_ibc, unblocked_ibc; 1365 int ret = 0; 1366 1367 mutex_lock(&kvm->lock); 1368 if (kvm->created_vcpus) { 1369 ret = -EBUSY; 1370 goto out; 1371 } 1372 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1373 if (!proc) { 1374 ret = -ENOMEM; 1375 goto out; 1376 } 1377 if (!copy_from_user(proc, (void __user *)attr->addr, 1378 sizeof(*proc))) { 1379 kvm->arch.model.cpuid = proc->cpuid; 1380 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1381 unblocked_ibc = sclp.ibc & 0xfff; 1382 if (lowest_ibc && proc->ibc) { 1383 if (proc->ibc > unblocked_ibc) 1384 kvm->arch.model.ibc = unblocked_ibc; 1385 else if (proc->ibc < lowest_ibc) 1386 kvm->arch.model.ibc = lowest_ibc; 1387 else 1388 kvm->arch.model.ibc = proc->ibc; 1389 } 1390 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1391 S390_ARCH_FAC_LIST_SIZE_BYTE); 1392 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1393 kvm->arch.model.ibc, 1394 kvm->arch.model.cpuid); 1395 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1396 kvm->arch.model.fac_list[0], 1397 kvm->arch.model.fac_list[1], 1398 kvm->arch.model.fac_list[2]); 1399 } else 1400 ret = -EFAULT; 1401 kfree(proc); 1402 out: 1403 mutex_unlock(&kvm->lock); 1404 return ret; 1405 } 1406 1407 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1408 struct kvm_device_attr *attr) 1409 { 1410 struct kvm_s390_vm_cpu_feat data; 1411 1412 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1413 return -EFAULT; 1414 if (!bitmap_subset((unsigned long *) data.feat, 1415 kvm_s390_available_cpu_feat, 1416 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1417 return -EINVAL; 1418 1419 mutex_lock(&kvm->lock); 1420 if (kvm->created_vcpus) { 1421 mutex_unlock(&kvm->lock); 1422 return -EBUSY; 1423 } 1424 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1425 mutex_unlock(&kvm->lock); 1426 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1427 data.feat[0], 1428 data.feat[1], 1429 data.feat[2]); 1430 return 0; 1431 } 1432 1433 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1434 struct kvm_device_attr *attr) 1435 { 1436 mutex_lock(&kvm->lock); 1437 if (kvm->created_vcpus) { 1438 mutex_unlock(&kvm->lock); 1439 return -EBUSY; 1440 } 1441 1442 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1443 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1444 mutex_unlock(&kvm->lock); 1445 return -EFAULT; 1446 } 1447 mutex_unlock(&kvm->lock); 1448 1449 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1450 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1451 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1452 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1453 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1454 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1455 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1456 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1457 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1458 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1459 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1460 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1461 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1462 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1463 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1464 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1465 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1466 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1467 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1468 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1469 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1470 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1471 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1472 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1473 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1474 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1475 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1476 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1477 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1478 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1479 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1480 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1481 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1482 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1483 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1484 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1485 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1486 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1487 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1488 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1489 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1490 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1491 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1492 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1493 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1494 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1495 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1496 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1497 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1498 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1499 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1500 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1501 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1502 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1503 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1504 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1505 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1506 1507 return 0; 1508 } 1509 1510 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1511 { 1512 int ret = -ENXIO; 1513 1514 switch (attr->attr) { 1515 case KVM_S390_VM_CPU_PROCESSOR: 1516 ret = kvm_s390_set_processor(kvm, attr); 1517 break; 1518 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1519 ret = kvm_s390_set_processor_feat(kvm, attr); 1520 break; 1521 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1522 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1523 break; 1524 } 1525 return ret; 1526 } 1527 1528 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1529 { 1530 struct kvm_s390_vm_cpu_processor *proc; 1531 int ret = 0; 1532 1533 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1534 if (!proc) { 1535 ret = -ENOMEM; 1536 goto out; 1537 } 1538 proc->cpuid = kvm->arch.model.cpuid; 1539 proc->ibc = kvm->arch.model.ibc; 1540 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1541 S390_ARCH_FAC_LIST_SIZE_BYTE); 1542 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1543 kvm->arch.model.ibc, 1544 kvm->arch.model.cpuid); 1545 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1546 kvm->arch.model.fac_list[0], 1547 kvm->arch.model.fac_list[1], 1548 kvm->arch.model.fac_list[2]); 1549 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1550 ret = -EFAULT; 1551 kfree(proc); 1552 out: 1553 return ret; 1554 } 1555 1556 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1557 { 1558 struct kvm_s390_vm_cpu_machine *mach; 1559 int ret = 0; 1560 1561 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1562 if (!mach) { 1563 ret = -ENOMEM; 1564 goto out; 1565 } 1566 get_cpu_id((struct cpuid *) &mach->cpuid); 1567 mach->ibc = sclp.ibc; 1568 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1569 S390_ARCH_FAC_LIST_SIZE_BYTE); 1570 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1571 sizeof(stfle_fac_list)); 1572 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1573 kvm->arch.model.ibc, 1574 kvm->arch.model.cpuid); 1575 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1576 mach->fac_mask[0], 1577 mach->fac_mask[1], 1578 mach->fac_mask[2]); 1579 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1580 mach->fac_list[0], 1581 mach->fac_list[1], 1582 mach->fac_list[2]); 1583 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1584 ret = -EFAULT; 1585 kfree(mach); 1586 out: 1587 return ret; 1588 } 1589 1590 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1591 struct kvm_device_attr *attr) 1592 { 1593 struct kvm_s390_vm_cpu_feat data; 1594 1595 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1596 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1597 return -EFAULT; 1598 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1599 data.feat[0], 1600 data.feat[1], 1601 data.feat[2]); 1602 return 0; 1603 } 1604 1605 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1606 struct kvm_device_attr *attr) 1607 { 1608 struct kvm_s390_vm_cpu_feat data; 1609 1610 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1611 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1612 return -EFAULT; 1613 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1614 data.feat[0], 1615 data.feat[1], 1616 data.feat[2]); 1617 return 0; 1618 } 1619 1620 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1621 struct kvm_device_attr *attr) 1622 { 1623 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1624 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1625 return -EFAULT; 1626 1627 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1629 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1630 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1631 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1632 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1633 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1634 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1635 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1636 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1637 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1638 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1639 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1640 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1641 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1642 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1643 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1644 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1645 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1646 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1647 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1648 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1649 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1650 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1651 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1652 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1653 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1654 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1655 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1656 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1657 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1658 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1659 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1660 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1661 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1662 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1663 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1664 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1665 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1666 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1667 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1668 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1669 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1670 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1671 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1672 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1673 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1674 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1675 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1676 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1677 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1678 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1679 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1680 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1681 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1682 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1683 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1684 1685 return 0; 1686 } 1687 1688 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1689 struct kvm_device_attr *attr) 1690 { 1691 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1692 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1693 return -EFAULT; 1694 1695 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1696 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1697 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1698 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1699 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1700 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1701 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1702 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1703 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1704 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1705 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1706 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1707 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1708 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1709 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1710 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1711 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1712 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1713 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1714 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1715 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1716 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1717 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1718 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1719 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1720 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1721 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1722 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1723 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1724 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1725 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1726 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1727 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1728 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1729 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1730 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1731 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1732 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1733 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1734 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1735 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1736 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1737 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1738 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1739 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1740 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1741 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1742 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1743 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1744 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1745 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1746 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1747 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1748 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1749 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1750 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1751 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1752 1753 return 0; 1754 } 1755 1756 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1757 { 1758 int ret = -ENXIO; 1759 1760 switch (attr->attr) { 1761 case KVM_S390_VM_CPU_PROCESSOR: 1762 ret = kvm_s390_get_processor(kvm, attr); 1763 break; 1764 case KVM_S390_VM_CPU_MACHINE: 1765 ret = kvm_s390_get_machine(kvm, attr); 1766 break; 1767 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1768 ret = kvm_s390_get_processor_feat(kvm, attr); 1769 break; 1770 case KVM_S390_VM_CPU_MACHINE_FEAT: 1771 ret = kvm_s390_get_machine_feat(kvm, attr); 1772 break; 1773 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1774 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1775 break; 1776 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1777 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1778 break; 1779 } 1780 return ret; 1781 } 1782 1783 /** 1784 * kvm_s390_update_topology_change_report - update CPU topology change report 1785 * @kvm: guest KVM description 1786 * @val: set or clear the MTCR bit 1787 * 1788 * Updates the Multiprocessor Topology-Change-Report bit to signal 1789 * the guest with a topology change. 1790 * This is only relevant if the topology facility is present. 1791 * 1792 * The SCA version, bsca or esca, doesn't matter as offset is the same. 1793 */ 1794 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) 1795 { 1796 union sca_utility new, old; 1797 struct bsca_block *sca; 1798 1799 read_lock(&kvm->arch.sca_lock); 1800 sca = kvm->arch.sca; 1801 do { 1802 old = READ_ONCE(sca->utility); 1803 new = old; 1804 new.mtcr = val; 1805 } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); 1806 read_unlock(&kvm->arch.sca_lock); 1807 } 1808 1809 static int kvm_s390_set_topo_change_indication(struct kvm *kvm, 1810 struct kvm_device_attr *attr) 1811 { 1812 if (!test_kvm_facility(kvm, 11)) 1813 return -ENXIO; 1814 1815 kvm_s390_update_topology_change_report(kvm, !!attr->attr); 1816 return 0; 1817 } 1818 1819 static int kvm_s390_get_topo_change_indication(struct kvm *kvm, 1820 struct kvm_device_attr *attr) 1821 { 1822 u8 topo; 1823 1824 if (!test_kvm_facility(kvm, 11)) 1825 return -ENXIO; 1826 1827 read_lock(&kvm->arch.sca_lock); 1828 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr; 1829 read_unlock(&kvm->arch.sca_lock); 1830 1831 return put_user(topo, (u8 __user *)attr->addr); 1832 } 1833 1834 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1835 { 1836 int ret; 1837 1838 switch (attr->group) { 1839 case KVM_S390_VM_MEM_CTRL: 1840 ret = kvm_s390_set_mem_control(kvm, attr); 1841 break; 1842 case KVM_S390_VM_TOD: 1843 ret = kvm_s390_set_tod(kvm, attr); 1844 break; 1845 case KVM_S390_VM_CPU_MODEL: 1846 ret = kvm_s390_set_cpu_model(kvm, attr); 1847 break; 1848 case KVM_S390_VM_CRYPTO: 1849 ret = kvm_s390_vm_set_crypto(kvm, attr); 1850 break; 1851 case KVM_S390_VM_MIGRATION: 1852 ret = kvm_s390_vm_set_migration(kvm, attr); 1853 break; 1854 case KVM_S390_VM_CPU_TOPOLOGY: 1855 ret = kvm_s390_set_topo_change_indication(kvm, attr); 1856 break; 1857 default: 1858 ret = -ENXIO; 1859 break; 1860 } 1861 1862 return ret; 1863 } 1864 1865 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1866 { 1867 int ret; 1868 1869 switch (attr->group) { 1870 case KVM_S390_VM_MEM_CTRL: 1871 ret = kvm_s390_get_mem_control(kvm, attr); 1872 break; 1873 case KVM_S390_VM_TOD: 1874 ret = kvm_s390_get_tod(kvm, attr); 1875 break; 1876 case KVM_S390_VM_CPU_MODEL: 1877 ret = kvm_s390_get_cpu_model(kvm, attr); 1878 break; 1879 case KVM_S390_VM_MIGRATION: 1880 ret = kvm_s390_vm_get_migration(kvm, attr); 1881 break; 1882 case KVM_S390_VM_CPU_TOPOLOGY: 1883 ret = kvm_s390_get_topo_change_indication(kvm, attr); 1884 break; 1885 default: 1886 ret = -ENXIO; 1887 break; 1888 } 1889 1890 return ret; 1891 } 1892 1893 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1894 { 1895 int ret; 1896 1897 switch (attr->group) { 1898 case KVM_S390_VM_MEM_CTRL: 1899 switch (attr->attr) { 1900 case KVM_S390_VM_MEM_ENABLE_CMMA: 1901 case KVM_S390_VM_MEM_CLR_CMMA: 1902 ret = sclp.has_cmma ? 0 : -ENXIO; 1903 break; 1904 case KVM_S390_VM_MEM_LIMIT_SIZE: 1905 ret = 0; 1906 break; 1907 default: 1908 ret = -ENXIO; 1909 break; 1910 } 1911 break; 1912 case KVM_S390_VM_TOD: 1913 switch (attr->attr) { 1914 case KVM_S390_VM_TOD_LOW: 1915 case KVM_S390_VM_TOD_HIGH: 1916 ret = 0; 1917 break; 1918 default: 1919 ret = -ENXIO; 1920 break; 1921 } 1922 break; 1923 case KVM_S390_VM_CPU_MODEL: 1924 switch (attr->attr) { 1925 case KVM_S390_VM_CPU_PROCESSOR: 1926 case KVM_S390_VM_CPU_MACHINE: 1927 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1928 case KVM_S390_VM_CPU_MACHINE_FEAT: 1929 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1930 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1931 ret = 0; 1932 break; 1933 default: 1934 ret = -ENXIO; 1935 break; 1936 } 1937 break; 1938 case KVM_S390_VM_CRYPTO: 1939 switch (attr->attr) { 1940 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1941 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1942 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1943 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1944 ret = 0; 1945 break; 1946 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1947 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1948 ret = ap_instructions_available() ? 0 : -ENXIO; 1949 break; 1950 default: 1951 ret = -ENXIO; 1952 break; 1953 } 1954 break; 1955 case KVM_S390_VM_MIGRATION: 1956 ret = 0; 1957 break; 1958 case KVM_S390_VM_CPU_TOPOLOGY: 1959 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO; 1960 break; 1961 default: 1962 ret = -ENXIO; 1963 break; 1964 } 1965 1966 return ret; 1967 } 1968 1969 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1970 { 1971 uint8_t *keys; 1972 uint64_t hva; 1973 int srcu_idx, i, r = 0; 1974 1975 if (args->flags != 0) 1976 return -EINVAL; 1977 1978 /* Is this guest using storage keys? */ 1979 if (!mm_uses_skeys(current->mm)) 1980 return KVM_S390_GET_SKEYS_NONE; 1981 1982 /* Enforce sane limit on memory allocation */ 1983 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1984 return -EINVAL; 1985 1986 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1987 if (!keys) 1988 return -ENOMEM; 1989 1990 mmap_read_lock(current->mm); 1991 srcu_idx = srcu_read_lock(&kvm->srcu); 1992 for (i = 0; i < args->count; i++) { 1993 hva = gfn_to_hva(kvm, args->start_gfn + i); 1994 if (kvm_is_error_hva(hva)) { 1995 r = -EFAULT; 1996 break; 1997 } 1998 1999 r = get_guest_storage_key(current->mm, hva, &keys[i]); 2000 if (r) 2001 break; 2002 } 2003 srcu_read_unlock(&kvm->srcu, srcu_idx); 2004 mmap_read_unlock(current->mm); 2005 2006 if (!r) { 2007 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 2008 sizeof(uint8_t) * args->count); 2009 if (r) 2010 r = -EFAULT; 2011 } 2012 2013 kvfree(keys); 2014 return r; 2015 } 2016 2017 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 2018 { 2019 uint8_t *keys; 2020 uint64_t hva; 2021 int srcu_idx, i, r = 0; 2022 bool unlocked; 2023 2024 if (args->flags != 0) 2025 return -EINVAL; 2026 2027 /* Enforce sane limit on memory allocation */ 2028 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 2029 return -EINVAL; 2030 2031 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 2032 if (!keys) 2033 return -ENOMEM; 2034 2035 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 2036 sizeof(uint8_t) * args->count); 2037 if (r) { 2038 r = -EFAULT; 2039 goto out; 2040 } 2041 2042 /* Enable storage key handling for the guest */ 2043 r = s390_enable_skey(); 2044 if (r) 2045 goto out; 2046 2047 i = 0; 2048 mmap_read_lock(current->mm); 2049 srcu_idx = srcu_read_lock(&kvm->srcu); 2050 while (i < args->count) { 2051 unlocked = false; 2052 hva = gfn_to_hva(kvm, args->start_gfn + i); 2053 if (kvm_is_error_hva(hva)) { 2054 r = -EFAULT; 2055 break; 2056 } 2057 2058 /* Lowest order bit is reserved */ 2059 if (keys[i] & 0x01) { 2060 r = -EINVAL; 2061 break; 2062 } 2063 2064 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 2065 if (r) { 2066 r = fixup_user_fault(current->mm, hva, 2067 FAULT_FLAG_WRITE, &unlocked); 2068 if (r) 2069 break; 2070 } 2071 if (!r) 2072 i++; 2073 } 2074 srcu_read_unlock(&kvm->srcu, srcu_idx); 2075 mmap_read_unlock(current->mm); 2076 out: 2077 kvfree(keys); 2078 return r; 2079 } 2080 2081 /* 2082 * Base address and length must be sent at the start of each block, therefore 2083 * it's cheaper to send some clean data, as long as it's less than the size of 2084 * two longs. 2085 */ 2086 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 2087 /* for consistency */ 2088 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 2089 2090 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2091 u8 *res, unsigned long bufsize) 2092 { 2093 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 2094 2095 args->count = 0; 2096 while (args->count < bufsize) { 2097 hva = gfn_to_hva(kvm, cur_gfn); 2098 /* 2099 * We return an error if the first value was invalid, but we 2100 * return successfully if at least one value was copied. 2101 */ 2102 if (kvm_is_error_hva(hva)) 2103 return args->count ? 0 : -EFAULT; 2104 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2105 pgstev = 0; 2106 res[args->count++] = (pgstev >> 24) & 0x43; 2107 cur_gfn++; 2108 } 2109 2110 return 0; 2111 } 2112 2113 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 2114 gfn_t gfn) 2115 { 2116 return ____gfn_to_memslot(slots, gfn, true); 2117 } 2118 2119 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 2120 unsigned long cur_gfn) 2121 { 2122 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 2123 unsigned long ofs = cur_gfn - ms->base_gfn; 2124 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 2125 2126 if (ms->base_gfn + ms->npages <= cur_gfn) { 2127 mnode = rb_next(mnode); 2128 /* If we are above the highest slot, wrap around */ 2129 if (!mnode) 2130 mnode = rb_first(&slots->gfn_tree); 2131 2132 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2133 ofs = 0; 2134 } 2135 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 2136 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 2137 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2138 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 2139 } 2140 return ms->base_gfn + ofs; 2141 } 2142 2143 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2144 u8 *res, unsigned long bufsize) 2145 { 2146 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2147 struct kvm_memslots *slots = kvm_memslots(kvm); 2148 struct kvm_memory_slot *ms; 2149 2150 if (unlikely(kvm_memslots_empty(slots))) 2151 return 0; 2152 2153 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2154 ms = gfn_to_memslot(kvm, cur_gfn); 2155 args->count = 0; 2156 args->start_gfn = cur_gfn; 2157 if (!ms) 2158 return 0; 2159 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2160 mem_end = kvm_s390_get_gfn_end(slots); 2161 2162 while (args->count < bufsize) { 2163 hva = gfn_to_hva(kvm, cur_gfn); 2164 if (kvm_is_error_hva(hva)) 2165 return 0; 2166 /* Decrement only if we actually flipped the bit to 0 */ 2167 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2168 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2169 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2170 pgstev = 0; 2171 /* Save the value */ 2172 res[args->count++] = (pgstev >> 24) & 0x43; 2173 /* If the next bit is too far away, stop. */ 2174 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2175 return 0; 2176 /* If we reached the previous "next", find the next one */ 2177 if (cur_gfn == next_gfn) 2178 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2179 /* Reached the end of memory or of the buffer, stop */ 2180 if ((next_gfn >= mem_end) || 2181 (next_gfn - args->start_gfn >= bufsize)) 2182 return 0; 2183 cur_gfn++; 2184 /* Reached the end of the current memslot, take the next one. */ 2185 if (cur_gfn - ms->base_gfn >= ms->npages) { 2186 ms = gfn_to_memslot(kvm, cur_gfn); 2187 if (!ms) 2188 return 0; 2189 } 2190 } 2191 return 0; 2192 } 2193 2194 /* 2195 * This function searches for the next page with dirty CMMA attributes, and 2196 * saves the attributes in the buffer up to either the end of the buffer or 2197 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2198 * no trailing clean bytes are saved. 2199 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2200 * output buffer will indicate 0 as length. 2201 */ 2202 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2203 struct kvm_s390_cmma_log *args) 2204 { 2205 unsigned long bufsize; 2206 int srcu_idx, peek, ret; 2207 u8 *values; 2208 2209 if (!kvm->arch.use_cmma) 2210 return -ENXIO; 2211 /* Invalid/unsupported flags were specified */ 2212 if (args->flags & ~KVM_S390_CMMA_PEEK) 2213 return -EINVAL; 2214 /* Migration mode query, and we are not doing a migration */ 2215 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2216 if (!peek && !kvm->arch.migration_mode) 2217 return -EINVAL; 2218 /* CMMA is disabled or was not used, or the buffer has length zero */ 2219 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2220 if (!bufsize || !kvm->mm->context.uses_cmm) { 2221 memset(args, 0, sizeof(*args)); 2222 return 0; 2223 } 2224 /* We are not peeking, and there are no dirty pages */ 2225 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2226 memset(args, 0, sizeof(*args)); 2227 return 0; 2228 } 2229 2230 values = vmalloc(bufsize); 2231 if (!values) 2232 return -ENOMEM; 2233 2234 mmap_read_lock(kvm->mm); 2235 srcu_idx = srcu_read_lock(&kvm->srcu); 2236 if (peek) 2237 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2238 else 2239 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2240 srcu_read_unlock(&kvm->srcu, srcu_idx); 2241 mmap_read_unlock(kvm->mm); 2242 2243 if (kvm->arch.migration_mode) 2244 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2245 else 2246 args->remaining = 0; 2247 2248 if (copy_to_user((void __user *)args->values, values, args->count)) 2249 ret = -EFAULT; 2250 2251 vfree(values); 2252 return ret; 2253 } 2254 2255 /* 2256 * This function sets the CMMA attributes for the given pages. If the input 2257 * buffer has zero length, no action is taken, otherwise the attributes are 2258 * set and the mm->context.uses_cmm flag is set. 2259 */ 2260 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2261 const struct kvm_s390_cmma_log *args) 2262 { 2263 unsigned long hva, mask, pgstev, i; 2264 uint8_t *bits; 2265 int srcu_idx, r = 0; 2266 2267 mask = args->mask; 2268 2269 if (!kvm->arch.use_cmma) 2270 return -ENXIO; 2271 /* invalid/unsupported flags */ 2272 if (args->flags != 0) 2273 return -EINVAL; 2274 /* Enforce sane limit on memory allocation */ 2275 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2276 return -EINVAL; 2277 /* Nothing to do */ 2278 if (args->count == 0) 2279 return 0; 2280 2281 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2282 if (!bits) 2283 return -ENOMEM; 2284 2285 r = copy_from_user(bits, (void __user *)args->values, args->count); 2286 if (r) { 2287 r = -EFAULT; 2288 goto out; 2289 } 2290 2291 mmap_read_lock(kvm->mm); 2292 srcu_idx = srcu_read_lock(&kvm->srcu); 2293 for (i = 0; i < args->count; i++) { 2294 hva = gfn_to_hva(kvm, args->start_gfn + i); 2295 if (kvm_is_error_hva(hva)) { 2296 r = -EFAULT; 2297 break; 2298 } 2299 2300 pgstev = bits[i]; 2301 pgstev = pgstev << 24; 2302 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2303 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2304 } 2305 srcu_read_unlock(&kvm->srcu, srcu_idx); 2306 mmap_read_unlock(kvm->mm); 2307 2308 if (!kvm->mm->context.uses_cmm) { 2309 mmap_write_lock(kvm->mm); 2310 kvm->mm->context.uses_cmm = 1; 2311 mmap_write_unlock(kvm->mm); 2312 } 2313 out: 2314 vfree(bits); 2315 return r; 2316 } 2317 2318 /** 2319 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to 2320 * non protected. 2321 * @kvm: the VM whose protected vCPUs are to be converted 2322 * @rc: return value for the RC field of the UVC (in case of error) 2323 * @rrc: return value for the RRC field of the UVC (in case of error) 2324 * 2325 * Does not stop in case of error, tries to convert as many 2326 * CPUs as possible. In case of error, the RC and RRC of the last error are 2327 * returned. 2328 * 2329 * Return: 0 in case of success, otherwise -EIO 2330 */ 2331 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2332 { 2333 struct kvm_vcpu *vcpu; 2334 unsigned long i; 2335 u16 _rc, _rrc; 2336 int ret = 0; 2337 2338 /* 2339 * We ignore failures and try to destroy as many CPUs as possible. 2340 * At the same time we must not free the assigned resources when 2341 * this fails, as the ultravisor has still access to that memory. 2342 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2343 * behind. 2344 * We want to return the first failure rc and rrc, though. 2345 */ 2346 kvm_for_each_vcpu(i, vcpu, kvm) { 2347 mutex_lock(&vcpu->mutex); 2348 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) { 2349 *rc = _rc; 2350 *rrc = _rrc; 2351 ret = -EIO; 2352 } 2353 mutex_unlock(&vcpu->mutex); 2354 } 2355 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ 2356 if (use_gisa) 2357 kvm_s390_gisa_enable(kvm); 2358 return ret; 2359 } 2360 2361 /** 2362 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM 2363 * to protected. 2364 * @kvm: the VM whose protected vCPUs are to be converted 2365 * @rc: return value for the RC field of the UVC (in case of error) 2366 * @rrc: return value for the RRC field of the UVC (in case of error) 2367 * 2368 * Tries to undo the conversion in case of error. 2369 * 2370 * Return: 0 in case of success, otherwise -EIO 2371 */ 2372 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2373 { 2374 unsigned long i; 2375 int r = 0; 2376 u16 dummy; 2377 2378 struct kvm_vcpu *vcpu; 2379 2380 /* Disable the GISA if the ultravisor does not support AIV. */ 2381 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) 2382 kvm_s390_gisa_disable(kvm); 2383 2384 kvm_for_each_vcpu(i, vcpu, kvm) { 2385 mutex_lock(&vcpu->mutex); 2386 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2387 mutex_unlock(&vcpu->mutex); 2388 if (r) 2389 break; 2390 } 2391 if (r) 2392 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2393 return r; 2394 } 2395 2396 /* 2397 * Here we provide user space with a direct interface to query UV 2398 * related data like UV maxima and available features as well as 2399 * feature specific data. 2400 * 2401 * To facilitate future extension of the data structures we'll try to 2402 * write data up to the maximum requested length. 2403 */ 2404 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info) 2405 { 2406 ssize_t len_min; 2407 2408 switch (info->header.id) { 2409 case KVM_PV_INFO_VM: { 2410 len_min = sizeof(info->header) + sizeof(info->vm); 2411 2412 if (info->header.len_max < len_min) 2413 return -EINVAL; 2414 2415 memcpy(info->vm.inst_calls_list, 2416 uv_info.inst_calls_list, 2417 sizeof(uv_info.inst_calls_list)); 2418 2419 /* It's max cpuid not max cpus, so it's off by one */ 2420 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1; 2421 info->vm.max_guests = uv_info.max_num_sec_conf; 2422 info->vm.max_guest_addr = uv_info.max_sec_stor_addr; 2423 info->vm.feature_indication = uv_info.uv_feature_indications; 2424 2425 return len_min; 2426 } 2427 case KVM_PV_INFO_DUMP: { 2428 len_min = sizeof(info->header) + sizeof(info->dump); 2429 2430 if (info->header.len_max < len_min) 2431 return -EINVAL; 2432 2433 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len; 2434 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len; 2435 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len; 2436 return len_min; 2437 } 2438 default: 2439 return -EINVAL; 2440 } 2441 } 2442 2443 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, 2444 struct kvm_s390_pv_dmp dmp) 2445 { 2446 int r = -EINVAL; 2447 void __user *result_buff = (void __user *)dmp.buff_addr; 2448 2449 switch (dmp.subcmd) { 2450 case KVM_PV_DUMP_INIT: { 2451 if (kvm->arch.pv.dumping) 2452 break; 2453 2454 /* 2455 * Block SIE entry as concurrent dump UVCs could lead 2456 * to validities. 2457 */ 2458 kvm_s390_vcpu_block_all(kvm); 2459 2460 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2461 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc); 2462 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x", 2463 cmd->rc, cmd->rrc); 2464 if (!r) { 2465 kvm->arch.pv.dumping = true; 2466 } else { 2467 kvm_s390_vcpu_unblock_all(kvm); 2468 r = -EINVAL; 2469 } 2470 break; 2471 } 2472 case KVM_PV_DUMP_CONFIG_STOR_STATE: { 2473 if (!kvm->arch.pv.dumping) 2474 break; 2475 2476 /* 2477 * gaddr is an output parameter since we might stop 2478 * early. As dmp will be copied back in our caller, we 2479 * don't need to do it ourselves. 2480 */ 2481 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len, 2482 &cmd->rc, &cmd->rrc); 2483 break; 2484 } 2485 case KVM_PV_DUMP_COMPLETE: { 2486 if (!kvm->arch.pv.dumping) 2487 break; 2488 2489 r = -EINVAL; 2490 if (dmp.buff_len < uv_info.conf_dump_finalize_len) 2491 break; 2492 2493 r = kvm_s390_pv_dump_complete(kvm, result_buff, 2494 &cmd->rc, &cmd->rrc); 2495 break; 2496 } 2497 default: 2498 r = -ENOTTY; 2499 break; 2500 } 2501 2502 return r; 2503 } 2504 2505 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2506 { 2507 int r = 0; 2508 u16 dummy; 2509 void __user *argp = (void __user *)cmd->data; 2510 2511 switch (cmd->cmd) { 2512 case KVM_PV_ENABLE: { 2513 r = -EINVAL; 2514 if (kvm_s390_pv_is_protected(kvm)) 2515 break; 2516 2517 /* 2518 * FMT 4 SIE needs esca. As we never switch back to bsca from 2519 * esca, we need no cleanup in the error cases below 2520 */ 2521 r = sca_switch_to_extended(kvm); 2522 if (r) 2523 break; 2524 2525 mmap_write_lock(current->mm); 2526 r = gmap_mark_unmergeable(); 2527 mmap_write_unlock(current->mm); 2528 if (r) 2529 break; 2530 2531 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2532 if (r) 2533 break; 2534 2535 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2536 if (r) 2537 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2538 2539 /* we need to block service interrupts from now on */ 2540 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2541 break; 2542 } 2543 case KVM_PV_DISABLE: { 2544 r = -EINVAL; 2545 if (!kvm_s390_pv_is_protected(kvm)) 2546 break; 2547 2548 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2549 /* 2550 * If a CPU could not be destroyed, destroy VM will also fail. 2551 * There is no point in trying to destroy it. Instead return 2552 * the rc and rrc from the first CPU that failed destroying. 2553 */ 2554 if (r) 2555 break; 2556 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2557 2558 /* no need to block service interrupts any more */ 2559 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2560 break; 2561 } 2562 case KVM_PV_SET_SEC_PARMS: { 2563 struct kvm_s390_pv_sec_parm parms = {}; 2564 void *hdr; 2565 2566 r = -EINVAL; 2567 if (!kvm_s390_pv_is_protected(kvm)) 2568 break; 2569 2570 r = -EFAULT; 2571 if (copy_from_user(&parms, argp, sizeof(parms))) 2572 break; 2573 2574 /* Currently restricted to 8KB */ 2575 r = -EINVAL; 2576 if (parms.length > PAGE_SIZE * 2) 2577 break; 2578 2579 r = -ENOMEM; 2580 hdr = vmalloc(parms.length); 2581 if (!hdr) 2582 break; 2583 2584 r = -EFAULT; 2585 if (!copy_from_user(hdr, (void __user *)parms.origin, 2586 parms.length)) 2587 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2588 &cmd->rc, &cmd->rrc); 2589 2590 vfree(hdr); 2591 break; 2592 } 2593 case KVM_PV_UNPACK: { 2594 struct kvm_s390_pv_unp unp = {}; 2595 2596 r = -EINVAL; 2597 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2598 break; 2599 2600 r = -EFAULT; 2601 if (copy_from_user(&unp, argp, sizeof(unp))) 2602 break; 2603 2604 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2605 &cmd->rc, &cmd->rrc); 2606 break; 2607 } 2608 case KVM_PV_VERIFY: { 2609 r = -EINVAL; 2610 if (!kvm_s390_pv_is_protected(kvm)) 2611 break; 2612 2613 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2614 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2615 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2616 cmd->rrc); 2617 break; 2618 } 2619 case KVM_PV_PREP_RESET: { 2620 r = -EINVAL; 2621 if (!kvm_s390_pv_is_protected(kvm)) 2622 break; 2623 2624 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2625 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2626 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2627 cmd->rc, cmd->rrc); 2628 break; 2629 } 2630 case KVM_PV_UNSHARE_ALL: { 2631 r = -EINVAL; 2632 if (!kvm_s390_pv_is_protected(kvm)) 2633 break; 2634 2635 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2636 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2637 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2638 cmd->rc, cmd->rrc); 2639 break; 2640 } 2641 case KVM_PV_INFO: { 2642 struct kvm_s390_pv_info info = {}; 2643 ssize_t data_len; 2644 2645 /* 2646 * No need to check the VM protection here. 2647 * 2648 * Maybe user space wants to query some of the data 2649 * when the VM is still unprotected. If we see the 2650 * need to fence a new data command we can still 2651 * return an error in the info handler. 2652 */ 2653 2654 r = -EFAULT; 2655 if (copy_from_user(&info, argp, sizeof(info.header))) 2656 break; 2657 2658 r = -EINVAL; 2659 if (info.header.len_max < sizeof(info.header)) 2660 break; 2661 2662 data_len = kvm_s390_handle_pv_info(&info); 2663 if (data_len < 0) { 2664 r = data_len; 2665 break; 2666 } 2667 /* 2668 * If a data command struct is extended (multiple 2669 * times) this can be used to determine how much of it 2670 * is valid. 2671 */ 2672 info.header.len_written = data_len; 2673 2674 r = -EFAULT; 2675 if (copy_to_user(argp, &info, data_len)) 2676 break; 2677 2678 r = 0; 2679 break; 2680 } 2681 case KVM_PV_DUMP: { 2682 struct kvm_s390_pv_dmp dmp; 2683 2684 r = -EINVAL; 2685 if (!kvm_s390_pv_is_protected(kvm)) 2686 break; 2687 2688 r = -EFAULT; 2689 if (copy_from_user(&dmp, argp, sizeof(dmp))) 2690 break; 2691 2692 r = kvm_s390_pv_dmp(kvm, cmd, dmp); 2693 if (r) 2694 break; 2695 2696 if (copy_to_user(argp, &dmp, sizeof(dmp))) { 2697 r = -EFAULT; 2698 break; 2699 } 2700 2701 break; 2702 } 2703 default: 2704 r = -ENOTTY; 2705 } 2706 return r; 2707 } 2708 2709 static bool access_key_invalid(u8 access_key) 2710 { 2711 return access_key > 0xf; 2712 } 2713 2714 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2715 { 2716 void __user *uaddr = (void __user *)mop->buf; 2717 u64 supported_flags; 2718 void *tmpbuf = NULL; 2719 int r, srcu_idx; 2720 2721 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION 2722 | KVM_S390_MEMOP_F_CHECK_ONLY; 2723 if (mop->flags & ~supported_flags || !mop->size) 2724 return -EINVAL; 2725 if (mop->size > MEM_OP_MAX_SIZE) 2726 return -E2BIG; 2727 /* 2728 * This is technically a heuristic only, if the kvm->lock is not 2729 * taken, it is not guaranteed that the vm is/remains non-protected. 2730 * This is ok from a kernel perspective, wrongdoing is detected 2731 * on the access, -EFAULT is returned and the vm may crash the 2732 * next time it accesses the memory in question. 2733 * There is no sane usecase to do switching and a memop on two 2734 * different CPUs at the same time. 2735 */ 2736 if (kvm_s390_pv_get_handle(kvm)) 2737 return -EINVAL; 2738 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2739 if (access_key_invalid(mop->key)) 2740 return -EINVAL; 2741 } else { 2742 mop->key = 0; 2743 } 2744 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2745 tmpbuf = vmalloc(mop->size); 2746 if (!tmpbuf) 2747 return -ENOMEM; 2748 } 2749 2750 srcu_idx = srcu_read_lock(&kvm->srcu); 2751 2752 if (kvm_is_error_gpa(kvm, mop->gaddr)) { 2753 r = PGM_ADDRESSING; 2754 goto out_unlock; 2755 } 2756 2757 switch (mop->op) { 2758 case KVM_S390_MEMOP_ABSOLUTE_READ: { 2759 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2760 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); 2761 } else { 2762 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2763 mop->size, GACC_FETCH, mop->key); 2764 if (r == 0) { 2765 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2766 r = -EFAULT; 2767 } 2768 } 2769 break; 2770 } 2771 case KVM_S390_MEMOP_ABSOLUTE_WRITE: { 2772 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2773 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); 2774 } else { 2775 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2776 r = -EFAULT; 2777 break; 2778 } 2779 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2780 mop->size, GACC_STORE, mop->key); 2781 } 2782 break; 2783 } 2784 default: 2785 r = -EINVAL; 2786 } 2787 2788 out_unlock: 2789 srcu_read_unlock(&kvm->srcu, srcu_idx); 2790 2791 vfree(tmpbuf); 2792 return r; 2793 } 2794 2795 long kvm_arch_vm_ioctl(struct file *filp, 2796 unsigned int ioctl, unsigned long arg) 2797 { 2798 struct kvm *kvm = filp->private_data; 2799 void __user *argp = (void __user *)arg; 2800 struct kvm_device_attr attr; 2801 int r; 2802 2803 switch (ioctl) { 2804 case KVM_S390_INTERRUPT: { 2805 struct kvm_s390_interrupt s390int; 2806 2807 r = -EFAULT; 2808 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2809 break; 2810 r = kvm_s390_inject_vm(kvm, &s390int); 2811 break; 2812 } 2813 case KVM_CREATE_IRQCHIP: { 2814 struct kvm_irq_routing_entry routing; 2815 2816 r = -EINVAL; 2817 if (kvm->arch.use_irqchip) { 2818 /* Set up dummy routing. */ 2819 memset(&routing, 0, sizeof(routing)); 2820 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2821 } 2822 break; 2823 } 2824 case KVM_SET_DEVICE_ATTR: { 2825 r = -EFAULT; 2826 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2827 break; 2828 r = kvm_s390_vm_set_attr(kvm, &attr); 2829 break; 2830 } 2831 case KVM_GET_DEVICE_ATTR: { 2832 r = -EFAULT; 2833 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2834 break; 2835 r = kvm_s390_vm_get_attr(kvm, &attr); 2836 break; 2837 } 2838 case KVM_HAS_DEVICE_ATTR: { 2839 r = -EFAULT; 2840 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2841 break; 2842 r = kvm_s390_vm_has_attr(kvm, &attr); 2843 break; 2844 } 2845 case KVM_S390_GET_SKEYS: { 2846 struct kvm_s390_skeys args; 2847 2848 r = -EFAULT; 2849 if (copy_from_user(&args, argp, 2850 sizeof(struct kvm_s390_skeys))) 2851 break; 2852 r = kvm_s390_get_skeys(kvm, &args); 2853 break; 2854 } 2855 case KVM_S390_SET_SKEYS: { 2856 struct kvm_s390_skeys args; 2857 2858 r = -EFAULT; 2859 if (copy_from_user(&args, argp, 2860 sizeof(struct kvm_s390_skeys))) 2861 break; 2862 r = kvm_s390_set_skeys(kvm, &args); 2863 break; 2864 } 2865 case KVM_S390_GET_CMMA_BITS: { 2866 struct kvm_s390_cmma_log args; 2867 2868 r = -EFAULT; 2869 if (copy_from_user(&args, argp, sizeof(args))) 2870 break; 2871 mutex_lock(&kvm->slots_lock); 2872 r = kvm_s390_get_cmma_bits(kvm, &args); 2873 mutex_unlock(&kvm->slots_lock); 2874 if (!r) { 2875 r = copy_to_user(argp, &args, sizeof(args)); 2876 if (r) 2877 r = -EFAULT; 2878 } 2879 break; 2880 } 2881 case KVM_S390_SET_CMMA_BITS: { 2882 struct kvm_s390_cmma_log args; 2883 2884 r = -EFAULT; 2885 if (copy_from_user(&args, argp, sizeof(args))) 2886 break; 2887 mutex_lock(&kvm->slots_lock); 2888 r = kvm_s390_set_cmma_bits(kvm, &args); 2889 mutex_unlock(&kvm->slots_lock); 2890 break; 2891 } 2892 case KVM_S390_PV_COMMAND: { 2893 struct kvm_pv_cmd args; 2894 2895 /* protvirt means user cpu state */ 2896 kvm_s390_set_user_cpu_state_ctrl(kvm); 2897 r = 0; 2898 if (!is_prot_virt_host()) { 2899 r = -EINVAL; 2900 break; 2901 } 2902 if (copy_from_user(&args, argp, sizeof(args))) { 2903 r = -EFAULT; 2904 break; 2905 } 2906 if (args.flags) { 2907 r = -EINVAL; 2908 break; 2909 } 2910 mutex_lock(&kvm->lock); 2911 r = kvm_s390_handle_pv(kvm, &args); 2912 mutex_unlock(&kvm->lock); 2913 if (copy_to_user(argp, &args, sizeof(args))) { 2914 r = -EFAULT; 2915 break; 2916 } 2917 break; 2918 } 2919 case KVM_S390_MEM_OP: { 2920 struct kvm_s390_mem_op mem_op; 2921 2922 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 2923 r = kvm_s390_vm_mem_op(kvm, &mem_op); 2924 else 2925 r = -EFAULT; 2926 break; 2927 } 2928 case KVM_S390_ZPCI_OP: { 2929 struct kvm_s390_zpci_op args; 2930 2931 r = -EINVAL; 2932 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 2933 break; 2934 if (copy_from_user(&args, argp, sizeof(args))) { 2935 r = -EFAULT; 2936 break; 2937 } 2938 r = kvm_s390_pci_zpci_op(kvm, &args); 2939 break; 2940 } 2941 default: 2942 r = -ENOTTY; 2943 } 2944 2945 return r; 2946 } 2947 2948 static int kvm_s390_apxa_installed(void) 2949 { 2950 struct ap_config_info info; 2951 2952 if (ap_instructions_available()) { 2953 if (ap_qci(&info) == 0) 2954 return info.apxa; 2955 } 2956 2957 return 0; 2958 } 2959 2960 /* 2961 * The format of the crypto control block (CRYCB) is specified in the 3 low 2962 * order bits of the CRYCB designation (CRYCBD) field as follows: 2963 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2964 * AP extended addressing (APXA) facility are installed. 2965 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2966 * Format 2: Both the APXA and MSAX3 facilities are installed 2967 */ 2968 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2969 { 2970 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2971 2972 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2973 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2974 2975 /* Check whether MSAX3 is installed */ 2976 if (!test_kvm_facility(kvm, 76)) 2977 return; 2978 2979 if (kvm_s390_apxa_installed()) 2980 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2981 else 2982 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2983 } 2984 2985 /* 2986 * kvm_arch_crypto_set_masks 2987 * 2988 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2989 * to be set. 2990 * @apm: the mask identifying the accessible AP adapters 2991 * @aqm: the mask identifying the accessible AP domains 2992 * @adm: the mask identifying the accessible AP control domains 2993 * 2994 * Set the masks that identify the adapters, domains and control domains to 2995 * which the KVM guest is granted access. 2996 * 2997 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2998 * function. 2999 */ 3000 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 3001 unsigned long *aqm, unsigned long *adm) 3002 { 3003 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 3004 3005 kvm_s390_vcpu_block_all(kvm); 3006 3007 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 3008 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 3009 memcpy(crycb->apcb1.apm, apm, 32); 3010 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 3011 apm[0], apm[1], apm[2], apm[3]); 3012 memcpy(crycb->apcb1.aqm, aqm, 32); 3013 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 3014 aqm[0], aqm[1], aqm[2], aqm[3]); 3015 memcpy(crycb->apcb1.adm, adm, 32); 3016 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 3017 adm[0], adm[1], adm[2], adm[3]); 3018 break; 3019 case CRYCB_FORMAT1: 3020 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 3021 memcpy(crycb->apcb0.apm, apm, 8); 3022 memcpy(crycb->apcb0.aqm, aqm, 2); 3023 memcpy(crycb->apcb0.adm, adm, 2); 3024 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 3025 apm[0], *((unsigned short *)aqm), 3026 *((unsigned short *)adm)); 3027 break; 3028 default: /* Can not happen */ 3029 break; 3030 } 3031 3032 /* recreate the shadow crycb for each vcpu */ 3033 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3034 kvm_s390_vcpu_unblock_all(kvm); 3035 } 3036 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 3037 3038 /* 3039 * kvm_arch_crypto_clear_masks 3040 * 3041 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 3042 * to be cleared. 3043 * 3044 * Clear the masks that identify the adapters, domains and control domains to 3045 * which the KVM guest is granted access. 3046 * 3047 * Note: The kvm->lock mutex must be locked by the caller before invoking this 3048 * function. 3049 */ 3050 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 3051 { 3052 kvm_s390_vcpu_block_all(kvm); 3053 3054 memset(&kvm->arch.crypto.crycb->apcb0, 0, 3055 sizeof(kvm->arch.crypto.crycb->apcb0)); 3056 memset(&kvm->arch.crypto.crycb->apcb1, 0, 3057 sizeof(kvm->arch.crypto.crycb->apcb1)); 3058 3059 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 3060 /* recreate the shadow crycb for each vcpu */ 3061 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3062 kvm_s390_vcpu_unblock_all(kvm); 3063 } 3064 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 3065 3066 static u64 kvm_s390_get_initial_cpuid(void) 3067 { 3068 struct cpuid cpuid; 3069 3070 get_cpu_id(&cpuid); 3071 cpuid.version = 0xff; 3072 return *((u64 *) &cpuid); 3073 } 3074 3075 static void kvm_s390_crypto_init(struct kvm *kvm) 3076 { 3077 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 3078 kvm_s390_set_crycb_format(kvm); 3079 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 3080 3081 if (!test_kvm_facility(kvm, 76)) 3082 return; 3083 3084 /* Enable AES/DEA protected key functions by default */ 3085 kvm->arch.crypto.aes_kw = 1; 3086 kvm->arch.crypto.dea_kw = 1; 3087 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 3088 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 3089 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 3090 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 3091 } 3092 3093 static void sca_dispose(struct kvm *kvm) 3094 { 3095 if (kvm->arch.use_esca) 3096 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 3097 else 3098 free_page((unsigned long)(kvm->arch.sca)); 3099 kvm->arch.sca = NULL; 3100 } 3101 3102 void kvm_arch_free_vm(struct kvm *kvm) 3103 { 3104 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 3105 kvm_s390_pci_clear_list(kvm); 3106 3107 __kvm_arch_free_vm(kvm); 3108 } 3109 3110 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 3111 { 3112 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 3113 int i, rc; 3114 char debug_name[16]; 3115 static unsigned long sca_offset; 3116 3117 rc = -EINVAL; 3118 #ifdef CONFIG_KVM_S390_UCONTROL 3119 if (type & ~KVM_VM_S390_UCONTROL) 3120 goto out_err; 3121 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 3122 goto out_err; 3123 #else 3124 if (type) 3125 goto out_err; 3126 #endif 3127 3128 rc = s390_enable_sie(); 3129 if (rc) 3130 goto out_err; 3131 3132 rc = -ENOMEM; 3133 3134 if (!sclp.has_64bscao) 3135 alloc_flags |= GFP_DMA; 3136 rwlock_init(&kvm->arch.sca_lock); 3137 /* start with basic SCA */ 3138 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 3139 if (!kvm->arch.sca) 3140 goto out_err; 3141 mutex_lock(&kvm_lock); 3142 sca_offset += 16; 3143 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 3144 sca_offset = 0; 3145 kvm->arch.sca = (struct bsca_block *) 3146 ((char *) kvm->arch.sca + sca_offset); 3147 mutex_unlock(&kvm_lock); 3148 3149 sprintf(debug_name, "kvm-%u", current->pid); 3150 3151 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 3152 if (!kvm->arch.dbf) 3153 goto out_err; 3154 3155 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 3156 kvm->arch.sie_page2 = 3157 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 3158 if (!kvm->arch.sie_page2) 3159 goto out_err; 3160 3161 kvm->arch.sie_page2->kvm = kvm; 3162 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 3163 3164 for (i = 0; i < kvm_s390_fac_size(); i++) { 3165 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 3166 (kvm_s390_fac_base[i] | 3167 kvm_s390_fac_ext[i]); 3168 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 3169 kvm_s390_fac_base[i]; 3170 } 3171 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 3172 3173 /* we are always in czam mode - even on pre z14 machines */ 3174 set_kvm_facility(kvm->arch.model.fac_mask, 138); 3175 set_kvm_facility(kvm->arch.model.fac_list, 138); 3176 /* we emulate STHYI in kvm */ 3177 set_kvm_facility(kvm->arch.model.fac_mask, 74); 3178 set_kvm_facility(kvm->arch.model.fac_list, 74); 3179 if (MACHINE_HAS_TLB_GUEST) { 3180 set_kvm_facility(kvm->arch.model.fac_mask, 147); 3181 set_kvm_facility(kvm->arch.model.fac_list, 147); 3182 } 3183 3184 if (css_general_characteristics.aiv && test_facility(65)) 3185 set_kvm_facility(kvm->arch.model.fac_mask, 65); 3186 3187 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 3188 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 3189 3190 kvm_s390_crypto_init(kvm); 3191 3192 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 3193 mutex_lock(&kvm->lock); 3194 kvm_s390_pci_init_list(kvm); 3195 kvm_s390_vcpu_pci_enable_interp(kvm); 3196 mutex_unlock(&kvm->lock); 3197 } 3198 3199 mutex_init(&kvm->arch.float_int.ais_lock); 3200 spin_lock_init(&kvm->arch.float_int.lock); 3201 for (i = 0; i < FIRQ_LIST_COUNT; i++) 3202 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 3203 init_waitqueue_head(&kvm->arch.ipte_wq); 3204 mutex_init(&kvm->arch.ipte_mutex); 3205 3206 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 3207 VM_EVENT(kvm, 3, "vm created with type %lu", type); 3208 3209 if (type & KVM_VM_S390_UCONTROL) { 3210 kvm->arch.gmap = NULL; 3211 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 3212 } else { 3213 if (sclp.hamax == U64_MAX) 3214 kvm->arch.mem_limit = TASK_SIZE_MAX; 3215 else 3216 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 3217 sclp.hamax + 1); 3218 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 3219 if (!kvm->arch.gmap) 3220 goto out_err; 3221 kvm->arch.gmap->private = kvm; 3222 kvm->arch.gmap->pfault_enabled = 0; 3223 } 3224 3225 kvm->arch.use_pfmfi = sclp.has_pfmfi; 3226 kvm->arch.use_skf = sclp.has_skey; 3227 spin_lock_init(&kvm->arch.start_stop_lock); 3228 kvm_s390_vsie_init(kvm); 3229 if (use_gisa) 3230 kvm_s390_gisa_init(kvm); 3231 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 3232 3233 return 0; 3234 out_err: 3235 free_page((unsigned long)kvm->arch.sie_page2); 3236 debug_unregister(kvm->arch.dbf); 3237 sca_dispose(kvm); 3238 KVM_EVENT(3, "creation of vm failed: %d", rc); 3239 return rc; 3240 } 3241 3242 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 3243 { 3244 u16 rc, rrc; 3245 3246 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 3247 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 3248 kvm_s390_clear_local_irqs(vcpu); 3249 kvm_clear_async_pf_completion_queue(vcpu); 3250 if (!kvm_is_ucontrol(vcpu->kvm)) 3251 sca_del_vcpu(vcpu); 3252 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 3253 3254 if (kvm_is_ucontrol(vcpu->kvm)) 3255 gmap_remove(vcpu->arch.gmap); 3256 3257 if (vcpu->kvm->arch.use_cmma) 3258 kvm_s390_vcpu_unsetup_cmma(vcpu); 3259 /* We can not hold the vcpu mutex here, we are already dying */ 3260 if (kvm_s390_pv_cpu_get_handle(vcpu)) 3261 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 3262 free_page((unsigned long)(vcpu->arch.sie_block)); 3263 } 3264 3265 void kvm_arch_destroy_vm(struct kvm *kvm) 3266 { 3267 u16 rc, rrc; 3268 3269 kvm_destroy_vcpus(kvm); 3270 sca_dispose(kvm); 3271 kvm_s390_gisa_destroy(kvm); 3272 /* 3273 * We are already at the end of life and kvm->lock is not taken. 3274 * This is ok as the file descriptor is closed by now and nobody 3275 * can mess with the pv state. To avoid lockdep_assert_held from 3276 * complaining we do not use kvm_s390_pv_is_protected. 3277 */ 3278 if (kvm_s390_pv_get_handle(kvm)) 3279 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 3280 /* 3281 * Remove the mmu notifier only when the whole KVM VM is torn down, 3282 * and only if one was registered to begin with. If the VM is 3283 * currently not protected, but has been previously been protected, 3284 * then it's possible that the notifier is still registered. 3285 */ 3286 if (kvm->arch.pv.mmu_notifier.ops) 3287 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm); 3288 3289 debug_unregister(kvm->arch.dbf); 3290 free_page((unsigned long)kvm->arch.sie_page2); 3291 if (!kvm_is_ucontrol(kvm)) 3292 gmap_remove(kvm->arch.gmap); 3293 kvm_s390_destroy_adapters(kvm); 3294 kvm_s390_clear_float_irqs(kvm); 3295 kvm_s390_vsie_destroy(kvm); 3296 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 3297 } 3298 3299 /* Section: vcpu related */ 3300 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 3301 { 3302 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 3303 if (!vcpu->arch.gmap) 3304 return -ENOMEM; 3305 vcpu->arch.gmap->private = vcpu->kvm; 3306 3307 return 0; 3308 } 3309 3310 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 3311 { 3312 if (!kvm_s390_use_sca_entries()) 3313 return; 3314 read_lock(&vcpu->kvm->arch.sca_lock); 3315 if (vcpu->kvm->arch.use_esca) { 3316 struct esca_block *sca = vcpu->kvm->arch.sca; 3317 3318 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3319 sca->cpu[vcpu->vcpu_id].sda = 0; 3320 } else { 3321 struct bsca_block *sca = vcpu->kvm->arch.sca; 3322 3323 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3324 sca->cpu[vcpu->vcpu_id].sda = 0; 3325 } 3326 read_unlock(&vcpu->kvm->arch.sca_lock); 3327 } 3328 3329 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 3330 { 3331 if (!kvm_s390_use_sca_entries()) { 3332 struct bsca_block *sca = vcpu->kvm->arch.sca; 3333 3334 /* we still need the basic sca for the ipte control */ 3335 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3336 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 3337 return; 3338 } 3339 read_lock(&vcpu->kvm->arch.sca_lock); 3340 if (vcpu->kvm->arch.use_esca) { 3341 struct esca_block *sca = vcpu->kvm->arch.sca; 3342 3343 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 3344 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3345 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 3346 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3347 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3348 } else { 3349 struct bsca_block *sca = vcpu->kvm->arch.sca; 3350 3351 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 3352 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3353 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 3354 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3355 } 3356 read_unlock(&vcpu->kvm->arch.sca_lock); 3357 } 3358 3359 /* Basic SCA to Extended SCA data copy routines */ 3360 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 3361 { 3362 d->sda = s->sda; 3363 d->sigp_ctrl.c = s->sigp_ctrl.c; 3364 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 3365 } 3366 3367 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 3368 { 3369 int i; 3370 3371 d->ipte_control = s->ipte_control; 3372 d->mcn[0] = s->mcn; 3373 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 3374 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 3375 } 3376 3377 static int sca_switch_to_extended(struct kvm *kvm) 3378 { 3379 struct bsca_block *old_sca = kvm->arch.sca; 3380 struct esca_block *new_sca; 3381 struct kvm_vcpu *vcpu; 3382 unsigned long vcpu_idx; 3383 u32 scaol, scaoh; 3384 3385 if (kvm->arch.use_esca) 3386 return 0; 3387 3388 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3389 if (!new_sca) 3390 return -ENOMEM; 3391 3392 scaoh = (u32)((u64)(new_sca) >> 32); 3393 scaol = (u32)(u64)(new_sca) & ~0x3fU; 3394 3395 kvm_s390_vcpu_block_all(kvm); 3396 write_lock(&kvm->arch.sca_lock); 3397 3398 sca_copy_b_to_e(new_sca, old_sca); 3399 3400 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3401 vcpu->arch.sie_block->scaoh = scaoh; 3402 vcpu->arch.sie_block->scaol = scaol; 3403 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3404 } 3405 kvm->arch.sca = new_sca; 3406 kvm->arch.use_esca = 1; 3407 3408 write_unlock(&kvm->arch.sca_lock); 3409 kvm_s390_vcpu_unblock_all(kvm); 3410 3411 free_page((unsigned long)old_sca); 3412 3413 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3414 old_sca, kvm->arch.sca); 3415 return 0; 3416 } 3417 3418 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3419 { 3420 int rc; 3421 3422 if (!kvm_s390_use_sca_entries()) { 3423 if (id < KVM_MAX_VCPUS) 3424 return true; 3425 return false; 3426 } 3427 if (id < KVM_S390_BSCA_CPU_SLOTS) 3428 return true; 3429 if (!sclp.has_esca || !sclp.has_64bscao) 3430 return false; 3431 3432 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3433 3434 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3435 } 3436 3437 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3438 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3439 { 3440 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3441 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3442 vcpu->arch.cputm_start = get_tod_clock_fast(); 3443 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3444 } 3445 3446 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3447 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3448 { 3449 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3450 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3451 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3452 vcpu->arch.cputm_start = 0; 3453 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3454 } 3455 3456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3457 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3458 { 3459 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3460 vcpu->arch.cputm_enabled = true; 3461 __start_cpu_timer_accounting(vcpu); 3462 } 3463 3464 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3465 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3466 { 3467 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3468 __stop_cpu_timer_accounting(vcpu); 3469 vcpu->arch.cputm_enabled = false; 3470 } 3471 3472 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3473 { 3474 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3475 __enable_cpu_timer_accounting(vcpu); 3476 preempt_enable(); 3477 } 3478 3479 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3480 { 3481 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3482 __disable_cpu_timer_accounting(vcpu); 3483 preempt_enable(); 3484 } 3485 3486 /* set the cpu timer - may only be called from the VCPU thread itself */ 3487 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3488 { 3489 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3490 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3491 if (vcpu->arch.cputm_enabled) 3492 vcpu->arch.cputm_start = get_tod_clock_fast(); 3493 vcpu->arch.sie_block->cputm = cputm; 3494 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3495 preempt_enable(); 3496 } 3497 3498 /* update and get the cpu timer - can also be called from other VCPU threads */ 3499 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3500 { 3501 unsigned int seq; 3502 __u64 value; 3503 3504 if (unlikely(!vcpu->arch.cputm_enabled)) 3505 return vcpu->arch.sie_block->cputm; 3506 3507 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3508 do { 3509 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3510 /* 3511 * If the writer would ever execute a read in the critical 3512 * section, e.g. in irq context, we have a deadlock. 3513 */ 3514 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3515 value = vcpu->arch.sie_block->cputm; 3516 /* if cputm_start is 0, accounting is being started/stopped */ 3517 if (likely(vcpu->arch.cputm_start)) 3518 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3519 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3520 preempt_enable(); 3521 return value; 3522 } 3523 3524 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3525 { 3526 3527 gmap_enable(vcpu->arch.enabled_gmap); 3528 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3529 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3530 __start_cpu_timer_accounting(vcpu); 3531 vcpu->cpu = cpu; 3532 } 3533 3534 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3535 { 3536 vcpu->cpu = -1; 3537 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3538 __stop_cpu_timer_accounting(vcpu); 3539 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3540 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3541 gmap_disable(vcpu->arch.enabled_gmap); 3542 3543 } 3544 3545 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3546 { 3547 mutex_lock(&vcpu->kvm->lock); 3548 preempt_disable(); 3549 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3550 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3551 preempt_enable(); 3552 mutex_unlock(&vcpu->kvm->lock); 3553 if (!kvm_is_ucontrol(vcpu->kvm)) { 3554 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3555 sca_add_vcpu(vcpu); 3556 } 3557 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3558 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3559 /* make vcpu_load load the right gmap on the first trigger */ 3560 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3561 } 3562 3563 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3564 { 3565 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3566 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3567 return true; 3568 return false; 3569 } 3570 3571 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3572 { 3573 /* At least one ECC subfunction must be present */ 3574 return kvm_has_pckmo_subfunc(kvm, 32) || 3575 kvm_has_pckmo_subfunc(kvm, 33) || 3576 kvm_has_pckmo_subfunc(kvm, 34) || 3577 kvm_has_pckmo_subfunc(kvm, 40) || 3578 kvm_has_pckmo_subfunc(kvm, 41); 3579 3580 } 3581 3582 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3583 { 3584 /* 3585 * If the AP instructions are not being interpreted and the MSAX3 3586 * facility is not configured for the guest, there is nothing to set up. 3587 */ 3588 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3589 return; 3590 3591 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3592 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3593 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3594 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3595 3596 if (vcpu->kvm->arch.crypto.apie) 3597 vcpu->arch.sie_block->eca |= ECA_APIE; 3598 3599 /* Set up protected key support */ 3600 if (vcpu->kvm->arch.crypto.aes_kw) { 3601 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3602 /* ecc is also wrapped with AES key */ 3603 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3604 vcpu->arch.sie_block->ecd |= ECD_ECC; 3605 } 3606 3607 if (vcpu->kvm->arch.crypto.dea_kw) 3608 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3609 } 3610 3611 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3612 { 3613 free_page(vcpu->arch.sie_block->cbrlo); 3614 vcpu->arch.sie_block->cbrlo = 0; 3615 } 3616 3617 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3618 { 3619 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3620 if (!vcpu->arch.sie_block->cbrlo) 3621 return -ENOMEM; 3622 return 0; 3623 } 3624 3625 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3626 { 3627 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3628 3629 vcpu->arch.sie_block->ibc = model->ibc; 3630 if (test_kvm_facility(vcpu->kvm, 7)) 3631 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3632 } 3633 3634 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3635 { 3636 int rc = 0; 3637 u16 uvrc, uvrrc; 3638 3639 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3640 CPUSTAT_SM | 3641 CPUSTAT_STOPPED); 3642 3643 if (test_kvm_facility(vcpu->kvm, 78)) 3644 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3645 else if (test_kvm_facility(vcpu->kvm, 8)) 3646 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3647 3648 kvm_s390_vcpu_setup_model(vcpu); 3649 3650 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3651 if (MACHINE_HAS_ESOP) 3652 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3653 if (test_kvm_facility(vcpu->kvm, 9)) 3654 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3655 if (test_kvm_facility(vcpu->kvm, 11)) 3656 vcpu->arch.sie_block->ecb |= ECB_PTF; 3657 if (test_kvm_facility(vcpu->kvm, 73)) 3658 vcpu->arch.sie_block->ecb |= ECB_TE; 3659 if (!kvm_is_ucontrol(vcpu->kvm)) 3660 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3661 3662 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3663 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3664 if (test_kvm_facility(vcpu->kvm, 130)) 3665 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3666 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3667 if (sclp.has_cei) 3668 vcpu->arch.sie_block->eca |= ECA_CEI; 3669 if (sclp.has_ib) 3670 vcpu->arch.sie_block->eca |= ECA_IB; 3671 if (sclp.has_siif) 3672 vcpu->arch.sie_block->eca |= ECA_SII; 3673 if (sclp.has_sigpif) 3674 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3675 if (test_kvm_facility(vcpu->kvm, 129)) { 3676 vcpu->arch.sie_block->eca |= ECA_VX; 3677 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3678 } 3679 if (test_kvm_facility(vcpu->kvm, 139)) 3680 vcpu->arch.sie_block->ecd |= ECD_MEF; 3681 if (test_kvm_facility(vcpu->kvm, 156)) 3682 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3683 if (vcpu->arch.sie_block->gd) { 3684 vcpu->arch.sie_block->eca |= ECA_AIV; 3685 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3686 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3687 } 3688 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3689 | SDNXC; 3690 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3691 3692 if (sclp.has_kss) 3693 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3694 else 3695 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3696 3697 if (vcpu->kvm->arch.use_cmma) { 3698 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3699 if (rc) 3700 return rc; 3701 } 3702 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3703 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3704 3705 vcpu->arch.sie_block->hpid = HPID_KVM; 3706 3707 kvm_s390_vcpu_crypto_setup(vcpu); 3708 3709 kvm_s390_vcpu_pci_setup(vcpu); 3710 3711 mutex_lock(&vcpu->kvm->lock); 3712 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3713 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3714 if (rc) 3715 kvm_s390_vcpu_unsetup_cmma(vcpu); 3716 } 3717 mutex_unlock(&vcpu->kvm->lock); 3718 3719 return rc; 3720 } 3721 3722 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3723 { 3724 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3725 return -EINVAL; 3726 return 0; 3727 } 3728 3729 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3730 { 3731 struct sie_page *sie_page; 3732 int rc; 3733 3734 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3735 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3736 if (!sie_page) 3737 return -ENOMEM; 3738 3739 vcpu->arch.sie_block = &sie_page->sie_block; 3740 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3741 3742 /* the real guest size will always be smaller than msl */ 3743 vcpu->arch.sie_block->mso = 0; 3744 vcpu->arch.sie_block->msl = sclp.hamax; 3745 3746 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3747 spin_lock_init(&vcpu->arch.local_int.lock); 3748 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); 3749 seqcount_init(&vcpu->arch.cputm_seqcount); 3750 3751 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3752 kvm_clear_async_pf_completion_queue(vcpu); 3753 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3754 KVM_SYNC_GPRS | 3755 KVM_SYNC_ACRS | 3756 KVM_SYNC_CRS | 3757 KVM_SYNC_ARCH0 | 3758 KVM_SYNC_PFAULT | 3759 KVM_SYNC_DIAG318; 3760 kvm_s390_set_prefix(vcpu, 0); 3761 if (test_kvm_facility(vcpu->kvm, 64)) 3762 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3763 if (test_kvm_facility(vcpu->kvm, 82)) 3764 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3765 if (test_kvm_facility(vcpu->kvm, 133)) 3766 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3767 if (test_kvm_facility(vcpu->kvm, 156)) 3768 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3769 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3770 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3771 */ 3772 if (MACHINE_HAS_VX) 3773 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3774 else 3775 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3776 3777 if (kvm_is_ucontrol(vcpu->kvm)) { 3778 rc = __kvm_ucontrol_vcpu_init(vcpu); 3779 if (rc) 3780 goto out_free_sie_block; 3781 } 3782 3783 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3784 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3785 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3786 3787 rc = kvm_s390_vcpu_setup(vcpu); 3788 if (rc) 3789 goto out_ucontrol_uninit; 3790 3791 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 3792 return 0; 3793 3794 out_ucontrol_uninit: 3795 if (kvm_is_ucontrol(vcpu->kvm)) 3796 gmap_remove(vcpu->arch.gmap); 3797 out_free_sie_block: 3798 free_page((unsigned long)(vcpu->arch.sie_block)); 3799 return rc; 3800 } 3801 3802 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3803 { 3804 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3805 return kvm_s390_vcpu_has_irq(vcpu, 0); 3806 } 3807 3808 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3809 { 3810 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3811 } 3812 3813 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3814 { 3815 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3816 exit_sie(vcpu); 3817 } 3818 3819 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3820 { 3821 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3822 } 3823 3824 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3825 { 3826 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3827 exit_sie(vcpu); 3828 } 3829 3830 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3831 { 3832 return atomic_read(&vcpu->arch.sie_block->prog20) & 3833 (PROG_BLOCK_SIE | PROG_REQUEST); 3834 } 3835 3836 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3837 { 3838 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3839 } 3840 3841 /* 3842 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3843 * If the CPU is not running (e.g. waiting as idle) the function will 3844 * return immediately. */ 3845 void exit_sie(struct kvm_vcpu *vcpu) 3846 { 3847 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3848 kvm_s390_vsie_kick(vcpu); 3849 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3850 cpu_relax(); 3851 } 3852 3853 /* Kick a guest cpu out of SIE to process a request synchronously */ 3854 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3855 { 3856 __kvm_make_request(req, vcpu); 3857 kvm_s390_vcpu_request(vcpu); 3858 } 3859 3860 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3861 unsigned long end) 3862 { 3863 struct kvm *kvm = gmap->private; 3864 struct kvm_vcpu *vcpu; 3865 unsigned long prefix; 3866 unsigned long i; 3867 3868 if (gmap_is_shadow(gmap)) 3869 return; 3870 if (start >= 1UL << 31) 3871 /* We are only interested in prefix pages */ 3872 return; 3873 kvm_for_each_vcpu(i, vcpu, kvm) { 3874 /* match against both prefix pages */ 3875 prefix = kvm_s390_get_prefix(vcpu); 3876 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3877 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3878 start, end); 3879 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3880 } 3881 } 3882 } 3883 3884 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3885 { 3886 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3887 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3888 READ_ONCE(halt_poll_max_steal)) { 3889 vcpu->stat.halt_no_poll_steal++; 3890 return true; 3891 } 3892 return false; 3893 } 3894 3895 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3896 { 3897 /* kvm common code refers to this, but never calls it */ 3898 BUG(); 3899 return 0; 3900 } 3901 3902 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3903 struct kvm_one_reg *reg) 3904 { 3905 int r = -EINVAL; 3906 3907 switch (reg->id) { 3908 case KVM_REG_S390_TODPR: 3909 r = put_user(vcpu->arch.sie_block->todpr, 3910 (u32 __user *)reg->addr); 3911 break; 3912 case KVM_REG_S390_EPOCHDIFF: 3913 r = put_user(vcpu->arch.sie_block->epoch, 3914 (u64 __user *)reg->addr); 3915 break; 3916 case KVM_REG_S390_CPU_TIMER: 3917 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3918 (u64 __user *)reg->addr); 3919 break; 3920 case KVM_REG_S390_CLOCK_COMP: 3921 r = put_user(vcpu->arch.sie_block->ckc, 3922 (u64 __user *)reg->addr); 3923 break; 3924 case KVM_REG_S390_PFTOKEN: 3925 r = put_user(vcpu->arch.pfault_token, 3926 (u64 __user *)reg->addr); 3927 break; 3928 case KVM_REG_S390_PFCOMPARE: 3929 r = put_user(vcpu->arch.pfault_compare, 3930 (u64 __user *)reg->addr); 3931 break; 3932 case KVM_REG_S390_PFSELECT: 3933 r = put_user(vcpu->arch.pfault_select, 3934 (u64 __user *)reg->addr); 3935 break; 3936 case KVM_REG_S390_PP: 3937 r = put_user(vcpu->arch.sie_block->pp, 3938 (u64 __user *)reg->addr); 3939 break; 3940 case KVM_REG_S390_GBEA: 3941 r = put_user(vcpu->arch.sie_block->gbea, 3942 (u64 __user *)reg->addr); 3943 break; 3944 default: 3945 break; 3946 } 3947 3948 return r; 3949 } 3950 3951 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3952 struct kvm_one_reg *reg) 3953 { 3954 int r = -EINVAL; 3955 __u64 val; 3956 3957 switch (reg->id) { 3958 case KVM_REG_S390_TODPR: 3959 r = get_user(vcpu->arch.sie_block->todpr, 3960 (u32 __user *)reg->addr); 3961 break; 3962 case KVM_REG_S390_EPOCHDIFF: 3963 r = get_user(vcpu->arch.sie_block->epoch, 3964 (u64 __user *)reg->addr); 3965 break; 3966 case KVM_REG_S390_CPU_TIMER: 3967 r = get_user(val, (u64 __user *)reg->addr); 3968 if (!r) 3969 kvm_s390_set_cpu_timer(vcpu, val); 3970 break; 3971 case KVM_REG_S390_CLOCK_COMP: 3972 r = get_user(vcpu->arch.sie_block->ckc, 3973 (u64 __user *)reg->addr); 3974 break; 3975 case KVM_REG_S390_PFTOKEN: 3976 r = get_user(vcpu->arch.pfault_token, 3977 (u64 __user *)reg->addr); 3978 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3979 kvm_clear_async_pf_completion_queue(vcpu); 3980 break; 3981 case KVM_REG_S390_PFCOMPARE: 3982 r = get_user(vcpu->arch.pfault_compare, 3983 (u64 __user *)reg->addr); 3984 break; 3985 case KVM_REG_S390_PFSELECT: 3986 r = get_user(vcpu->arch.pfault_select, 3987 (u64 __user *)reg->addr); 3988 break; 3989 case KVM_REG_S390_PP: 3990 r = get_user(vcpu->arch.sie_block->pp, 3991 (u64 __user *)reg->addr); 3992 break; 3993 case KVM_REG_S390_GBEA: 3994 r = get_user(vcpu->arch.sie_block->gbea, 3995 (u64 __user *)reg->addr); 3996 break; 3997 default: 3998 break; 3999 } 4000 4001 return r; 4002 } 4003 4004 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 4005 { 4006 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 4007 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 4008 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 4009 4010 kvm_clear_async_pf_completion_queue(vcpu); 4011 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 4012 kvm_s390_vcpu_stop(vcpu); 4013 kvm_s390_clear_local_irqs(vcpu); 4014 } 4015 4016 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 4017 { 4018 /* Initial reset is a superset of the normal reset */ 4019 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4020 4021 /* 4022 * This equals initial cpu reset in pop, but we don't switch to ESA. 4023 * We do not only reset the internal data, but also ... 4024 */ 4025 vcpu->arch.sie_block->gpsw.mask = 0; 4026 vcpu->arch.sie_block->gpsw.addr = 0; 4027 kvm_s390_set_prefix(vcpu, 0); 4028 kvm_s390_set_cpu_timer(vcpu, 0); 4029 vcpu->arch.sie_block->ckc = 0; 4030 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 4031 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 4032 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 4033 4034 /* ... the data in sync regs */ 4035 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 4036 vcpu->run->s.regs.ckc = 0; 4037 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 4038 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 4039 vcpu->run->psw_addr = 0; 4040 vcpu->run->psw_mask = 0; 4041 vcpu->run->s.regs.todpr = 0; 4042 vcpu->run->s.regs.cputm = 0; 4043 vcpu->run->s.regs.ckc = 0; 4044 vcpu->run->s.regs.pp = 0; 4045 vcpu->run->s.regs.gbea = 1; 4046 vcpu->run->s.regs.fpc = 0; 4047 /* 4048 * Do not reset these registers in the protected case, as some of 4049 * them are overlayed and they are not accessible in this case 4050 * anyway. 4051 */ 4052 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4053 vcpu->arch.sie_block->gbea = 1; 4054 vcpu->arch.sie_block->pp = 0; 4055 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4056 vcpu->arch.sie_block->todpr = 0; 4057 } 4058 } 4059 4060 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 4061 { 4062 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 4063 4064 /* Clear reset is a superset of the initial reset */ 4065 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4066 4067 memset(®s->gprs, 0, sizeof(regs->gprs)); 4068 memset(®s->vrs, 0, sizeof(regs->vrs)); 4069 memset(®s->acrs, 0, sizeof(regs->acrs)); 4070 memset(®s->gscb, 0, sizeof(regs->gscb)); 4071 4072 regs->etoken = 0; 4073 regs->etoken_extension = 0; 4074 } 4075 4076 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4077 { 4078 vcpu_load(vcpu); 4079 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 4080 vcpu_put(vcpu); 4081 return 0; 4082 } 4083 4084 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4085 { 4086 vcpu_load(vcpu); 4087 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 4088 vcpu_put(vcpu); 4089 return 0; 4090 } 4091 4092 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 4093 struct kvm_sregs *sregs) 4094 { 4095 vcpu_load(vcpu); 4096 4097 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 4098 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 4099 4100 vcpu_put(vcpu); 4101 return 0; 4102 } 4103 4104 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4105 struct kvm_sregs *sregs) 4106 { 4107 vcpu_load(vcpu); 4108 4109 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 4110 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 4111 4112 vcpu_put(vcpu); 4113 return 0; 4114 } 4115 4116 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4117 { 4118 int ret = 0; 4119 4120 vcpu_load(vcpu); 4121 4122 if (test_fp_ctl(fpu->fpc)) { 4123 ret = -EINVAL; 4124 goto out; 4125 } 4126 vcpu->run->s.regs.fpc = fpu->fpc; 4127 if (MACHINE_HAS_VX) 4128 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 4129 (freg_t *) fpu->fprs); 4130 else 4131 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 4132 4133 out: 4134 vcpu_put(vcpu); 4135 return ret; 4136 } 4137 4138 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4139 { 4140 vcpu_load(vcpu); 4141 4142 /* make sure we have the latest values */ 4143 save_fpu_regs(); 4144 if (MACHINE_HAS_VX) 4145 convert_vx_to_fp((freg_t *) fpu->fprs, 4146 (__vector128 *) vcpu->run->s.regs.vrs); 4147 else 4148 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 4149 fpu->fpc = vcpu->run->s.regs.fpc; 4150 4151 vcpu_put(vcpu); 4152 return 0; 4153 } 4154 4155 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 4156 { 4157 int rc = 0; 4158 4159 if (!is_vcpu_stopped(vcpu)) 4160 rc = -EBUSY; 4161 else { 4162 vcpu->run->psw_mask = psw.mask; 4163 vcpu->run->psw_addr = psw.addr; 4164 } 4165 return rc; 4166 } 4167 4168 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 4169 struct kvm_translation *tr) 4170 { 4171 return -EINVAL; /* not implemented yet */ 4172 } 4173 4174 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 4175 KVM_GUESTDBG_USE_HW_BP | \ 4176 KVM_GUESTDBG_ENABLE) 4177 4178 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 4179 struct kvm_guest_debug *dbg) 4180 { 4181 int rc = 0; 4182 4183 vcpu_load(vcpu); 4184 4185 vcpu->guest_debug = 0; 4186 kvm_s390_clear_bp_data(vcpu); 4187 4188 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 4189 rc = -EINVAL; 4190 goto out; 4191 } 4192 if (!sclp.has_gpere) { 4193 rc = -EINVAL; 4194 goto out; 4195 } 4196 4197 if (dbg->control & KVM_GUESTDBG_ENABLE) { 4198 vcpu->guest_debug = dbg->control; 4199 /* enforce guest PER */ 4200 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 4201 4202 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 4203 rc = kvm_s390_import_bp_data(vcpu, dbg); 4204 } else { 4205 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4206 vcpu->arch.guestdbg.last_bp = 0; 4207 } 4208 4209 if (rc) { 4210 vcpu->guest_debug = 0; 4211 kvm_s390_clear_bp_data(vcpu); 4212 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4213 } 4214 4215 out: 4216 vcpu_put(vcpu); 4217 return rc; 4218 } 4219 4220 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 4221 struct kvm_mp_state *mp_state) 4222 { 4223 int ret; 4224 4225 vcpu_load(vcpu); 4226 4227 /* CHECK_STOP and LOAD are not supported yet */ 4228 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 4229 KVM_MP_STATE_OPERATING; 4230 4231 vcpu_put(vcpu); 4232 return ret; 4233 } 4234 4235 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 4236 struct kvm_mp_state *mp_state) 4237 { 4238 int rc = 0; 4239 4240 vcpu_load(vcpu); 4241 4242 /* user space knows about this interface - let it control the state */ 4243 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 4244 4245 switch (mp_state->mp_state) { 4246 case KVM_MP_STATE_STOPPED: 4247 rc = kvm_s390_vcpu_stop(vcpu); 4248 break; 4249 case KVM_MP_STATE_OPERATING: 4250 rc = kvm_s390_vcpu_start(vcpu); 4251 break; 4252 case KVM_MP_STATE_LOAD: 4253 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4254 rc = -ENXIO; 4255 break; 4256 } 4257 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 4258 break; 4259 case KVM_MP_STATE_CHECK_STOP: 4260 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 4261 default: 4262 rc = -ENXIO; 4263 } 4264 4265 vcpu_put(vcpu); 4266 return rc; 4267 } 4268 4269 static bool ibs_enabled(struct kvm_vcpu *vcpu) 4270 { 4271 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 4272 } 4273 4274 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 4275 { 4276 retry: 4277 kvm_s390_vcpu_request_handled(vcpu); 4278 if (!kvm_request_pending(vcpu)) 4279 return 0; 4280 /* 4281 * If the guest prefix changed, re-arm the ipte notifier for the 4282 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 4283 * This ensures that the ipte instruction for this request has 4284 * already finished. We might race against a second unmapper that 4285 * wants to set the blocking bit. Lets just retry the request loop. 4286 */ 4287 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { 4288 int rc; 4289 rc = gmap_mprotect_notify(vcpu->arch.gmap, 4290 kvm_s390_get_prefix(vcpu), 4291 PAGE_SIZE * 2, PROT_WRITE); 4292 if (rc) { 4293 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 4294 return rc; 4295 } 4296 goto retry; 4297 } 4298 4299 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 4300 vcpu->arch.sie_block->ihcpu = 0xffff; 4301 goto retry; 4302 } 4303 4304 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 4305 if (!ibs_enabled(vcpu)) { 4306 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 4307 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 4308 } 4309 goto retry; 4310 } 4311 4312 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 4313 if (ibs_enabled(vcpu)) { 4314 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 4315 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 4316 } 4317 goto retry; 4318 } 4319 4320 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 4321 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 4322 goto retry; 4323 } 4324 4325 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 4326 /* 4327 * Disable CMM virtualization; we will emulate the ESSA 4328 * instruction manually, in order to provide additional 4329 * functionalities needed for live migration. 4330 */ 4331 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 4332 goto retry; 4333 } 4334 4335 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 4336 /* 4337 * Re-enable CMM virtualization if CMMA is available and 4338 * CMM has been used. 4339 */ 4340 if ((vcpu->kvm->arch.use_cmma) && 4341 (vcpu->kvm->mm->context.uses_cmm)) 4342 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 4343 goto retry; 4344 } 4345 4346 /* we left the vsie handler, nothing to do, just clear the request */ 4347 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 4348 4349 return 0; 4350 } 4351 4352 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4353 { 4354 struct kvm_vcpu *vcpu; 4355 union tod_clock clk; 4356 unsigned long i; 4357 4358 preempt_disable(); 4359 4360 store_tod_clock_ext(&clk); 4361 4362 kvm->arch.epoch = gtod->tod - clk.tod; 4363 kvm->arch.epdx = 0; 4364 if (test_kvm_facility(kvm, 139)) { 4365 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 4366 if (kvm->arch.epoch > gtod->tod) 4367 kvm->arch.epdx -= 1; 4368 } 4369 4370 kvm_s390_vcpu_block_all(kvm); 4371 kvm_for_each_vcpu(i, vcpu, kvm) { 4372 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 4373 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 4374 } 4375 4376 kvm_s390_vcpu_unblock_all(kvm); 4377 preempt_enable(); 4378 } 4379 4380 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4381 { 4382 mutex_lock(&kvm->lock); 4383 __kvm_s390_set_tod_clock(kvm, gtod); 4384 mutex_unlock(&kvm->lock); 4385 } 4386 4387 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4388 { 4389 if (!mutex_trylock(&kvm->lock)) 4390 return 0; 4391 __kvm_s390_set_tod_clock(kvm, gtod); 4392 mutex_unlock(&kvm->lock); 4393 return 1; 4394 } 4395 4396 /** 4397 * kvm_arch_fault_in_page - fault-in guest page if necessary 4398 * @vcpu: The corresponding virtual cpu 4399 * @gpa: Guest physical address 4400 * @writable: Whether the page should be writable or not 4401 * 4402 * Make sure that a guest page has been faulted-in on the host. 4403 * 4404 * Return: Zero on success, negative error code otherwise. 4405 */ 4406 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 4407 { 4408 return gmap_fault(vcpu->arch.gmap, gpa, 4409 writable ? FAULT_FLAG_WRITE : 0); 4410 } 4411 4412 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4413 unsigned long token) 4414 { 4415 struct kvm_s390_interrupt inti; 4416 struct kvm_s390_irq irq; 4417 4418 if (start_token) { 4419 irq.u.ext.ext_params2 = token; 4420 irq.type = KVM_S390_INT_PFAULT_INIT; 4421 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4422 } else { 4423 inti.type = KVM_S390_INT_PFAULT_DONE; 4424 inti.parm64 = token; 4425 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4426 } 4427 } 4428 4429 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4430 struct kvm_async_pf *work) 4431 { 4432 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4433 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4434 4435 return true; 4436 } 4437 4438 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4439 struct kvm_async_pf *work) 4440 { 4441 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4442 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4443 } 4444 4445 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4446 struct kvm_async_pf *work) 4447 { 4448 /* s390 will always inject the page directly */ 4449 } 4450 4451 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4452 { 4453 /* 4454 * s390 will always inject the page directly, 4455 * but we still want check_async_completion to cleanup 4456 */ 4457 return true; 4458 } 4459 4460 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4461 { 4462 hva_t hva; 4463 struct kvm_arch_async_pf arch; 4464 4465 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4466 return false; 4467 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4468 vcpu->arch.pfault_compare) 4469 return false; 4470 if (psw_extint_disabled(vcpu)) 4471 return false; 4472 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4473 return false; 4474 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4475 return false; 4476 if (!vcpu->arch.gmap->pfault_enabled) 4477 return false; 4478 4479 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4480 hva += current->thread.gmap_addr & ~PAGE_MASK; 4481 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4482 return false; 4483 4484 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4485 } 4486 4487 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4488 { 4489 int rc, cpuflags; 4490 4491 /* 4492 * On s390 notifications for arriving pages will be delivered directly 4493 * to the guest but the house keeping for completed pfaults is 4494 * handled outside the worker. 4495 */ 4496 kvm_check_async_pf_completion(vcpu); 4497 4498 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4499 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4500 4501 if (need_resched()) 4502 schedule(); 4503 4504 if (!kvm_is_ucontrol(vcpu->kvm)) { 4505 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4506 if (rc) 4507 return rc; 4508 } 4509 4510 rc = kvm_s390_handle_requests(vcpu); 4511 if (rc) 4512 return rc; 4513 4514 if (guestdbg_enabled(vcpu)) { 4515 kvm_s390_backup_guest_per_regs(vcpu); 4516 kvm_s390_patch_guest_per_regs(vcpu); 4517 } 4518 4519 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4520 4521 vcpu->arch.sie_block->icptcode = 0; 4522 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4523 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4524 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4525 4526 return 0; 4527 } 4528 4529 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4530 { 4531 struct kvm_s390_pgm_info pgm_info = { 4532 .code = PGM_ADDRESSING, 4533 }; 4534 u8 opcode, ilen; 4535 int rc; 4536 4537 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4538 trace_kvm_s390_sie_fault(vcpu); 4539 4540 /* 4541 * We want to inject an addressing exception, which is defined as a 4542 * suppressing or terminating exception. However, since we came here 4543 * by a DAT access exception, the PSW still points to the faulting 4544 * instruction since DAT exceptions are nullifying. So we've got 4545 * to look up the current opcode to get the length of the instruction 4546 * to be able to forward the PSW. 4547 */ 4548 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4549 ilen = insn_length(opcode); 4550 if (rc < 0) { 4551 return rc; 4552 } else if (rc) { 4553 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4554 * Forward by arbitrary ilc, injection will take care of 4555 * nullification if necessary. 4556 */ 4557 pgm_info = vcpu->arch.pgm; 4558 ilen = 4; 4559 } 4560 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4561 kvm_s390_forward_psw(vcpu, ilen); 4562 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4563 } 4564 4565 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4566 { 4567 struct mcck_volatile_info *mcck_info; 4568 struct sie_page *sie_page; 4569 4570 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4571 vcpu->arch.sie_block->icptcode); 4572 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4573 4574 if (guestdbg_enabled(vcpu)) 4575 kvm_s390_restore_guest_per_regs(vcpu); 4576 4577 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4578 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4579 4580 if (exit_reason == -EINTR) { 4581 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4582 sie_page = container_of(vcpu->arch.sie_block, 4583 struct sie_page, sie_block); 4584 mcck_info = &sie_page->mcck_info; 4585 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4586 return 0; 4587 } 4588 4589 if (vcpu->arch.sie_block->icptcode > 0) { 4590 int rc = kvm_handle_sie_intercept(vcpu); 4591 4592 if (rc != -EOPNOTSUPP) 4593 return rc; 4594 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4595 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4596 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4597 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4598 return -EREMOTE; 4599 } else if (exit_reason != -EFAULT) { 4600 vcpu->stat.exit_null++; 4601 return 0; 4602 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4603 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4604 vcpu->run->s390_ucontrol.trans_exc_code = 4605 current->thread.gmap_addr; 4606 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4607 return -EREMOTE; 4608 } else if (current->thread.gmap_pfault) { 4609 trace_kvm_s390_major_guest_pfault(vcpu); 4610 current->thread.gmap_pfault = 0; 4611 if (kvm_arch_setup_async_pf(vcpu)) 4612 return 0; 4613 vcpu->stat.pfault_sync++; 4614 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4615 } 4616 return vcpu_post_run_fault_in_sie(vcpu); 4617 } 4618 4619 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4620 static int __vcpu_run(struct kvm_vcpu *vcpu) 4621 { 4622 int rc, exit_reason; 4623 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4624 4625 /* 4626 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4627 * ning the guest), so that memslots (and other stuff) are protected 4628 */ 4629 kvm_vcpu_srcu_read_lock(vcpu); 4630 4631 do { 4632 rc = vcpu_pre_run(vcpu); 4633 if (rc) 4634 break; 4635 4636 kvm_vcpu_srcu_read_unlock(vcpu); 4637 /* 4638 * As PF_VCPU will be used in fault handler, between 4639 * guest_enter and guest_exit should be no uaccess. 4640 */ 4641 local_irq_disable(); 4642 guest_enter_irqoff(); 4643 __disable_cpu_timer_accounting(vcpu); 4644 local_irq_enable(); 4645 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4646 memcpy(sie_page->pv_grregs, 4647 vcpu->run->s.regs.gprs, 4648 sizeof(sie_page->pv_grregs)); 4649 } 4650 if (test_cpu_flag(CIF_FPU)) 4651 load_fpu_regs(); 4652 exit_reason = sie64a(vcpu->arch.sie_block, 4653 vcpu->run->s.regs.gprs); 4654 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4655 memcpy(vcpu->run->s.regs.gprs, 4656 sie_page->pv_grregs, 4657 sizeof(sie_page->pv_grregs)); 4658 /* 4659 * We're not allowed to inject interrupts on intercepts 4660 * that leave the guest state in an "in-between" state 4661 * where the next SIE entry will do a continuation. 4662 * Fence interrupts in our "internal" PSW. 4663 */ 4664 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4665 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4666 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4667 } 4668 } 4669 local_irq_disable(); 4670 __enable_cpu_timer_accounting(vcpu); 4671 guest_exit_irqoff(); 4672 local_irq_enable(); 4673 kvm_vcpu_srcu_read_lock(vcpu); 4674 4675 rc = vcpu_post_run(vcpu, exit_reason); 4676 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4677 4678 kvm_vcpu_srcu_read_unlock(vcpu); 4679 return rc; 4680 } 4681 4682 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4683 { 4684 struct kvm_run *kvm_run = vcpu->run; 4685 struct runtime_instr_cb *riccb; 4686 struct gs_cb *gscb; 4687 4688 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4689 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4690 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4691 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4692 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4693 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4694 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4695 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4696 } 4697 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4698 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4699 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4700 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4701 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4702 kvm_clear_async_pf_completion_queue(vcpu); 4703 } 4704 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4705 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4706 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4707 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4708 } 4709 /* 4710 * If userspace sets the riccb (e.g. after migration) to a valid state, 4711 * we should enable RI here instead of doing the lazy enablement. 4712 */ 4713 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4714 test_kvm_facility(vcpu->kvm, 64) && 4715 riccb->v && 4716 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4717 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4718 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4719 } 4720 /* 4721 * If userspace sets the gscb (e.g. after migration) to non-zero, 4722 * we should enable GS here instead of doing the lazy enablement. 4723 */ 4724 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4725 test_kvm_facility(vcpu->kvm, 133) && 4726 gscb->gssm && 4727 !vcpu->arch.gs_enabled) { 4728 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4729 vcpu->arch.sie_block->ecb |= ECB_GS; 4730 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4731 vcpu->arch.gs_enabled = 1; 4732 } 4733 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4734 test_kvm_facility(vcpu->kvm, 82)) { 4735 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4736 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4737 } 4738 if (MACHINE_HAS_GS) { 4739 preempt_disable(); 4740 __ctl_set_bit(2, 4); 4741 if (current->thread.gs_cb) { 4742 vcpu->arch.host_gscb = current->thread.gs_cb; 4743 save_gs_cb(vcpu->arch.host_gscb); 4744 } 4745 if (vcpu->arch.gs_enabled) { 4746 current->thread.gs_cb = (struct gs_cb *) 4747 &vcpu->run->s.regs.gscb; 4748 restore_gs_cb(current->thread.gs_cb); 4749 } 4750 preempt_enable(); 4751 } 4752 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4753 } 4754 4755 static void sync_regs(struct kvm_vcpu *vcpu) 4756 { 4757 struct kvm_run *kvm_run = vcpu->run; 4758 4759 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4760 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4761 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4762 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4763 /* some control register changes require a tlb flush */ 4764 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4765 } 4766 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4767 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4768 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4769 } 4770 save_access_regs(vcpu->arch.host_acrs); 4771 restore_access_regs(vcpu->run->s.regs.acrs); 4772 /* save host (userspace) fprs/vrs */ 4773 save_fpu_regs(); 4774 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4775 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4776 if (MACHINE_HAS_VX) 4777 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4778 else 4779 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4780 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4781 if (test_fp_ctl(current->thread.fpu.fpc)) 4782 /* User space provided an invalid FPC, let's clear it */ 4783 current->thread.fpu.fpc = 0; 4784 4785 /* Sync fmt2 only data */ 4786 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4787 sync_regs_fmt2(vcpu); 4788 } else { 4789 /* 4790 * In several places we have to modify our internal view to 4791 * not do things that are disallowed by the ultravisor. For 4792 * example we must not inject interrupts after specific exits 4793 * (e.g. 112 prefix page not secure). We do this by turning 4794 * off the machine check, external and I/O interrupt bits 4795 * of our PSW copy. To avoid getting validity intercepts, we 4796 * do only accept the condition code from userspace. 4797 */ 4798 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4799 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4800 PSW_MASK_CC; 4801 } 4802 4803 kvm_run->kvm_dirty_regs = 0; 4804 } 4805 4806 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4807 { 4808 struct kvm_run *kvm_run = vcpu->run; 4809 4810 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4811 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4812 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4813 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4814 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4815 if (MACHINE_HAS_GS) { 4816 preempt_disable(); 4817 __ctl_set_bit(2, 4); 4818 if (vcpu->arch.gs_enabled) 4819 save_gs_cb(current->thread.gs_cb); 4820 current->thread.gs_cb = vcpu->arch.host_gscb; 4821 restore_gs_cb(vcpu->arch.host_gscb); 4822 if (!vcpu->arch.host_gscb) 4823 __ctl_clear_bit(2, 4); 4824 vcpu->arch.host_gscb = NULL; 4825 preempt_enable(); 4826 } 4827 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4828 } 4829 4830 static void store_regs(struct kvm_vcpu *vcpu) 4831 { 4832 struct kvm_run *kvm_run = vcpu->run; 4833 4834 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4835 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4836 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4837 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4838 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4839 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4840 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4841 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4842 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4843 save_access_regs(vcpu->run->s.regs.acrs); 4844 restore_access_regs(vcpu->arch.host_acrs); 4845 /* Save guest register state */ 4846 save_fpu_regs(); 4847 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4848 /* Restore will be done lazily at return */ 4849 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4850 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4851 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4852 store_regs_fmt2(vcpu); 4853 } 4854 4855 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4856 { 4857 struct kvm_run *kvm_run = vcpu->run; 4858 int rc; 4859 4860 /* 4861 * Running a VM while dumping always has the potential to 4862 * produce inconsistent dump data. But for PV vcpus a SIE 4863 * entry while dumping could also lead to a fatal validity 4864 * intercept which we absolutely want to avoid. 4865 */ 4866 if (vcpu->kvm->arch.pv.dumping) 4867 return -EINVAL; 4868 4869 if (kvm_run->immediate_exit) 4870 return -EINTR; 4871 4872 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4873 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4874 return -EINVAL; 4875 4876 vcpu_load(vcpu); 4877 4878 if (guestdbg_exit_pending(vcpu)) { 4879 kvm_s390_prepare_debug_exit(vcpu); 4880 rc = 0; 4881 goto out; 4882 } 4883 4884 kvm_sigset_activate(vcpu); 4885 4886 /* 4887 * no need to check the return value of vcpu_start as it can only have 4888 * an error for protvirt, but protvirt means user cpu state 4889 */ 4890 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4891 kvm_s390_vcpu_start(vcpu); 4892 } else if (is_vcpu_stopped(vcpu)) { 4893 pr_err_ratelimited("can't run stopped vcpu %d\n", 4894 vcpu->vcpu_id); 4895 rc = -EINVAL; 4896 goto out; 4897 } 4898 4899 sync_regs(vcpu); 4900 enable_cpu_timer_accounting(vcpu); 4901 4902 might_fault(); 4903 rc = __vcpu_run(vcpu); 4904 4905 if (signal_pending(current) && !rc) { 4906 kvm_run->exit_reason = KVM_EXIT_INTR; 4907 rc = -EINTR; 4908 } 4909 4910 if (guestdbg_exit_pending(vcpu) && !rc) { 4911 kvm_s390_prepare_debug_exit(vcpu); 4912 rc = 0; 4913 } 4914 4915 if (rc == -EREMOTE) { 4916 /* userspace support is needed, kvm_run has been prepared */ 4917 rc = 0; 4918 } 4919 4920 disable_cpu_timer_accounting(vcpu); 4921 store_regs(vcpu); 4922 4923 kvm_sigset_deactivate(vcpu); 4924 4925 vcpu->stat.exit_userspace++; 4926 out: 4927 vcpu_put(vcpu); 4928 return rc; 4929 } 4930 4931 /* 4932 * store status at address 4933 * we use have two special cases: 4934 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4935 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4936 */ 4937 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4938 { 4939 unsigned char archmode = 1; 4940 freg_t fprs[NUM_FPRS]; 4941 unsigned int px; 4942 u64 clkcomp, cputm; 4943 int rc; 4944 4945 px = kvm_s390_get_prefix(vcpu); 4946 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4947 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4948 return -EFAULT; 4949 gpa = 0; 4950 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4951 if (write_guest_real(vcpu, 163, &archmode, 1)) 4952 return -EFAULT; 4953 gpa = px; 4954 } else 4955 gpa -= __LC_FPREGS_SAVE_AREA; 4956 4957 /* manually convert vector registers if necessary */ 4958 if (MACHINE_HAS_VX) { 4959 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4960 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4961 fprs, 128); 4962 } else { 4963 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4964 vcpu->run->s.regs.fprs, 128); 4965 } 4966 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4967 vcpu->run->s.regs.gprs, 128); 4968 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4969 &vcpu->arch.sie_block->gpsw, 16); 4970 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4971 &px, 4); 4972 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4973 &vcpu->run->s.regs.fpc, 4); 4974 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4975 &vcpu->arch.sie_block->todpr, 4); 4976 cputm = kvm_s390_get_cpu_timer(vcpu); 4977 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4978 &cputm, 8); 4979 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4980 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4981 &clkcomp, 8); 4982 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4983 &vcpu->run->s.regs.acrs, 64); 4984 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4985 &vcpu->arch.sie_block->gcr, 128); 4986 return rc ? -EFAULT : 0; 4987 } 4988 4989 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4990 { 4991 /* 4992 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4993 * switch in the run ioctl. Let's update our copies before we save 4994 * it into the save area 4995 */ 4996 save_fpu_regs(); 4997 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4998 save_access_regs(vcpu->run->s.regs.acrs); 4999 5000 return kvm_s390_store_status_unloaded(vcpu, addr); 5001 } 5002 5003 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5004 { 5005 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 5006 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 5007 } 5008 5009 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 5010 { 5011 unsigned long i; 5012 struct kvm_vcpu *vcpu; 5013 5014 kvm_for_each_vcpu(i, vcpu, kvm) { 5015 __disable_ibs_on_vcpu(vcpu); 5016 } 5017 } 5018 5019 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5020 { 5021 if (!sclp.has_ibs) 5022 return; 5023 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 5024 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 5025 } 5026 5027 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 5028 { 5029 int i, online_vcpus, r = 0, started_vcpus = 0; 5030 5031 if (!is_vcpu_stopped(vcpu)) 5032 return 0; 5033 5034 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 5035 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5036 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5037 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5038 5039 /* Let's tell the UV that we want to change into the operating state */ 5040 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5041 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 5042 if (r) { 5043 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5044 return r; 5045 } 5046 } 5047 5048 for (i = 0; i < online_vcpus; i++) { 5049 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 5050 started_vcpus++; 5051 } 5052 5053 if (started_vcpus == 0) { 5054 /* we're the only active VCPU -> speed it up */ 5055 __enable_ibs_on_vcpu(vcpu); 5056 } else if (started_vcpus == 1) { 5057 /* 5058 * As we are starting a second VCPU, we have to disable 5059 * the IBS facility on all VCPUs to remove potentially 5060 * outstanding ENABLE requests. 5061 */ 5062 __disable_ibs_on_all_vcpus(vcpu->kvm); 5063 } 5064 5065 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 5066 /* 5067 * The real PSW might have changed due to a RESTART interpreted by the 5068 * ultravisor. We block all interrupts and let the next sie exit 5069 * refresh our view. 5070 */ 5071 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5072 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 5073 /* 5074 * Another VCPU might have used IBS while we were offline. 5075 * Let's play safe and flush the VCPU at startup. 5076 */ 5077 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 5078 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5079 return 0; 5080 } 5081 5082 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 5083 { 5084 int i, online_vcpus, r = 0, started_vcpus = 0; 5085 struct kvm_vcpu *started_vcpu = NULL; 5086 5087 if (is_vcpu_stopped(vcpu)) 5088 return 0; 5089 5090 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 5091 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5092 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5093 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5094 5095 /* Let's tell the UV that we want to change into the stopped state */ 5096 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5097 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 5098 if (r) { 5099 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5100 return r; 5101 } 5102 } 5103 5104 /* 5105 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 5106 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 5107 * have been fully processed. This will ensure that the VCPU 5108 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 5109 */ 5110 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 5111 kvm_s390_clear_stop_irq(vcpu); 5112 5113 __disable_ibs_on_vcpu(vcpu); 5114 5115 for (i = 0; i < online_vcpus; i++) { 5116 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 5117 5118 if (!is_vcpu_stopped(tmp)) { 5119 started_vcpus++; 5120 started_vcpu = tmp; 5121 } 5122 } 5123 5124 if (started_vcpus == 1) { 5125 /* 5126 * As we only have one VCPU left, we want to enable the 5127 * IBS facility for that VCPU to speed it up. 5128 */ 5129 __enable_ibs_on_vcpu(started_vcpu); 5130 } 5131 5132 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5133 return 0; 5134 } 5135 5136 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 5137 struct kvm_enable_cap *cap) 5138 { 5139 int r; 5140 5141 if (cap->flags) 5142 return -EINVAL; 5143 5144 switch (cap->cap) { 5145 case KVM_CAP_S390_CSS_SUPPORT: 5146 if (!vcpu->kvm->arch.css_support) { 5147 vcpu->kvm->arch.css_support = 1; 5148 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 5149 trace_kvm_s390_enable_css(vcpu->kvm); 5150 } 5151 r = 0; 5152 break; 5153 default: 5154 r = -EINVAL; 5155 break; 5156 } 5157 return r; 5158 } 5159 5160 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, 5161 struct kvm_s390_mem_op *mop) 5162 { 5163 void __user *uaddr = (void __user *)mop->buf; 5164 int r = 0; 5165 5166 if (mop->flags || !mop->size) 5167 return -EINVAL; 5168 if (mop->size + mop->sida_offset < mop->size) 5169 return -EINVAL; 5170 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 5171 return -E2BIG; 5172 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 5173 return -EINVAL; 5174 5175 switch (mop->op) { 5176 case KVM_S390_MEMOP_SIDA_READ: 5177 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 5178 mop->sida_offset), mop->size)) 5179 r = -EFAULT; 5180 5181 break; 5182 case KVM_S390_MEMOP_SIDA_WRITE: 5183 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 5184 mop->sida_offset), uaddr, mop->size)) 5185 r = -EFAULT; 5186 break; 5187 } 5188 return r; 5189 } 5190 5191 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, 5192 struct kvm_s390_mem_op *mop) 5193 { 5194 void __user *uaddr = (void __user *)mop->buf; 5195 void *tmpbuf = NULL; 5196 int r = 0; 5197 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 5198 | KVM_S390_MEMOP_F_CHECK_ONLY 5199 | KVM_S390_MEMOP_F_SKEY_PROTECTION; 5200 5201 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 5202 return -EINVAL; 5203 if (mop->size > MEM_OP_MAX_SIZE) 5204 return -E2BIG; 5205 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5206 return -EINVAL; 5207 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 5208 if (access_key_invalid(mop->key)) 5209 return -EINVAL; 5210 } else { 5211 mop->key = 0; 5212 } 5213 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 5214 tmpbuf = vmalloc(mop->size); 5215 if (!tmpbuf) 5216 return -ENOMEM; 5217 } 5218 5219 switch (mop->op) { 5220 case KVM_S390_MEMOP_LOGICAL_READ: 5221 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 5222 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 5223 GACC_FETCH, mop->key); 5224 break; 5225 } 5226 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5227 mop->size, mop->key); 5228 if (r == 0) { 5229 if (copy_to_user(uaddr, tmpbuf, mop->size)) 5230 r = -EFAULT; 5231 } 5232 break; 5233 case KVM_S390_MEMOP_LOGICAL_WRITE: 5234 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 5235 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 5236 GACC_STORE, mop->key); 5237 break; 5238 } 5239 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 5240 r = -EFAULT; 5241 break; 5242 } 5243 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5244 mop->size, mop->key); 5245 break; 5246 } 5247 5248 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 5249 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 5250 5251 vfree(tmpbuf); 5252 return r; 5253 } 5254 5255 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, 5256 struct kvm_s390_mem_op *mop) 5257 { 5258 int r, srcu_idx; 5259 5260 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5261 5262 switch (mop->op) { 5263 case KVM_S390_MEMOP_LOGICAL_READ: 5264 case KVM_S390_MEMOP_LOGICAL_WRITE: 5265 r = kvm_s390_vcpu_mem_op(vcpu, mop); 5266 break; 5267 case KVM_S390_MEMOP_SIDA_READ: 5268 case KVM_S390_MEMOP_SIDA_WRITE: 5269 /* we are locked against sida going away by the vcpu->mutex */ 5270 r = kvm_s390_vcpu_sida_op(vcpu, mop); 5271 break; 5272 default: 5273 r = -EINVAL; 5274 } 5275 5276 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 5277 return r; 5278 } 5279 5280 long kvm_arch_vcpu_async_ioctl(struct file *filp, 5281 unsigned int ioctl, unsigned long arg) 5282 { 5283 struct kvm_vcpu *vcpu = filp->private_data; 5284 void __user *argp = (void __user *)arg; 5285 5286 switch (ioctl) { 5287 case KVM_S390_IRQ: { 5288 struct kvm_s390_irq s390irq; 5289 5290 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 5291 return -EFAULT; 5292 return kvm_s390_inject_vcpu(vcpu, &s390irq); 5293 } 5294 case KVM_S390_INTERRUPT: { 5295 struct kvm_s390_interrupt s390int; 5296 struct kvm_s390_irq s390irq = {}; 5297 5298 if (copy_from_user(&s390int, argp, sizeof(s390int))) 5299 return -EFAULT; 5300 if (s390int_to_s390irq(&s390int, &s390irq)) 5301 return -EINVAL; 5302 return kvm_s390_inject_vcpu(vcpu, &s390irq); 5303 } 5304 } 5305 return -ENOIOCTLCMD; 5306 } 5307 5308 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu, 5309 struct kvm_pv_cmd *cmd) 5310 { 5311 struct kvm_s390_pv_dmp dmp; 5312 void *data; 5313 int ret; 5314 5315 /* Dump initialization is a prerequisite */ 5316 if (!vcpu->kvm->arch.pv.dumping) 5317 return -EINVAL; 5318 5319 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp))) 5320 return -EFAULT; 5321 5322 /* We only handle this subcmd right now */ 5323 if (dmp.subcmd != KVM_PV_DUMP_CPU) 5324 return -EINVAL; 5325 5326 /* CPU dump length is the same as create cpu storage donation. */ 5327 if (dmp.buff_len != uv_info.guest_cpu_stor_len) 5328 return -EINVAL; 5329 5330 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL); 5331 if (!data) 5332 return -ENOMEM; 5333 5334 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc); 5335 5336 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x", 5337 vcpu->vcpu_id, cmd->rc, cmd->rrc); 5338 5339 if (ret) 5340 ret = -EINVAL; 5341 5342 /* On success copy over the dump data */ 5343 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len)) 5344 ret = -EFAULT; 5345 5346 kvfree(data); 5347 return ret; 5348 } 5349 5350 long kvm_arch_vcpu_ioctl(struct file *filp, 5351 unsigned int ioctl, unsigned long arg) 5352 { 5353 struct kvm_vcpu *vcpu = filp->private_data; 5354 void __user *argp = (void __user *)arg; 5355 int idx; 5356 long r; 5357 u16 rc, rrc; 5358 5359 vcpu_load(vcpu); 5360 5361 switch (ioctl) { 5362 case KVM_S390_STORE_STATUS: 5363 idx = srcu_read_lock(&vcpu->kvm->srcu); 5364 r = kvm_s390_store_status_unloaded(vcpu, arg); 5365 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5366 break; 5367 case KVM_S390_SET_INITIAL_PSW: { 5368 psw_t psw; 5369 5370 r = -EFAULT; 5371 if (copy_from_user(&psw, argp, sizeof(psw))) 5372 break; 5373 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 5374 break; 5375 } 5376 case KVM_S390_CLEAR_RESET: 5377 r = 0; 5378 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 5379 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5380 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5381 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 5382 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 5383 rc, rrc); 5384 } 5385 break; 5386 case KVM_S390_INITIAL_RESET: 5387 r = 0; 5388 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 5389 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5390 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5391 UVC_CMD_CPU_RESET_INITIAL, 5392 &rc, &rrc); 5393 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 5394 rc, rrc); 5395 } 5396 break; 5397 case KVM_S390_NORMAL_RESET: 5398 r = 0; 5399 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 5400 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5401 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5402 UVC_CMD_CPU_RESET, &rc, &rrc); 5403 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 5404 rc, rrc); 5405 } 5406 break; 5407 case KVM_SET_ONE_REG: 5408 case KVM_GET_ONE_REG: { 5409 struct kvm_one_reg reg; 5410 r = -EINVAL; 5411 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5412 break; 5413 r = -EFAULT; 5414 if (copy_from_user(®, argp, sizeof(reg))) 5415 break; 5416 if (ioctl == KVM_SET_ONE_REG) 5417 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 5418 else 5419 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 5420 break; 5421 } 5422 #ifdef CONFIG_KVM_S390_UCONTROL 5423 case KVM_S390_UCAS_MAP: { 5424 struct kvm_s390_ucas_mapping ucasmap; 5425 5426 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5427 r = -EFAULT; 5428 break; 5429 } 5430 5431 if (!kvm_is_ucontrol(vcpu->kvm)) { 5432 r = -EINVAL; 5433 break; 5434 } 5435 5436 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 5437 ucasmap.vcpu_addr, ucasmap.length); 5438 break; 5439 } 5440 case KVM_S390_UCAS_UNMAP: { 5441 struct kvm_s390_ucas_mapping ucasmap; 5442 5443 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5444 r = -EFAULT; 5445 break; 5446 } 5447 5448 if (!kvm_is_ucontrol(vcpu->kvm)) { 5449 r = -EINVAL; 5450 break; 5451 } 5452 5453 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 5454 ucasmap.length); 5455 break; 5456 } 5457 #endif 5458 case KVM_S390_VCPU_FAULT: { 5459 r = gmap_fault(vcpu->arch.gmap, arg, 0); 5460 break; 5461 } 5462 case KVM_ENABLE_CAP: 5463 { 5464 struct kvm_enable_cap cap; 5465 r = -EFAULT; 5466 if (copy_from_user(&cap, argp, sizeof(cap))) 5467 break; 5468 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5469 break; 5470 } 5471 case KVM_S390_MEM_OP: { 5472 struct kvm_s390_mem_op mem_op; 5473 5474 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5475 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); 5476 else 5477 r = -EFAULT; 5478 break; 5479 } 5480 case KVM_S390_SET_IRQ_STATE: { 5481 struct kvm_s390_irq_state irq_state; 5482 5483 r = -EFAULT; 5484 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5485 break; 5486 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5487 irq_state.len == 0 || 5488 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5489 r = -EINVAL; 5490 break; 5491 } 5492 /* do not use irq_state.flags, it will break old QEMUs */ 5493 r = kvm_s390_set_irq_state(vcpu, 5494 (void __user *) irq_state.buf, 5495 irq_state.len); 5496 break; 5497 } 5498 case KVM_S390_GET_IRQ_STATE: { 5499 struct kvm_s390_irq_state irq_state; 5500 5501 r = -EFAULT; 5502 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5503 break; 5504 if (irq_state.len == 0) { 5505 r = -EINVAL; 5506 break; 5507 } 5508 /* do not use irq_state.flags, it will break old QEMUs */ 5509 r = kvm_s390_get_irq_state(vcpu, 5510 (__u8 __user *) irq_state.buf, 5511 irq_state.len); 5512 break; 5513 } 5514 case KVM_S390_PV_CPU_COMMAND: { 5515 struct kvm_pv_cmd cmd; 5516 5517 r = -EINVAL; 5518 if (!is_prot_virt_host()) 5519 break; 5520 5521 r = -EFAULT; 5522 if (copy_from_user(&cmd, argp, sizeof(cmd))) 5523 break; 5524 5525 r = -EINVAL; 5526 if (cmd.flags) 5527 break; 5528 5529 /* We only handle this cmd right now */ 5530 if (cmd.cmd != KVM_PV_DUMP) 5531 break; 5532 5533 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd); 5534 5535 /* Always copy over UV rc / rrc data */ 5536 if (copy_to_user((__u8 __user *)argp, &cmd.rc, 5537 sizeof(cmd.rc) + sizeof(cmd.rrc))) 5538 r = -EFAULT; 5539 break; 5540 } 5541 default: 5542 r = -ENOTTY; 5543 } 5544 5545 vcpu_put(vcpu); 5546 return r; 5547 } 5548 5549 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5550 { 5551 #ifdef CONFIG_KVM_S390_UCONTROL 5552 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5553 && (kvm_is_ucontrol(vcpu->kvm))) { 5554 vmf->page = virt_to_page(vcpu->arch.sie_block); 5555 get_page(vmf->page); 5556 return 0; 5557 } 5558 #endif 5559 return VM_FAULT_SIGBUS; 5560 } 5561 5562 /* Section: memory related */ 5563 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5564 const struct kvm_memory_slot *old, 5565 struct kvm_memory_slot *new, 5566 enum kvm_mr_change change) 5567 { 5568 gpa_t size; 5569 5570 /* When we are protected, we should not change the memory slots */ 5571 if (kvm_s390_pv_get_handle(kvm)) 5572 return -EINVAL; 5573 5574 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 5575 return 0; 5576 5577 /* A few sanity checks. We can have memory slots which have to be 5578 located/ended at a segment boundary (1MB). The memory in userland is 5579 ok to be fragmented into various different vmas. It is okay to mmap() 5580 and munmap() stuff in this slot after doing this call at any time */ 5581 5582 if (new->userspace_addr & 0xffffful) 5583 return -EINVAL; 5584 5585 size = new->npages * PAGE_SIZE; 5586 if (size & 0xffffful) 5587 return -EINVAL; 5588 5589 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5590 return -EINVAL; 5591 5592 return 0; 5593 } 5594 5595 void kvm_arch_commit_memory_region(struct kvm *kvm, 5596 struct kvm_memory_slot *old, 5597 const struct kvm_memory_slot *new, 5598 enum kvm_mr_change change) 5599 { 5600 int rc = 0; 5601 5602 switch (change) { 5603 case KVM_MR_DELETE: 5604 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5605 old->npages * PAGE_SIZE); 5606 break; 5607 case KVM_MR_MOVE: 5608 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5609 old->npages * PAGE_SIZE); 5610 if (rc) 5611 break; 5612 fallthrough; 5613 case KVM_MR_CREATE: 5614 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5615 new->base_gfn * PAGE_SIZE, 5616 new->npages * PAGE_SIZE); 5617 break; 5618 case KVM_MR_FLAGS_ONLY: 5619 break; 5620 default: 5621 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5622 } 5623 if (rc) 5624 pr_warn("failed to commit memory region\n"); 5625 return; 5626 } 5627 5628 static inline unsigned long nonhyp_mask(int i) 5629 { 5630 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5631 5632 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5633 } 5634 5635 static int __init kvm_s390_init(void) 5636 { 5637 int i; 5638 5639 if (!sclp.has_sief2) { 5640 pr_info("SIE is not available\n"); 5641 return -ENODEV; 5642 } 5643 5644 if (nested && hpage) { 5645 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5646 return -EINVAL; 5647 } 5648 5649 for (i = 0; i < 16; i++) 5650 kvm_s390_fac_base[i] |= 5651 stfle_fac_list[i] & nonhyp_mask(i); 5652 5653 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5654 } 5655 5656 static void __exit kvm_s390_exit(void) 5657 { 5658 kvm_exit(); 5659 } 5660 5661 module_init(kvm_s390_init); 5662 module_exit(kvm_s390_exit); 5663 5664 /* 5665 * Enable autoloading of the kvm module. 5666 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5667 * since x86 takes a different approach. 5668 */ 5669 #include <linux/miscdevice.h> 5670 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5671 MODULE_ALIAS("devname:kvm"); 5672