1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #define KMSG_COMPONENT "kvm-s390" 15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 #include <linux/pgtable.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 52 #define CREATE_TRACE_POINTS 53 #include "trace.h" 54 #include "trace-s390.h" 55 56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 57 #define LOCAL_IRQS 32 58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 59 (KVM_MAX_VCPUS + LOCAL_IRQS)) 60 61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 62 KVM_GENERIC_VM_STATS(), 63 STATS_DESC_COUNTER(VM, inject_io), 64 STATS_DESC_COUNTER(VM, inject_float_mchk), 65 STATS_DESC_COUNTER(VM, inject_pfault_done), 66 STATS_DESC_COUNTER(VM, inject_service_signal), 67 STATS_DESC_COUNTER(VM, inject_virtio) 68 }; 69 70 const struct kvm_stats_header kvm_vm_stats_header = { 71 .name_size = KVM_STATS_NAME_SIZE, 72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 73 .id_offset = sizeof(struct kvm_stats_header), 74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 76 sizeof(kvm_vm_stats_desc), 77 }; 78 79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 80 KVM_GENERIC_VCPU_STATS(), 81 STATS_DESC_COUNTER(VCPU, exit_userspace), 82 STATS_DESC_COUNTER(VCPU, exit_null), 83 STATS_DESC_COUNTER(VCPU, exit_external_request), 84 STATS_DESC_COUNTER(VCPU, exit_io_request), 85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 86 STATS_DESC_COUNTER(VCPU, exit_stop_request), 87 STATS_DESC_COUNTER(VCPU, exit_validity), 88 STATS_DESC_COUNTER(VCPU, exit_instruction), 89 STATS_DESC_COUNTER(VCPU, exit_pei), 90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 91 STATS_DESC_COUNTER(VCPU, instruction_lctl), 92 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 93 STATS_DESC_COUNTER(VCPU, instruction_stctl), 94 STATS_DESC_COUNTER(VCPU, instruction_stctg), 95 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 97 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 98 STATS_DESC_COUNTER(VCPU, deliver_ckc), 99 STATS_DESC_COUNTER(VCPU, deliver_cputm), 100 STATS_DESC_COUNTER(VCPU, deliver_external_call), 101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 102 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 103 STATS_DESC_COUNTER(VCPU, deliver_virtio), 104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 107 STATS_DESC_COUNTER(VCPU, deliver_program), 108 STATS_DESC_COUNTER(VCPU, deliver_io), 109 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 110 STATS_DESC_COUNTER(VCPU, exit_wait_state), 111 STATS_DESC_COUNTER(VCPU, inject_ckc), 112 STATS_DESC_COUNTER(VCPU, inject_cputm), 113 STATS_DESC_COUNTER(VCPU, inject_external_call), 114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 115 STATS_DESC_COUNTER(VCPU, inject_mchk), 116 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 117 STATS_DESC_COUNTER(VCPU, inject_program), 118 STATS_DESC_COUNTER(VCPU, inject_restart), 119 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 120 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 121 STATS_DESC_COUNTER(VCPU, instruction_epsw), 122 STATS_DESC_COUNTER(VCPU, instruction_gs), 123 STATS_DESC_COUNTER(VCPU, instruction_io_other), 124 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 125 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 126 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 127 STATS_DESC_COUNTER(VCPU, instruction_ptff), 128 STATS_DESC_COUNTER(VCPU, instruction_sck), 129 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 130 STATS_DESC_COUNTER(VCPU, instruction_stidp), 131 STATS_DESC_COUNTER(VCPU, instruction_spx), 132 STATS_DESC_COUNTER(VCPU, instruction_stpx), 133 STATS_DESC_COUNTER(VCPU, instruction_stap), 134 STATS_DESC_COUNTER(VCPU, instruction_iske), 135 STATS_DESC_COUNTER(VCPU, instruction_ri), 136 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 137 STATS_DESC_COUNTER(VCPU, instruction_sske), 138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 139 STATS_DESC_COUNTER(VCPU, instruction_stsi), 140 STATS_DESC_COUNTER(VCPU, instruction_stfl), 141 STATS_DESC_COUNTER(VCPU, instruction_tb), 142 STATS_DESC_COUNTER(VCPU, instruction_tpi), 143 STATS_DESC_COUNTER(VCPU, instruction_tprot), 144 STATS_DESC_COUNTER(VCPU, instruction_tsch), 145 STATS_DESC_COUNTER(VCPU, instruction_sie), 146 STATS_DESC_COUNTER(VCPU, instruction_essa), 147 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 168 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 173 STATS_DESC_COUNTER(VCPU, pfault_sync) 174 }; 175 176 const struct kvm_stats_header kvm_vcpu_stats_header = { 177 .name_size = KVM_STATS_NAME_SIZE, 178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 179 .id_offset = sizeof(struct kvm_stats_header), 180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 182 sizeof(kvm_vcpu_stats_desc), 183 }; 184 185 /* allow nested virtualization in KVM (if enabled by user space) */ 186 static int nested; 187 module_param(nested, int, S_IRUGO); 188 MODULE_PARM_DESC(nested, "Nested virtualization support"); 189 190 /* allow 1m huge page guest backing, if !nested */ 191 static int hpage; 192 module_param(hpage, int, 0444); 193 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 194 195 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 196 static u8 halt_poll_max_steal = 10; 197 module_param(halt_poll_max_steal, byte, 0644); 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 199 200 /* if set to true, the GISA will be initialized and used if available */ 201 static bool use_gisa = true; 202 module_param(use_gisa, bool, 0644); 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 204 205 /* maximum diag9c forwarding per second */ 206 unsigned int diag9c_forwarding_hz; 207 module_param(diag9c_forwarding_hz, uint, 0644); 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 209 210 /* 211 * For now we handle at most 16 double words as this is what the s390 base 212 * kernel handles and stores in the prefix page. If we ever need to go beyond 213 * this, this requires changes to code, but the external uapi can stay. 214 */ 215 #define SIZE_INTERNAL 16 216 217 /* 218 * Base feature mask that defines default mask for facilities. Consists of the 219 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 220 */ 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 222 /* 223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 224 * and defines the facilities that can be enabled via a cpu model. 225 */ 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 227 228 static unsigned long kvm_s390_fac_size(void) 229 { 230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 233 sizeof(stfle_fac_list)); 234 235 return SIZE_INTERNAL; 236 } 237 238 /* available cpu features supported by kvm */ 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 240 /* available subfunctions indicated via query / "test bit" */ 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 242 243 static struct gmap_notifier gmap_notifier; 244 static struct gmap_notifier vsie_gmap_notifier; 245 debug_info_t *kvm_s390_dbf; 246 debug_info_t *kvm_s390_dbf_uv; 247 248 /* Section: not file related */ 249 int kvm_arch_hardware_enable(void) 250 { 251 /* every s390 is virtualization enabled ;-) */ 252 return 0; 253 } 254 255 int kvm_arch_check_processor_compat(void *opaque) 256 { 257 return 0; 258 } 259 260 /* forward declarations */ 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 262 unsigned long end); 263 static int sca_switch_to_extended(struct kvm *kvm); 264 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 266 { 267 u8 delta_idx = 0; 268 269 /* 270 * The TOD jumps by delta, we have to compensate this by adding 271 * -delta to the epoch. 272 */ 273 delta = -delta; 274 275 /* sign-extension - we're adding to signed values below */ 276 if ((s64)delta < 0) 277 delta_idx = -1; 278 279 scb->epoch += delta; 280 if (scb->ecd & ECD_MEF) { 281 scb->epdx += delta_idx; 282 if (scb->epoch < delta) 283 scb->epdx += 1; 284 } 285 } 286 287 /* 288 * This callback is executed during stop_machine(). All CPUs are therefore 289 * temporarily stopped. In order not to change guest behavior, we have to 290 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 291 * so a CPU won't be stopped while calculating with the epoch. 292 */ 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 294 void *v) 295 { 296 struct kvm *kvm; 297 struct kvm_vcpu *vcpu; 298 unsigned long i; 299 unsigned long long *delta = v; 300 301 list_for_each_entry(kvm, &vm_list, vm_list) { 302 kvm_for_each_vcpu(i, vcpu, kvm) { 303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 304 if (i == 0) { 305 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 306 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 307 } 308 if (vcpu->arch.cputm_enabled) 309 vcpu->arch.cputm_start += *delta; 310 if (vcpu->arch.vsie_block) 311 kvm_clock_sync_scb(vcpu->arch.vsie_block, 312 *delta); 313 } 314 } 315 return NOTIFY_OK; 316 } 317 318 static struct notifier_block kvm_clock_notifier = { 319 .notifier_call = kvm_clock_sync, 320 }; 321 322 int kvm_arch_hardware_setup(void *opaque) 323 { 324 gmap_notifier.notifier_call = kvm_gmap_notifier; 325 gmap_register_pte_notifier(&gmap_notifier); 326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 327 gmap_register_pte_notifier(&vsie_gmap_notifier); 328 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 329 &kvm_clock_notifier); 330 return 0; 331 } 332 333 void kvm_arch_hardware_unsetup(void) 334 { 335 gmap_unregister_pte_notifier(&gmap_notifier); 336 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 338 &kvm_clock_notifier); 339 } 340 341 static void allow_cpu_feat(unsigned long nr) 342 { 343 set_bit_inv(nr, kvm_s390_available_cpu_feat); 344 } 345 346 static inline int plo_test_bit(unsigned char nr) 347 { 348 unsigned long function = (unsigned long)nr | 0x100; 349 int cc; 350 351 asm volatile( 352 " lgr 0,%[function]\n" 353 /* Parameter registers are ignored for "test bit" */ 354 " plo 0,0,0,0(0)\n" 355 " ipm %0\n" 356 " srl %0,28\n" 357 : "=d" (cc) 358 : [function] "d" (function) 359 : "cc", "0"); 360 return cc == 0; 361 } 362 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 364 { 365 asm volatile( 366 " lghi 0,0\n" 367 " lgr 1,%[query]\n" 368 /* Parameter registers are ignored */ 369 " .insn rrf,%[opc] << 16,2,4,6,0\n" 370 : 371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 372 : "cc", "memory", "0", "1"); 373 } 374 375 #define INSN_SORTL 0xb938 376 #define INSN_DFLTCC 0xb939 377 378 static void kvm_s390_cpu_feat_init(void) 379 { 380 int i; 381 382 for (i = 0; i < 256; ++i) { 383 if (plo_test_bit(i)) 384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 385 } 386 387 if (test_facility(28)) /* TOD-clock steering */ 388 ptff(kvm_s390_available_subfunc.ptff, 389 sizeof(kvm_s390_available_subfunc.ptff), 390 PTFF_QAF); 391 392 if (test_facility(17)) { /* MSA */ 393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 394 kvm_s390_available_subfunc.kmac); 395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 396 kvm_s390_available_subfunc.kmc); 397 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 398 kvm_s390_available_subfunc.km); 399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 400 kvm_s390_available_subfunc.kimd); 401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.klmd); 403 } 404 if (test_facility(76)) /* MSA3 */ 405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 406 kvm_s390_available_subfunc.pckmo); 407 if (test_facility(77)) { /* MSA4 */ 408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 409 kvm_s390_available_subfunc.kmctr); 410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 411 kvm_s390_available_subfunc.kmf); 412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 413 kvm_s390_available_subfunc.kmo); 414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 415 kvm_s390_available_subfunc.pcc); 416 } 417 if (test_facility(57)) /* MSA5 */ 418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 419 kvm_s390_available_subfunc.ppno); 420 421 if (test_facility(146)) /* MSA8 */ 422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 423 kvm_s390_available_subfunc.kma); 424 425 if (test_facility(155)) /* MSA9 */ 426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 427 kvm_s390_available_subfunc.kdsa); 428 429 if (test_facility(150)) /* SORTL */ 430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 431 432 if (test_facility(151)) /* DFLTCC */ 433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 434 435 if (MACHINE_HAS_ESOP) 436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 437 /* 438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 440 */ 441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 442 !test_facility(3) || !nested) 443 return; 444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 445 if (sclp.has_64bscao) 446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 447 if (sclp.has_siif) 448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 449 if (sclp.has_gpere) 450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 451 if (sclp.has_gsls) 452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 453 if (sclp.has_ib) 454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 455 if (sclp.has_cei) 456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 457 if (sclp.has_ibs) 458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 459 if (sclp.has_kss) 460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 461 /* 462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 463 * all skey handling functions read/set the skey from the PGSTE 464 * instead of the real storage key. 465 * 466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 467 * pages being detected as preserved although they are resident. 468 * 469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 471 * 472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 475 * 476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 477 * cannot easily shadow the SCA because of the ipte lock. 478 */ 479 } 480 481 int kvm_arch_init(void *opaque) 482 { 483 int rc = -ENOMEM; 484 485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 486 if (!kvm_s390_dbf) 487 return -ENOMEM; 488 489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 490 if (!kvm_s390_dbf_uv) 491 goto out; 492 493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 495 goto out; 496 497 kvm_s390_cpu_feat_init(); 498 499 /* Register floating interrupt controller interface. */ 500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 501 if (rc) { 502 pr_err("A FLIC registration call failed with rc=%d\n", rc); 503 goto out; 504 } 505 506 rc = kvm_s390_gib_init(GAL_ISC); 507 if (rc) 508 goto out; 509 510 return 0; 511 512 out: 513 kvm_arch_exit(); 514 return rc; 515 } 516 517 void kvm_arch_exit(void) 518 { 519 kvm_s390_gib_destroy(); 520 debug_unregister(kvm_s390_dbf); 521 debug_unregister(kvm_s390_dbf_uv); 522 } 523 524 /* Section: device related */ 525 long kvm_arch_dev_ioctl(struct file *filp, 526 unsigned int ioctl, unsigned long arg) 527 { 528 if (ioctl == KVM_S390_ENABLE_SIE) 529 return s390_enable_sie(); 530 return -EINVAL; 531 } 532 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 534 { 535 int r; 536 537 switch (ext) { 538 case KVM_CAP_S390_PSW: 539 case KVM_CAP_S390_GMAP: 540 case KVM_CAP_SYNC_MMU: 541 #ifdef CONFIG_KVM_S390_UCONTROL 542 case KVM_CAP_S390_UCONTROL: 543 #endif 544 case KVM_CAP_ASYNC_PF: 545 case KVM_CAP_SYNC_REGS: 546 case KVM_CAP_ONE_REG: 547 case KVM_CAP_ENABLE_CAP: 548 case KVM_CAP_S390_CSS_SUPPORT: 549 case KVM_CAP_IOEVENTFD: 550 case KVM_CAP_DEVICE_CTRL: 551 case KVM_CAP_S390_IRQCHIP: 552 case KVM_CAP_VM_ATTRIBUTES: 553 case KVM_CAP_MP_STATE: 554 case KVM_CAP_IMMEDIATE_EXIT: 555 case KVM_CAP_S390_INJECT_IRQ: 556 case KVM_CAP_S390_USER_SIGP: 557 case KVM_CAP_S390_USER_STSI: 558 case KVM_CAP_S390_SKEYS: 559 case KVM_CAP_S390_IRQ_STATE: 560 case KVM_CAP_S390_USER_INSTR0: 561 case KVM_CAP_S390_CMMA_MIGRATION: 562 case KVM_CAP_S390_AIS: 563 case KVM_CAP_S390_AIS_MIGRATION: 564 case KVM_CAP_S390_VCPU_RESETS: 565 case KVM_CAP_SET_GUEST_DEBUG: 566 case KVM_CAP_S390_DIAG318: 567 r = 1; 568 break; 569 case KVM_CAP_SET_GUEST_DEBUG2: 570 r = KVM_GUESTDBG_VALID_MASK; 571 break; 572 case KVM_CAP_S390_HPAGE_1M: 573 r = 0; 574 if (hpage && !kvm_is_ucontrol(kvm)) 575 r = 1; 576 break; 577 case KVM_CAP_S390_MEM_OP: 578 r = MEM_OP_MAX_SIZE; 579 break; 580 case KVM_CAP_NR_VCPUS: 581 case KVM_CAP_MAX_VCPUS: 582 case KVM_CAP_MAX_VCPU_ID: 583 r = KVM_S390_BSCA_CPU_SLOTS; 584 if (!kvm_s390_use_sca_entries()) 585 r = KVM_MAX_VCPUS; 586 else if (sclp.has_esca && sclp.has_64bscao) 587 r = KVM_S390_ESCA_CPU_SLOTS; 588 if (ext == KVM_CAP_NR_VCPUS) 589 r = min_t(unsigned int, num_online_cpus(), r); 590 break; 591 case KVM_CAP_S390_COW: 592 r = MACHINE_HAS_ESOP; 593 break; 594 case KVM_CAP_S390_VECTOR_REGISTERS: 595 r = MACHINE_HAS_VX; 596 break; 597 case KVM_CAP_S390_RI: 598 r = test_facility(64); 599 break; 600 case KVM_CAP_S390_GS: 601 r = test_facility(133); 602 break; 603 case KVM_CAP_S390_BPB: 604 r = test_facility(82); 605 break; 606 case KVM_CAP_S390_PROTECTED: 607 r = is_prot_virt_host(); 608 break; 609 default: 610 r = 0; 611 } 612 return r; 613 } 614 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 616 { 617 int i; 618 gfn_t cur_gfn, last_gfn; 619 unsigned long gaddr, vmaddr; 620 struct gmap *gmap = kvm->arch.gmap; 621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 622 623 /* Loop over all guest segments */ 624 cur_gfn = memslot->base_gfn; 625 last_gfn = memslot->base_gfn + memslot->npages; 626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 627 gaddr = gfn_to_gpa(cur_gfn); 628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 629 if (kvm_is_error_hva(vmaddr)) 630 continue; 631 632 bitmap_zero(bitmap, _PAGE_ENTRIES); 633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 634 for (i = 0; i < _PAGE_ENTRIES; i++) { 635 if (test_bit(i, bitmap)) 636 mark_page_dirty(kvm, cur_gfn + i); 637 } 638 639 if (fatal_signal_pending(current)) 640 return; 641 cond_resched(); 642 } 643 } 644 645 /* Section: vm related */ 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 647 648 /* 649 * Get (and clear) the dirty memory log for a memory slot. 650 */ 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 652 struct kvm_dirty_log *log) 653 { 654 int r; 655 unsigned long n; 656 struct kvm_memory_slot *memslot; 657 int is_dirty; 658 659 if (kvm_is_ucontrol(kvm)) 660 return -EINVAL; 661 662 mutex_lock(&kvm->slots_lock); 663 664 r = -EINVAL; 665 if (log->slot >= KVM_USER_MEM_SLOTS) 666 goto out; 667 668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 669 if (r) 670 goto out; 671 672 /* Clear the dirty log */ 673 if (is_dirty) { 674 n = kvm_dirty_bitmap_bytes(memslot); 675 memset(memslot->dirty_bitmap, 0, n); 676 } 677 r = 0; 678 out: 679 mutex_unlock(&kvm->slots_lock); 680 return r; 681 } 682 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 684 { 685 unsigned long i; 686 struct kvm_vcpu *vcpu; 687 688 kvm_for_each_vcpu(i, vcpu, kvm) { 689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 690 } 691 } 692 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 694 { 695 int r; 696 697 if (cap->flags) 698 return -EINVAL; 699 700 switch (cap->cap) { 701 case KVM_CAP_S390_IRQCHIP: 702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 703 kvm->arch.use_irqchip = 1; 704 r = 0; 705 break; 706 case KVM_CAP_S390_USER_SIGP: 707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 708 kvm->arch.user_sigp = 1; 709 r = 0; 710 break; 711 case KVM_CAP_S390_VECTOR_REGISTERS: 712 mutex_lock(&kvm->lock); 713 if (kvm->created_vcpus) { 714 r = -EBUSY; 715 } else if (MACHINE_HAS_VX) { 716 set_kvm_facility(kvm->arch.model.fac_mask, 129); 717 set_kvm_facility(kvm->arch.model.fac_list, 129); 718 if (test_facility(134)) { 719 set_kvm_facility(kvm->arch.model.fac_mask, 134); 720 set_kvm_facility(kvm->arch.model.fac_list, 134); 721 } 722 if (test_facility(135)) { 723 set_kvm_facility(kvm->arch.model.fac_mask, 135); 724 set_kvm_facility(kvm->arch.model.fac_list, 135); 725 } 726 if (test_facility(148)) { 727 set_kvm_facility(kvm->arch.model.fac_mask, 148); 728 set_kvm_facility(kvm->arch.model.fac_list, 148); 729 } 730 if (test_facility(152)) { 731 set_kvm_facility(kvm->arch.model.fac_mask, 152); 732 set_kvm_facility(kvm->arch.model.fac_list, 152); 733 } 734 if (test_facility(192)) { 735 set_kvm_facility(kvm->arch.model.fac_mask, 192); 736 set_kvm_facility(kvm->arch.model.fac_list, 192); 737 } 738 r = 0; 739 } else 740 r = -EINVAL; 741 mutex_unlock(&kvm->lock); 742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 743 r ? "(not available)" : "(success)"); 744 break; 745 case KVM_CAP_S390_RI: 746 r = -EINVAL; 747 mutex_lock(&kvm->lock); 748 if (kvm->created_vcpus) { 749 r = -EBUSY; 750 } else if (test_facility(64)) { 751 set_kvm_facility(kvm->arch.model.fac_mask, 64); 752 set_kvm_facility(kvm->arch.model.fac_list, 64); 753 r = 0; 754 } 755 mutex_unlock(&kvm->lock); 756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 757 r ? "(not available)" : "(success)"); 758 break; 759 case KVM_CAP_S390_AIS: 760 mutex_lock(&kvm->lock); 761 if (kvm->created_vcpus) { 762 r = -EBUSY; 763 } else { 764 set_kvm_facility(kvm->arch.model.fac_mask, 72); 765 set_kvm_facility(kvm->arch.model.fac_list, 72); 766 r = 0; 767 } 768 mutex_unlock(&kvm->lock); 769 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 770 r ? "(not available)" : "(success)"); 771 break; 772 case KVM_CAP_S390_GS: 773 r = -EINVAL; 774 mutex_lock(&kvm->lock); 775 if (kvm->created_vcpus) { 776 r = -EBUSY; 777 } else if (test_facility(133)) { 778 set_kvm_facility(kvm->arch.model.fac_mask, 133); 779 set_kvm_facility(kvm->arch.model.fac_list, 133); 780 r = 0; 781 } 782 mutex_unlock(&kvm->lock); 783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 784 r ? "(not available)" : "(success)"); 785 break; 786 case KVM_CAP_S390_HPAGE_1M: 787 mutex_lock(&kvm->lock); 788 if (kvm->created_vcpus) 789 r = -EBUSY; 790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 791 r = -EINVAL; 792 else { 793 r = 0; 794 mmap_write_lock(kvm->mm); 795 kvm->mm->context.allow_gmap_hpage_1m = 1; 796 mmap_write_unlock(kvm->mm); 797 /* 798 * We might have to create fake 4k page 799 * tables. To avoid that the hardware works on 800 * stale PGSTEs, we emulate these instructions. 801 */ 802 kvm->arch.use_skf = 0; 803 kvm->arch.use_pfmfi = 0; 804 } 805 mutex_unlock(&kvm->lock); 806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 807 r ? "(not available)" : "(success)"); 808 break; 809 case KVM_CAP_S390_USER_STSI: 810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 811 kvm->arch.user_stsi = 1; 812 r = 0; 813 break; 814 case KVM_CAP_S390_USER_INSTR0: 815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 816 kvm->arch.user_instr0 = 1; 817 icpt_operexc_on_all_vcpus(kvm); 818 r = 0; 819 break; 820 default: 821 r = -EINVAL; 822 break; 823 } 824 return r; 825 } 826 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 828 { 829 int ret; 830 831 switch (attr->attr) { 832 case KVM_S390_VM_MEM_LIMIT_SIZE: 833 ret = 0; 834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 835 kvm->arch.mem_limit); 836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 837 ret = -EFAULT; 838 break; 839 default: 840 ret = -ENXIO; 841 break; 842 } 843 return ret; 844 } 845 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 847 { 848 int ret; 849 unsigned int idx; 850 switch (attr->attr) { 851 case KVM_S390_VM_MEM_ENABLE_CMMA: 852 ret = -ENXIO; 853 if (!sclp.has_cmma) 854 break; 855 856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 857 mutex_lock(&kvm->lock); 858 if (kvm->created_vcpus) 859 ret = -EBUSY; 860 else if (kvm->mm->context.allow_gmap_hpage_1m) 861 ret = -EINVAL; 862 else { 863 kvm->arch.use_cmma = 1; 864 /* Not compatible with cmma. */ 865 kvm->arch.use_pfmfi = 0; 866 ret = 0; 867 } 868 mutex_unlock(&kvm->lock); 869 break; 870 case KVM_S390_VM_MEM_CLR_CMMA: 871 ret = -ENXIO; 872 if (!sclp.has_cmma) 873 break; 874 ret = -EINVAL; 875 if (!kvm->arch.use_cmma) 876 break; 877 878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 879 mutex_lock(&kvm->lock); 880 idx = srcu_read_lock(&kvm->srcu); 881 s390_reset_cmma(kvm->arch.gmap->mm); 882 srcu_read_unlock(&kvm->srcu, idx); 883 mutex_unlock(&kvm->lock); 884 ret = 0; 885 break; 886 case KVM_S390_VM_MEM_LIMIT_SIZE: { 887 unsigned long new_limit; 888 889 if (kvm_is_ucontrol(kvm)) 890 return -EINVAL; 891 892 if (get_user(new_limit, (u64 __user *)attr->addr)) 893 return -EFAULT; 894 895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 896 new_limit > kvm->arch.mem_limit) 897 return -E2BIG; 898 899 if (!new_limit) 900 return -EINVAL; 901 902 /* gmap_create takes last usable address */ 903 if (new_limit != KVM_S390_NO_MEM_LIMIT) 904 new_limit -= 1; 905 906 ret = -EBUSY; 907 mutex_lock(&kvm->lock); 908 if (!kvm->created_vcpus) { 909 /* gmap_create will round the limit up */ 910 struct gmap *new = gmap_create(current->mm, new_limit); 911 912 if (!new) { 913 ret = -ENOMEM; 914 } else { 915 gmap_remove(kvm->arch.gmap); 916 new->private = kvm; 917 kvm->arch.gmap = new; 918 ret = 0; 919 } 920 } 921 mutex_unlock(&kvm->lock); 922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 924 (void *) kvm->arch.gmap->asce); 925 break; 926 } 927 default: 928 ret = -ENXIO; 929 break; 930 } 931 return ret; 932 } 933 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 935 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 937 { 938 struct kvm_vcpu *vcpu; 939 unsigned long i; 940 941 kvm_s390_vcpu_block_all(kvm); 942 943 kvm_for_each_vcpu(i, vcpu, kvm) { 944 kvm_s390_vcpu_crypto_setup(vcpu); 945 /* recreate the shadow crycb by leaving the VSIE handler */ 946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 947 } 948 949 kvm_s390_vcpu_unblock_all(kvm); 950 } 951 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 953 { 954 mutex_lock(&kvm->lock); 955 switch (attr->attr) { 956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 957 if (!test_kvm_facility(kvm, 76)) { 958 mutex_unlock(&kvm->lock); 959 return -EINVAL; 960 } 961 get_random_bytes( 962 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 964 kvm->arch.crypto.aes_kw = 1; 965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 966 break; 967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 968 if (!test_kvm_facility(kvm, 76)) { 969 mutex_unlock(&kvm->lock); 970 return -EINVAL; 971 } 972 get_random_bytes( 973 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 975 kvm->arch.crypto.dea_kw = 1; 976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 977 break; 978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 979 if (!test_kvm_facility(kvm, 76)) { 980 mutex_unlock(&kvm->lock); 981 return -EINVAL; 982 } 983 kvm->arch.crypto.aes_kw = 0; 984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 987 break; 988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 989 if (!test_kvm_facility(kvm, 76)) { 990 mutex_unlock(&kvm->lock); 991 return -EINVAL; 992 } 993 kvm->arch.crypto.dea_kw = 0; 994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 997 break; 998 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 999 if (!ap_instructions_available()) { 1000 mutex_unlock(&kvm->lock); 1001 return -EOPNOTSUPP; 1002 } 1003 kvm->arch.crypto.apie = 1; 1004 break; 1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1006 if (!ap_instructions_available()) { 1007 mutex_unlock(&kvm->lock); 1008 return -EOPNOTSUPP; 1009 } 1010 kvm->arch.crypto.apie = 0; 1011 break; 1012 default: 1013 mutex_unlock(&kvm->lock); 1014 return -ENXIO; 1015 } 1016 1017 kvm_s390_vcpu_crypto_reset_all(kvm); 1018 mutex_unlock(&kvm->lock); 1019 return 0; 1020 } 1021 1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1023 { 1024 unsigned long cx; 1025 struct kvm_vcpu *vcpu; 1026 1027 kvm_for_each_vcpu(cx, vcpu, kvm) 1028 kvm_s390_sync_request(req, vcpu); 1029 } 1030 1031 /* 1032 * Must be called with kvm->srcu held to avoid races on memslots, and with 1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1034 */ 1035 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1036 { 1037 struct kvm_memory_slot *ms; 1038 struct kvm_memslots *slots; 1039 unsigned long ram_pages = 0; 1040 int bkt; 1041 1042 /* migration mode already enabled */ 1043 if (kvm->arch.migration_mode) 1044 return 0; 1045 slots = kvm_memslots(kvm); 1046 if (!slots || kvm_memslots_empty(slots)) 1047 return -EINVAL; 1048 1049 if (!kvm->arch.use_cmma) { 1050 kvm->arch.migration_mode = 1; 1051 return 0; 1052 } 1053 /* mark all the pages in active slots as dirty */ 1054 kvm_for_each_memslot(ms, bkt, slots) { 1055 if (!ms->dirty_bitmap) 1056 return -EINVAL; 1057 /* 1058 * The second half of the bitmap is only used on x86, 1059 * and would be wasted otherwise, so we put it to good 1060 * use here to keep track of the state of the storage 1061 * attributes. 1062 */ 1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1064 ram_pages += ms->npages; 1065 } 1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1067 kvm->arch.migration_mode = 1; 1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1069 return 0; 1070 } 1071 1072 /* 1073 * Must be called with kvm->slots_lock to avoid races with ourselves and 1074 * kvm_s390_vm_start_migration. 1075 */ 1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1077 { 1078 /* migration mode already disabled */ 1079 if (!kvm->arch.migration_mode) 1080 return 0; 1081 kvm->arch.migration_mode = 0; 1082 if (kvm->arch.use_cmma) 1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1084 return 0; 1085 } 1086 1087 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1088 struct kvm_device_attr *attr) 1089 { 1090 int res = -ENXIO; 1091 1092 mutex_lock(&kvm->slots_lock); 1093 switch (attr->attr) { 1094 case KVM_S390_VM_MIGRATION_START: 1095 res = kvm_s390_vm_start_migration(kvm); 1096 break; 1097 case KVM_S390_VM_MIGRATION_STOP: 1098 res = kvm_s390_vm_stop_migration(kvm); 1099 break; 1100 default: 1101 break; 1102 } 1103 mutex_unlock(&kvm->slots_lock); 1104 1105 return res; 1106 } 1107 1108 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1109 struct kvm_device_attr *attr) 1110 { 1111 u64 mig = kvm->arch.migration_mode; 1112 1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1114 return -ENXIO; 1115 1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1117 return -EFAULT; 1118 return 0; 1119 } 1120 1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1122 { 1123 struct kvm_s390_vm_tod_clock gtod; 1124 1125 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1126 return -EFAULT; 1127 1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1129 return -EINVAL; 1130 kvm_s390_set_tod_clock(kvm, >od); 1131 1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1133 gtod.epoch_idx, gtod.tod); 1134 1135 return 0; 1136 } 1137 1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1139 { 1140 u8 gtod_high; 1141 1142 if (copy_from_user(>od_high, (void __user *)attr->addr, 1143 sizeof(gtod_high))) 1144 return -EFAULT; 1145 1146 if (gtod_high != 0) 1147 return -EINVAL; 1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1149 1150 return 0; 1151 } 1152 1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1154 { 1155 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1156 1157 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1158 sizeof(gtod.tod))) 1159 return -EFAULT; 1160 1161 kvm_s390_set_tod_clock(kvm, >od); 1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1163 return 0; 1164 } 1165 1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1167 { 1168 int ret; 1169 1170 if (attr->flags) 1171 return -EINVAL; 1172 1173 switch (attr->attr) { 1174 case KVM_S390_VM_TOD_EXT: 1175 ret = kvm_s390_set_tod_ext(kvm, attr); 1176 break; 1177 case KVM_S390_VM_TOD_HIGH: 1178 ret = kvm_s390_set_tod_high(kvm, attr); 1179 break; 1180 case KVM_S390_VM_TOD_LOW: 1181 ret = kvm_s390_set_tod_low(kvm, attr); 1182 break; 1183 default: 1184 ret = -ENXIO; 1185 break; 1186 } 1187 return ret; 1188 } 1189 1190 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1191 struct kvm_s390_vm_tod_clock *gtod) 1192 { 1193 union tod_clock clk; 1194 1195 preempt_disable(); 1196 1197 store_tod_clock_ext(&clk); 1198 1199 gtod->tod = clk.tod + kvm->arch.epoch; 1200 gtod->epoch_idx = 0; 1201 if (test_kvm_facility(kvm, 139)) { 1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1203 if (gtod->tod < clk.tod) 1204 gtod->epoch_idx += 1; 1205 } 1206 1207 preempt_enable(); 1208 } 1209 1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 struct kvm_s390_vm_tod_clock gtod; 1213 1214 memset(>od, 0, sizeof(gtod)); 1215 kvm_s390_get_tod_clock(kvm, >od); 1216 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1217 return -EFAULT; 1218 1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1220 gtod.epoch_idx, gtod.tod); 1221 return 0; 1222 } 1223 1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1225 { 1226 u8 gtod_high = 0; 1227 1228 if (copy_to_user((void __user *)attr->addr, >od_high, 1229 sizeof(gtod_high))) 1230 return -EFAULT; 1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1232 1233 return 0; 1234 } 1235 1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1237 { 1238 u64 gtod; 1239 1240 gtod = kvm_s390_get_tod_clock_fast(kvm); 1241 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1242 return -EFAULT; 1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1244 1245 return 0; 1246 } 1247 1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1249 { 1250 int ret; 1251 1252 if (attr->flags) 1253 return -EINVAL; 1254 1255 switch (attr->attr) { 1256 case KVM_S390_VM_TOD_EXT: 1257 ret = kvm_s390_get_tod_ext(kvm, attr); 1258 break; 1259 case KVM_S390_VM_TOD_HIGH: 1260 ret = kvm_s390_get_tod_high(kvm, attr); 1261 break; 1262 case KVM_S390_VM_TOD_LOW: 1263 ret = kvm_s390_get_tod_low(kvm, attr); 1264 break; 1265 default: 1266 ret = -ENXIO; 1267 break; 1268 } 1269 return ret; 1270 } 1271 1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1273 { 1274 struct kvm_s390_vm_cpu_processor *proc; 1275 u16 lowest_ibc, unblocked_ibc; 1276 int ret = 0; 1277 1278 mutex_lock(&kvm->lock); 1279 if (kvm->created_vcpus) { 1280 ret = -EBUSY; 1281 goto out; 1282 } 1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1284 if (!proc) { 1285 ret = -ENOMEM; 1286 goto out; 1287 } 1288 if (!copy_from_user(proc, (void __user *)attr->addr, 1289 sizeof(*proc))) { 1290 kvm->arch.model.cpuid = proc->cpuid; 1291 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1292 unblocked_ibc = sclp.ibc & 0xfff; 1293 if (lowest_ibc && proc->ibc) { 1294 if (proc->ibc > unblocked_ibc) 1295 kvm->arch.model.ibc = unblocked_ibc; 1296 else if (proc->ibc < lowest_ibc) 1297 kvm->arch.model.ibc = lowest_ibc; 1298 else 1299 kvm->arch.model.ibc = proc->ibc; 1300 } 1301 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1302 S390_ARCH_FAC_LIST_SIZE_BYTE); 1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1304 kvm->arch.model.ibc, 1305 kvm->arch.model.cpuid); 1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1307 kvm->arch.model.fac_list[0], 1308 kvm->arch.model.fac_list[1], 1309 kvm->arch.model.fac_list[2]); 1310 } else 1311 ret = -EFAULT; 1312 kfree(proc); 1313 out: 1314 mutex_unlock(&kvm->lock); 1315 return ret; 1316 } 1317 1318 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1319 struct kvm_device_attr *attr) 1320 { 1321 struct kvm_s390_vm_cpu_feat data; 1322 1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1324 return -EFAULT; 1325 if (!bitmap_subset((unsigned long *) data.feat, 1326 kvm_s390_available_cpu_feat, 1327 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1328 return -EINVAL; 1329 1330 mutex_lock(&kvm->lock); 1331 if (kvm->created_vcpus) { 1332 mutex_unlock(&kvm->lock); 1333 return -EBUSY; 1334 } 1335 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1336 KVM_S390_VM_CPU_FEAT_NR_BITS); 1337 mutex_unlock(&kvm->lock); 1338 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1339 data.feat[0], 1340 data.feat[1], 1341 data.feat[2]); 1342 return 0; 1343 } 1344 1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1346 struct kvm_device_attr *attr) 1347 { 1348 mutex_lock(&kvm->lock); 1349 if (kvm->created_vcpus) { 1350 mutex_unlock(&kvm->lock); 1351 return -EBUSY; 1352 } 1353 1354 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1355 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1356 mutex_unlock(&kvm->lock); 1357 return -EFAULT; 1358 } 1359 mutex_unlock(&kvm->lock); 1360 1361 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1366 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1369 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1372 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1375 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1378 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1381 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1384 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1387 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1390 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1393 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1396 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1399 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1402 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1405 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1408 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1413 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1418 1419 return 0; 1420 } 1421 1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1423 { 1424 int ret = -ENXIO; 1425 1426 switch (attr->attr) { 1427 case KVM_S390_VM_CPU_PROCESSOR: 1428 ret = kvm_s390_set_processor(kvm, attr); 1429 break; 1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1431 ret = kvm_s390_set_processor_feat(kvm, attr); 1432 break; 1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1434 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1435 break; 1436 } 1437 return ret; 1438 } 1439 1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1441 { 1442 struct kvm_s390_vm_cpu_processor *proc; 1443 int ret = 0; 1444 1445 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1446 if (!proc) { 1447 ret = -ENOMEM; 1448 goto out; 1449 } 1450 proc->cpuid = kvm->arch.model.cpuid; 1451 proc->ibc = kvm->arch.model.ibc; 1452 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1453 S390_ARCH_FAC_LIST_SIZE_BYTE); 1454 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1455 kvm->arch.model.ibc, 1456 kvm->arch.model.cpuid); 1457 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1458 kvm->arch.model.fac_list[0], 1459 kvm->arch.model.fac_list[1], 1460 kvm->arch.model.fac_list[2]); 1461 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1462 ret = -EFAULT; 1463 kfree(proc); 1464 out: 1465 return ret; 1466 } 1467 1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1469 { 1470 struct kvm_s390_vm_cpu_machine *mach; 1471 int ret = 0; 1472 1473 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1474 if (!mach) { 1475 ret = -ENOMEM; 1476 goto out; 1477 } 1478 get_cpu_id((struct cpuid *) &mach->cpuid); 1479 mach->ibc = sclp.ibc; 1480 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1481 S390_ARCH_FAC_LIST_SIZE_BYTE); 1482 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1483 sizeof(stfle_fac_list)); 1484 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1485 kvm->arch.model.ibc, 1486 kvm->arch.model.cpuid); 1487 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1488 mach->fac_mask[0], 1489 mach->fac_mask[1], 1490 mach->fac_mask[2]); 1491 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1492 mach->fac_list[0], 1493 mach->fac_list[1], 1494 mach->fac_list[2]); 1495 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1496 ret = -EFAULT; 1497 kfree(mach); 1498 out: 1499 return ret; 1500 } 1501 1502 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1503 struct kvm_device_attr *attr) 1504 { 1505 struct kvm_s390_vm_cpu_feat data; 1506 1507 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1508 KVM_S390_VM_CPU_FEAT_NR_BITS); 1509 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1510 return -EFAULT; 1511 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1512 data.feat[0], 1513 data.feat[1], 1514 data.feat[2]); 1515 return 0; 1516 } 1517 1518 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1519 struct kvm_device_attr *attr) 1520 { 1521 struct kvm_s390_vm_cpu_feat data; 1522 1523 bitmap_copy((unsigned long *) data.feat, 1524 kvm_s390_available_cpu_feat, 1525 KVM_S390_VM_CPU_FEAT_NR_BITS); 1526 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1527 return -EFAULT; 1528 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1529 data.feat[0], 1530 data.feat[1], 1531 data.feat[2]); 1532 return 0; 1533 } 1534 1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1536 struct kvm_device_attr *attr) 1537 { 1538 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1539 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1540 return -EFAULT; 1541 1542 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1547 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1550 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1553 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1556 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1559 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1562 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1565 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1568 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1571 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1574 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1577 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1580 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1583 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1586 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1589 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1594 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1599 1600 return 0; 1601 } 1602 1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1604 struct kvm_device_attr *attr) 1605 { 1606 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1607 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1608 return -EFAULT; 1609 1610 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1615 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1618 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1621 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1624 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1627 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1630 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1633 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1636 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1639 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1642 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1645 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1648 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1651 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1654 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1657 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1662 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1667 1668 return 0; 1669 } 1670 1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1672 { 1673 int ret = -ENXIO; 1674 1675 switch (attr->attr) { 1676 case KVM_S390_VM_CPU_PROCESSOR: 1677 ret = kvm_s390_get_processor(kvm, attr); 1678 break; 1679 case KVM_S390_VM_CPU_MACHINE: 1680 ret = kvm_s390_get_machine(kvm, attr); 1681 break; 1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1683 ret = kvm_s390_get_processor_feat(kvm, attr); 1684 break; 1685 case KVM_S390_VM_CPU_MACHINE_FEAT: 1686 ret = kvm_s390_get_machine_feat(kvm, attr); 1687 break; 1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1689 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1690 break; 1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1692 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1693 break; 1694 } 1695 return ret; 1696 } 1697 1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1699 { 1700 int ret; 1701 1702 switch (attr->group) { 1703 case KVM_S390_VM_MEM_CTRL: 1704 ret = kvm_s390_set_mem_control(kvm, attr); 1705 break; 1706 case KVM_S390_VM_TOD: 1707 ret = kvm_s390_set_tod(kvm, attr); 1708 break; 1709 case KVM_S390_VM_CPU_MODEL: 1710 ret = kvm_s390_set_cpu_model(kvm, attr); 1711 break; 1712 case KVM_S390_VM_CRYPTO: 1713 ret = kvm_s390_vm_set_crypto(kvm, attr); 1714 break; 1715 case KVM_S390_VM_MIGRATION: 1716 ret = kvm_s390_vm_set_migration(kvm, attr); 1717 break; 1718 default: 1719 ret = -ENXIO; 1720 break; 1721 } 1722 1723 return ret; 1724 } 1725 1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1727 { 1728 int ret; 1729 1730 switch (attr->group) { 1731 case KVM_S390_VM_MEM_CTRL: 1732 ret = kvm_s390_get_mem_control(kvm, attr); 1733 break; 1734 case KVM_S390_VM_TOD: 1735 ret = kvm_s390_get_tod(kvm, attr); 1736 break; 1737 case KVM_S390_VM_CPU_MODEL: 1738 ret = kvm_s390_get_cpu_model(kvm, attr); 1739 break; 1740 case KVM_S390_VM_MIGRATION: 1741 ret = kvm_s390_vm_get_migration(kvm, attr); 1742 break; 1743 default: 1744 ret = -ENXIO; 1745 break; 1746 } 1747 1748 return ret; 1749 } 1750 1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1752 { 1753 int ret; 1754 1755 switch (attr->group) { 1756 case KVM_S390_VM_MEM_CTRL: 1757 switch (attr->attr) { 1758 case KVM_S390_VM_MEM_ENABLE_CMMA: 1759 case KVM_S390_VM_MEM_CLR_CMMA: 1760 ret = sclp.has_cmma ? 0 : -ENXIO; 1761 break; 1762 case KVM_S390_VM_MEM_LIMIT_SIZE: 1763 ret = 0; 1764 break; 1765 default: 1766 ret = -ENXIO; 1767 break; 1768 } 1769 break; 1770 case KVM_S390_VM_TOD: 1771 switch (attr->attr) { 1772 case KVM_S390_VM_TOD_LOW: 1773 case KVM_S390_VM_TOD_HIGH: 1774 ret = 0; 1775 break; 1776 default: 1777 ret = -ENXIO; 1778 break; 1779 } 1780 break; 1781 case KVM_S390_VM_CPU_MODEL: 1782 switch (attr->attr) { 1783 case KVM_S390_VM_CPU_PROCESSOR: 1784 case KVM_S390_VM_CPU_MACHINE: 1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1786 case KVM_S390_VM_CPU_MACHINE_FEAT: 1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1789 ret = 0; 1790 break; 1791 default: 1792 ret = -ENXIO; 1793 break; 1794 } 1795 break; 1796 case KVM_S390_VM_CRYPTO: 1797 switch (attr->attr) { 1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1802 ret = 0; 1803 break; 1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1806 ret = ap_instructions_available() ? 0 : -ENXIO; 1807 break; 1808 default: 1809 ret = -ENXIO; 1810 break; 1811 } 1812 break; 1813 case KVM_S390_VM_MIGRATION: 1814 ret = 0; 1815 break; 1816 default: 1817 ret = -ENXIO; 1818 break; 1819 } 1820 1821 return ret; 1822 } 1823 1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1825 { 1826 uint8_t *keys; 1827 uint64_t hva; 1828 int srcu_idx, i, r = 0; 1829 1830 if (args->flags != 0) 1831 return -EINVAL; 1832 1833 /* Is this guest using storage keys? */ 1834 if (!mm_uses_skeys(current->mm)) 1835 return KVM_S390_GET_SKEYS_NONE; 1836 1837 /* Enforce sane limit on memory allocation */ 1838 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1839 return -EINVAL; 1840 1841 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1842 if (!keys) 1843 return -ENOMEM; 1844 1845 mmap_read_lock(current->mm); 1846 srcu_idx = srcu_read_lock(&kvm->srcu); 1847 for (i = 0; i < args->count; i++) { 1848 hva = gfn_to_hva(kvm, args->start_gfn + i); 1849 if (kvm_is_error_hva(hva)) { 1850 r = -EFAULT; 1851 break; 1852 } 1853 1854 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1855 if (r) 1856 break; 1857 } 1858 srcu_read_unlock(&kvm->srcu, srcu_idx); 1859 mmap_read_unlock(current->mm); 1860 1861 if (!r) { 1862 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1863 sizeof(uint8_t) * args->count); 1864 if (r) 1865 r = -EFAULT; 1866 } 1867 1868 kvfree(keys); 1869 return r; 1870 } 1871 1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1873 { 1874 uint8_t *keys; 1875 uint64_t hva; 1876 int srcu_idx, i, r = 0; 1877 bool unlocked; 1878 1879 if (args->flags != 0) 1880 return -EINVAL; 1881 1882 /* Enforce sane limit on memory allocation */ 1883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1884 return -EINVAL; 1885 1886 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1887 if (!keys) 1888 return -ENOMEM; 1889 1890 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1891 sizeof(uint8_t) * args->count); 1892 if (r) { 1893 r = -EFAULT; 1894 goto out; 1895 } 1896 1897 /* Enable storage key handling for the guest */ 1898 r = s390_enable_skey(); 1899 if (r) 1900 goto out; 1901 1902 i = 0; 1903 mmap_read_lock(current->mm); 1904 srcu_idx = srcu_read_lock(&kvm->srcu); 1905 while (i < args->count) { 1906 unlocked = false; 1907 hva = gfn_to_hva(kvm, args->start_gfn + i); 1908 if (kvm_is_error_hva(hva)) { 1909 r = -EFAULT; 1910 break; 1911 } 1912 1913 /* Lowest order bit is reserved */ 1914 if (keys[i] & 0x01) { 1915 r = -EINVAL; 1916 break; 1917 } 1918 1919 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1920 if (r) { 1921 r = fixup_user_fault(current->mm, hva, 1922 FAULT_FLAG_WRITE, &unlocked); 1923 if (r) 1924 break; 1925 } 1926 if (!r) 1927 i++; 1928 } 1929 srcu_read_unlock(&kvm->srcu, srcu_idx); 1930 mmap_read_unlock(current->mm); 1931 out: 1932 kvfree(keys); 1933 return r; 1934 } 1935 1936 /* 1937 * Base address and length must be sent at the start of each block, therefore 1938 * it's cheaper to send some clean data, as long as it's less than the size of 1939 * two longs. 1940 */ 1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1942 /* for consistency */ 1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1944 1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1946 u8 *res, unsigned long bufsize) 1947 { 1948 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1949 1950 args->count = 0; 1951 while (args->count < bufsize) { 1952 hva = gfn_to_hva(kvm, cur_gfn); 1953 /* 1954 * We return an error if the first value was invalid, but we 1955 * return successfully if at least one value was copied. 1956 */ 1957 if (kvm_is_error_hva(hva)) 1958 return args->count ? 0 : -EFAULT; 1959 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1960 pgstev = 0; 1961 res[args->count++] = (pgstev >> 24) & 0x43; 1962 cur_gfn++; 1963 } 1964 1965 return 0; 1966 } 1967 1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 1969 gfn_t gfn) 1970 { 1971 return ____gfn_to_memslot(slots, gfn, true); 1972 } 1973 1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1975 unsigned long cur_gfn) 1976 { 1977 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 1978 unsigned long ofs = cur_gfn - ms->base_gfn; 1979 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 1980 1981 if (ms->base_gfn + ms->npages <= cur_gfn) { 1982 mnode = rb_next(mnode); 1983 /* If we are above the highest slot, wrap around */ 1984 if (!mnode) 1985 mnode = rb_first(&slots->gfn_tree); 1986 1987 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1988 ofs = 0; 1989 } 1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1991 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 1992 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1993 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 1994 } 1995 return ms->base_gfn + ofs; 1996 } 1997 1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1999 u8 *res, unsigned long bufsize) 2000 { 2001 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2002 struct kvm_memslots *slots = kvm_memslots(kvm); 2003 struct kvm_memory_slot *ms; 2004 2005 if (unlikely(kvm_memslots_empty(slots))) 2006 return 0; 2007 2008 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2009 ms = gfn_to_memslot(kvm, cur_gfn); 2010 args->count = 0; 2011 args->start_gfn = cur_gfn; 2012 if (!ms) 2013 return 0; 2014 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2015 mem_end = kvm_s390_get_gfn_end(slots); 2016 2017 while (args->count < bufsize) { 2018 hva = gfn_to_hva(kvm, cur_gfn); 2019 if (kvm_is_error_hva(hva)) 2020 return 0; 2021 /* Decrement only if we actually flipped the bit to 0 */ 2022 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2023 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2024 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2025 pgstev = 0; 2026 /* Save the value */ 2027 res[args->count++] = (pgstev >> 24) & 0x43; 2028 /* If the next bit is too far away, stop. */ 2029 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2030 return 0; 2031 /* If we reached the previous "next", find the next one */ 2032 if (cur_gfn == next_gfn) 2033 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2034 /* Reached the end of memory or of the buffer, stop */ 2035 if ((next_gfn >= mem_end) || 2036 (next_gfn - args->start_gfn >= bufsize)) 2037 return 0; 2038 cur_gfn++; 2039 /* Reached the end of the current memslot, take the next one. */ 2040 if (cur_gfn - ms->base_gfn >= ms->npages) { 2041 ms = gfn_to_memslot(kvm, cur_gfn); 2042 if (!ms) 2043 return 0; 2044 } 2045 } 2046 return 0; 2047 } 2048 2049 /* 2050 * This function searches for the next page with dirty CMMA attributes, and 2051 * saves the attributes in the buffer up to either the end of the buffer or 2052 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2053 * no trailing clean bytes are saved. 2054 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2055 * output buffer will indicate 0 as length. 2056 */ 2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2058 struct kvm_s390_cmma_log *args) 2059 { 2060 unsigned long bufsize; 2061 int srcu_idx, peek, ret; 2062 u8 *values; 2063 2064 if (!kvm->arch.use_cmma) 2065 return -ENXIO; 2066 /* Invalid/unsupported flags were specified */ 2067 if (args->flags & ~KVM_S390_CMMA_PEEK) 2068 return -EINVAL; 2069 /* Migration mode query, and we are not doing a migration */ 2070 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2071 if (!peek && !kvm->arch.migration_mode) 2072 return -EINVAL; 2073 /* CMMA is disabled or was not used, or the buffer has length zero */ 2074 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2075 if (!bufsize || !kvm->mm->context.uses_cmm) { 2076 memset(args, 0, sizeof(*args)); 2077 return 0; 2078 } 2079 /* We are not peeking, and there are no dirty pages */ 2080 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2081 memset(args, 0, sizeof(*args)); 2082 return 0; 2083 } 2084 2085 values = vmalloc(bufsize); 2086 if (!values) 2087 return -ENOMEM; 2088 2089 mmap_read_lock(kvm->mm); 2090 srcu_idx = srcu_read_lock(&kvm->srcu); 2091 if (peek) 2092 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2093 else 2094 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2095 srcu_read_unlock(&kvm->srcu, srcu_idx); 2096 mmap_read_unlock(kvm->mm); 2097 2098 if (kvm->arch.migration_mode) 2099 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2100 else 2101 args->remaining = 0; 2102 2103 if (copy_to_user((void __user *)args->values, values, args->count)) 2104 ret = -EFAULT; 2105 2106 vfree(values); 2107 return ret; 2108 } 2109 2110 /* 2111 * This function sets the CMMA attributes for the given pages. If the input 2112 * buffer has zero length, no action is taken, otherwise the attributes are 2113 * set and the mm->context.uses_cmm flag is set. 2114 */ 2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2116 const struct kvm_s390_cmma_log *args) 2117 { 2118 unsigned long hva, mask, pgstev, i; 2119 uint8_t *bits; 2120 int srcu_idx, r = 0; 2121 2122 mask = args->mask; 2123 2124 if (!kvm->arch.use_cmma) 2125 return -ENXIO; 2126 /* invalid/unsupported flags */ 2127 if (args->flags != 0) 2128 return -EINVAL; 2129 /* Enforce sane limit on memory allocation */ 2130 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2131 return -EINVAL; 2132 /* Nothing to do */ 2133 if (args->count == 0) 2134 return 0; 2135 2136 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2137 if (!bits) 2138 return -ENOMEM; 2139 2140 r = copy_from_user(bits, (void __user *)args->values, args->count); 2141 if (r) { 2142 r = -EFAULT; 2143 goto out; 2144 } 2145 2146 mmap_read_lock(kvm->mm); 2147 srcu_idx = srcu_read_lock(&kvm->srcu); 2148 for (i = 0; i < args->count; i++) { 2149 hva = gfn_to_hva(kvm, args->start_gfn + i); 2150 if (kvm_is_error_hva(hva)) { 2151 r = -EFAULT; 2152 break; 2153 } 2154 2155 pgstev = bits[i]; 2156 pgstev = pgstev << 24; 2157 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2158 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2159 } 2160 srcu_read_unlock(&kvm->srcu, srcu_idx); 2161 mmap_read_unlock(kvm->mm); 2162 2163 if (!kvm->mm->context.uses_cmm) { 2164 mmap_write_lock(kvm->mm); 2165 kvm->mm->context.uses_cmm = 1; 2166 mmap_write_unlock(kvm->mm); 2167 } 2168 out: 2169 vfree(bits); 2170 return r; 2171 } 2172 2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2174 { 2175 struct kvm_vcpu *vcpu; 2176 u16 rc, rrc; 2177 int ret = 0; 2178 unsigned long i; 2179 2180 /* 2181 * We ignore failures and try to destroy as many CPUs as possible. 2182 * At the same time we must not free the assigned resources when 2183 * this fails, as the ultravisor has still access to that memory. 2184 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2185 * behind. 2186 * We want to return the first failure rc and rrc, though. 2187 */ 2188 kvm_for_each_vcpu(i, vcpu, kvm) { 2189 mutex_lock(&vcpu->mutex); 2190 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2191 *rcp = rc; 2192 *rrcp = rrc; 2193 ret = -EIO; 2194 } 2195 mutex_unlock(&vcpu->mutex); 2196 } 2197 return ret; 2198 } 2199 2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2201 { 2202 unsigned long i; 2203 int r = 0; 2204 u16 dummy; 2205 2206 struct kvm_vcpu *vcpu; 2207 2208 kvm_for_each_vcpu(i, vcpu, kvm) { 2209 mutex_lock(&vcpu->mutex); 2210 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2211 mutex_unlock(&vcpu->mutex); 2212 if (r) 2213 break; 2214 } 2215 if (r) 2216 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2217 return r; 2218 } 2219 2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2221 { 2222 int r = 0; 2223 u16 dummy; 2224 void __user *argp = (void __user *)cmd->data; 2225 2226 switch (cmd->cmd) { 2227 case KVM_PV_ENABLE: { 2228 r = -EINVAL; 2229 if (kvm_s390_pv_is_protected(kvm)) 2230 break; 2231 2232 /* 2233 * FMT 4 SIE needs esca. As we never switch back to bsca from 2234 * esca, we need no cleanup in the error cases below 2235 */ 2236 r = sca_switch_to_extended(kvm); 2237 if (r) 2238 break; 2239 2240 mmap_write_lock(current->mm); 2241 r = gmap_mark_unmergeable(); 2242 mmap_write_unlock(current->mm); 2243 if (r) 2244 break; 2245 2246 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2247 if (r) 2248 break; 2249 2250 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2251 if (r) 2252 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2253 2254 /* we need to block service interrupts from now on */ 2255 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2256 break; 2257 } 2258 case KVM_PV_DISABLE: { 2259 r = -EINVAL; 2260 if (!kvm_s390_pv_is_protected(kvm)) 2261 break; 2262 2263 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2264 /* 2265 * If a CPU could not be destroyed, destroy VM will also fail. 2266 * There is no point in trying to destroy it. Instead return 2267 * the rc and rrc from the first CPU that failed destroying. 2268 */ 2269 if (r) 2270 break; 2271 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2272 2273 /* no need to block service interrupts any more */ 2274 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2275 break; 2276 } 2277 case KVM_PV_SET_SEC_PARMS: { 2278 struct kvm_s390_pv_sec_parm parms = {}; 2279 void *hdr; 2280 2281 r = -EINVAL; 2282 if (!kvm_s390_pv_is_protected(kvm)) 2283 break; 2284 2285 r = -EFAULT; 2286 if (copy_from_user(&parms, argp, sizeof(parms))) 2287 break; 2288 2289 /* Currently restricted to 8KB */ 2290 r = -EINVAL; 2291 if (parms.length > PAGE_SIZE * 2) 2292 break; 2293 2294 r = -ENOMEM; 2295 hdr = vmalloc(parms.length); 2296 if (!hdr) 2297 break; 2298 2299 r = -EFAULT; 2300 if (!copy_from_user(hdr, (void __user *)parms.origin, 2301 parms.length)) 2302 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2303 &cmd->rc, &cmd->rrc); 2304 2305 vfree(hdr); 2306 break; 2307 } 2308 case KVM_PV_UNPACK: { 2309 struct kvm_s390_pv_unp unp = {}; 2310 2311 r = -EINVAL; 2312 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2313 break; 2314 2315 r = -EFAULT; 2316 if (copy_from_user(&unp, argp, sizeof(unp))) 2317 break; 2318 2319 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2320 &cmd->rc, &cmd->rrc); 2321 break; 2322 } 2323 case KVM_PV_VERIFY: { 2324 r = -EINVAL; 2325 if (!kvm_s390_pv_is_protected(kvm)) 2326 break; 2327 2328 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2329 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2330 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2331 cmd->rrc); 2332 break; 2333 } 2334 case KVM_PV_PREP_RESET: { 2335 r = -EINVAL; 2336 if (!kvm_s390_pv_is_protected(kvm)) 2337 break; 2338 2339 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2340 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2341 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2342 cmd->rc, cmd->rrc); 2343 break; 2344 } 2345 case KVM_PV_UNSHARE_ALL: { 2346 r = -EINVAL; 2347 if (!kvm_s390_pv_is_protected(kvm)) 2348 break; 2349 2350 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2351 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2352 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2353 cmd->rc, cmd->rrc); 2354 break; 2355 } 2356 default: 2357 r = -ENOTTY; 2358 } 2359 return r; 2360 } 2361 2362 static bool access_key_invalid(u8 access_key) 2363 { 2364 return access_key > 0xf; 2365 } 2366 2367 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2368 { 2369 void __user *uaddr = (void __user *)mop->buf; 2370 u64 supported_flags; 2371 void *tmpbuf = NULL; 2372 int r, srcu_idx; 2373 2374 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION 2375 | KVM_S390_MEMOP_F_CHECK_ONLY; 2376 if (mop->flags & ~supported_flags) 2377 return -EINVAL; 2378 if (mop->size > MEM_OP_MAX_SIZE) 2379 return -E2BIG; 2380 if (kvm_s390_pv_is_protected(kvm)) 2381 return -EINVAL; 2382 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2383 if (access_key_invalid(mop->key)) 2384 return -EINVAL; 2385 } else { 2386 mop->key = 0; 2387 } 2388 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2389 tmpbuf = vmalloc(mop->size); 2390 if (!tmpbuf) 2391 return -ENOMEM; 2392 } 2393 2394 srcu_idx = srcu_read_lock(&kvm->srcu); 2395 2396 if (kvm_is_error_gpa(kvm, mop->gaddr)) { 2397 r = PGM_ADDRESSING; 2398 goto out_unlock; 2399 } 2400 2401 switch (mop->op) { 2402 case KVM_S390_MEMOP_ABSOLUTE_READ: { 2403 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2404 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); 2405 } else { 2406 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2407 mop->size, GACC_FETCH, mop->key); 2408 if (r == 0) { 2409 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2410 r = -EFAULT; 2411 } 2412 } 2413 break; 2414 } 2415 case KVM_S390_MEMOP_ABSOLUTE_WRITE: { 2416 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2417 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); 2418 } else { 2419 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2420 r = -EFAULT; 2421 break; 2422 } 2423 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2424 mop->size, GACC_STORE, mop->key); 2425 } 2426 break; 2427 } 2428 default: 2429 r = -EINVAL; 2430 } 2431 2432 out_unlock: 2433 srcu_read_unlock(&kvm->srcu, srcu_idx); 2434 2435 vfree(tmpbuf); 2436 return r; 2437 } 2438 2439 long kvm_arch_vm_ioctl(struct file *filp, 2440 unsigned int ioctl, unsigned long arg) 2441 { 2442 struct kvm *kvm = filp->private_data; 2443 void __user *argp = (void __user *)arg; 2444 struct kvm_device_attr attr; 2445 int r; 2446 2447 switch (ioctl) { 2448 case KVM_S390_INTERRUPT: { 2449 struct kvm_s390_interrupt s390int; 2450 2451 r = -EFAULT; 2452 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2453 break; 2454 r = kvm_s390_inject_vm(kvm, &s390int); 2455 break; 2456 } 2457 case KVM_CREATE_IRQCHIP: { 2458 struct kvm_irq_routing_entry routing; 2459 2460 r = -EINVAL; 2461 if (kvm->arch.use_irqchip) { 2462 /* Set up dummy routing. */ 2463 memset(&routing, 0, sizeof(routing)); 2464 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2465 } 2466 break; 2467 } 2468 case KVM_SET_DEVICE_ATTR: { 2469 r = -EFAULT; 2470 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2471 break; 2472 r = kvm_s390_vm_set_attr(kvm, &attr); 2473 break; 2474 } 2475 case KVM_GET_DEVICE_ATTR: { 2476 r = -EFAULT; 2477 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2478 break; 2479 r = kvm_s390_vm_get_attr(kvm, &attr); 2480 break; 2481 } 2482 case KVM_HAS_DEVICE_ATTR: { 2483 r = -EFAULT; 2484 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2485 break; 2486 r = kvm_s390_vm_has_attr(kvm, &attr); 2487 break; 2488 } 2489 case KVM_S390_GET_SKEYS: { 2490 struct kvm_s390_skeys args; 2491 2492 r = -EFAULT; 2493 if (copy_from_user(&args, argp, 2494 sizeof(struct kvm_s390_skeys))) 2495 break; 2496 r = kvm_s390_get_skeys(kvm, &args); 2497 break; 2498 } 2499 case KVM_S390_SET_SKEYS: { 2500 struct kvm_s390_skeys args; 2501 2502 r = -EFAULT; 2503 if (copy_from_user(&args, argp, 2504 sizeof(struct kvm_s390_skeys))) 2505 break; 2506 r = kvm_s390_set_skeys(kvm, &args); 2507 break; 2508 } 2509 case KVM_S390_GET_CMMA_BITS: { 2510 struct kvm_s390_cmma_log args; 2511 2512 r = -EFAULT; 2513 if (copy_from_user(&args, argp, sizeof(args))) 2514 break; 2515 mutex_lock(&kvm->slots_lock); 2516 r = kvm_s390_get_cmma_bits(kvm, &args); 2517 mutex_unlock(&kvm->slots_lock); 2518 if (!r) { 2519 r = copy_to_user(argp, &args, sizeof(args)); 2520 if (r) 2521 r = -EFAULT; 2522 } 2523 break; 2524 } 2525 case KVM_S390_SET_CMMA_BITS: { 2526 struct kvm_s390_cmma_log args; 2527 2528 r = -EFAULT; 2529 if (copy_from_user(&args, argp, sizeof(args))) 2530 break; 2531 mutex_lock(&kvm->slots_lock); 2532 r = kvm_s390_set_cmma_bits(kvm, &args); 2533 mutex_unlock(&kvm->slots_lock); 2534 break; 2535 } 2536 case KVM_S390_PV_COMMAND: { 2537 struct kvm_pv_cmd args; 2538 2539 /* protvirt means user cpu state */ 2540 kvm_s390_set_user_cpu_state_ctrl(kvm); 2541 r = 0; 2542 if (!is_prot_virt_host()) { 2543 r = -EINVAL; 2544 break; 2545 } 2546 if (copy_from_user(&args, argp, sizeof(args))) { 2547 r = -EFAULT; 2548 break; 2549 } 2550 if (args.flags) { 2551 r = -EINVAL; 2552 break; 2553 } 2554 mutex_lock(&kvm->lock); 2555 r = kvm_s390_handle_pv(kvm, &args); 2556 mutex_unlock(&kvm->lock); 2557 if (copy_to_user(argp, &args, sizeof(args))) { 2558 r = -EFAULT; 2559 break; 2560 } 2561 break; 2562 } 2563 case KVM_S390_MEM_OP: { 2564 struct kvm_s390_mem_op mem_op; 2565 2566 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 2567 r = kvm_s390_vm_mem_op(kvm, &mem_op); 2568 else 2569 r = -EFAULT; 2570 break; 2571 } 2572 default: 2573 r = -ENOTTY; 2574 } 2575 2576 return r; 2577 } 2578 2579 static int kvm_s390_apxa_installed(void) 2580 { 2581 struct ap_config_info info; 2582 2583 if (ap_instructions_available()) { 2584 if (ap_qci(&info) == 0) 2585 return info.apxa; 2586 } 2587 2588 return 0; 2589 } 2590 2591 /* 2592 * The format of the crypto control block (CRYCB) is specified in the 3 low 2593 * order bits of the CRYCB designation (CRYCBD) field as follows: 2594 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2595 * AP extended addressing (APXA) facility are installed. 2596 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2597 * Format 2: Both the APXA and MSAX3 facilities are installed 2598 */ 2599 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2600 { 2601 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2602 2603 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2604 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2605 2606 /* Check whether MSAX3 is installed */ 2607 if (!test_kvm_facility(kvm, 76)) 2608 return; 2609 2610 if (kvm_s390_apxa_installed()) 2611 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2612 else 2613 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2614 } 2615 2616 /* 2617 * kvm_arch_crypto_set_masks 2618 * 2619 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2620 * to be set. 2621 * @apm: the mask identifying the accessible AP adapters 2622 * @aqm: the mask identifying the accessible AP domains 2623 * @adm: the mask identifying the accessible AP control domains 2624 * 2625 * Set the masks that identify the adapters, domains and control domains to 2626 * which the KVM guest is granted access. 2627 * 2628 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2629 * function. 2630 */ 2631 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2632 unsigned long *aqm, unsigned long *adm) 2633 { 2634 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2635 2636 kvm_s390_vcpu_block_all(kvm); 2637 2638 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2639 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2640 memcpy(crycb->apcb1.apm, apm, 32); 2641 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2642 apm[0], apm[1], apm[2], apm[3]); 2643 memcpy(crycb->apcb1.aqm, aqm, 32); 2644 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2645 aqm[0], aqm[1], aqm[2], aqm[3]); 2646 memcpy(crycb->apcb1.adm, adm, 32); 2647 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2648 adm[0], adm[1], adm[2], adm[3]); 2649 break; 2650 case CRYCB_FORMAT1: 2651 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2652 memcpy(crycb->apcb0.apm, apm, 8); 2653 memcpy(crycb->apcb0.aqm, aqm, 2); 2654 memcpy(crycb->apcb0.adm, adm, 2); 2655 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2656 apm[0], *((unsigned short *)aqm), 2657 *((unsigned short *)adm)); 2658 break; 2659 default: /* Can not happen */ 2660 break; 2661 } 2662 2663 /* recreate the shadow crycb for each vcpu */ 2664 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2665 kvm_s390_vcpu_unblock_all(kvm); 2666 } 2667 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2668 2669 /* 2670 * kvm_arch_crypto_clear_masks 2671 * 2672 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2673 * to be cleared. 2674 * 2675 * Clear the masks that identify the adapters, domains and control domains to 2676 * which the KVM guest is granted access. 2677 * 2678 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2679 * function. 2680 */ 2681 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2682 { 2683 kvm_s390_vcpu_block_all(kvm); 2684 2685 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2686 sizeof(kvm->arch.crypto.crycb->apcb0)); 2687 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2688 sizeof(kvm->arch.crypto.crycb->apcb1)); 2689 2690 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2691 /* recreate the shadow crycb for each vcpu */ 2692 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2693 kvm_s390_vcpu_unblock_all(kvm); 2694 } 2695 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2696 2697 static u64 kvm_s390_get_initial_cpuid(void) 2698 { 2699 struct cpuid cpuid; 2700 2701 get_cpu_id(&cpuid); 2702 cpuid.version = 0xff; 2703 return *((u64 *) &cpuid); 2704 } 2705 2706 static void kvm_s390_crypto_init(struct kvm *kvm) 2707 { 2708 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2709 kvm_s390_set_crycb_format(kvm); 2710 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 2711 2712 if (!test_kvm_facility(kvm, 76)) 2713 return; 2714 2715 /* Enable AES/DEA protected key functions by default */ 2716 kvm->arch.crypto.aes_kw = 1; 2717 kvm->arch.crypto.dea_kw = 1; 2718 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2719 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2720 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2721 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2722 } 2723 2724 static void sca_dispose(struct kvm *kvm) 2725 { 2726 if (kvm->arch.use_esca) 2727 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2728 else 2729 free_page((unsigned long)(kvm->arch.sca)); 2730 kvm->arch.sca = NULL; 2731 } 2732 2733 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2734 { 2735 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2736 int i, rc; 2737 char debug_name[16]; 2738 static unsigned long sca_offset; 2739 2740 rc = -EINVAL; 2741 #ifdef CONFIG_KVM_S390_UCONTROL 2742 if (type & ~KVM_VM_S390_UCONTROL) 2743 goto out_err; 2744 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2745 goto out_err; 2746 #else 2747 if (type) 2748 goto out_err; 2749 #endif 2750 2751 rc = s390_enable_sie(); 2752 if (rc) 2753 goto out_err; 2754 2755 rc = -ENOMEM; 2756 2757 if (!sclp.has_64bscao) 2758 alloc_flags |= GFP_DMA; 2759 rwlock_init(&kvm->arch.sca_lock); 2760 /* start with basic SCA */ 2761 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2762 if (!kvm->arch.sca) 2763 goto out_err; 2764 mutex_lock(&kvm_lock); 2765 sca_offset += 16; 2766 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2767 sca_offset = 0; 2768 kvm->arch.sca = (struct bsca_block *) 2769 ((char *) kvm->arch.sca + sca_offset); 2770 mutex_unlock(&kvm_lock); 2771 2772 sprintf(debug_name, "kvm-%u", current->pid); 2773 2774 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2775 if (!kvm->arch.dbf) 2776 goto out_err; 2777 2778 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2779 kvm->arch.sie_page2 = 2780 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2781 if (!kvm->arch.sie_page2) 2782 goto out_err; 2783 2784 kvm->arch.sie_page2->kvm = kvm; 2785 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2786 2787 for (i = 0; i < kvm_s390_fac_size(); i++) { 2788 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 2789 (kvm_s390_fac_base[i] | 2790 kvm_s390_fac_ext[i]); 2791 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 2792 kvm_s390_fac_base[i]; 2793 } 2794 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2795 2796 /* we are always in czam mode - even on pre z14 machines */ 2797 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2798 set_kvm_facility(kvm->arch.model.fac_list, 138); 2799 /* we emulate STHYI in kvm */ 2800 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2801 set_kvm_facility(kvm->arch.model.fac_list, 74); 2802 if (MACHINE_HAS_TLB_GUEST) { 2803 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2804 set_kvm_facility(kvm->arch.model.fac_list, 147); 2805 } 2806 2807 if (css_general_characteristics.aiv && test_facility(65)) 2808 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2809 2810 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2811 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2812 2813 kvm_s390_crypto_init(kvm); 2814 2815 mutex_init(&kvm->arch.float_int.ais_lock); 2816 spin_lock_init(&kvm->arch.float_int.lock); 2817 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2818 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2819 init_waitqueue_head(&kvm->arch.ipte_wq); 2820 mutex_init(&kvm->arch.ipte_mutex); 2821 2822 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2823 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2824 2825 if (type & KVM_VM_S390_UCONTROL) { 2826 kvm->arch.gmap = NULL; 2827 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2828 } else { 2829 if (sclp.hamax == U64_MAX) 2830 kvm->arch.mem_limit = TASK_SIZE_MAX; 2831 else 2832 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2833 sclp.hamax + 1); 2834 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2835 if (!kvm->arch.gmap) 2836 goto out_err; 2837 kvm->arch.gmap->private = kvm; 2838 kvm->arch.gmap->pfault_enabled = 0; 2839 } 2840 2841 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2842 kvm->arch.use_skf = sclp.has_skey; 2843 spin_lock_init(&kvm->arch.start_stop_lock); 2844 kvm_s390_vsie_init(kvm); 2845 if (use_gisa) 2846 kvm_s390_gisa_init(kvm); 2847 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2848 2849 return 0; 2850 out_err: 2851 free_page((unsigned long)kvm->arch.sie_page2); 2852 debug_unregister(kvm->arch.dbf); 2853 sca_dispose(kvm); 2854 KVM_EVENT(3, "creation of vm failed: %d", rc); 2855 return rc; 2856 } 2857 2858 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2859 { 2860 u16 rc, rrc; 2861 2862 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2863 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2864 kvm_s390_clear_local_irqs(vcpu); 2865 kvm_clear_async_pf_completion_queue(vcpu); 2866 if (!kvm_is_ucontrol(vcpu->kvm)) 2867 sca_del_vcpu(vcpu); 2868 2869 if (kvm_is_ucontrol(vcpu->kvm)) 2870 gmap_remove(vcpu->arch.gmap); 2871 2872 if (vcpu->kvm->arch.use_cmma) 2873 kvm_s390_vcpu_unsetup_cmma(vcpu); 2874 /* We can not hold the vcpu mutex here, we are already dying */ 2875 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2876 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2877 free_page((unsigned long)(vcpu->arch.sie_block)); 2878 } 2879 2880 void kvm_arch_destroy_vm(struct kvm *kvm) 2881 { 2882 u16 rc, rrc; 2883 2884 kvm_destroy_vcpus(kvm); 2885 sca_dispose(kvm); 2886 kvm_s390_gisa_destroy(kvm); 2887 /* 2888 * We are already at the end of life and kvm->lock is not taken. 2889 * This is ok as the file descriptor is closed by now and nobody 2890 * can mess with the pv state. To avoid lockdep_assert_held from 2891 * complaining we do not use kvm_s390_pv_is_protected. 2892 */ 2893 if (kvm_s390_pv_get_handle(kvm)) 2894 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2895 debug_unregister(kvm->arch.dbf); 2896 free_page((unsigned long)kvm->arch.sie_page2); 2897 if (!kvm_is_ucontrol(kvm)) 2898 gmap_remove(kvm->arch.gmap); 2899 kvm_s390_destroy_adapters(kvm); 2900 kvm_s390_clear_float_irqs(kvm); 2901 kvm_s390_vsie_destroy(kvm); 2902 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2903 } 2904 2905 /* Section: vcpu related */ 2906 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2907 { 2908 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2909 if (!vcpu->arch.gmap) 2910 return -ENOMEM; 2911 vcpu->arch.gmap->private = vcpu->kvm; 2912 2913 return 0; 2914 } 2915 2916 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2917 { 2918 if (!kvm_s390_use_sca_entries()) 2919 return; 2920 read_lock(&vcpu->kvm->arch.sca_lock); 2921 if (vcpu->kvm->arch.use_esca) { 2922 struct esca_block *sca = vcpu->kvm->arch.sca; 2923 2924 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2925 sca->cpu[vcpu->vcpu_id].sda = 0; 2926 } else { 2927 struct bsca_block *sca = vcpu->kvm->arch.sca; 2928 2929 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2930 sca->cpu[vcpu->vcpu_id].sda = 0; 2931 } 2932 read_unlock(&vcpu->kvm->arch.sca_lock); 2933 } 2934 2935 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2936 { 2937 if (!kvm_s390_use_sca_entries()) { 2938 struct bsca_block *sca = vcpu->kvm->arch.sca; 2939 2940 /* we still need the basic sca for the ipte control */ 2941 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2942 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2943 return; 2944 } 2945 read_lock(&vcpu->kvm->arch.sca_lock); 2946 if (vcpu->kvm->arch.use_esca) { 2947 struct esca_block *sca = vcpu->kvm->arch.sca; 2948 2949 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2950 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2951 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2952 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2953 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2954 } else { 2955 struct bsca_block *sca = vcpu->kvm->arch.sca; 2956 2957 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2958 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2959 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2960 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2961 } 2962 read_unlock(&vcpu->kvm->arch.sca_lock); 2963 } 2964 2965 /* Basic SCA to Extended SCA data copy routines */ 2966 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2967 { 2968 d->sda = s->sda; 2969 d->sigp_ctrl.c = s->sigp_ctrl.c; 2970 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2971 } 2972 2973 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2974 { 2975 int i; 2976 2977 d->ipte_control = s->ipte_control; 2978 d->mcn[0] = s->mcn; 2979 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2980 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2981 } 2982 2983 static int sca_switch_to_extended(struct kvm *kvm) 2984 { 2985 struct bsca_block *old_sca = kvm->arch.sca; 2986 struct esca_block *new_sca; 2987 struct kvm_vcpu *vcpu; 2988 unsigned long vcpu_idx; 2989 u32 scaol, scaoh; 2990 2991 if (kvm->arch.use_esca) 2992 return 0; 2993 2994 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 2995 if (!new_sca) 2996 return -ENOMEM; 2997 2998 scaoh = (u32)((u64)(new_sca) >> 32); 2999 scaol = (u32)(u64)(new_sca) & ~0x3fU; 3000 3001 kvm_s390_vcpu_block_all(kvm); 3002 write_lock(&kvm->arch.sca_lock); 3003 3004 sca_copy_b_to_e(new_sca, old_sca); 3005 3006 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3007 vcpu->arch.sie_block->scaoh = scaoh; 3008 vcpu->arch.sie_block->scaol = scaol; 3009 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3010 } 3011 kvm->arch.sca = new_sca; 3012 kvm->arch.use_esca = 1; 3013 3014 write_unlock(&kvm->arch.sca_lock); 3015 kvm_s390_vcpu_unblock_all(kvm); 3016 3017 free_page((unsigned long)old_sca); 3018 3019 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3020 old_sca, kvm->arch.sca); 3021 return 0; 3022 } 3023 3024 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3025 { 3026 int rc; 3027 3028 if (!kvm_s390_use_sca_entries()) { 3029 if (id < KVM_MAX_VCPUS) 3030 return true; 3031 return false; 3032 } 3033 if (id < KVM_S390_BSCA_CPU_SLOTS) 3034 return true; 3035 if (!sclp.has_esca || !sclp.has_64bscao) 3036 return false; 3037 3038 mutex_lock(&kvm->lock); 3039 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3040 mutex_unlock(&kvm->lock); 3041 3042 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3043 } 3044 3045 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3046 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3047 { 3048 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3049 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3050 vcpu->arch.cputm_start = get_tod_clock_fast(); 3051 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3052 } 3053 3054 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3055 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3056 { 3057 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3058 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3059 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3060 vcpu->arch.cputm_start = 0; 3061 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3062 } 3063 3064 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3065 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3066 { 3067 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3068 vcpu->arch.cputm_enabled = true; 3069 __start_cpu_timer_accounting(vcpu); 3070 } 3071 3072 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3073 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3074 { 3075 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3076 __stop_cpu_timer_accounting(vcpu); 3077 vcpu->arch.cputm_enabled = false; 3078 } 3079 3080 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3081 { 3082 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3083 __enable_cpu_timer_accounting(vcpu); 3084 preempt_enable(); 3085 } 3086 3087 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3088 { 3089 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3090 __disable_cpu_timer_accounting(vcpu); 3091 preempt_enable(); 3092 } 3093 3094 /* set the cpu timer - may only be called from the VCPU thread itself */ 3095 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3096 { 3097 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3098 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3099 if (vcpu->arch.cputm_enabled) 3100 vcpu->arch.cputm_start = get_tod_clock_fast(); 3101 vcpu->arch.sie_block->cputm = cputm; 3102 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3103 preempt_enable(); 3104 } 3105 3106 /* update and get the cpu timer - can also be called from other VCPU threads */ 3107 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3108 { 3109 unsigned int seq; 3110 __u64 value; 3111 3112 if (unlikely(!vcpu->arch.cputm_enabled)) 3113 return vcpu->arch.sie_block->cputm; 3114 3115 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3116 do { 3117 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3118 /* 3119 * If the writer would ever execute a read in the critical 3120 * section, e.g. in irq context, we have a deadlock. 3121 */ 3122 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3123 value = vcpu->arch.sie_block->cputm; 3124 /* if cputm_start is 0, accounting is being started/stopped */ 3125 if (likely(vcpu->arch.cputm_start)) 3126 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3127 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3128 preempt_enable(); 3129 return value; 3130 } 3131 3132 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3133 { 3134 3135 gmap_enable(vcpu->arch.enabled_gmap); 3136 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3137 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3138 __start_cpu_timer_accounting(vcpu); 3139 vcpu->cpu = cpu; 3140 } 3141 3142 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3143 { 3144 vcpu->cpu = -1; 3145 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3146 __stop_cpu_timer_accounting(vcpu); 3147 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3148 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3149 gmap_disable(vcpu->arch.enabled_gmap); 3150 3151 } 3152 3153 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3154 { 3155 mutex_lock(&vcpu->kvm->lock); 3156 preempt_disable(); 3157 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3158 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3159 preempt_enable(); 3160 mutex_unlock(&vcpu->kvm->lock); 3161 if (!kvm_is_ucontrol(vcpu->kvm)) { 3162 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3163 sca_add_vcpu(vcpu); 3164 } 3165 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3166 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3167 /* make vcpu_load load the right gmap on the first trigger */ 3168 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3169 } 3170 3171 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3172 { 3173 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3174 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3175 return true; 3176 return false; 3177 } 3178 3179 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3180 { 3181 /* At least one ECC subfunction must be present */ 3182 return kvm_has_pckmo_subfunc(kvm, 32) || 3183 kvm_has_pckmo_subfunc(kvm, 33) || 3184 kvm_has_pckmo_subfunc(kvm, 34) || 3185 kvm_has_pckmo_subfunc(kvm, 40) || 3186 kvm_has_pckmo_subfunc(kvm, 41); 3187 3188 } 3189 3190 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3191 { 3192 /* 3193 * If the AP instructions are not being interpreted and the MSAX3 3194 * facility is not configured for the guest, there is nothing to set up. 3195 */ 3196 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3197 return; 3198 3199 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3200 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3201 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3202 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3203 3204 if (vcpu->kvm->arch.crypto.apie) 3205 vcpu->arch.sie_block->eca |= ECA_APIE; 3206 3207 /* Set up protected key support */ 3208 if (vcpu->kvm->arch.crypto.aes_kw) { 3209 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3210 /* ecc is also wrapped with AES key */ 3211 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3212 vcpu->arch.sie_block->ecd |= ECD_ECC; 3213 } 3214 3215 if (vcpu->kvm->arch.crypto.dea_kw) 3216 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3217 } 3218 3219 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3220 { 3221 free_page(vcpu->arch.sie_block->cbrlo); 3222 vcpu->arch.sie_block->cbrlo = 0; 3223 } 3224 3225 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3226 { 3227 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3228 if (!vcpu->arch.sie_block->cbrlo) 3229 return -ENOMEM; 3230 return 0; 3231 } 3232 3233 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3234 { 3235 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3236 3237 vcpu->arch.sie_block->ibc = model->ibc; 3238 if (test_kvm_facility(vcpu->kvm, 7)) 3239 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3240 } 3241 3242 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3243 { 3244 int rc = 0; 3245 u16 uvrc, uvrrc; 3246 3247 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3248 CPUSTAT_SM | 3249 CPUSTAT_STOPPED); 3250 3251 if (test_kvm_facility(vcpu->kvm, 78)) 3252 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3253 else if (test_kvm_facility(vcpu->kvm, 8)) 3254 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3255 3256 kvm_s390_vcpu_setup_model(vcpu); 3257 3258 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3259 if (MACHINE_HAS_ESOP) 3260 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3261 if (test_kvm_facility(vcpu->kvm, 9)) 3262 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3263 if (test_kvm_facility(vcpu->kvm, 73)) 3264 vcpu->arch.sie_block->ecb |= ECB_TE; 3265 if (!kvm_is_ucontrol(vcpu->kvm)) 3266 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3267 3268 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3269 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3270 if (test_kvm_facility(vcpu->kvm, 130)) 3271 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3272 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3273 if (sclp.has_cei) 3274 vcpu->arch.sie_block->eca |= ECA_CEI; 3275 if (sclp.has_ib) 3276 vcpu->arch.sie_block->eca |= ECA_IB; 3277 if (sclp.has_siif) 3278 vcpu->arch.sie_block->eca |= ECA_SII; 3279 if (sclp.has_sigpif) 3280 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3281 if (test_kvm_facility(vcpu->kvm, 129)) { 3282 vcpu->arch.sie_block->eca |= ECA_VX; 3283 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3284 } 3285 if (test_kvm_facility(vcpu->kvm, 139)) 3286 vcpu->arch.sie_block->ecd |= ECD_MEF; 3287 if (test_kvm_facility(vcpu->kvm, 156)) 3288 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3289 if (vcpu->arch.sie_block->gd) { 3290 vcpu->arch.sie_block->eca |= ECA_AIV; 3291 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3292 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3293 } 3294 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3295 | SDNXC; 3296 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3297 3298 if (sclp.has_kss) 3299 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3300 else 3301 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3302 3303 if (vcpu->kvm->arch.use_cmma) { 3304 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3305 if (rc) 3306 return rc; 3307 } 3308 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3309 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3310 3311 vcpu->arch.sie_block->hpid = HPID_KVM; 3312 3313 kvm_s390_vcpu_crypto_setup(vcpu); 3314 3315 mutex_lock(&vcpu->kvm->lock); 3316 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3317 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3318 if (rc) 3319 kvm_s390_vcpu_unsetup_cmma(vcpu); 3320 } 3321 mutex_unlock(&vcpu->kvm->lock); 3322 3323 return rc; 3324 } 3325 3326 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3327 { 3328 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3329 return -EINVAL; 3330 return 0; 3331 } 3332 3333 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3334 { 3335 struct sie_page *sie_page; 3336 int rc; 3337 3338 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3339 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3340 if (!sie_page) 3341 return -ENOMEM; 3342 3343 vcpu->arch.sie_block = &sie_page->sie_block; 3344 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3345 3346 /* the real guest size will always be smaller than msl */ 3347 vcpu->arch.sie_block->mso = 0; 3348 vcpu->arch.sie_block->msl = sclp.hamax; 3349 3350 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3351 spin_lock_init(&vcpu->arch.local_int.lock); 3352 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; 3353 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 3354 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 3355 seqcount_init(&vcpu->arch.cputm_seqcount); 3356 3357 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3358 kvm_clear_async_pf_completion_queue(vcpu); 3359 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3360 KVM_SYNC_GPRS | 3361 KVM_SYNC_ACRS | 3362 KVM_SYNC_CRS | 3363 KVM_SYNC_ARCH0 | 3364 KVM_SYNC_PFAULT | 3365 KVM_SYNC_DIAG318; 3366 kvm_s390_set_prefix(vcpu, 0); 3367 if (test_kvm_facility(vcpu->kvm, 64)) 3368 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3369 if (test_kvm_facility(vcpu->kvm, 82)) 3370 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3371 if (test_kvm_facility(vcpu->kvm, 133)) 3372 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3373 if (test_kvm_facility(vcpu->kvm, 156)) 3374 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3375 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3376 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3377 */ 3378 if (MACHINE_HAS_VX) 3379 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3380 else 3381 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3382 3383 if (kvm_is_ucontrol(vcpu->kvm)) { 3384 rc = __kvm_ucontrol_vcpu_init(vcpu); 3385 if (rc) 3386 goto out_free_sie_block; 3387 } 3388 3389 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3390 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3391 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3392 3393 rc = kvm_s390_vcpu_setup(vcpu); 3394 if (rc) 3395 goto out_ucontrol_uninit; 3396 return 0; 3397 3398 out_ucontrol_uninit: 3399 if (kvm_is_ucontrol(vcpu->kvm)) 3400 gmap_remove(vcpu->arch.gmap); 3401 out_free_sie_block: 3402 free_page((unsigned long)(vcpu->arch.sie_block)); 3403 return rc; 3404 } 3405 3406 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3407 { 3408 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3409 return kvm_s390_vcpu_has_irq(vcpu, 0); 3410 } 3411 3412 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3413 { 3414 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3415 } 3416 3417 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3418 { 3419 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3420 exit_sie(vcpu); 3421 } 3422 3423 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3424 { 3425 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3426 } 3427 3428 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3429 { 3430 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3431 exit_sie(vcpu); 3432 } 3433 3434 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3435 { 3436 return atomic_read(&vcpu->arch.sie_block->prog20) & 3437 (PROG_BLOCK_SIE | PROG_REQUEST); 3438 } 3439 3440 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3441 { 3442 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3443 } 3444 3445 /* 3446 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3447 * If the CPU is not running (e.g. waiting as idle) the function will 3448 * return immediately. */ 3449 void exit_sie(struct kvm_vcpu *vcpu) 3450 { 3451 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3452 kvm_s390_vsie_kick(vcpu); 3453 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3454 cpu_relax(); 3455 } 3456 3457 /* Kick a guest cpu out of SIE to process a request synchronously */ 3458 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3459 { 3460 kvm_make_request(req, vcpu); 3461 kvm_s390_vcpu_request(vcpu); 3462 } 3463 3464 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3465 unsigned long end) 3466 { 3467 struct kvm *kvm = gmap->private; 3468 struct kvm_vcpu *vcpu; 3469 unsigned long prefix; 3470 unsigned long i; 3471 3472 if (gmap_is_shadow(gmap)) 3473 return; 3474 if (start >= 1UL << 31) 3475 /* We are only interested in prefix pages */ 3476 return; 3477 kvm_for_each_vcpu(i, vcpu, kvm) { 3478 /* match against both prefix pages */ 3479 prefix = kvm_s390_get_prefix(vcpu); 3480 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3481 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3482 start, end); 3483 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 3484 } 3485 } 3486 } 3487 3488 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3489 { 3490 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3491 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3492 READ_ONCE(halt_poll_max_steal)) { 3493 vcpu->stat.halt_no_poll_steal++; 3494 return true; 3495 } 3496 return false; 3497 } 3498 3499 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3500 { 3501 /* kvm common code refers to this, but never calls it */ 3502 BUG(); 3503 return 0; 3504 } 3505 3506 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3507 struct kvm_one_reg *reg) 3508 { 3509 int r = -EINVAL; 3510 3511 switch (reg->id) { 3512 case KVM_REG_S390_TODPR: 3513 r = put_user(vcpu->arch.sie_block->todpr, 3514 (u32 __user *)reg->addr); 3515 break; 3516 case KVM_REG_S390_EPOCHDIFF: 3517 r = put_user(vcpu->arch.sie_block->epoch, 3518 (u64 __user *)reg->addr); 3519 break; 3520 case KVM_REG_S390_CPU_TIMER: 3521 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3522 (u64 __user *)reg->addr); 3523 break; 3524 case KVM_REG_S390_CLOCK_COMP: 3525 r = put_user(vcpu->arch.sie_block->ckc, 3526 (u64 __user *)reg->addr); 3527 break; 3528 case KVM_REG_S390_PFTOKEN: 3529 r = put_user(vcpu->arch.pfault_token, 3530 (u64 __user *)reg->addr); 3531 break; 3532 case KVM_REG_S390_PFCOMPARE: 3533 r = put_user(vcpu->arch.pfault_compare, 3534 (u64 __user *)reg->addr); 3535 break; 3536 case KVM_REG_S390_PFSELECT: 3537 r = put_user(vcpu->arch.pfault_select, 3538 (u64 __user *)reg->addr); 3539 break; 3540 case KVM_REG_S390_PP: 3541 r = put_user(vcpu->arch.sie_block->pp, 3542 (u64 __user *)reg->addr); 3543 break; 3544 case KVM_REG_S390_GBEA: 3545 r = put_user(vcpu->arch.sie_block->gbea, 3546 (u64 __user *)reg->addr); 3547 break; 3548 default: 3549 break; 3550 } 3551 3552 return r; 3553 } 3554 3555 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3556 struct kvm_one_reg *reg) 3557 { 3558 int r = -EINVAL; 3559 __u64 val; 3560 3561 switch (reg->id) { 3562 case KVM_REG_S390_TODPR: 3563 r = get_user(vcpu->arch.sie_block->todpr, 3564 (u32 __user *)reg->addr); 3565 break; 3566 case KVM_REG_S390_EPOCHDIFF: 3567 r = get_user(vcpu->arch.sie_block->epoch, 3568 (u64 __user *)reg->addr); 3569 break; 3570 case KVM_REG_S390_CPU_TIMER: 3571 r = get_user(val, (u64 __user *)reg->addr); 3572 if (!r) 3573 kvm_s390_set_cpu_timer(vcpu, val); 3574 break; 3575 case KVM_REG_S390_CLOCK_COMP: 3576 r = get_user(vcpu->arch.sie_block->ckc, 3577 (u64 __user *)reg->addr); 3578 break; 3579 case KVM_REG_S390_PFTOKEN: 3580 r = get_user(vcpu->arch.pfault_token, 3581 (u64 __user *)reg->addr); 3582 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3583 kvm_clear_async_pf_completion_queue(vcpu); 3584 break; 3585 case KVM_REG_S390_PFCOMPARE: 3586 r = get_user(vcpu->arch.pfault_compare, 3587 (u64 __user *)reg->addr); 3588 break; 3589 case KVM_REG_S390_PFSELECT: 3590 r = get_user(vcpu->arch.pfault_select, 3591 (u64 __user *)reg->addr); 3592 break; 3593 case KVM_REG_S390_PP: 3594 r = get_user(vcpu->arch.sie_block->pp, 3595 (u64 __user *)reg->addr); 3596 break; 3597 case KVM_REG_S390_GBEA: 3598 r = get_user(vcpu->arch.sie_block->gbea, 3599 (u64 __user *)reg->addr); 3600 break; 3601 default: 3602 break; 3603 } 3604 3605 return r; 3606 } 3607 3608 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3609 { 3610 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3611 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3612 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3613 3614 kvm_clear_async_pf_completion_queue(vcpu); 3615 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3616 kvm_s390_vcpu_stop(vcpu); 3617 kvm_s390_clear_local_irqs(vcpu); 3618 } 3619 3620 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3621 { 3622 /* Initial reset is a superset of the normal reset */ 3623 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3624 3625 /* 3626 * This equals initial cpu reset in pop, but we don't switch to ESA. 3627 * We do not only reset the internal data, but also ... 3628 */ 3629 vcpu->arch.sie_block->gpsw.mask = 0; 3630 vcpu->arch.sie_block->gpsw.addr = 0; 3631 kvm_s390_set_prefix(vcpu, 0); 3632 kvm_s390_set_cpu_timer(vcpu, 0); 3633 vcpu->arch.sie_block->ckc = 0; 3634 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3635 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3636 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3637 3638 /* ... the data in sync regs */ 3639 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3640 vcpu->run->s.regs.ckc = 0; 3641 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3642 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3643 vcpu->run->psw_addr = 0; 3644 vcpu->run->psw_mask = 0; 3645 vcpu->run->s.regs.todpr = 0; 3646 vcpu->run->s.regs.cputm = 0; 3647 vcpu->run->s.regs.ckc = 0; 3648 vcpu->run->s.regs.pp = 0; 3649 vcpu->run->s.regs.gbea = 1; 3650 vcpu->run->s.regs.fpc = 0; 3651 /* 3652 * Do not reset these registers in the protected case, as some of 3653 * them are overlayed and they are not accessible in this case 3654 * anyway. 3655 */ 3656 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3657 vcpu->arch.sie_block->gbea = 1; 3658 vcpu->arch.sie_block->pp = 0; 3659 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3660 vcpu->arch.sie_block->todpr = 0; 3661 } 3662 } 3663 3664 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3665 { 3666 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3667 3668 /* Clear reset is a superset of the initial reset */ 3669 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3670 3671 memset(®s->gprs, 0, sizeof(regs->gprs)); 3672 memset(®s->vrs, 0, sizeof(regs->vrs)); 3673 memset(®s->acrs, 0, sizeof(regs->acrs)); 3674 memset(®s->gscb, 0, sizeof(regs->gscb)); 3675 3676 regs->etoken = 0; 3677 regs->etoken_extension = 0; 3678 } 3679 3680 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3681 { 3682 vcpu_load(vcpu); 3683 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3684 vcpu_put(vcpu); 3685 return 0; 3686 } 3687 3688 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3689 { 3690 vcpu_load(vcpu); 3691 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3692 vcpu_put(vcpu); 3693 return 0; 3694 } 3695 3696 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3697 struct kvm_sregs *sregs) 3698 { 3699 vcpu_load(vcpu); 3700 3701 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3702 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3703 3704 vcpu_put(vcpu); 3705 return 0; 3706 } 3707 3708 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3709 struct kvm_sregs *sregs) 3710 { 3711 vcpu_load(vcpu); 3712 3713 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3714 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3715 3716 vcpu_put(vcpu); 3717 return 0; 3718 } 3719 3720 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3721 { 3722 int ret = 0; 3723 3724 vcpu_load(vcpu); 3725 3726 if (test_fp_ctl(fpu->fpc)) { 3727 ret = -EINVAL; 3728 goto out; 3729 } 3730 vcpu->run->s.regs.fpc = fpu->fpc; 3731 if (MACHINE_HAS_VX) 3732 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3733 (freg_t *) fpu->fprs); 3734 else 3735 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3736 3737 out: 3738 vcpu_put(vcpu); 3739 return ret; 3740 } 3741 3742 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3743 { 3744 vcpu_load(vcpu); 3745 3746 /* make sure we have the latest values */ 3747 save_fpu_regs(); 3748 if (MACHINE_HAS_VX) 3749 convert_vx_to_fp((freg_t *) fpu->fprs, 3750 (__vector128 *) vcpu->run->s.regs.vrs); 3751 else 3752 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3753 fpu->fpc = vcpu->run->s.regs.fpc; 3754 3755 vcpu_put(vcpu); 3756 return 0; 3757 } 3758 3759 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3760 { 3761 int rc = 0; 3762 3763 if (!is_vcpu_stopped(vcpu)) 3764 rc = -EBUSY; 3765 else { 3766 vcpu->run->psw_mask = psw.mask; 3767 vcpu->run->psw_addr = psw.addr; 3768 } 3769 return rc; 3770 } 3771 3772 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3773 struct kvm_translation *tr) 3774 { 3775 return -EINVAL; /* not implemented yet */ 3776 } 3777 3778 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3779 KVM_GUESTDBG_USE_HW_BP | \ 3780 KVM_GUESTDBG_ENABLE) 3781 3782 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3783 struct kvm_guest_debug *dbg) 3784 { 3785 int rc = 0; 3786 3787 vcpu_load(vcpu); 3788 3789 vcpu->guest_debug = 0; 3790 kvm_s390_clear_bp_data(vcpu); 3791 3792 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3793 rc = -EINVAL; 3794 goto out; 3795 } 3796 if (!sclp.has_gpere) { 3797 rc = -EINVAL; 3798 goto out; 3799 } 3800 3801 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3802 vcpu->guest_debug = dbg->control; 3803 /* enforce guest PER */ 3804 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3805 3806 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3807 rc = kvm_s390_import_bp_data(vcpu, dbg); 3808 } else { 3809 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3810 vcpu->arch.guestdbg.last_bp = 0; 3811 } 3812 3813 if (rc) { 3814 vcpu->guest_debug = 0; 3815 kvm_s390_clear_bp_data(vcpu); 3816 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3817 } 3818 3819 out: 3820 vcpu_put(vcpu); 3821 return rc; 3822 } 3823 3824 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3825 struct kvm_mp_state *mp_state) 3826 { 3827 int ret; 3828 3829 vcpu_load(vcpu); 3830 3831 /* CHECK_STOP and LOAD are not supported yet */ 3832 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3833 KVM_MP_STATE_OPERATING; 3834 3835 vcpu_put(vcpu); 3836 return ret; 3837 } 3838 3839 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3840 struct kvm_mp_state *mp_state) 3841 { 3842 int rc = 0; 3843 3844 vcpu_load(vcpu); 3845 3846 /* user space knows about this interface - let it control the state */ 3847 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 3848 3849 switch (mp_state->mp_state) { 3850 case KVM_MP_STATE_STOPPED: 3851 rc = kvm_s390_vcpu_stop(vcpu); 3852 break; 3853 case KVM_MP_STATE_OPERATING: 3854 rc = kvm_s390_vcpu_start(vcpu); 3855 break; 3856 case KVM_MP_STATE_LOAD: 3857 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3858 rc = -ENXIO; 3859 break; 3860 } 3861 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3862 break; 3863 case KVM_MP_STATE_CHECK_STOP: 3864 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3865 default: 3866 rc = -ENXIO; 3867 } 3868 3869 vcpu_put(vcpu); 3870 return rc; 3871 } 3872 3873 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3874 { 3875 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3876 } 3877 3878 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3879 { 3880 retry: 3881 kvm_s390_vcpu_request_handled(vcpu); 3882 if (!kvm_request_pending(vcpu)) 3883 return 0; 3884 /* 3885 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3886 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3887 * This ensures that the ipte instruction for this request has 3888 * already finished. We might race against a second unmapper that 3889 * wants to set the blocking bit. Lets just retry the request loop. 3890 */ 3891 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3892 int rc; 3893 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3894 kvm_s390_get_prefix(vcpu), 3895 PAGE_SIZE * 2, PROT_WRITE); 3896 if (rc) { 3897 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3898 return rc; 3899 } 3900 goto retry; 3901 } 3902 3903 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3904 vcpu->arch.sie_block->ihcpu = 0xffff; 3905 goto retry; 3906 } 3907 3908 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3909 if (!ibs_enabled(vcpu)) { 3910 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3911 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3912 } 3913 goto retry; 3914 } 3915 3916 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3917 if (ibs_enabled(vcpu)) { 3918 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3919 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3920 } 3921 goto retry; 3922 } 3923 3924 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3925 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3926 goto retry; 3927 } 3928 3929 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3930 /* 3931 * Disable CMM virtualization; we will emulate the ESSA 3932 * instruction manually, in order to provide additional 3933 * functionalities needed for live migration. 3934 */ 3935 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3936 goto retry; 3937 } 3938 3939 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3940 /* 3941 * Re-enable CMM virtualization if CMMA is available and 3942 * CMM has been used. 3943 */ 3944 if ((vcpu->kvm->arch.use_cmma) && 3945 (vcpu->kvm->mm->context.uses_cmm)) 3946 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3947 goto retry; 3948 } 3949 3950 /* nothing to do, just clear the request */ 3951 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3952 /* we left the vsie handler, nothing to do, just clear the request */ 3953 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3954 3955 return 0; 3956 } 3957 3958 void kvm_s390_set_tod_clock(struct kvm *kvm, 3959 const struct kvm_s390_vm_tod_clock *gtod) 3960 { 3961 struct kvm_vcpu *vcpu; 3962 union tod_clock clk; 3963 unsigned long i; 3964 3965 mutex_lock(&kvm->lock); 3966 preempt_disable(); 3967 3968 store_tod_clock_ext(&clk); 3969 3970 kvm->arch.epoch = gtod->tod - clk.tod; 3971 kvm->arch.epdx = 0; 3972 if (test_kvm_facility(kvm, 139)) { 3973 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3974 if (kvm->arch.epoch > gtod->tod) 3975 kvm->arch.epdx -= 1; 3976 } 3977 3978 kvm_s390_vcpu_block_all(kvm); 3979 kvm_for_each_vcpu(i, vcpu, kvm) { 3980 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3981 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3982 } 3983 3984 kvm_s390_vcpu_unblock_all(kvm); 3985 preempt_enable(); 3986 mutex_unlock(&kvm->lock); 3987 } 3988 3989 /** 3990 * kvm_arch_fault_in_page - fault-in guest page if necessary 3991 * @vcpu: The corresponding virtual cpu 3992 * @gpa: Guest physical address 3993 * @writable: Whether the page should be writable or not 3994 * 3995 * Make sure that a guest page has been faulted-in on the host. 3996 * 3997 * Return: Zero on success, negative error code otherwise. 3998 */ 3999 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 4000 { 4001 return gmap_fault(vcpu->arch.gmap, gpa, 4002 writable ? FAULT_FLAG_WRITE : 0); 4003 } 4004 4005 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4006 unsigned long token) 4007 { 4008 struct kvm_s390_interrupt inti; 4009 struct kvm_s390_irq irq; 4010 4011 if (start_token) { 4012 irq.u.ext.ext_params2 = token; 4013 irq.type = KVM_S390_INT_PFAULT_INIT; 4014 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4015 } else { 4016 inti.type = KVM_S390_INT_PFAULT_DONE; 4017 inti.parm64 = token; 4018 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4019 } 4020 } 4021 4022 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4023 struct kvm_async_pf *work) 4024 { 4025 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4026 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4027 4028 return true; 4029 } 4030 4031 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4032 struct kvm_async_pf *work) 4033 { 4034 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4035 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4036 } 4037 4038 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4039 struct kvm_async_pf *work) 4040 { 4041 /* s390 will always inject the page directly */ 4042 } 4043 4044 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4045 { 4046 /* 4047 * s390 will always inject the page directly, 4048 * but we still want check_async_completion to cleanup 4049 */ 4050 return true; 4051 } 4052 4053 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4054 { 4055 hva_t hva; 4056 struct kvm_arch_async_pf arch; 4057 4058 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4059 return false; 4060 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4061 vcpu->arch.pfault_compare) 4062 return false; 4063 if (psw_extint_disabled(vcpu)) 4064 return false; 4065 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4066 return false; 4067 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4068 return false; 4069 if (!vcpu->arch.gmap->pfault_enabled) 4070 return false; 4071 4072 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4073 hva += current->thread.gmap_addr & ~PAGE_MASK; 4074 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4075 return false; 4076 4077 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4078 } 4079 4080 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4081 { 4082 int rc, cpuflags; 4083 4084 /* 4085 * On s390 notifications for arriving pages will be delivered directly 4086 * to the guest but the house keeping for completed pfaults is 4087 * handled outside the worker. 4088 */ 4089 kvm_check_async_pf_completion(vcpu); 4090 4091 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4092 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4093 4094 if (need_resched()) 4095 schedule(); 4096 4097 if (!kvm_is_ucontrol(vcpu->kvm)) { 4098 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4099 if (rc) 4100 return rc; 4101 } 4102 4103 rc = kvm_s390_handle_requests(vcpu); 4104 if (rc) 4105 return rc; 4106 4107 if (guestdbg_enabled(vcpu)) { 4108 kvm_s390_backup_guest_per_regs(vcpu); 4109 kvm_s390_patch_guest_per_regs(vcpu); 4110 } 4111 4112 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4113 4114 vcpu->arch.sie_block->icptcode = 0; 4115 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4116 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4117 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4118 4119 return 0; 4120 } 4121 4122 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4123 { 4124 struct kvm_s390_pgm_info pgm_info = { 4125 .code = PGM_ADDRESSING, 4126 }; 4127 u8 opcode, ilen; 4128 int rc; 4129 4130 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4131 trace_kvm_s390_sie_fault(vcpu); 4132 4133 /* 4134 * We want to inject an addressing exception, which is defined as a 4135 * suppressing or terminating exception. However, since we came here 4136 * by a DAT access exception, the PSW still points to the faulting 4137 * instruction since DAT exceptions are nullifying. So we've got 4138 * to look up the current opcode to get the length of the instruction 4139 * to be able to forward the PSW. 4140 */ 4141 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4142 ilen = insn_length(opcode); 4143 if (rc < 0) { 4144 return rc; 4145 } else if (rc) { 4146 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4147 * Forward by arbitrary ilc, injection will take care of 4148 * nullification if necessary. 4149 */ 4150 pgm_info = vcpu->arch.pgm; 4151 ilen = 4; 4152 } 4153 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4154 kvm_s390_forward_psw(vcpu, ilen); 4155 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4156 } 4157 4158 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4159 { 4160 struct mcck_volatile_info *mcck_info; 4161 struct sie_page *sie_page; 4162 4163 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4164 vcpu->arch.sie_block->icptcode); 4165 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4166 4167 if (guestdbg_enabled(vcpu)) 4168 kvm_s390_restore_guest_per_regs(vcpu); 4169 4170 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4171 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4172 4173 if (exit_reason == -EINTR) { 4174 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4175 sie_page = container_of(vcpu->arch.sie_block, 4176 struct sie_page, sie_block); 4177 mcck_info = &sie_page->mcck_info; 4178 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4179 return 0; 4180 } 4181 4182 if (vcpu->arch.sie_block->icptcode > 0) { 4183 int rc = kvm_handle_sie_intercept(vcpu); 4184 4185 if (rc != -EOPNOTSUPP) 4186 return rc; 4187 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4188 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4189 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4190 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4191 return -EREMOTE; 4192 } else if (exit_reason != -EFAULT) { 4193 vcpu->stat.exit_null++; 4194 return 0; 4195 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4196 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4197 vcpu->run->s390_ucontrol.trans_exc_code = 4198 current->thread.gmap_addr; 4199 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4200 return -EREMOTE; 4201 } else if (current->thread.gmap_pfault) { 4202 trace_kvm_s390_major_guest_pfault(vcpu); 4203 current->thread.gmap_pfault = 0; 4204 if (kvm_arch_setup_async_pf(vcpu)) 4205 return 0; 4206 vcpu->stat.pfault_sync++; 4207 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4208 } 4209 return vcpu_post_run_fault_in_sie(vcpu); 4210 } 4211 4212 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4213 static int __vcpu_run(struct kvm_vcpu *vcpu) 4214 { 4215 int rc, exit_reason; 4216 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4217 4218 /* 4219 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4220 * ning the guest), so that memslots (and other stuff) are protected 4221 */ 4222 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4223 4224 do { 4225 rc = vcpu_pre_run(vcpu); 4226 if (rc) 4227 break; 4228 4229 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4230 /* 4231 * As PF_VCPU will be used in fault handler, between 4232 * guest_enter and guest_exit should be no uaccess. 4233 */ 4234 local_irq_disable(); 4235 guest_enter_irqoff(); 4236 __disable_cpu_timer_accounting(vcpu); 4237 local_irq_enable(); 4238 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4239 memcpy(sie_page->pv_grregs, 4240 vcpu->run->s.regs.gprs, 4241 sizeof(sie_page->pv_grregs)); 4242 } 4243 if (test_cpu_flag(CIF_FPU)) 4244 load_fpu_regs(); 4245 exit_reason = sie64a(vcpu->arch.sie_block, 4246 vcpu->run->s.regs.gprs); 4247 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4248 memcpy(vcpu->run->s.regs.gprs, 4249 sie_page->pv_grregs, 4250 sizeof(sie_page->pv_grregs)); 4251 /* 4252 * We're not allowed to inject interrupts on intercepts 4253 * that leave the guest state in an "in-between" state 4254 * where the next SIE entry will do a continuation. 4255 * Fence interrupts in our "internal" PSW. 4256 */ 4257 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4258 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4259 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4260 } 4261 } 4262 local_irq_disable(); 4263 __enable_cpu_timer_accounting(vcpu); 4264 guest_exit_irqoff(); 4265 local_irq_enable(); 4266 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4267 4268 rc = vcpu_post_run(vcpu, exit_reason); 4269 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4270 4271 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4272 return rc; 4273 } 4274 4275 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4276 { 4277 struct kvm_run *kvm_run = vcpu->run; 4278 struct runtime_instr_cb *riccb; 4279 struct gs_cb *gscb; 4280 4281 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4282 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4283 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4284 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4285 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4286 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4287 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4288 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4289 } 4290 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4291 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4292 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4293 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4294 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4295 kvm_clear_async_pf_completion_queue(vcpu); 4296 } 4297 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4298 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4299 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4300 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4301 } 4302 /* 4303 * If userspace sets the riccb (e.g. after migration) to a valid state, 4304 * we should enable RI here instead of doing the lazy enablement. 4305 */ 4306 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4307 test_kvm_facility(vcpu->kvm, 64) && 4308 riccb->v && 4309 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4310 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4311 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4312 } 4313 /* 4314 * If userspace sets the gscb (e.g. after migration) to non-zero, 4315 * we should enable GS here instead of doing the lazy enablement. 4316 */ 4317 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4318 test_kvm_facility(vcpu->kvm, 133) && 4319 gscb->gssm && 4320 !vcpu->arch.gs_enabled) { 4321 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4322 vcpu->arch.sie_block->ecb |= ECB_GS; 4323 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4324 vcpu->arch.gs_enabled = 1; 4325 } 4326 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4327 test_kvm_facility(vcpu->kvm, 82)) { 4328 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4329 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4330 } 4331 if (MACHINE_HAS_GS) { 4332 preempt_disable(); 4333 __ctl_set_bit(2, 4); 4334 if (current->thread.gs_cb) { 4335 vcpu->arch.host_gscb = current->thread.gs_cb; 4336 save_gs_cb(vcpu->arch.host_gscb); 4337 } 4338 if (vcpu->arch.gs_enabled) { 4339 current->thread.gs_cb = (struct gs_cb *) 4340 &vcpu->run->s.regs.gscb; 4341 restore_gs_cb(current->thread.gs_cb); 4342 } 4343 preempt_enable(); 4344 } 4345 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4346 } 4347 4348 static void sync_regs(struct kvm_vcpu *vcpu) 4349 { 4350 struct kvm_run *kvm_run = vcpu->run; 4351 4352 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4353 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4354 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4355 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4356 /* some control register changes require a tlb flush */ 4357 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4358 } 4359 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4360 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4361 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4362 } 4363 save_access_regs(vcpu->arch.host_acrs); 4364 restore_access_regs(vcpu->run->s.regs.acrs); 4365 /* save host (userspace) fprs/vrs */ 4366 save_fpu_regs(); 4367 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4368 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4369 if (MACHINE_HAS_VX) 4370 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4371 else 4372 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4373 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4374 if (test_fp_ctl(current->thread.fpu.fpc)) 4375 /* User space provided an invalid FPC, let's clear it */ 4376 current->thread.fpu.fpc = 0; 4377 4378 /* Sync fmt2 only data */ 4379 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4380 sync_regs_fmt2(vcpu); 4381 } else { 4382 /* 4383 * In several places we have to modify our internal view to 4384 * not do things that are disallowed by the ultravisor. For 4385 * example we must not inject interrupts after specific exits 4386 * (e.g. 112 prefix page not secure). We do this by turning 4387 * off the machine check, external and I/O interrupt bits 4388 * of our PSW copy. To avoid getting validity intercepts, we 4389 * do only accept the condition code from userspace. 4390 */ 4391 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4392 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4393 PSW_MASK_CC; 4394 } 4395 4396 kvm_run->kvm_dirty_regs = 0; 4397 } 4398 4399 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4400 { 4401 struct kvm_run *kvm_run = vcpu->run; 4402 4403 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4404 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4405 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4406 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4407 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4408 if (MACHINE_HAS_GS) { 4409 preempt_disable(); 4410 __ctl_set_bit(2, 4); 4411 if (vcpu->arch.gs_enabled) 4412 save_gs_cb(current->thread.gs_cb); 4413 current->thread.gs_cb = vcpu->arch.host_gscb; 4414 restore_gs_cb(vcpu->arch.host_gscb); 4415 if (!vcpu->arch.host_gscb) 4416 __ctl_clear_bit(2, 4); 4417 vcpu->arch.host_gscb = NULL; 4418 preempt_enable(); 4419 } 4420 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4421 } 4422 4423 static void store_regs(struct kvm_vcpu *vcpu) 4424 { 4425 struct kvm_run *kvm_run = vcpu->run; 4426 4427 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4428 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4429 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4430 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4431 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4432 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4433 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4434 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4435 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4436 save_access_regs(vcpu->run->s.regs.acrs); 4437 restore_access_regs(vcpu->arch.host_acrs); 4438 /* Save guest register state */ 4439 save_fpu_regs(); 4440 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4441 /* Restore will be done lazily at return */ 4442 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4443 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4444 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4445 store_regs_fmt2(vcpu); 4446 } 4447 4448 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4449 { 4450 struct kvm_run *kvm_run = vcpu->run; 4451 int rc; 4452 4453 if (kvm_run->immediate_exit) 4454 return -EINTR; 4455 4456 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4457 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4458 return -EINVAL; 4459 4460 vcpu_load(vcpu); 4461 4462 if (guestdbg_exit_pending(vcpu)) { 4463 kvm_s390_prepare_debug_exit(vcpu); 4464 rc = 0; 4465 goto out; 4466 } 4467 4468 kvm_sigset_activate(vcpu); 4469 4470 /* 4471 * no need to check the return value of vcpu_start as it can only have 4472 * an error for protvirt, but protvirt means user cpu state 4473 */ 4474 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4475 kvm_s390_vcpu_start(vcpu); 4476 } else if (is_vcpu_stopped(vcpu)) { 4477 pr_err_ratelimited("can't run stopped vcpu %d\n", 4478 vcpu->vcpu_id); 4479 rc = -EINVAL; 4480 goto out; 4481 } 4482 4483 sync_regs(vcpu); 4484 enable_cpu_timer_accounting(vcpu); 4485 4486 might_fault(); 4487 rc = __vcpu_run(vcpu); 4488 4489 if (signal_pending(current) && !rc) { 4490 kvm_run->exit_reason = KVM_EXIT_INTR; 4491 rc = -EINTR; 4492 } 4493 4494 if (guestdbg_exit_pending(vcpu) && !rc) { 4495 kvm_s390_prepare_debug_exit(vcpu); 4496 rc = 0; 4497 } 4498 4499 if (rc == -EREMOTE) { 4500 /* userspace support is needed, kvm_run has been prepared */ 4501 rc = 0; 4502 } 4503 4504 disable_cpu_timer_accounting(vcpu); 4505 store_regs(vcpu); 4506 4507 kvm_sigset_deactivate(vcpu); 4508 4509 vcpu->stat.exit_userspace++; 4510 out: 4511 vcpu_put(vcpu); 4512 return rc; 4513 } 4514 4515 /* 4516 * store status at address 4517 * we use have two special cases: 4518 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4519 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4520 */ 4521 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4522 { 4523 unsigned char archmode = 1; 4524 freg_t fprs[NUM_FPRS]; 4525 unsigned int px; 4526 u64 clkcomp, cputm; 4527 int rc; 4528 4529 px = kvm_s390_get_prefix(vcpu); 4530 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4531 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4532 return -EFAULT; 4533 gpa = 0; 4534 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4535 if (write_guest_real(vcpu, 163, &archmode, 1)) 4536 return -EFAULT; 4537 gpa = px; 4538 } else 4539 gpa -= __LC_FPREGS_SAVE_AREA; 4540 4541 /* manually convert vector registers if necessary */ 4542 if (MACHINE_HAS_VX) { 4543 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4544 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4545 fprs, 128); 4546 } else { 4547 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4548 vcpu->run->s.regs.fprs, 128); 4549 } 4550 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4551 vcpu->run->s.regs.gprs, 128); 4552 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4553 &vcpu->arch.sie_block->gpsw, 16); 4554 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4555 &px, 4); 4556 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4557 &vcpu->run->s.regs.fpc, 4); 4558 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4559 &vcpu->arch.sie_block->todpr, 4); 4560 cputm = kvm_s390_get_cpu_timer(vcpu); 4561 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4562 &cputm, 8); 4563 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4564 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4565 &clkcomp, 8); 4566 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4567 &vcpu->run->s.regs.acrs, 64); 4568 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4569 &vcpu->arch.sie_block->gcr, 128); 4570 return rc ? -EFAULT : 0; 4571 } 4572 4573 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4574 { 4575 /* 4576 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4577 * switch in the run ioctl. Let's update our copies before we save 4578 * it into the save area 4579 */ 4580 save_fpu_regs(); 4581 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4582 save_access_regs(vcpu->run->s.regs.acrs); 4583 4584 return kvm_s390_store_status_unloaded(vcpu, addr); 4585 } 4586 4587 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4588 { 4589 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4590 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4591 } 4592 4593 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4594 { 4595 unsigned long i; 4596 struct kvm_vcpu *vcpu; 4597 4598 kvm_for_each_vcpu(i, vcpu, kvm) { 4599 __disable_ibs_on_vcpu(vcpu); 4600 } 4601 } 4602 4603 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4604 { 4605 if (!sclp.has_ibs) 4606 return; 4607 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4608 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4609 } 4610 4611 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4612 { 4613 int i, online_vcpus, r = 0, started_vcpus = 0; 4614 4615 if (!is_vcpu_stopped(vcpu)) 4616 return 0; 4617 4618 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4619 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4620 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4621 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4622 4623 /* Let's tell the UV that we want to change into the operating state */ 4624 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4625 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4626 if (r) { 4627 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4628 return r; 4629 } 4630 } 4631 4632 for (i = 0; i < online_vcpus; i++) { 4633 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 4634 started_vcpus++; 4635 } 4636 4637 if (started_vcpus == 0) { 4638 /* we're the only active VCPU -> speed it up */ 4639 __enable_ibs_on_vcpu(vcpu); 4640 } else if (started_vcpus == 1) { 4641 /* 4642 * As we are starting a second VCPU, we have to disable 4643 * the IBS facility on all VCPUs to remove potentially 4644 * outstanding ENABLE requests. 4645 */ 4646 __disable_ibs_on_all_vcpus(vcpu->kvm); 4647 } 4648 4649 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4650 /* 4651 * The real PSW might have changed due to a RESTART interpreted by the 4652 * ultravisor. We block all interrupts and let the next sie exit 4653 * refresh our view. 4654 */ 4655 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4656 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4657 /* 4658 * Another VCPU might have used IBS while we were offline. 4659 * Let's play safe and flush the VCPU at startup. 4660 */ 4661 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4662 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4663 return 0; 4664 } 4665 4666 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4667 { 4668 int i, online_vcpus, r = 0, started_vcpus = 0; 4669 struct kvm_vcpu *started_vcpu = NULL; 4670 4671 if (is_vcpu_stopped(vcpu)) 4672 return 0; 4673 4674 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4675 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4676 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4677 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4678 4679 /* Let's tell the UV that we want to change into the stopped state */ 4680 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4681 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4682 if (r) { 4683 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4684 return r; 4685 } 4686 } 4687 4688 /* 4689 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 4690 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 4691 * have been fully processed. This will ensure that the VCPU 4692 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 4693 */ 4694 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4695 kvm_s390_clear_stop_irq(vcpu); 4696 4697 __disable_ibs_on_vcpu(vcpu); 4698 4699 for (i = 0; i < online_vcpus; i++) { 4700 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 4701 4702 if (!is_vcpu_stopped(tmp)) { 4703 started_vcpus++; 4704 started_vcpu = tmp; 4705 } 4706 } 4707 4708 if (started_vcpus == 1) { 4709 /* 4710 * As we only have one VCPU left, we want to enable the 4711 * IBS facility for that VCPU to speed it up. 4712 */ 4713 __enable_ibs_on_vcpu(started_vcpu); 4714 } 4715 4716 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4717 return 0; 4718 } 4719 4720 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4721 struct kvm_enable_cap *cap) 4722 { 4723 int r; 4724 4725 if (cap->flags) 4726 return -EINVAL; 4727 4728 switch (cap->cap) { 4729 case KVM_CAP_S390_CSS_SUPPORT: 4730 if (!vcpu->kvm->arch.css_support) { 4731 vcpu->kvm->arch.css_support = 1; 4732 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4733 trace_kvm_s390_enable_css(vcpu->kvm); 4734 } 4735 r = 0; 4736 break; 4737 default: 4738 r = -EINVAL; 4739 break; 4740 } 4741 return r; 4742 } 4743 4744 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, 4745 struct kvm_s390_mem_op *mop) 4746 { 4747 void __user *uaddr = (void __user *)mop->buf; 4748 int r = 0; 4749 4750 if (mop->flags || !mop->size) 4751 return -EINVAL; 4752 if (mop->size + mop->sida_offset < mop->size) 4753 return -EINVAL; 4754 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4755 return -E2BIG; 4756 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 4757 return -EINVAL; 4758 4759 switch (mop->op) { 4760 case KVM_S390_MEMOP_SIDA_READ: 4761 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4762 mop->sida_offset), mop->size)) 4763 r = -EFAULT; 4764 4765 break; 4766 case KVM_S390_MEMOP_SIDA_WRITE: 4767 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4768 mop->sida_offset), uaddr, mop->size)) 4769 r = -EFAULT; 4770 break; 4771 } 4772 return r; 4773 } 4774 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 4775 struct kvm_s390_mem_op *mop) 4776 { 4777 void __user *uaddr = (void __user *)mop->buf; 4778 void *tmpbuf = NULL; 4779 int r = 0; 4780 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4781 | KVM_S390_MEMOP_F_CHECK_ONLY 4782 | KVM_S390_MEMOP_F_SKEY_PROTECTION; 4783 4784 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4785 return -EINVAL; 4786 if (mop->size > MEM_OP_MAX_SIZE) 4787 return -E2BIG; 4788 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4789 return -EINVAL; 4790 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 4791 if (access_key_invalid(mop->key)) 4792 return -EINVAL; 4793 } else { 4794 mop->key = 0; 4795 } 4796 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4797 tmpbuf = vmalloc(mop->size); 4798 if (!tmpbuf) 4799 return -ENOMEM; 4800 } 4801 4802 switch (mop->op) { 4803 case KVM_S390_MEMOP_LOGICAL_READ: 4804 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4805 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4806 GACC_FETCH, mop->key); 4807 break; 4808 } 4809 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4810 mop->size, mop->key); 4811 if (r == 0) { 4812 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4813 r = -EFAULT; 4814 } 4815 break; 4816 case KVM_S390_MEMOP_LOGICAL_WRITE: 4817 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4818 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4819 GACC_STORE, mop->key); 4820 break; 4821 } 4822 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4823 r = -EFAULT; 4824 break; 4825 } 4826 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4827 mop->size, mop->key); 4828 break; 4829 } 4830 4831 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4832 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4833 4834 vfree(tmpbuf); 4835 return r; 4836 } 4837 4838 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, 4839 struct kvm_s390_mem_op *mop) 4840 { 4841 int r, srcu_idx; 4842 4843 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4844 4845 switch (mop->op) { 4846 case KVM_S390_MEMOP_LOGICAL_READ: 4847 case KVM_S390_MEMOP_LOGICAL_WRITE: 4848 r = kvm_s390_guest_mem_op(vcpu, mop); 4849 break; 4850 case KVM_S390_MEMOP_SIDA_READ: 4851 case KVM_S390_MEMOP_SIDA_WRITE: 4852 /* we are locked against sida going away by the vcpu->mutex */ 4853 r = kvm_s390_guest_sida_op(vcpu, mop); 4854 break; 4855 default: 4856 r = -EINVAL; 4857 } 4858 4859 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4860 return r; 4861 } 4862 4863 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4864 unsigned int ioctl, unsigned long arg) 4865 { 4866 struct kvm_vcpu *vcpu = filp->private_data; 4867 void __user *argp = (void __user *)arg; 4868 4869 switch (ioctl) { 4870 case KVM_S390_IRQ: { 4871 struct kvm_s390_irq s390irq; 4872 4873 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4874 return -EFAULT; 4875 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4876 } 4877 case KVM_S390_INTERRUPT: { 4878 struct kvm_s390_interrupt s390int; 4879 struct kvm_s390_irq s390irq = {}; 4880 4881 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4882 return -EFAULT; 4883 if (s390int_to_s390irq(&s390int, &s390irq)) 4884 return -EINVAL; 4885 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4886 } 4887 } 4888 return -ENOIOCTLCMD; 4889 } 4890 4891 long kvm_arch_vcpu_ioctl(struct file *filp, 4892 unsigned int ioctl, unsigned long arg) 4893 { 4894 struct kvm_vcpu *vcpu = filp->private_data; 4895 void __user *argp = (void __user *)arg; 4896 int idx; 4897 long r; 4898 u16 rc, rrc; 4899 4900 vcpu_load(vcpu); 4901 4902 switch (ioctl) { 4903 case KVM_S390_STORE_STATUS: 4904 idx = srcu_read_lock(&vcpu->kvm->srcu); 4905 r = kvm_s390_store_status_unloaded(vcpu, arg); 4906 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4907 break; 4908 case KVM_S390_SET_INITIAL_PSW: { 4909 psw_t psw; 4910 4911 r = -EFAULT; 4912 if (copy_from_user(&psw, argp, sizeof(psw))) 4913 break; 4914 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4915 break; 4916 } 4917 case KVM_S390_CLEAR_RESET: 4918 r = 0; 4919 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4920 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4921 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4922 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4923 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4924 rc, rrc); 4925 } 4926 break; 4927 case KVM_S390_INITIAL_RESET: 4928 r = 0; 4929 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4930 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4931 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4932 UVC_CMD_CPU_RESET_INITIAL, 4933 &rc, &rrc); 4934 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4935 rc, rrc); 4936 } 4937 break; 4938 case KVM_S390_NORMAL_RESET: 4939 r = 0; 4940 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4941 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4942 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4943 UVC_CMD_CPU_RESET, &rc, &rrc); 4944 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4945 rc, rrc); 4946 } 4947 break; 4948 case KVM_SET_ONE_REG: 4949 case KVM_GET_ONE_REG: { 4950 struct kvm_one_reg reg; 4951 r = -EINVAL; 4952 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4953 break; 4954 r = -EFAULT; 4955 if (copy_from_user(®, argp, sizeof(reg))) 4956 break; 4957 if (ioctl == KVM_SET_ONE_REG) 4958 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4959 else 4960 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4961 break; 4962 } 4963 #ifdef CONFIG_KVM_S390_UCONTROL 4964 case KVM_S390_UCAS_MAP: { 4965 struct kvm_s390_ucas_mapping ucasmap; 4966 4967 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4968 r = -EFAULT; 4969 break; 4970 } 4971 4972 if (!kvm_is_ucontrol(vcpu->kvm)) { 4973 r = -EINVAL; 4974 break; 4975 } 4976 4977 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4978 ucasmap.vcpu_addr, ucasmap.length); 4979 break; 4980 } 4981 case KVM_S390_UCAS_UNMAP: { 4982 struct kvm_s390_ucas_mapping ucasmap; 4983 4984 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4985 r = -EFAULT; 4986 break; 4987 } 4988 4989 if (!kvm_is_ucontrol(vcpu->kvm)) { 4990 r = -EINVAL; 4991 break; 4992 } 4993 4994 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4995 ucasmap.length); 4996 break; 4997 } 4998 #endif 4999 case KVM_S390_VCPU_FAULT: { 5000 r = gmap_fault(vcpu->arch.gmap, arg, 0); 5001 break; 5002 } 5003 case KVM_ENABLE_CAP: 5004 { 5005 struct kvm_enable_cap cap; 5006 r = -EFAULT; 5007 if (copy_from_user(&cap, argp, sizeof(cap))) 5008 break; 5009 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5010 break; 5011 } 5012 case KVM_S390_MEM_OP: { 5013 struct kvm_s390_mem_op mem_op; 5014 5015 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5016 r = kvm_s390_guest_memsida_op(vcpu, &mem_op); 5017 else 5018 r = -EFAULT; 5019 break; 5020 } 5021 case KVM_S390_SET_IRQ_STATE: { 5022 struct kvm_s390_irq_state irq_state; 5023 5024 r = -EFAULT; 5025 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5026 break; 5027 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5028 irq_state.len == 0 || 5029 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5030 r = -EINVAL; 5031 break; 5032 } 5033 /* do not use irq_state.flags, it will break old QEMUs */ 5034 r = kvm_s390_set_irq_state(vcpu, 5035 (void __user *) irq_state.buf, 5036 irq_state.len); 5037 break; 5038 } 5039 case KVM_S390_GET_IRQ_STATE: { 5040 struct kvm_s390_irq_state irq_state; 5041 5042 r = -EFAULT; 5043 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5044 break; 5045 if (irq_state.len == 0) { 5046 r = -EINVAL; 5047 break; 5048 } 5049 /* do not use irq_state.flags, it will break old QEMUs */ 5050 r = kvm_s390_get_irq_state(vcpu, 5051 (__u8 __user *) irq_state.buf, 5052 irq_state.len); 5053 break; 5054 } 5055 default: 5056 r = -ENOTTY; 5057 } 5058 5059 vcpu_put(vcpu); 5060 return r; 5061 } 5062 5063 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5064 { 5065 #ifdef CONFIG_KVM_S390_UCONTROL 5066 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5067 && (kvm_is_ucontrol(vcpu->kvm))) { 5068 vmf->page = virt_to_page(vcpu->arch.sie_block); 5069 get_page(vmf->page); 5070 return 0; 5071 } 5072 #endif 5073 return VM_FAULT_SIGBUS; 5074 } 5075 5076 /* Section: memory related */ 5077 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5078 const struct kvm_memory_slot *old, 5079 struct kvm_memory_slot *new, 5080 enum kvm_mr_change change) 5081 { 5082 gpa_t size; 5083 5084 /* When we are protected, we should not change the memory slots */ 5085 if (kvm_s390_pv_get_handle(kvm)) 5086 return -EINVAL; 5087 5088 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 5089 return 0; 5090 5091 /* A few sanity checks. We can have memory slots which have to be 5092 located/ended at a segment boundary (1MB). The memory in userland is 5093 ok to be fragmented into various different vmas. It is okay to mmap() 5094 and munmap() stuff in this slot after doing this call at any time */ 5095 5096 if (new->userspace_addr & 0xffffful) 5097 return -EINVAL; 5098 5099 size = new->npages * PAGE_SIZE; 5100 if (size & 0xffffful) 5101 return -EINVAL; 5102 5103 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5104 return -EINVAL; 5105 5106 return 0; 5107 } 5108 5109 void kvm_arch_commit_memory_region(struct kvm *kvm, 5110 struct kvm_memory_slot *old, 5111 const struct kvm_memory_slot *new, 5112 enum kvm_mr_change change) 5113 { 5114 int rc = 0; 5115 5116 switch (change) { 5117 case KVM_MR_DELETE: 5118 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5119 old->npages * PAGE_SIZE); 5120 break; 5121 case KVM_MR_MOVE: 5122 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5123 old->npages * PAGE_SIZE); 5124 if (rc) 5125 break; 5126 fallthrough; 5127 case KVM_MR_CREATE: 5128 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5129 new->base_gfn * PAGE_SIZE, 5130 new->npages * PAGE_SIZE); 5131 break; 5132 case KVM_MR_FLAGS_ONLY: 5133 break; 5134 default: 5135 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5136 } 5137 if (rc) 5138 pr_warn("failed to commit memory region\n"); 5139 return; 5140 } 5141 5142 static inline unsigned long nonhyp_mask(int i) 5143 { 5144 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5145 5146 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5147 } 5148 5149 static int __init kvm_s390_init(void) 5150 { 5151 int i; 5152 5153 if (!sclp.has_sief2) { 5154 pr_info("SIE is not available\n"); 5155 return -ENODEV; 5156 } 5157 5158 if (nested && hpage) { 5159 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5160 return -EINVAL; 5161 } 5162 5163 for (i = 0; i < 16; i++) 5164 kvm_s390_fac_base[i] |= 5165 stfle_fac_list[i] & nonhyp_mask(i); 5166 5167 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5168 } 5169 5170 static void __exit kvm_s390_exit(void) 5171 { 5172 kvm_exit(); 5173 } 5174 5175 module_init(kvm_s390_init); 5176 module_exit(kvm_s390_exit); 5177 5178 /* 5179 * Enable autoloading of the kvm module. 5180 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5181 * since x86 takes a different approach. 5182 */ 5183 #include <linux/miscdevice.h> 5184 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5185 MODULE_ALIAS("devname:kvm"); 5186