1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com> 10 * Jason J. Herne <jjherne@us.ibm.com> 11 */ 12 13 #define KMSG_COMPONENT "kvm-s390" 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/mman.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/random.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/vmalloc.h> 30 #include <linux/bitmap.h> 31 #include <linux/sched/signal.h> 32 #include <linux/string.h> 33 #include <linux/pgtable.h> 34 35 #include <asm/asm-offsets.h> 36 #include <asm/lowcore.h> 37 #include <asm/stp.h> 38 #include <asm/gmap.h> 39 #include <asm/nmi.h> 40 #include <asm/switch_to.h> 41 #include <asm/isc.h> 42 #include <asm/sclp.h> 43 #include <asm/cpacf.h> 44 #include <asm/timex.h> 45 #include <asm/ap.h> 46 #include <asm/uv.h> 47 #include <asm/fpu/api.h> 48 #include "kvm-s390.h" 49 #include "gaccess.h" 50 51 #define CREATE_TRACE_POINTS 52 #include "trace.h" 53 #include "trace-s390.h" 54 55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 56 #define LOCAL_IRQS 32 57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 58 (KVM_MAX_VCPUS + LOCAL_IRQS)) 59 60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 61 KVM_GENERIC_VM_STATS(), 62 STATS_DESC_COUNTER(VM, inject_io), 63 STATS_DESC_COUNTER(VM, inject_float_mchk), 64 STATS_DESC_COUNTER(VM, inject_pfault_done), 65 STATS_DESC_COUNTER(VM, inject_service_signal), 66 STATS_DESC_COUNTER(VM, inject_virtio) 67 }; 68 69 const struct kvm_stats_header kvm_vm_stats_header = { 70 .name_size = KVM_STATS_NAME_SIZE, 71 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 72 .id_offset = sizeof(struct kvm_stats_header), 73 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 74 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 75 sizeof(kvm_vm_stats_desc), 76 }; 77 78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 79 KVM_GENERIC_VCPU_STATS(), 80 STATS_DESC_COUNTER(VCPU, exit_userspace), 81 STATS_DESC_COUNTER(VCPU, exit_null), 82 STATS_DESC_COUNTER(VCPU, exit_external_request), 83 STATS_DESC_COUNTER(VCPU, exit_io_request), 84 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 85 STATS_DESC_COUNTER(VCPU, exit_stop_request), 86 STATS_DESC_COUNTER(VCPU, exit_validity), 87 STATS_DESC_COUNTER(VCPU, exit_instruction), 88 STATS_DESC_COUNTER(VCPU, exit_pei), 89 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 90 STATS_DESC_COUNTER(VCPU, instruction_lctl), 91 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 92 STATS_DESC_COUNTER(VCPU, instruction_stctl), 93 STATS_DESC_COUNTER(VCPU, instruction_stctg), 94 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 95 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 96 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 97 STATS_DESC_COUNTER(VCPU, deliver_ckc), 98 STATS_DESC_COUNTER(VCPU, deliver_cputm), 99 STATS_DESC_COUNTER(VCPU, deliver_external_call), 100 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 101 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 102 STATS_DESC_COUNTER(VCPU, deliver_virtio), 103 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 104 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 106 STATS_DESC_COUNTER(VCPU, deliver_program), 107 STATS_DESC_COUNTER(VCPU, deliver_io), 108 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 109 STATS_DESC_COUNTER(VCPU, exit_wait_state), 110 STATS_DESC_COUNTER(VCPU, inject_ckc), 111 STATS_DESC_COUNTER(VCPU, inject_cputm), 112 STATS_DESC_COUNTER(VCPU, inject_external_call), 113 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 114 STATS_DESC_COUNTER(VCPU, inject_mchk), 115 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 116 STATS_DESC_COUNTER(VCPU, inject_program), 117 STATS_DESC_COUNTER(VCPU, inject_restart), 118 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 119 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 120 STATS_DESC_COUNTER(VCPU, instruction_epsw), 121 STATS_DESC_COUNTER(VCPU, instruction_gs), 122 STATS_DESC_COUNTER(VCPU, instruction_io_other), 123 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 124 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 125 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 126 STATS_DESC_COUNTER(VCPU, instruction_ptff), 127 STATS_DESC_COUNTER(VCPU, instruction_sck), 128 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 129 STATS_DESC_COUNTER(VCPU, instruction_stidp), 130 STATS_DESC_COUNTER(VCPU, instruction_spx), 131 STATS_DESC_COUNTER(VCPU, instruction_stpx), 132 STATS_DESC_COUNTER(VCPU, instruction_stap), 133 STATS_DESC_COUNTER(VCPU, instruction_iske), 134 STATS_DESC_COUNTER(VCPU, instruction_ri), 135 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 136 STATS_DESC_COUNTER(VCPU, instruction_sske), 137 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 138 STATS_DESC_COUNTER(VCPU, instruction_stsi), 139 STATS_DESC_COUNTER(VCPU, instruction_stfl), 140 STATS_DESC_COUNTER(VCPU, instruction_tb), 141 STATS_DESC_COUNTER(VCPU, instruction_tpi), 142 STATS_DESC_COUNTER(VCPU, instruction_tprot), 143 STATS_DESC_COUNTER(VCPU, instruction_tsch), 144 STATS_DESC_COUNTER(VCPU, instruction_sie), 145 STATS_DESC_COUNTER(VCPU, instruction_essa), 146 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 147 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 149 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 163 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 166 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 167 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 172 STATS_DESC_COUNTER(VCPU, pfault_sync) 173 }; 174 175 const struct kvm_stats_header kvm_vcpu_stats_header = { 176 .name_size = KVM_STATS_NAME_SIZE, 177 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 178 .id_offset = sizeof(struct kvm_stats_header), 179 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 180 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 181 sizeof(kvm_vcpu_stats_desc), 182 }; 183 184 /* allow nested virtualization in KVM (if enabled by user space) */ 185 static int nested; 186 module_param(nested, int, S_IRUGO); 187 MODULE_PARM_DESC(nested, "Nested virtualization support"); 188 189 /* allow 1m huge page guest backing, if !nested */ 190 static int hpage; 191 module_param(hpage, int, 0444); 192 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 193 194 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 195 static u8 halt_poll_max_steal = 10; 196 module_param(halt_poll_max_steal, byte, 0644); 197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 198 199 /* if set to true, the GISA will be initialized and used if available */ 200 static bool use_gisa = true; 201 module_param(use_gisa, bool, 0644); 202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 203 204 /* maximum diag9c forwarding per second */ 205 unsigned int diag9c_forwarding_hz; 206 module_param(diag9c_forwarding_hz, uint, 0644); 207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 208 209 /* 210 * For now we handle at most 16 double words as this is what the s390 base 211 * kernel handles and stores in the prefix page. If we ever need to go beyond 212 * this, this requires changes to code, but the external uapi can stay. 213 */ 214 #define SIZE_INTERNAL 16 215 216 /* 217 * Base feature mask that defines default mask for facilities. Consists of the 218 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 219 */ 220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 221 /* 222 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 223 * and defines the facilities that can be enabled via a cpu model. 224 */ 225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 226 227 static unsigned long kvm_s390_fac_size(void) 228 { 229 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 231 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 232 sizeof(stfle_fac_list)); 233 234 return SIZE_INTERNAL; 235 } 236 237 /* available cpu features supported by kvm */ 238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 239 /* available subfunctions indicated via query / "test bit" */ 240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 241 242 static struct gmap_notifier gmap_notifier; 243 static struct gmap_notifier vsie_gmap_notifier; 244 debug_info_t *kvm_s390_dbf; 245 debug_info_t *kvm_s390_dbf_uv; 246 247 /* Section: not file related */ 248 int kvm_arch_hardware_enable(void) 249 { 250 /* every s390 is virtualization enabled ;-) */ 251 return 0; 252 } 253 254 int kvm_arch_check_processor_compat(void *opaque) 255 { 256 return 0; 257 } 258 259 /* forward declarations */ 260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 261 unsigned long end); 262 static int sca_switch_to_extended(struct kvm *kvm); 263 264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 265 { 266 u8 delta_idx = 0; 267 268 /* 269 * The TOD jumps by delta, we have to compensate this by adding 270 * -delta to the epoch. 271 */ 272 delta = -delta; 273 274 /* sign-extension - we're adding to signed values below */ 275 if ((s64)delta < 0) 276 delta_idx = -1; 277 278 scb->epoch += delta; 279 if (scb->ecd & ECD_MEF) { 280 scb->epdx += delta_idx; 281 if (scb->epoch < delta) 282 scb->epdx += 1; 283 } 284 } 285 286 /* 287 * This callback is executed during stop_machine(). All CPUs are therefore 288 * temporarily stopped. In order not to change guest behavior, we have to 289 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 290 * so a CPU won't be stopped while calculating with the epoch. 291 */ 292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 293 void *v) 294 { 295 struct kvm *kvm; 296 struct kvm_vcpu *vcpu; 297 unsigned long i; 298 unsigned long long *delta = v; 299 300 list_for_each_entry(kvm, &vm_list, vm_list) { 301 kvm_for_each_vcpu(i, vcpu, kvm) { 302 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 303 if (i == 0) { 304 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 305 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 306 } 307 if (vcpu->arch.cputm_enabled) 308 vcpu->arch.cputm_start += *delta; 309 if (vcpu->arch.vsie_block) 310 kvm_clock_sync_scb(vcpu->arch.vsie_block, 311 *delta); 312 } 313 } 314 return NOTIFY_OK; 315 } 316 317 static struct notifier_block kvm_clock_notifier = { 318 .notifier_call = kvm_clock_sync, 319 }; 320 321 int kvm_arch_hardware_setup(void *opaque) 322 { 323 gmap_notifier.notifier_call = kvm_gmap_notifier; 324 gmap_register_pte_notifier(&gmap_notifier); 325 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 326 gmap_register_pte_notifier(&vsie_gmap_notifier); 327 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 328 &kvm_clock_notifier); 329 return 0; 330 } 331 332 void kvm_arch_hardware_unsetup(void) 333 { 334 gmap_unregister_pte_notifier(&gmap_notifier); 335 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 336 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 337 &kvm_clock_notifier); 338 } 339 340 static void allow_cpu_feat(unsigned long nr) 341 { 342 set_bit_inv(nr, kvm_s390_available_cpu_feat); 343 } 344 345 static inline int plo_test_bit(unsigned char nr) 346 { 347 unsigned long function = (unsigned long)nr | 0x100; 348 int cc; 349 350 asm volatile( 351 " lgr 0,%[function]\n" 352 /* Parameter registers are ignored for "test bit" */ 353 " plo 0,0,0,0(0)\n" 354 " ipm %0\n" 355 " srl %0,28\n" 356 : "=d" (cc) 357 : [function] "d" (function) 358 : "cc", "0"); 359 return cc == 0; 360 } 361 362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 363 { 364 asm volatile( 365 " lghi 0,0\n" 366 " lgr 1,%[query]\n" 367 /* Parameter registers are ignored */ 368 " .insn rrf,%[opc] << 16,2,4,6,0\n" 369 : 370 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 371 : "cc", "memory", "0", "1"); 372 } 373 374 #define INSN_SORTL 0xb938 375 #define INSN_DFLTCC 0xb939 376 377 static void kvm_s390_cpu_feat_init(void) 378 { 379 int i; 380 381 for (i = 0; i < 256; ++i) { 382 if (plo_test_bit(i)) 383 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 384 } 385 386 if (test_facility(28)) /* TOD-clock steering */ 387 ptff(kvm_s390_available_subfunc.ptff, 388 sizeof(kvm_s390_available_subfunc.ptff), 389 PTFF_QAF); 390 391 if (test_facility(17)) { /* MSA */ 392 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 393 kvm_s390_available_subfunc.kmac); 394 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 395 kvm_s390_available_subfunc.kmc); 396 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 397 kvm_s390_available_subfunc.km); 398 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 399 kvm_s390_available_subfunc.kimd); 400 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 401 kvm_s390_available_subfunc.klmd); 402 } 403 if (test_facility(76)) /* MSA3 */ 404 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 405 kvm_s390_available_subfunc.pckmo); 406 if (test_facility(77)) { /* MSA4 */ 407 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 408 kvm_s390_available_subfunc.kmctr); 409 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 410 kvm_s390_available_subfunc.kmf); 411 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 412 kvm_s390_available_subfunc.kmo); 413 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 414 kvm_s390_available_subfunc.pcc); 415 } 416 if (test_facility(57)) /* MSA5 */ 417 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 418 kvm_s390_available_subfunc.ppno); 419 420 if (test_facility(146)) /* MSA8 */ 421 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 422 kvm_s390_available_subfunc.kma); 423 424 if (test_facility(155)) /* MSA9 */ 425 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 426 kvm_s390_available_subfunc.kdsa); 427 428 if (test_facility(150)) /* SORTL */ 429 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 430 431 if (test_facility(151)) /* DFLTCC */ 432 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 433 434 if (MACHINE_HAS_ESOP) 435 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 436 /* 437 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 438 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 439 */ 440 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 441 !test_facility(3) || !nested) 442 return; 443 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 444 if (sclp.has_64bscao) 445 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 446 if (sclp.has_siif) 447 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 448 if (sclp.has_gpere) 449 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 450 if (sclp.has_gsls) 451 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 452 if (sclp.has_ib) 453 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 454 if (sclp.has_cei) 455 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 456 if (sclp.has_ibs) 457 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 458 if (sclp.has_kss) 459 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 460 /* 461 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 462 * all skey handling functions read/set the skey from the PGSTE 463 * instead of the real storage key. 464 * 465 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 466 * pages being detected as preserved although they are resident. 467 * 468 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 469 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 470 * 471 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 472 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 473 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 474 * 475 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 476 * cannot easily shadow the SCA because of the ipte lock. 477 */ 478 } 479 480 int kvm_arch_init(void *opaque) 481 { 482 int rc = -ENOMEM; 483 484 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 485 if (!kvm_s390_dbf) 486 return -ENOMEM; 487 488 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 489 if (!kvm_s390_dbf_uv) 490 goto out; 491 492 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 493 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 494 goto out; 495 496 kvm_s390_cpu_feat_init(); 497 498 /* Register floating interrupt controller interface. */ 499 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 500 if (rc) { 501 pr_err("A FLIC registration call failed with rc=%d\n", rc); 502 goto out; 503 } 504 505 rc = kvm_s390_gib_init(GAL_ISC); 506 if (rc) 507 goto out; 508 509 return 0; 510 511 out: 512 kvm_arch_exit(); 513 return rc; 514 } 515 516 void kvm_arch_exit(void) 517 { 518 kvm_s390_gib_destroy(); 519 debug_unregister(kvm_s390_dbf); 520 debug_unregister(kvm_s390_dbf_uv); 521 } 522 523 /* Section: device related */ 524 long kvm_arch_dev_ioctl(struct file *filp, 525 unsigned int ioctl, unsigned long arg) 526 { 527 if (ioctl == KVM_S390_ENABLE_SIE) 528 return s390_enable_sie(); 529 return -EINVAL; 530 } 531 532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 533 { 534 int r; 535 536 switch (ext) { 537 case KVM_CAP_S390_PSW: 538 case KVM_CAP_S390_GMAP: 539 case KVM_CAP_SYNC_MMU: 540 #ifdef CONFIG_KVM_S390_UCONTROL 541 case KVM_CAP_S390_UCONTROL: 542 #endif 543 case KVM_CAP_ASYNC_PF: 544 case KVM_CAP_SYNC_REGS: 545 case KVM_CAP_ONE_REG: 546 case KVM_CAP_ENABLE_CAP: 547 case KVM_CAP_S390_CSS_SUPPORT: 548 case KVM_CAP_IOEVENTFD: 549 case KVM_CAP_DEVICE_CTRL: 550 case KVM_CAP_S390_IRQCHIP: 551 case KVM_CAP_VM_ATTRIBUTES: 552 case KVM_CAP_MP_STATE: 553 case KVM_CAP_IMMEDIATE_EXIT: 554 case KVM_CAP_S390_INJECT_IRQ: 555 case KVM_CAP_S390_USER_SIGP: 556 case KVM_CAP_S390_USER_STSI: 557 case KVM_CAP_S390_SKEYS: 558 case KVM_CAP_S390_IRQ_STATE: 559 case KVM_CAP_S390_USER_INSTR0: 560 case KVM_CAP_S390_CMMA_MIGRATION: 561 case KVM_CAP_S390_AIS: 562 case KVM_CAP_S390_AIS_MIGRATION: 563 case KVM_CAP_S390_VCPU_RESETS: 564 case KVM_CAP_SET_GUEST_DEBUG: 565 case KVM_CAP_S390_DIAG318: 566 case KVM_CAP_S390_MEM_OP_EXTENSION: 567 r = 1; 568 break; 569 case KVM_CAP_SET_GUEST_DEBUG2: 570 r = KVM_GUESTDBG_VALID_MASK; 571 break; 572 case KVM_CAP_S390_HPAGE_1M: 573 r = 0; 574 if (hpage && !kvm_is_ucontrol(kvm)) 575 r = 1; 576 break; 577 case KVM_CAP_S390_MEM_OP: 578 r = MEM_OP_MAX_SIZE; 579 break; 580 case KVM_CAP_NR_VCPUS: 581 case KVM_CAP_MAX_VCPUS: 582 case KVM_CAP_MAX_VCPU_ID: 583 r = KVM_S390_BSCA_CPU_SLOTS; 584 if (!kvm_s390_use_sca_entries()) 585 r = KVM_MAX_VCPUS; 586 else if (sclp.has_esca && sclp.has_64bscao) 587 r = KVM_S390_ESCA_CPU_SLOTS; 588 if (ext == KVM_CAP_NR_VCPUS) 589 r = min_t(unsigned int, num_online_cpus(), r); 590 break; 591 case KVM_CAP_S390_COW: 592 r = MACHINE_HAS_ESOP; 593 break; 594 case KVM_CAP_S390_VECTOR_REGISTERS: 595 r = MACHINE_HAS_VX; 596 break; 597 case KVM_CAP_S390_RI: 598 r = test_facility(64); 599 break; 600 case KVM_CAP_S390_GS: 601 r = test_facility(133); 602 break; 603 case KVM_CAP_S390_BPB: 604 r = test_facility(82); 605 break; 606 case KVM_CAP_S390_PROTECTED: 607 r = is_prot_virt_host(); 608 break; 609 default: 610 r = 0; 611 } 612 return r; 613 } 614 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 616 { 617 int i; 618 gfn_t cur_gfn, last_gfn; 619 unsigned long gaddr, vmaddr; 620 struct gmap *gmap = kvm->arch.gmap; 621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 622 623 /* Loop over all guest segments */ 624 cur_gfn = memslot->base_gfn; 625 last_gfn = memslot->base_gfn + memslot->npages; 626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 627 gaddr = gfn_to_gpa(cur_gfn); 628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 629 if (kvm_is_error_hva(vmaddr)) 630 continue; 631 632 bitmap_zero(bitmap, _PAGE_ENTRIES); 633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 634 for (i = 0; i < _PAGE_ENTRIES; i++) { 635 if (test_bit(i, bitmap)) 636 mark_page_dirty(kvm, cur_gfn + i); 637 } 638 639 if (fatal_signal_pending(current)) 640 return; 641 cond_resched(); 642 } 643 } 644 645 /* Section: vm related */ 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 647 648 /* 649 * Get (and clear) the dirty memory log for a memory slot. 650 */ 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 652 struct kvm_dirty_log *log) 653 { 654 int r; 655 unsigned long n; 656 struct kvm_memory_slot *memslot; 657 int is_dirty; 658 659 if (kvm_is_ucontrol(kvm)) 660 return -EINVAL; 661 662 mutex_lock(&kvm->slots_lock); 663 664 r = -EINVAL; 665 if (log->slot >= KVM_USER_MEM_SLOTS) 666 goto out; 667 668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 669 if (r) 670 goto out; 671 672 /* Clear the dirty log */ 673 if (is_dirty) { 674 n = kvm_dirty_bitmap_bytes(memslot); 675 memset(memslot->dirty_bitmap, 0, n); 676 } 677 r = 0; 678 out: 679 mutex_unlock(&kvm->slots_lock); 680 return r; 681 } 682 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 684 { 685 unsigned long i; 686 struct kvm_vcpu *vcpu; 687 688 kvm_for_each_vcpu(i, vcpu, kvm) { 689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 690 } 691 } 692 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 694 { 695 int r; 696 697 if (cap->flags) 698 return -EINVAL; 699 700 switch (cap->cap) { 701 case KVM_CAP_S390_IRQCHIP: 702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 703 kvm->arch.use_irqchip = 1; 704 r = 0; 705 break; 706 case KVM_CAP_S390_USER_SIGP: 707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 708 kvm->arch.user_sigp = 1; 709 r = 0; 710 break; 711 case KVM_CAP_S390_VECTOR_REGISTERS: 712 mutex_lock(&kvm->lock); 713 if (kvm->created_vcpus) { 714 r = -EBUSY; 715 } else if (MACHINE_HAS_VX) { 716 set_kvm_facility(kvm->arch.model.fac_mask, 129); 717 set_kvm_facility(kvm->arch.model.fac_list, 129); 718 if (test_facility(134)) { 719 set_kvm_facility(kvm->arch.model.fac_mask, 134); 720 set_kvm_facility(kvm->arch.model.fac_list, 134); 721 } 722 if (test_facility(135)) { 723 set_kvm_facility(kvm->arch.model.fac_mask, 135); 724 set_kvm_facility(kvm->arch.model.fac_list, 135); 725 } 726 if (test_facility(148)) { 727 set_kvm_facility(kvm->arch.model.fac_mask, 148); 728 set_kvm_facility(kvm->arch.model.fac_list, 148); 729 } 730 if (test_facility(152)) { 731 set_kvm_facility(kvm->arch.model.fac_mask, 152); 732 set_kvm_facility(kvm->arch.model.fac_list, 152); 733 } 734 if (test_facility(192)) { 735 set_kvm_facility(kvm->arch.model.fac_mask, 192); 736 set_kvm_facility(kvm->arch.model.fac_list, 192); 737 } 738 r = 0; 739 } else 740 r = -EINVAL; 741 mutex_unlock(&kvm->lock); 742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 743 r ? "(not available)" : "(success)"); 744 break; 745 case KVM_CAP_S390_RI: 746 r = -EINVAL; 747 mutex_lock(&kvm->lock); 748 if (kvm->created_vcpus) { 749 r = -EBUSY; 750 } else if (test_facility(64)) { 751 set_kvm_facility(kvm->arch.model.fac_mask, 64); 752 set_kvm_facility(kvm->arch.model.fac_list, 64); 753 r = 0; 754 } 755 mutex_unlock(&kvm->lock); 756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 757 r ? "(not available)" : "(success)"); 758 break; 759 case KVM_CAP_S390_AIS: 760 mutex_lock(&kvm->lock); 761 if (kvm->created_vcpus) { 762 r = -EBUSY; 763 } else { 764 set_kvm_facility(kvm->arch.model.fac_mask, 72); 765 set_kvm_facility(kvm->arch.model.fac_list, 72); 766 r = 0; 767 } 768 mutex_unlock(&kvm->lock); 769 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 770 r ? "(not available)" : "(success)"); 771 break; 772 case KVM_CAP_S390_GS: 773 r = -EINVAL; 774 mutex_lock(&kvm->lock); 775 if (kvm->created_vcpus) { 776 r = -EBUSY; 777 } else if (test_facility(133)) { 778 set_kvm_facility(kvm->arch.model.fac_mask, 133); 779 set_kvm_facility(kvm->arch.model.fac_list, 133); 780 r = 0; 781 } 782 mutex_unlock(&kvm->lock); 783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 784 r ? "(not available)" : "(success)"); 785 break; 786 case KVM_CAP_S390_HPAGE_1M: 787 mutex_lock(&kvm->lock); 788 if (kvm->created_vcpus) 789 r = -EBUSY; 790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 791 r = -EINVAL; 792 else { 793 r = 0; 794 mmap_write_lock(kvm->mm); 795 kvm->mm->context.allow_gmap_hpage_1m = 1; 796 mmap_write_unlock(kvm->mm); 797 /* 798 * We might have to create fake 4k page 799 * tables. To avoid that the hardware works on 800 * stale PGSTEs, we emulate these instructions. 801 */ 802 kvm->arch.use_skf = 0; 803 kvm->arch.use_pfmfi = 0; 804 } 805 mutex_unlock(&kvm->lock); 806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 807 r ? "(not available)" : "(success)"); 808 break; 809 case KVM_CAP_S390_USER_STSI: 810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 811 kvm->arch.user_stsi = 1; 812 r = 0; 813 break; 814 case KVM_CAP_S390_USER_INSTR0: 815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 816 kvm->arch.user_instr0 = 1; 817 icpt_operexc_on_all_vcpus(kvm); 818 r = 0; 819 break; 820 default: 821 r = -EINVAL; 822 break; 823 } 824 return r; 825 } 826 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 828 { 829 int ret; 830 831 switch (attr->attr) { 832 case KVM_S390_VM_MEM_LIMIT_SIZE: 833 ret = 0; 834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 835 kvm->arch.mem_limit); 836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 837 ret = -EFAULT; 838 break; 839 default: 840 ret = -ENXIO; 841 break; 842 } 843 return ret; 844 } 845 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 847 { 848 int ret; 849 unsigned int idx; 850 switch (attr->attr) { 851 case KVM_S390_VM_MEM_ENABLE_CMMA: 852 ret = -ENXIO; 853 if (!sclp.has_cmma) 854 break; 855 856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 857 mutex_lock(&kvm->lock); 858 if (kvm->created_vcpus) 859 ret = -EBUSY; 860 else if (kvm->mm->context.allow_gmap_hpage_1m) 861 ret = -EINVAL; 862 else { 863 kvm->arch.use_cmma = 1; 864 /* Not compatible with cmma. */ 865 kvm->arch.use_pfmfi = 0; 866 ret = 0; 867 } 868 mutex_unlock(&kvm->lock); 869 break; 870 case KVM_S390_VM_MEM_CLR_CMMA: 871 ret = -ENXIO; 872 if (!sclp.has_cmma) 873 break; 874 ret = -EINVAL; 875 if (!kvm->arch.use_cmma) 876 break; 877 878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 879 mutex_lock(&kvm->lock); 880 idx = srcu_read_lock(&kvm->srcu); 881 s390_reset_cmma(kvm->arch.gmap->mm); 882 srcu_read_unlock(&kvm->srcu, idx); 883 mutex_unlock(&kvm->lock); 884 ret = 0; 885 break; 886 case KVM_S390_VM_MEM_LIMIT_SIZE: { 887 unsigned long new_limit; 888 889 if (kvm_is_ucontrol(kvm)) 890 return -EINVAL; 891 892 if (get_user(new_limit, (u64 __user *)attr->addr)) 893 return -EFAULT; 894 895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 896 new_limit > kvm->arch.mem_limit) 897 return -E2BIG; 898 899 if (!new_limit) 900 return -EINVAL; 901 902 /* gmap_create takes last usable address */ 903 if (new_limit != KVM_S390_NO_MEM_LIMIT) 904 new_limit -= 1; 905 906 ret = -EBUSY; 907 mutex_lock(&kvm->lock); 908 if (!kvm->created_vcpus) { 909 /* gmap_create will round the limit up */ 910 struct gmap *new = gmap_create(current->mm, new_limit); 911 912 if (!new) { 913 ret = -ENOMEM; 914 } else { 915 gmap_remove(kvm->arch.gmap); 916 new->private = kvm; 917 kvm->arch.gmap = new; 918 ret = 0; 919 } 920 } 921 mutex_unlock(&kvm->lock); 922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 924 (void *) kvm->arch.gmap->asce); 925 break; 926 } 927 default: 928 ret = -ENXIO; 929 break; 930 } 931 return ret; 932 } 933 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 935 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 937 { 938 struct kvm_vcpu *vcpu; 939 unsigned long i; 940 941 kvm_s390_vcpu_block_all(kvm); 942 943 kvm_for_each_vcpu(i, vcpu, kvm) { 944 kvm_s390_vcpu_crypto_setup(vcpu); 945 /* recreate the shadow crycb by leaving the VSIE handler */ 946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 947 } 948 949 kvm_s390_vcpu_unblock_all(kvm); 950 } 951 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 953 { 954 mutex_lock(&kvm->lock); 955 switch (attr->attr) { 956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 957 if (!test_kvm_facility(kvm, 76)) { 958 mutex_unlock(&kvm->lock); 959 return -EINVAL; 960 } 961 get_random_bytes( 962 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 964 kvm->arch.crypto.aes_kw = 1; 965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 966 break; 967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 968 if (!test_kvm_facility(kvm, 76)) { 969 mutex_unlock(&kvm->lock); 970 return -EINVAL; 971 } 972 get_random_bytes( 973 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 975 kvm->arch.crypto.dea_kw = 1; 976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 977 break; 978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 979 if (!test_kvm_facility(kvm, 76)) { 980 mutex_unlock(&kvm->lock); 981 return -EINVAL; 982 } 983 kvm->arch.crypto.aes_kw = 0; 984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 987 break; 988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 989 if (!test_kvm_facility(kvm, 76)) { 990 mutex_unlock(&kvm->lock); 991 return -EINVAL; 992 } 993 kvm->arch.crypto.dea_kw = 0; 994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 997 break; 998 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 999 if (!ap_instructions_available()) { 1000 mutex_unlock(&kvm->lock); 1001 return -EOPNOTSUPP; 1002 } 1003 kvm->arch.crypto.apie = 1; 1004 break; 1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1006 if (!ap_instructions_available()) { 1007 mutex_unlock(&kvm->lock); 1008 return -EOPNOTSUPP; 1009 } 1010 kvm->arch.crypto.apie = 0; 1011 break; 1012 default: 1013 mutex_unlock(&kvm->lock); 1014 return -ENXIO; 1015 } 1016 1017 kvm_s390_vcpu_crypto_reset_all(kvm); 1018 mutex_unlock(&kvm->lock); 1019 return 0; 1020 } 1021 1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1023 { 1024 unsigned long cx; 1025 struct kvm_vcpu *vcpu; 1026 1027 kvm_for_each_vcpu(cx, vcpu, kvm) 1028 kvm_s390_sync_request(req, vcpu); 1029 } 1030 1031 /* 1032 * Must be called with kvm->srcu held to avoid races on memslots, and with 1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1034 */ 1035 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1036 { 1037 struct kvm_memory_slot *ms; 1038 struct kvm_memslots *slots; 1039 unsigned long ram_pages = 0; 1040 int bkt; 1041 1042 /* migration mode already enabled */ 1043 if (kvm->arch.migration_mode) 1044 return 0; 1045 slots = kvm_memslots(kvm); 1046 if (!slots || kvm_memslots_empty(slots)) 1047 return -EINVAL; 1048 1049 if (!kvm->arch.use_cmma) { 1050 kvm->arch.migration_mode = 1; 1051 return 0; 1052 } 1053 /* mark all the pages in active slots as dirty */ 1054 kvm_for_each_memslot(ms, bkt, slots) { 1055 if (!ms->dirty_bitmap) 1056 return -EINVAL; 1057 /* 1058 * The second half of the bitmap is only used on x86, 1059 * and would be wasted otherwise, so we put it to good 1060 * use here to keep track of the state of the storage 1061 * attributes. 1062 */ 1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1064 ram_pages += ms->npages; 1065 } 1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1067 kvm->arch.migration_mode = 1; 1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1069 return 0; 1070 } 1071 1072 /* 1073 * Must be called with kvm->slots_lock to avoid races with ourselves and 1074 * kvm_s390_vm_start_migration. 1075 */ 1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1077 { 1078 /* migration mode already disabled */ 1079 if (!kvm->arch.migration_mode) 1080 return 0; 1081 kvm->arch.migration_mode = 0; 1082 if (kvm->arch.use_cmma) 1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1084 return 0; 1085 } 1086 1087 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1088 struct kvm_device_attr *attr) 1089 { 1090 int res = -ENXIO; 1091 1092 mutex_lock(&kvm->slots_lock); 1093 switch (attr->attr) { 1094 case KVM_S390_VM_MIGRATION_START: 1095 res = kvm_s390_vm_start_migration(kvm); 1096 break; 1097 case KVM_S390_VM_MIGRATION_STOP: 1098 res = kvm_s390_vm_stop_migration(kvm); 1099 break; 1100 default: 1101 break; 1102 } 1103 mutex_unlock(&kvm->slots_lock); 1104 1105 return res; 1106 } 1107 1108 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1109 struct kvm_device_attr *attr) 1110 { 1111 u64 mig = kvm->arch.migration_mode; 1112 1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1114 return -ENXIO; 1115 1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1117 return -EFAULT; 1118 return 0; 1119 } 1120 1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1122 { 1123 struct kvm_s390_vm_tod_clock gtod; 1124 1125 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1126 return -EFAULT; 1127 1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1129 return -EINVAL; 1130 kvm_s390_set_tod_clock(kvm, >od); 1131 1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1133 gtod.epoch_idx, gtod.tod); 1134 1135 return 0; 1136 } 1137 1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1139 { 1140 u8 gtod_high; 1141 1142 if (copy_from_user(>od_high, (void __user *)attr->addr, 1143 sizeof(gtod_high))) 1144 return -EFAULT; 1145 1146 if (gtod_high != 0) 1147 return -EINVAL; 1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1149 1150 return 0; 1151 } 1152 1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1154 { 1155 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1156 1157 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1158 sizeof(gtod.tod))) 1159 return -EFAULT; 1160 1161 kvm_s390_set_tod_clock(kvm, >od); 1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1163 return 0; 1164 } 1165 1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1167 { 1168 int ret; 1169 1170 if (attr->flags) 1171 return -EINVAL; 1172 1173 switch (attr->attr) { 1174 case KVM_S390_VM_TOD_EXT: 1175 ret = kvm_s390_set_tod_ext(kvm, attr); 1176 break; 1177 case KVM_S390_VM_TOD_HIGH: 1178 ret = kvm_s390_set_tod_high(kvm, attr); 1179 break; 1180 case KVM_S390_VM_TOD_LOW: 1181 ret = kvm_s390_set_tod_low(kvm, attr); 1182 break; 1183 default: 1184 ret = -ENXIO; 1185 break; 1186 } 1187 return ret; 1188 } 1189 1190 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1191 struct kvm_s390_vm_tod_clock *gtod) 1192 { 1193 union tod_clock clk; 1194 1195 preempt_disable(); 1196 1197 store_tod_clock_ext(&clk); 1198 1199 gtod->tod = clk.tod + kvm->arch.epoch; 1200 gtod->epoch_idx = 0; 1201 if (test_kvm_facility(kvm, 139)) { 1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1203 if (gtod->tod < clk.tod) 1204 gtod->epoch_idx += 1; 1205 } 1206 1207 preempt_enable(); 1208 } 1209 1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 struct kvm_s390_vm_tod_clock gtod; 1213 1214 memset(>od, 0, sizeof(gtod)); 1215 kvm_s390_get_tod_clock(kvm, >od); 1216 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1217 return -EFAULT; 1218 1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1220 gtod.epoch_idx, gtod.tod); 1221 return 0; 1222 } 1223 1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1225 { 1226 u8 gtod_high = 0; 1227 1228 if (copy_to_user((void __user *)attr->addr, >od_high, 1229 sizeof(gtod_high))) 1230 return -EFAULT; 1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1232 1233 return 0; 1234 } 1235 1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1237 { 1238 u64 gtod; 1239 1240 gtod = kvm_s390_get_tod_clock_fast(kvm); 1241 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1242 return -EFAULT; 1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1244 1245 return 0; 1246 } 1247 1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1249 { 1250 int ret; 1251 1252 if (attr->flags) 1253 return -EINVAL; 1254 1255 switch (attr->attr) { 1256 case KVM_S390_VM_TOD_EXT: 1257 ret = kvm_s390_get_tod_ext(kvm, attr); 1258 break; 1259 case KVM_S390_VM_TOD_HIGH: 1260 ret = kvm_s390_get_tod_high(kvm, attr); 1261 break; 1262 case KVM_S390_VM_TOD_LOW: 1263 ret = kvm_s390_get_tod_low(kvm, attr); 1264 break; 1265 default: 1266 ret = -ENXIO; 1267 break; 1268 } 1269 return ret; 1270 } 1271 1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1273 { 1274 struct kvm_s390_vm_cpu_processor *proc; 1275 u16 lowest_ibc, unblocked_ibc; 1276 int ret = 0; 1277 1278 mutex_lock(&kvm->lock); 1279 if (kvm->created_vcpus) { 1280 ret = -EBUSY; 1281 goto out; 1282 } 1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1284 if (!proc) { 1285 ret = -ENOMEM; 1286 goto out; 1287 } 1288 if (!copy_from_user(proc, (void __user *)attr->addr, 1289 sizeof(*proc))) { 1290 kvm->arch.model.cpuid = proc->cpuid; 1291 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1292 unblocked_ibc = sclp.ibc & 0xfff; 1293 if (lowest_ibc && proc->ibc) { 1294 if (proc->ibc > unblocked_ibc) 1295 kvm->arch.model.ibc = unblocked_ibc; 1296 else if (proc->ibc < lowest_ibc) 1297 kvm->arch.model.ibc = lowest_ibc; 1298 else 1299 kvm->arch.model.ibc = proc->ibc; 1300 } 1301 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1302 S390_ARCH_FAC_LIST_SIZE_BYTE); 1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1304 kvm->arch.model.ibc, 1305 kvm->arch.model.cpuid); 1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1307 kvm->arch.model.fac_list[0], 1308 kvm->arch.model.fac_list[1], 1309 kvm->arch.model.fac_list[2]); 1310 } else 1311 ret = -EFAULT; 1312 kfree(proc); 1313 out: 1314 mutex_unlock(&kvm->lock); 1315 return ret; 1316 } 1317 1318 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1319 struct kvm_device_attr *attr) 1320 { 1321 struct kvm_s390_vm_cpu_feat data; 1322 1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1324 return -EFAULT; 1325 if (!bitmap_subset((unsigned long *) data.feat, 1326 kvm_s390_available_cpu_feat, 1327 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1328 return -EINVAL; 1329 1330 mutex_lock(&kvm->lock); 1331 if (kvm->created_vcpus) { 1332 mutex_unlock(&kvm->lock); 1333 return -EBUSY; 1334 } 1335 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1336 mutex_unlock(&kvm->lock); 1337 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1338 data.feat[0], 1339 data.feat[1], 1340 data.feat[2]); 1341 return 0; 1342 } 1343 1344 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1345 struct kvm_device_attr *attr) 1346 { 1347 mutex_lock(&kvm->lock); 1348 if (kvm->created_vcpus) { 1349 mutex_unlock(&kvm->lock); 1350 return -EBUSY; 1351 } 1352 1353 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1354 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1355 mutex_unlock(&kvm->lock); 1356 return -EFAULT; 1357 } 1358 mutex_unlock(&kvm->lock); 1359 1360 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1361 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1365 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1366 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1368 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1369 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1371 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1372 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1374 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1375 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1377 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1378 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1380 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1381 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1383 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1384 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1386 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1387 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1389 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1390 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1392 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1393 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1395 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1396 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1398 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1399 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1401 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1402 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1404 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1405 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1407 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1408 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1412 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1413 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1417 1418 return 0; 1419 } 1420 1421 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1422 { 1423 int ret = -ENXIO; 1424 1425 switch (attr->attr) { 1426 case KVM_S390_VM_CPU_PROCESSOR: 1427 ret = kvm_s390_set_processor(kvm, attr); 1428 break; 1429 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1430 ret = kvm_s390_set_processor_feat(kvm, attr); 1431 break; 1432 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1433 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1434 break; 1435 } 1436 return ret; 1437 } 1438 1439 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1440 { 1441 struct kvm_s390_vm_cpu_processor *proc; 1442 int ret = 0; 1443 1444 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1445 if (!proc) { 1446 ret = -ENOMEM; 1447 goto out; 1448 } 1449 proc->cpuid = kvm->arch.model.cpuid; 1450 proc->ibc = kvm->arch.model.ibc; 1451 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1452 S390_ARCH_FAC_LIST_SIZE_BYTE); 1453 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1454 kvm->arch.model.ibc, 1455 kvm->arch.model.cpuid); 1456 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1457 kvm->arch.model.fac_list[0], 1458 kvm->arch.model.fac_list[1], 1459 kvm->arch.model.fac_list[2]); 1460 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1461 ret = -EFAULT; 1462 kfree(proc); 1463 out: 1464 return ret; 1465 } 1466 1467 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1468 { 1469 struct kvm_s390_vm_cpu_machine *mach; 1470 int ret = 0; 1471 1472 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1473 if (!mach) { 1474 ret = -ENOMEM; 1475 goto out; 1476 } 1477 get_cpu_id((struct cpuid *) &mach->cpuid); 1478 mach->ibc = sclp.ibc; 1479 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1480 S390_ARCH_FAC_LIST_SIZE_BYTE); 1481 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1482 sizeof(stfle_fac_list)); 1483 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1484 kvm->arch.model.ibc, 1485 kvm->arch.model.cpuid); 1486 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1487 mach->fac_mask[0], 1488 mach->fac_mask[1], 1489 mach->fac_mask[2]); 1490 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1491 mach->fac_list[0], 1492 mach->fac_list[1], 1493 mach->fac_list[2]); 1494 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1495 ret = -EFAULT; 1496 kfree(mach); 1497 out: 1498 return ret; 1499 } 1500 1501 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1502 struct kvm_device_attr *attr) 1503 { 1504 struct kvm_s390_vm_cpu_feat data; 1505 1506 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1507 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1508 return -EFAULT; 1509 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1510 data.feat[0], 1511 data.feat[1], 1512 data.feat[2]); 1513 return 0; 1514 } 1515 1516 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1517 struct kvm_device_attr *attr) 1518 { 1519 struct kvm_s390_vm_cpu_feat data; 1520 1521 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1522 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1523 return -EFAULT; 1524 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1525 data.feat[0], 1526 data.feat[1], 1527 data.feat[2]); 1528 return 0; 1529 } 1530 1531 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1532 struct kvm_device_attr *attr) 1533 { 1534 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1535 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1536 return -EFAULT; 1537 1538 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1539 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1540 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1541 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1542 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1543 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1544 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1545 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1546 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1547 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1548 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1549 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1550 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1552 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1553 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1554 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1555 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1556 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1557 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1558 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1559 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1560 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1561 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1562 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1563 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1564 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1565 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1566 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1567 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1568 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1570 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1571 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1573 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1574 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1575 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1576 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1577 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1578 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1579 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1580 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1581 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1582 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1583 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1584 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1585 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1586 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1587 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1588 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1589 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1590 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1591 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1592 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1593 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1594 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1595 1596 return 0; 1597 } 1598 1599 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1600 struct kvm_device_attr *attr) 1601 { 1602 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1603 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1604 return -EFAULT; 1605 1606 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1607 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1608 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1609 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1610 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1611 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1612 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1613 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1614 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1615 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1616 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1617 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1618 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1619 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1620 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1621 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1622 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1623 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1624 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1625 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1626 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1627 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1628 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1629 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1630 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1631 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1632 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1633 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1634 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1635 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1636 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1637 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1638 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1639 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1640 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1641 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1642 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1643 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1644 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1645 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1646 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1647 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1648 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1649 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1650 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1651 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1652 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1653 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1654 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1655 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1656 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1657 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1658 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1659 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1660 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1661 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1662 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1663 1664 return 0; 1665 } 1666 1667 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1668 { 1669 int ret = -ENXIO; 1670 1671 switch (attr->attr) { 1672 case KVM_S390_VM_CPU_PROCESSOR: 1673 ret = kvm_s390_get_processor(kvm, attr); 1674 break; 1675 case KVM_S390_VM_CPU_MACHINE: 1676 ret = kvm_s390_get_machine(kvm, attr); 1677 break; 1678 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1679 ret = kvm_s390_get_processor_feat(kvm, attr); 1680 break; 1681 case KVM_S390_VM_CPU_MACHINE_FEAT: 1682 ret = kvm_s390_get_machine_feat(kvm, attr); 1683 break; 1684 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1685 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1686 break; 1687 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1688 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1689 break; 1690 } 1691 return ret; 1692 } 1693 1694 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1695 { 1696 int ret; 1697 1698 switch (attr->group) { 1699 case KVM_S390_VM_MEM_CTRL: 1700 ret = kvm_s390_set_mem_control(kvm, attr); 1701 break; 1702 case KVM_S390_VM_TOD: 1703 ret = kvm_s390_set_tod(kvm, attr); 1704 break; 1705 case KVM_S390_VM_CPU_MODEL: 1706 ret = kvm_s390_set_cpu_model(kvm, attr); 1707 break; 1708 case KVM_S390_VM_CRYPTO: 1709 ret = kvm_s390_vm_set_crypto(kvm, attr); 1710 break; 1711 case KVM_S390_VM_MIGRATION: 1712 ret = kvm_s390_vm_set_migration(kvm, attr); 1713 break; 1714 default: 1715 ret = -ENXIO; 1716 break; 1717 } 1718 1719 return ret; 1720 } 1721 1722 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1723 { 1724 int ret; 1725 1726 switch (attr->group) { 1727 case KVM_S390_VM_MEM_CTRL: 1728 ret = kvm_s390_get_mem_control(kvm, attr); 1729 break; 1730 case KVM_S390_VM_TOD: 1731 ret = kvm_s390_get_tod(kvm, attr); 1732 break; 1733 case KVM_S390_VM_CPU_MODEL: 1734 ret = kvm_s390_get_cpu_model(kvm, attr); 1735 break; 1736 case KVM_S390_VM_MIGRATION: 1737 ret = kvm_s390_vm_get_migration(kvm, attr); 1738 break; 1739 default: 1740 ret = -ENXIO; 1741 break; 1742 } 1743 1744 return ret; 1745 } 1746 1747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1748 { 1749 int ret; 1750 1751 switch (attr->group) { 1752 case KVM_S390_VM_MEM_CTRL: 1753 switch (attr->attr) { 1754 case KVM_S390_VM_MEM_ENABLE_CMMA: 1755 case KVM_S390_VM_MEM_CLR_CMMA: 1756 ret = sclp.has_cmma ? 0 : -ENXIO; 1757 break; 1758 case KVM_S390_VM_MEM_LIMIT_SIZE: 1759 ret = 0; 1760 break; 1761 default: 1762 ret = -ENXIO; 1763 break; 1764 } 1765 break; 1766 case KVM_S390_VM_TOD: 1767 switch (attr->attr) { 1768 case KVM_S390_VM_TOD_LOW: 1769 case KVM_S390_VM_TOD_HIGH: 1770 ret = 0; 1771 break; 1772 default: 1773 ret = -ENXIO; 1774 break; 1775 } 1776 break; 1777 case KVM_S390_VM_CPU_MODEL: 1778 switch (attr->attr) { 1779 case KVM_S390_VM_CPU_PROCESSOR: 1780 case KVM_S390_VM_CPU_MACHINE: 1781 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1782 case KVM_S390_VM_CPU_MACHINE_FEAT: 1783 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1784 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1785 ret = 0; 1786 break; 1787 default: 1788 ret = -ENXIO; 1789 break; 1790 } 1791 break; 1792 case KVM_S390_VM_CRYPTO: 1793 switch (attr->attr) { 1794 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1795 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1796 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1797 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1798 ret = 0; 1799 break; 1800 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1801 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1802 ret = ap_instructions_available() ? 0 : -ENXIO; 1803 break; 1804 default: 1805 ret = -ENXIO; 1806 break; 1807 } 1808 break; 1809 case KVM_S390_VM_MIGRATION: 1810 ret = 0; 1811 break; 1812 default: 1813 ret = -ENXIO; 1814 break; 1815 } 1816 1817 return ret; 1818 } 1819 1820 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1821 { 1822 uint8_t *keys; 1823 uint64_t hva; 1824 int srcu_idx, i, r = 0; 1825 1826 if (args->flags != 0) 1827 return -EINVAL; 1828 1829 /* Is this guest using storage keys? */ 1830 if (!mm_uses_skeys(current->mm)) 1831 return KVM_S390_GET_SKEYS_NONE; 1832 1833 /* Enforce sane limit on memory allocation */ 1834 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1835 return -EINVAL; 1836 1837 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1838 if (!keys) 1839 return -ENOMEM; 1840 1841 mmap_read_lock(current->mm); 1842 srcu_idx = srcu_read_lock(&kvm->srcu); 1843 for (i = 0; i < args->count; i++) { 1844 hva = gfn_to_hva(kvm, args->start_gfn + i); 1845 if (kvm_is_error_hva(hva)) { 1846 r = -EFAULT; 1847 break; 1848 } 1849 1850 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1851 if (r) 1852 break; 1853 } 1854 srcu_read_unlock(&kvm->srcu, srcu_idx); 1855 mmap_read_unlock(current->mm); 1856 1857 if (!r) { 1858 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1859 sizeof(uint8_t) * args->count); 1860 if (r) 1861 r = -EFAULT; 1862 } 1863 1864 kvfree(keys); 1865 return r; 1866 } 1867 1868 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1869 { 1870 uint8_t *keys; 1871 uint64_t hva; 1872 int srcu_idx, i, r = 0; 1873 bool unlocked; 1874 1875 if (args->flags != 0) 1876 return -EINVAL; 1877 1878 /* Enforce sane limit on memory allocation */ 1879 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1880 return -EINVAL; 1881 1882 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1883 if (!keys) 1884 return -ENOMEM; 1885 1886 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1887 sizeof(uint8_t) * args->count); 1888 if (r) { 1889 r = -EFAULT; 1890 goto out; 1891 } 1892 1893 /* Enable storage key handling for the guest */ 1894 r = s390_enable_skey(); 1895 if (r) 1896 goto out; 1897 1898 i = 0; 1899 mmap_read_lock(current->mm); 1900 srcu_idx = srcu_read_lock(&kvm->srcu); 1901 while (i < args->count) { 1902 unlocked = false; 1903 hva = gfn_to_hva(kvm, args->start_gfn + i); 1904 if (kvm_is_error_hva(hva)) { 1905 r = -EFAULT; 1906 break; 1907 } 1908 1909 /* Lowest order bit is reserved */ 1910 if (keys[i] & 0x01) { 1911 r = -EINVAL; 1912 break; 1913 } 1914 1915 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1916 if (r) { 1917 r = fixup_user_fault(current->mm, hva, 1918 FAULT_FLAG_WRITE, &unlocked); 1919 if (r) 1920 break; 1921 } 1922 if (!r) 1923 i++; 1924 } 1925 srcu_read_unlock(&kvm->srcu, srcu_idx); 1926 mmap_read_unlock(current->mm); 1927 out: 1928 kvfree(keys); 1929 return r; 1930 } 1931 1932 /* 1933 * Base address and length must be sent at the start of each block, therefore 1934 * it's cheaper to send some clean data, as long as it's less than the size of 1935 * two longs. 1936 */ 1937 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1938 /* for consistency */ 1939 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1940 1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1942 u8 *res, unsigned long bufsize) 1943 { 1944 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1945 1946 args->count = 0; 1947 while (args->count < bufsize) { 1948 hva = gfn_to_hva(kvm, cur_gfn); 1949 /* 1950 * We return an error if the first value was invalid, but we 1951 * return successfully if at least one value was copied. 1952 */ 1953 if (kvm_is_error_hva(hva)) 1954 return args->count ? 0 : -EFAULT; 1955 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1956 pgstev = 0; 1957 res[args->count++] = (pgstev >> 24) & 0x43; 1958 cur_gfn++; 1959 } 1960 1961 return 0; 1962 } 1963 1964 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 1965 gfn_t gfn) 1966 { 1967 return ____gfn_to_memslot(slots, gfn, true); 1968 } 1969 1970 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1971 unsigned long cur_gfn) 1972 { 1973 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 1974 unsigned long ofs = cur_gfn - ms->base_gfn; 1975 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 1976 1977 if (ms->base_gfn + ms->npages <= cur_gfn) { 1978 mnode = rb_next(mnode); 1979 /* If we are above the highest slot, wrap around */ 1980 if (!mnode) 1981 mnode = rb_first(&slots->gfn_tree); 1982 1983 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1984 ofs = 0; 1985 } 1986 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1987 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 1988 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1989 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 1990 } 1991 return ms->base_gfn + ofs; 1992 } 1993 1994 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1995 u8 *res, unsigned long bufsize) 1996 { 1997 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 1998 struct kvm_memslots *slots = kvm_memslots(kvm); 1999 struct kvm_memory_slot *ms; 2000 2001 if (unlikely(kvm_memslots_empty(slots))) 2002 return 0; 2003 2004 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2005 ms = gfn_to_memslot(kvm, cur_gfn); 2006 args->count = 0; 2007 args->start_gfn = cur_gfn; 2008 if (!ms) 2009 return 0; 2010 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2011 mem_end = kvm_s390_get_gfn_end(slots); 2012 2013 while (args->count < bufsize) { 2014 hva = gfn_to_hva(kvm, cur_gfn); 2015 if (kvm_is_error_hva(hva)) 2016 return 0; 2017 /* Decrement only if we actually flipped the bit to 0 */ 2018 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2019 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2020 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2021 pgstev = 0; 2022 /* Save the value */ 2023 res[args->count++] = (pgstev >> 24) & 0x43; 2024 /* If the next bit is too far away, stop. */ 2025 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2026 return 0; 2027 /* If we reached the previous "next", find the next one */ 2028 if (cur_gfn == next_gfn) 2029 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2030 /* Reached the end of memory or of the buffer, stop */ 2031 if ((next_gfn >= mem_end) || 2032 (next_gfn - args->start_gfn >= bufsize)) 2033 return 0; 2034 cur_gfn++; 2035 /* Reached the end of the current memslot, take the next one. */ 2036 if (cur_gfn - ms->base_gfn >= ms->npages) { 2037 ms = gfn_to_memslot(kvm, cur_gfn); 2038 if (!ms) 2039 return 0; 2040 } 2041 } 2042 return 0; 2043 } 2044 2045 /* 2046 * This function searches for the next page with dirty CMMA attributes, and 2047 * saves the attributes in the buffer up to either the end of the buffer or 2048 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2049 * no trailing clean bytes are saved. 2050 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2051 * output buffer will indicate 0 as length. 2052 */ 2053 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2054 struct kvm_s390_cmma_log *args) 2055 { 2056 unsigned long bufsize; 2057 int srcu_idx, peek, ret; 2058 u8 *values; 2059 2060 if (!kvm->arch.use_cmma) 2061 return -ENXIO; 2062 /* Invalid/unsupported flags were specified */ 2063 if (args->flags & ~KVM_S390_CMMA_PEEK) 2064 return -EINVAL; 2065 /* Migration mode query, and we are not doing a migration */ 2066 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2067 if (!peek && !kvm->arch.migration_mode) 2068 return -EINVAL; 2069 /* CMMA is disabled or was not used, or the buffer has length zero */ 2070 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2071 if (!bufsize || !kvm->mm->context.uses_cmm) { 2072 memset(args, 0, sizeof(*args)); 2073 return 0; 2074 } 2075 /* We are not peeking, and there are no dirty pages */ 2076 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2077 memset(args, 0, sizeof(*args)); 2078 return 0; 2079 } 2080 2081 values = vmalloc(bufsize); 2082 if (!values) 2083 return -ENOMEM; 2084 2085 mmap_read_lock(kvm->mm); 2086 srcu_idx = srcu_read_lock(&kvm->srcu); 2087 if (peek) 2088 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2089 else 2090 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2091 srcu_read_unlock(&kvm->srcu, srcu_idx); 2092 mmap_read_unlock(kvm->mm); 2093 2094 if (kvm->arch.migration_mode) 2095 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2096 else 2097 args->remaining = 0; 2098 2099 if (copy_to_user((void __user *)args->values, values, args->count)) 2100 ret = -EFAULT; 2101 2102 vfree(values); 2103 return ret; 2104 } 2105 2106 /* 2107 * This function sets the CMMA attributes for the given pages. If the input 2108 * buffer has zero length, no action is taken, otherwise the attributes are 2109 * set and the mm->context.uses_cmm flag is set. 2110 */ 2111 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2112 const struct kvm_s390_cmma_log *args) 2113 { 2114 unsigned long hva, mask, pgstev, i; 2115 uint8_t *bits; 2116 int srcu_idx, r = 0; 2117 2118 mask = args->mask; 2119 2120 if (!kvm->arch.use_cmma) 2121 return -ENXIO; 2122 /* invalid/unsupported flags */ 2123 if (args->flags != 0) 2124 return -EINVAL; 2125 /* Enforce sane limit on memory allocation */ 2126 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2127 return -EINVAL; 2128 /* Nothing to do */ 2129 if (args->count == 0) 2130 return 0; 2131 2132 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2133 if (!bits) 2134 return -ENOMEM; 2135 2136 r = copy_from_user(bits, (void __user *)args->values, args->count); 2137 if (r) { 2138 r = -EFAULT; 2139 goto out; 2140 } 2141 2142 mmap_read_lock(kvm->mm); 2143 srcu_idx = srcu_read_lock(&kvm->srcu); 2144 for (i = 0; i < args->count; i++) { 2145 hva = gfn_to_hva(kvm, args->start_gfn + i); 2146 if (kvm_is_error_hva(hva)) { 2147 r = -EFAULT; 2148 break; 2149 } 2150 2151 pgstev = bits[i]; 2152 pgstev = pgstev << 24; 2153 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2154 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2155 } 2156 srcu_read_unlock(&kvm->srcu, srcu_idx); 2157 mmap_read_unlock(kvm->mm); 2158 2159 if (!kvm->mm->context.uses_cmm) { 2160 mmap_write_lock(kvm->mm); 2161 kvm->mm->context.uses_cmm = 1; 2162 mmap_write_unlock(kvm->mm); 2163 } 2164 out: 2165 vfree(bits); 2166 return r; 2167 } 2168 2169 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2170 { 2171 struct kvm_vcpu *vcpu; 2172 u16 rc, rrc; 2173 int ret = 0; 2174 unsigned long i; 2175 2176 /* 2177 * We ignore failures and try to destroy as many CPUs as possible. 2178 * At the same time we must not free the assigned resources when 2179 * this fails, as the ultravisor has still access to that memory. 2180 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2181 * behind. 2182 * We want to return the first failure rc and rrc, though. 2183 */ 2184 kvm_for_each_vcpu(i, vcpu, kvm) { 2185 mutex_lock(&vcpu->mutex); 2186 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2187 *rcp = rc; 2188 *rrcp = rrc; 2189 ret = -EIO; 2190 } 2191 mutex_unlock(&vcpu->mutex); 2192 } 2193 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ 2194 if (use_gisa) 2195 kvm_s390_gisa_enable(kvm); 2196 return ret; 2197 } 2198 2199 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2200 { 2201 unsigned long i; 2202 int r = 0; 2203 u16 dummy; 2204 2205 struct kvm_vcpu *vcpu; 2206 2207 /* Disable the GISA if the ultravisor does not support AIV. */ 2208 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) 2209 kvm_s390_gisa_disable(kvm); 2210 2211 kvm_for_each_vcpu(i, vcpu, kvm) { 2212 mutex_lock(&vcpu->mutex); 2213 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2214 mutex_unlock(&vcpu->mutex); 2215 if (r) 2216 break; 2217 } 2218 if (r) 2219 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2220 return r; 2221 } 2222 2223 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2224 { 2225 int r = 0; 2226 u16 dummy; 2227 void __user *argp = (void __user *)cmd->data; 2228 2229 switch (cmd->cmd) { 2230 case KVM_PV_ENABLE: { 2231 r = -EINVAL; 2232 if (kvm_s390_pv_is_protected(kvm)) 2233 break; 2234 2235 /* 2236 * FMT 4 SIE needs esca. As we never switch back to bsca from 2237 * esca, we need no cleanup in the error cases below 2238 */ 2239 r = sca_switch_to_extended(kvm); 2240 if (r) 2241 break; 2242 2243 mmap_write_lock(current->mm); 2244 r = gmap_mark_unmergeable(); 2245 mmap_write_unlock(current->mm); 2246 if (r) 2247 break; 2248 2249 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2250 if (r) 2251 break; 2252 2253 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2254 if (r) 2255 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2256 2257 /* we need to block service interrupts from now on */ 2258 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2259 break; 2260 } 2261 case KVM_PV_DISABLE: { 2262 r = -EINVAL; 2263 if (!kvm_s390_pv_is_protected(kvm)) 2264 break; 2265 2266 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2267 /* 2268 * If a CPU could not be destroyed, destroy VM will also fail. 2269 * There is no point in trying to destroy it. Instead return 2270 * the rc and rrc from the first CPU that failed destroying. 2271 */ 2272 if (r) 2273 break; 2274 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2275 2276 /* no need to block service interrupts any more */ 2277 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2278 break; 2279 } 2280 case KVM_PV_SET_SEC_PARMS: { 2281 struct kvm_s390_pv_sec_parm parms = {}; 2282 void *hdr; 2283 2284 r = -EINVAL; 2285 if (!kvm_s390_pv_is_protected(kvm)) 2286 break; 2287 2288 r = -EFAULT; 2289 if (copy_from_user(&parms, argp, sizeof(parms))) 2290 break; 2291 2292 /* Currently restricted to 8KB */ 2293 r = -EINVAL; 2294 if (parms.length > PAGE_SIZE * 2) 2295 break; 2296 2297 r = -ENOMEM; 2298 hdr = vmalloc(parms.length); 2299 if (!hdr) 2300 break; 2301 2302 r = -EFAULT; 2303 if (!copy_from_user(hdr, (void __user *)parms.origin, 2304 parms.length)) 2305 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2306 &cmd->rc, &cmd->rrc); 2307 2308 vfree(hdr); 2309 break; 2310 } 2311 case KVM_PV_UNPACK: { 2312 struct kvm_s390_pv_unp unp = {}; 2313 2314 r = -EINVAL; 2315 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2316 break; 2317 2318 r = -EFAULT; 2319 if (copy_from_user(&unp, argp, sizeof(unp))) 2320 break; 2321 2322 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2323 &cmd->rc, &cmd->rrc); 2324 break; 2325 } 2326 case KVM_PV_VERIFY: { 2327 r = -EINVAL; 2328 if (!kvm_s390_pv_is_protected(kvm)) 2329 break; 2330 2331 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2332 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2333 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2334 cmd->rrc); 2335 break; 2336 } 2337 case KVM_PV_PREP_RESET: { 2338 r = -EINVAL; 2339 if (!kvm_s390_pv_is_protected(kvm)) 2340 break; 2341 2342 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2343 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2344 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2345 cmd->rc, cmd->rrc); 2346 break; 2347 } 2348 case KVM_PV_UNSHARE_ALL: { 2349 r = -EINVAL; 2350 if (!kvm_s390_pv_is_protected(kvm)) 2351 break; 2352 2353 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2354 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2355 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2356 cmd->rc, cmd->rrc); 2357 break; 2358 } 2359 default: 2360 r = -ENOTTY; 2361 } 2362 return r; 2363 } 2364 2365 static bool access_key_invalid(u8 access_key) 2366 { 2367 return access_key > 0xf; 2368 } 2369 2370 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2371 { 2372 void __user *uaddr = (void __user *)mop->buf; 2373 u64 supported_flags; 2374 void *tmpbuf = NULL; 2375 int r, srcu_idx; 2376 2377 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION 2378 | KVM_S390_MEMOP_F_CHECK_ONLY; 2379 if (mop->flags & ~supported_flags || !mop->size) 2380 return -EINVAL; 2381 if (mop->size > MEM_OP_MAX_SIZE) 2382 return -E2BIG; 2383 /* 2384 * This is technically a heuristic only, if the kvm->lock is not 2385 * taken, it is not guaranteed that the vm is/remains non-protected. 2386 * This is ok from a kernel perspective, wrongdoing is detected 2387 * on the access, -EFAULT is returned and the vm may crash the 2388 * next time it accesses the memory in question. 2389 * There is no sane usecase to do switching and a memop on two 2390 * different CPUs at the same time. 2391 */ 2392 if (kvm_s390_pv_get_handle(kvm)) 2393 return -EINVAL; 2394 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2395 if (access_key_invalid(mop->key)) 2396 return -EINVAL; 2397 } else { 2398 mop->key = 0; 2399 } 2400 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2401 tmpbuf = vmalloc(mop->size); 2402 if (!tmpbuf) 2403 return -ENOMEM; 2404 } 2405 2406 srcu_idx = srcu_read_lock(&kvm->srcu); 2407 2408 if (kvm_is_error_gpa(kvm, mop->gaddr)) { 2409 r = PGM_ADDRESSING; 2410 goto out_unlock; 2411 } 2412 2413 switch (mop->op) { 2414 case KVM_S390_MEMOP_ABSOLUTE_READ: { 2415 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2416 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); 2417 } else { 2418 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2419 mop->size, GACC_FETCH, mop->key); 2420 if (r == 0) { 2421 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2422 r = -EFAULT; 2423 } 2424 } 2425 break; 2426 } 2427 case KVM_S390_MEMOP_ABSOLUTE_WRITE: { 2428 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2429 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); 2430 } else { 2431 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2432 r = -EFAULT; 2433 break; 2434 } 2435 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2436 mop->size, GACC_STORE, mop->key); 2437 } 2438 break; 2439 } 2440 default: 2441 r = -EINVAL; 2442 } 2443 2444 out_unlock: 2445 srcu_read_unlock(&kvm->srcu, srcu_idx); 2446 2447 vfree(tmpbuf); 2448 return r; 2449 } 2450 2451 long kvm_arch_vm_ioctl(struct file *filp, 2452 unsigned int ioctl, unsigned long arg) 2453 { 2454 struct kvm *kvm = filp->private_data; 2455 void __user *argp = (void __user *)arg; 2456 struct kvm_device_attr attr; 2457 int r; 2458 2459 switch (ioctl) { 2460 case KVM_S390_INTERRUPT: { 2461 struct kvm_s390_interrupt s390int; 2462 2463 r = -EFAULT; 2464 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2465 break; 2466 r = kvm_s390_inject_vm(kvm, &s390int); 2467 break; 2468 } 2469 case KVM_CREATE_IRQCHIP: { 2470 struct kvm_irq_routing_entry routing; 2471 2472 r = -EINVAL; 2473 if (kvm->arch.use_irqchip) { 2474 /* Set up dummy routing. */ 2475 memset(&routing, 0, sizeof(routing)); 2476 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2477 } 2478 break; 2479 } 2480 case KVM_SET_DEVICE_ATTR: { 2481 r = -EFAULT; 2482 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2483 break; 2484 r = kvm_s390_vm_set_attr(kvm, &attr); 2485 break; 2486 } 2487 case KVM_GET_DEVICE_ATTR: { 2488 r = -EFAULT; 2489 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2490 break; 2491 r = kvm_s390_vm_get_attr(kvm, &attr); 2492 break; 2493 } 2494 case KVM_HAS_DEVICE_ATTR: { 2495 r = -EFAULT; 2496 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2497 break; 2498 r = kvm_s390_vm_has_attr(kvm, &attr); 2499 break; 2500 } 2501 case KVM_S390_GET_SKEYS: { 2502 struct kvm_s390_skeys args; 2503 2504 r = -EFAULT; 2505 if (copy_from_user(&args, argp, 2506 sizeof(struct kvm_s390_skeys))) 2507 break; 2508 r = kvm_s390_get_skeys(kvm, &args); 2509 break; 2510 } 2511 case KVM_S390_SET_SKEYS: { 2512 struct kvm_s390_skeys args; 2513 2514 r = -EFAULT; 2515 if (copy_from_user(&args, argp, 2516 sizeof(struct kvm_s390_skeys))) 2517 break; 2518 r = kvm_s390_set_skeys(kvm, &args); 2519 break; 2520 } 2521 case KVM_S390_GET_CMMA_BITS: { 2522 struct kvm_s390_cmma_log args; 2523 2524 r = -EFAULT; 2525 if (copy_from_user(&args, argp, sizeof(args))) 2526 break; 2527 mutex_lock(&kvm->slots_lock); 2528 r = kvm_s390_get_cmma_bits(kvm, &args); 2529 mutex_unlock(&kvm->slots_lock); 2530 if (!r) { 2531 r = copy_to_user(argp, &args, sizeof(args)); 2532 if (r) 2533 r = -EFAULT; 2534 } 2535 break; 2536 } 2537 case KVM_S390_SET_CMMA_BITS: { 2538 struct kvm_s390_cmma_log args; 2539 2540 r = -EFAULT; 2541 if (copy_from_user(&args, argp, sizeof(args))) 2542 break; 2543 mutex_lock(&kvm->slots_lock); 2544 r = kvm_s390_set_cmma_bits(kvm, &args); 2545 mutex_unlock(&kvm->slots_lock); 2546 break; 2547 } 2548 case KVM_S390_PV_COMMAND: { 2549 struct kvm_pv_cmd args; 2550 2551 /* protvirt means user cpu state */ 2552 kvm_s390_set_user_cpu_state_ctrl(kvm); 2553 r = 0; 2554 if (!is_prot_virt_host()) { 2555 r = -EINVAL; 2556 break; 2557 } 2558 if (copy_from_user(&args, argp, sizeof(args))) { 2559 r = -EFAULT; 2560 break; 2561 } 2562 if (args.flags) { 2563 r = -EINVAL; 2564 break; 2565 } 2566 mutex_lock(&kvm->lock); 2567 r = kvm_s390_handle_pv(kvm, &args); 2568 mutex_unlock(&kvm->lock); 2569 if (copy_to_user(argp, &args, sizeof(args))) { 2570 r = -EFAULT; 2571 break; 2572 } 2573 break; 2574 } 2575 case KVM_S390_MEM_OP: { 2576 struct kvm_s390_mem_op mem_op; 2577 2578 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 2579 r = kvm_s390_vm_mem_op(kvm, &mem_op); 2580 else 2581 r = -EFAULT; 2582 break; 2583 } 2584 default: 2585 r = -ENOTTY; 2586 } 2587 2588 return r; 2589 } 2590 2591 static int kvm_s390_apxa_installed(void) 2592 { 2593 struct ap_config_info info; 2594 2595 if (ap_instructions_available()) { 2596 if (ap_qci(&info) == 0) 2597 return info.apxa; 2598 } 2599 2600 return 0; 2601 } 2602 2603 /* 2604 * The format of the crypto control block (CRYCB) is specified in the 3 low 2605 * order bits of the CRYCB designation (CRYCBD) field as follows: 2606 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2607 * AP extended addressing (APXA) facility are installed. 2608 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2609 * Format 2: Both the APXA and MSAX3 facilities are installed 2610 */ 2611 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2612 { 2613 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2614 2615 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2616 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2617 2618 /* Check whether MSAX3 is installed */ 2619 if (!test_kvm_facility(kvm, 76)) 2620 return; 2621 2622 if (kvm_s390_apxa_installed()) 2623 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2624 else 2625 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2626 } 2627 2628 /* 2629 * kvm_arch_crypto_set_masks 2630 * 2631 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2632 * to be set. 2633 * @apm: the mask identifying the accessible AP adapters 2634 * @aqm: the mask identifying the accessible AP domains 2635 * @adm: the mask identifying the accessible AP control domains 2636 * 2637 * Set the masks that identify the adapters, domains and control domains to 2638 * which the KVM guest is granted access. 2639 * 2640 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2641 * function. 2642 */ 2643 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2644 unsigned long *aqm, unsigned long *adm) 2645 { 2646 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2647 2648 kvm_s390_vcpu_block_all(kvm); 2649 2650 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2651 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2652 memcpy(crycb->apcb1.apm, apm, 32); 2653 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2654 apm[0], apm[1], apm[2], apm[3]); 2655 memcpy(crycb->apcb1.aqm, aqm, 32); 2656 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2657 aqm[0], aqm[1], aqm[2], aqm[3]); 2658 memcpy(crycb->apcb1.adm, adm, 32); 2659 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2660 adm[0], adm[1], adm[2], adm[3]); 2661 break; 2662 case CRYCB_FORMAT1: 2663 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2664 memcpy(crycb->apcb0.apm, apm, 8); 2665 memcpy(crycb->apcb0.aqm, aqm, 2); 2666 memcpy(crycb->apcb0.adm, adm, 2); 2667 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2668 apm[0], *((unsigned short *)aqm), 2669 *((unsigned short *)adm)); 2670 break; 2671 default: /* Can not happen */ 2672 break; 2673 } 2674 2675 /* recreate the shadow crycb for each vcpu */ 2676 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2677 kvm_s390_vcpu_unblock_all(kvm); 2678 } 2679 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2680 2681 /* 2682 * kvm_arch_crypto_clear_masks 2683 * 2684 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2685 * to be cleared. 2686 * 2687 * Clear the masks that identify the adapters, domains and control domains to 2688 * which the KVM guest is granted access. 2689 * 2690 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2691 * function. 2692 */ 2693 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2694 { 2695 kvm_s390_vcpu_block_all(kvm); 2696 2697 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2698 sizeof(kvm->arch.crypto.crycb->apcb0)); 2699 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2700 sizeof(kvm->arch.crypto.crycb->apcb1)); 2701 2702 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2703 /* recreate the shadow crycb for each vcpu */ 2704 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2705 kvm_s390_vcpu_unblock_all(kvm); 2706 } 2707 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2708 2709 static u64 kvm_s390_get_initial_cpuid(void) 2710 { 2711 struct cpuid cpuid; 2712 2713 get_cpu_id(&cpuid); 2714 cpuid.version = 0xff; 2715 return *((u64 *) &cpuid); 2716 } 2717 2718 static void kvm_s390_crypto_init(struct kvm *kvm) 2719 { 2720 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2721 kvm_s390_set_crycb_format(kvm); 2722 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 2723 2724 if (!test_kvm_facility(kvm, 76)) 2725 return; 2726 2727 /* Enable AES/DEA protected key functions by default */ 2728 kvm->arch.crypto.aes_kw = 1; 2729 kvm->arch.crypto.dea_kw = 1; 2730 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2731 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2732 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2733 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2734 } 2735 2736 static void sca_dispose(struct kvm *kvm) 2737 { 2738 if (kvm->arch.use_esca) 2739 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2740 else 2741 free_page((unsigned long)(kvm->arch.sca)); 2742 kvm->arch.sca = NULL; 2743 } 2744 2745 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2746 { 2747 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2748 int i, rc; 2749 char debug_name[16]; 2750 static unsigned long sca_offset; 2751 2752 rc = -EINVAL; 2753 #ifdef CONFIG_KVM_S390_UCONTROL 2754 if (type & ~KVM_VM_S390_UCONTROL) 2755 goto out_err; 2756 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2757 goto out_err; 2758 #else 2759 if (type) 2760 goto out_err; 2761 #endif 2762 2763 rc = s390_enable_sie(); 2764 if (rc) 2765 goto out_err; 2766 2767 rc = -ENOMEM; 2768 2769 if (!sclp.has_64bscao) 2770 alloc_flags |= GFP_DMA; 2771 rwlock_init(&kvm->arch.sca_lock); 2772 /* start with basic SCA */ 2773 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2774 if (!kvm->arch.sca) 2775 goto out_err; 2776 mutex_lock(&kvm_lock); 2777 sca_offset += 16; 2778 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2779 sca_offset = 0; 2780 kvm->arch.sca = (struct bsca_block *) 2781 ((char *) kvm->arch.sca + sca_offset); 2782 mutex_unlock(&kvm_lock); 2783 2784 sprintf(debug_name, "kvm-%u", current->pid); 2785 2786 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2787 if (!kvm->arch.dbf) 2788 goto out_err; 2789 2790 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2791 kvm->arch.sie_page2 = 2792 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2793 if (!kvm->arch.sie_page2) 2794 goto out_err; 2795 2796 kvm->arch.sie_page2->kvm = kvm; 2797 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2798 2799 for (i = 0; i < kvm_s390_fac_size(); i++) { 2800 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 2801 (kvm_s390_fac_base[i] | 2802 kvm_s390_fac_ext[i]); 2803 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 2804 kvm_s390_fac_base[i]; 2805 } 2806 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2807 2808 /* we are always in czam mode - even on pre z14 machines */ 2809 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2810 set_kvm_facility(kvm->arch.model.fac_list, 138); 2811 /* we emulate STHYI in kvm */ 2812 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2813 set_kvm_facility(kvm->arch.model.fac_list, 74); 2814 if (MACHINE_HAS_TLB_GUEST) { 2815 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2816 set_kvm_facility(kvm->arch.model.fac_list, 147); 2817 } 2818 2819 if (css_general_characteristics.aiv && test_facility(65)) 2820 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2821 2822 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2823 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2824 2825 kvm_s390_crypto_init(kvm); 2826 2827 mutex_init(&kvm->arch.float_int.ais_lock); 2828 spin_lock_init(&kvm->arch.float_int.lock); 2829 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2830 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2831 init_waitqueue_head(&kvm->arch.ipte_wq); 2832 mutex_init(&kvm->arch.ipte_mutex); 2833 2834 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2835 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2836 2837 if (type & KVM_VM_S390_UCONTROL) { 2838 kvm->arch.gmap = NULL; 2839 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2840 } else { 2841 if (sclp.hamax == U64_MAX) 2842 kvm->arch.mem_limit = TASK_SIZE_MAX; 2843 else 2844 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2845 sclp.hamax + 1); 2846 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2847 if (!kvm->arch.gmap) 2848 goto out_err; 2849 kvm->arch.gmap->private = kvm; 2850 kvm->arch.gmap->pfault_enabled = 0; 2851 } 2852 2853 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2854 kvm->arch.use_skf = sclp.has_skey; 2855 spin_lock_init(&kvm->arch.start_stop_lock); 2856 kvm_s390_vsie_init(kvm); 2857 if (use_gisa) 2858 kvm_s390_gisa_init(kvm); 2859 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2860 2861 return 0; 2862 out_err: 2863 free_page((unsigned long)kvm->arch.sie_page2); 2864 debug_unregister(kvm->arch.dbf); 2865 sca_dispose(kvm); 2866 KVM_EVENT(3, "creation of vm failed: %d", rc); 2867 return rc; 2868 } 2869 2870 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2871 { 2872 u16 rc, rrc; 2873 2874 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2875 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2876 kvm_s390_clear_local_irqs(vcpu); 2877 kvm_clear_async_pf_completion_queue(vcpu); 2878 if (!kvm_is_ucontrol(vcpu->kvm)) 2879 sca_del_vcpu(vcpu); 2880 2881 if (kvm_is_ucontrol(vcpu->kvm)) 2882 gmap_remove(vcpu->arch.gmap); 2883 2884 if (vcpu->kvm->arch.use_cmma) 2885 kvm_s390_vcpu_unsetup_cmma(vcpu); 2886 /* We can not hold the vcpu mutex here, we are already dying */ 2887 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2888 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2889 free_page((unsigned long)(vcpu->arch.sie_block)); 2890 } 2891 2892 void kvm_arch_destroy_vm(struct kvm *kvm) 2893 { 2894 u16 rc, rrc; 2895 2896 kvm_destroy_vcpus(kvm); 2897 sca_dispose(kvm); 2898 kvm_s390_gisa_destroy(kvm); 2899 /* 2900 * We are already at the end of life and kvm->lock is not taken. 2901 * This is ok as the file descriptor is closed by now and nobody 2902 * can mess with the pv state. To avoid lockdep_assert_held from 2903 * complaining we do not use kvm_s390_pv_is_protected. 2904 */ 2905 if (kvm_s390_pv_get_handle(kvm)) 2906 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2907 debug_unregister(kvm->arch.dbf); 2908 free_page((unsigned long)kvm->arch.sie_page2); 2909 if (!kvm_is_ucontrol(kvm)) 2910 gmap_remove(kvm->arch.gmap); 2911 kvm_s390_destroy_adapters(kvm); 2912 kvm_s390_clear_float_irqs(kvm); 2913 kvm_s390_vsie_destroy(kvm); 2914 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2915 } 2916 2917 /* Section: vcpu related */ 2918 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2919 { 2920 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2921 if (!vcpu->arch.gmap) 2922 return -ENOMEM; 2923 vcpu->arch.gmap->private = vcpu->kvm; 2924 2925 return 0; 2926 } 2927 2928 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2929 { 2930 if (!kvm_s390_use_sca_entries()) 2931 return; 2932 read_lock(&vcpu->kvm->arch.sca_lock); 2933 if (vcpu->kvm->arch.use_esca) { 2934 struct esca_block *sca = vcpu->kvm->arch.sca; 2935 2936 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2937 sca->cpu[vcpu->vcpu_id].sda = 0; 2938 } else { 2939 struct bsca_block *sca = vcpu->kvm->arch.sca; 2940 2941 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2942 sca->cpu[vcpu->vcpu_id].sda = 0; 2943 } 2944 read_unlock(&vcpu->kvm->arch.sca_lock); 2945 } 2946 2947 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2948 { 2949 if (!kvm_s390_use_sca_entries()) { 2950 struct bsca_block *sca = vcpu->kvm->arch.sca; 2951 2952 /* we still need the basic sca for the ipte control */ 2953 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2954 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2955 return; 2956 } 2957 read_lock(&vcpu->kvm->arch.sca_lock); 2958 if (vcpu->kvm->arch.use_esca) { 2959 struct esca_block *sca = vcpu->kvm->arch.sca; 2960 2961 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2962 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2963 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2964 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2965 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2966 } else { 2967 struct bsca_block *sca = vcpu->kvm->arch.sca; 2968 2969 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2970 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2971 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2972 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2973 } 2974 read_unlock(&vcpu->kvm->arch.sca_lock); 2975 } 2976 2977 /* Basic SCA to Extended SCA data copy routines */ 2978 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2979 { 2980 d->sda = s->sda; 2981 d->sigp_ctrl.c = s->sigp_ctrl.c; 2982 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2983 } 2984 2985 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2986 { 2987 int i; 2988 2989 d->ipte_control = s->ipte_control; 2990 d->mcn[0] = s->mcn; 2991 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2992 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2993 } 2994 2995 static int sca_switch_to_extended(struct kvm *kvm) 2996 { 2997 struct bsca_block *old_sca = kvm->arch.sca; 2998 struct esca_block *new_sca; 2999 struct kvm_vcpu *vcpu; 3000 unsigned long vcpu_idx; 3001 u32 scaol, scaoh; 3002 3003 if (kvm->arch.use_esca) 3004 return 0; 3005 3006 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3007 if (!new_sca) 3008 return -ENOMEM; 3009 3010 scaoh = (u32)((u64)(new_sca) >> 32); 3011 scaol = (u32)(u64)(new_sca) & ~0x3fU; 3012 3013 kvm_s390_vcpu_block_all(kvm); 3014 write_lock(&kvm->arch.sca_lock); 3015 3016 sca_copy_b_to_e(new_sca, old_sca); 3017 3018 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3019 vcpu->arch.sie_block->scaoh = scaoh; 3020 vcpu->arch.sie_block->scaol = scaol; 3021 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3022 } 3023 kvm->arch.sca = new_sca; 3024 kvm->arch.use_esca = 1; 3025 3026 write_unlock(&kvm->arch.sca_lock); 3027 kvm_s390_vcpu_unblock_all(kvm); 3028 3029 free_page((unsigned long)old_sca); 3030 3031 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3032 old_sca, kvm->arch.sca); 3033 return 0; 3034 } 3035 3036 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3037 { 3038 int rc; 3039 3040 if (!kvm_s390_use_sca_entries()) { 3041 if (id < KVM_MAX_VCPUS) 3042 return true; 3043 return false; 3044 } 3045 if (id < KVM_S390_BSCA_CPU_SLOTS) 3046 return true; 3047 if (!sclp.has_esca || !sclp.has_64bscao) 3048 return false; 3049 3050 mutex_lock(&kvm->lock); 3051 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3052 mutex_unlock(&kvm->lock); 3053 3054 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3055 } 3056 3057 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3058 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3059 { 3060 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3061 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3062 vcpu->arch.cputm_start = get_tod_clock_fast(); 3063 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3064 } 3065 3066 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3067 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3068 { 3069 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3070 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3071 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3072 vcpu->arch.cputm_start = 0; 3073 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3074 } 3075 3076 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3077 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3078 { 3079 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3080 vcpu->arch.cputm_enabled = true; 3081 __start_cpu_timer_accounting(vcpu); 3082 } 3083 3084 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3085 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3086 { 3087 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3088 __stop_cpu_timer_accounting(vcpu); 3089 vcpu->arch.cputm_enabled = false; 3090 } 3091 3092 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3093 { 3094 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3095 __enable_cpu_timer_accounting(vcpu); 3096 preempt_enable(); 3097 } 3098 3099 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3100 { 3101 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3102 __disable_cpu_timer_accounting(vcpu); 3103 preempt_enable(); 3104 } 3105 3106 /* set the cpu timer - may only be called from the VCPU thread itself */ 3107 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3108 { 3109 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3110 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3111 if (vcpu->arch.cputm_enabled) 3112 vcpu->arch.cputm_start = get_tod_clock_fast(); 3113 vcpu->arch.sie_block->cputm = cputm; 3114 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3115 preempt_enable(); 3116 } 3117 3118 /* update and get the cpu timer - can also be called from other VCPU threads */ 3119 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3120 { 3121 unsigned int seq; 3122 __u64 value; 3123 3124 if (unlikely(!vcpu->arch.cputm_enabled)) 3125 return vcpu->arch.sie_block->cputm; 3126 3127 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3128 do { 3129 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3130 /* 3131 * If the writer would ever execute a read in the critical 3132 * section, e.g. in irq context, we have a deadlock. 3133 */ 3134 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3135 value = vcpu->arch.sie_block->cputm; 3136 /* if cputm_start is 0, accounting is being started/stopped */ 3137 if (likely(vcpu->arch.cputm_start)) 3138 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3139 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3140 preempt_enable(); 3141 return value; 3142 } 3143 3144 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3145 { 3146 3147 gmap_enable(vcpu->arch.enabled_gmap); 3148 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3149 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3150 __start_cpu_timer_accounting(vcpu); 3151 vcpu->cpu = cpu; 3152 } 3153 3154 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3155 { 3156 vcpu->cpu = -1; 3157 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3158 __stop_cpu_timer_accounting(vcpu); 3159 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3160 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3161 gmap_disable(vcpu->arch.enabled_gmap); 3162 3163 } 3164 3165 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3166 { 3167 mutex_lock(&vcpu->kvm->lock); 3168 preempt_disable(); 3169 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3170 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3171 preempt_enable(); 3172 mutex_unlock(&vcpu->kvm->lock); 3173 if (!kvm_is_ucontrol(vcpu->kvm)) { 3174 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3175 sca_add_vcpu(vcpu); 3176 } 3177 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3178 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3179 /* make vcpu_load load the right gmap on the first trigger */ 3180 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3181 } 3182 3183 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3184 { 3185 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3186 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3187 return true; 3188 return false; 3189 } 3190 3191 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3192 { 3193 /* At least one ECC subfunction must be present */ 3194 return kvm_has_pckmo_subfunc(kvm, 32) || 3195 kvm_has_pckmo_subfunc(kvm, 33) || 3196 kvm_has_pckmo_subfunc(kvm, 34) || 3197 kvm_has_pckmo_subfunc(kvm, 40) || 3198 kvm_has_pckmo_subfunc(kvm, 41); 3199 3200 } 3201 3202 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3203 { 3204 /* 3205 * If the AP instructions are not being interpreted and the MSAX3 3206 * facility is not configured for the guest, there is nothing to set up. 3207 */ 3208 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3209 return; 3210 3211 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3212 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3213 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3214 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3215 3216 if (vcpu->kvm->arch.crypto.apie) 3217 vcpu->arch.sie_block->eca |= ECA_APIE; 3218 3219 /* Set up protected key support */ 3220 if (vcpu->kvm->arch.crypto.aes_kw) { 3221 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3222 /* ecc is also wrapped with AES key */ 3223 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3224 vcpu->arch.sie_block->ecd |= ECD_ECC; 3225 } 3226 3227 if (vcpu->kvm->arch.crypto.dea_kw) 3228 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3229 } 3230 3231 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3232 { 3233 free_page(vcpu->arch.sie_block->cbrlo); 3234 vcpu->arch.sie_block->cbrlo = 0; 3235 } 3236 3237 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3238 { 3239 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3240 if (!vcpu->arch.sie_block->cbrlo) 3241 return -ENOMEM; 3242 return 0; 3243 } 3244 3245 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3246 { 3247 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3248 3249 vcpu->arch.sie_block->ibc = model->ibc; 3250 if (test_kvm_facility(vcpu->kvm, 7)) 3251 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3252 } 3253 3254 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3255 { 3256 int rc = 0; 3257 u16 uvrc, uvrrc; 3258 3259 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3260 CPUSTAT_SM | 3261 CPUSTAT_STOPPED); 3262 3263 if (test_kvm_facility(vcpu->kvm, 78)) 3264 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3265 else if (test_kvm_facility(vcpu->kvm, 8)) 3266 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3267 3268 kvm_s390_vcpu_setup_model(vcpu); 3269 3270 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3271 if (MACHINE_HAS_ESOP) 3272 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3273 if (test_kvm_facility(vcpu->kvm, 9)) 3274 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3275 if (test_kvm_facility(vcpu->kvm, 73)) 3276 vcpu->arch.sie_block->ecb |= ECB_TE; 3277 if (!kvm_is_ucontrol(vcpu->kvm)) 3278 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3279 3280 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3281 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3282 if (test_kvm_facility(vcpu->kvm, 130)) 3283 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3284 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3285 if (sclp.has_cei) 3286 vcpu->arch.sie_block->eca |= ECA_CEI; 3287 if (sclp.has_ib) 3288 vcpu->arch.sie_block->eca |= ECA_IB; 3289 if (sclp.has_siif) 3290 vcpu->arch.sie_block->eca |= ECA_SII; 3291 if (sclp.has_sigpif) 3292 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3293 if (test_kvm_facility(vcpu->kvm, 129)) { 3294 vcpu->arch.sie_block->eca |= ECA_VX; 3295 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3296 } 3297 if (test_kvm_facility(vcpu->kvm, 139)) 3298 vcpu->arch.sie_block->ecd |= ECD_MEF; 3299 if (test_kvm_facility(vcpu->kvm, 156)) 3300 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3301 if (vcpu->arch.sie_block->gd) { 3302 vcpu->arch.sie_block->eca |= ECA_AIV; 3303 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3304 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3305 } 3306 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3307 | SDNXC; 3308 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3309 3310 if (sclp.has_kss) 3311 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3312 else 3313 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3314 3315 if (vcpu->kvm->arch.use_cmma) { 3316 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3317 if (rc) 3318 return rc; 3319 } 3320 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3321 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3322 3323 vcpu->arch.sie_block->hpid = HPID_KVM; 3324 3325 kvm_s390_vcpu_crypto_setup(vcpu); 3326 3327 mutex_lock(&vcpu->kvm->lock); 3328 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3329 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3330 if (rc) 3331 kvm_s390_vcpu_unsetup_cmma(vcpu); 3332 } 3333 mutex_unlock(&vcpu->kvm->lock); 3334 3335 return rc; 3336 } 3337 3338 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3339 { 3340 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3341 return -EINVAL; 3342 return 0; 3343 } 3344 3345 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3346 { 3347 struct sie_page *sie_page; 3348 int rc; 3349 3350 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3351 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3352 if (!sie_page) 3353 return -ENOMEM; 3354 3355 vcpu->arch.sie_block = &sie_page->sie_block; 3356 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3357 3358 /* the real guest size will always be smaller than msl */ 3359 vcpu->arch.sie_block->mso = 0; 3360 vcpu->arch.sie_block->msl = sclp.hamax; 3361 3362 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3363 spin_lock_init(&vcpu->arch.local_int.lock); 3364 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); 3365 seqcount_init(&vcpu->arch.cputm_seqcount); 3366 3367 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3368 kvm_clear_async_pf_completion_queue(vcpu); 3369 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3370 KVM_SYNC_GPRS | 3371 KVM_SYNC_ACRS | 3372 KVM_SYNC_CRS | 3373 KVM_SYNC_ARCH0 | 3374 KVM_SYNC_PFAULT | 3375 KVM_SYNC_DIAG318; 3376 kvm_s390_set_prefix(vcpu, 0); 3377 if (test_kvm_facility(vcpu->kvm, 64)) 3378 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3379 if (test_kvm_facility(vcpu->kvm, 82)) 3380 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3381 if (test_kvm_facility(vcpu->kvm, 133)) 3382 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3383 if (test_kvm_facility(vcpu->kvm, 156)) 3384 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3385 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3386 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3387 */ 3388 if (MACHINE_HAS_VX) 3389 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3390 else 3391 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3392 3393 if (kvm_is_ucontrol(vcpu->kvm)) { 3394 rc = __kvm_ucontrol_vcpu_init(vcpu); 3395 if (rc) 3396 goto out_free_sie_block; 3397 } 3398 3399 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3400 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3401 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3402 3403 rc = kvm_s390_vcpu_setup(vcpu); 3404 if (rc) 3405 goto out_ucontrol_uninit; 3406 return 0; 3407 3408 out_ucontrol_uninit: 3409 if (kvm_is_ucontrol(vcpu->kvm)) 3410 gmap_remove(vcpu->arch.gmap); 3411 out_free_sie_block: 3412 free_page((unsigned long)(vcpu->arch.sie_block)); 3413 return rc; 3414 } 3415 3416 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3417 { 3418 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3419 return kvm_s390_vcpu_has_irq(vcpu, 0); 3420 } 3421 3422 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3423 { 3424 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3425 } 3426 3427 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3428 { 3429 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3430 exit_sie(vcpu); 3431 } 3432 3433 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3434 { 3435 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3436 } 3437 3438 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3439 { 3440 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3441 exit_sie(vcpu); 3442 } 3443 3444 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3445 { 3446 return atomic_read(&vcpu->arch.sie_block->prog20) & 3447 (PROG_BLOCK_SIE | PROG_REQUEST); 3448 } 3449 3450 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3451 { 3452 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3453 } 3454 3455 /* 3456 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3457 * If the CPU is not running (e.g. waiting as idle) the function will 3458 * return immediately. */ 3459 void exit_sie(struct kvm_vcpu *vcpu) 3460 { 3461 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3462 kvm_s390_vsie_kick(vcpu); 3463 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3464 cpu_relax(); 3465 } 3466 3467 /* Kick a guest cpu out of SIE to process a request synchronously */ 3468 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3469 { 3470 __kvm_make_request(req, vcpu); 3471 kvm_s390_vcpu_request(vcpu); 3472 } 3473 3474 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3475 unsigned long end) 3476 { 3477 struct kvm *kvm = gmap->private; 3478 struct kvm_vcpu *vcpu; 3479 unsigned long prefix; 3480 unsigned long i; 3481 3482 if (gmap_is_shadow(gmap)) 3483 return; 3484 if (start >= 1UL << 31) 3485 /* We are only interested in prefix pages */ 3486 return; 3487 kvm_for_each_vcpu(i, vcpu, kvm) { 3488 /* match against both prefix pages */ 3489 prefix = kvm_s390_get_prefix(vcpu); 3490 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3491 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3492 start, end); 3493 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3494 } 3495 } 3496 } 3497 3498 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3499 { 3500 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3501 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3502 READ_ONCE(halt_poll_max_steal)) { 3503 vcpu->stat.halt_no_poll_steal++; 3504 return true; 3505 } 3506 return false; 3507 } 3508 3509 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3510 { 3511 /* kvm common code refers to this, but never calls it */ 3512 BUG(); 3513 return 0; 3514 } 3515 3516 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3517 struct kvm_one_reg *reg) 3518 { 3519 int r = -EINVAL; 3520 3521 switch (reg->id) { 3522 case KVM_REG_S390_TODPR: 3523 r = put_user(vcpu->arch.sie_block->todpr, 3524 (u32 __user *)reg->addr); 3525 break; 3526 case KVM_REG_S390_EPOCHDIFF: 3527 r = put_user(vcpu->arch.sie_block->epoch, 3528 (u64 __user *)reg->addr); 3529 break; 3530 case KVM_REG_S390_CPU_TIMER: 3531 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3532 (u64 __user *)reg->addr); 3533 break; 3534 case KVM_REG_S390_CLOCK_COMP: 3535 r = put_user(vcpu->arch.sie_block->ckc, 3536 (u64 __user *)reg->addr); 3537 break; 3538 case KVM_REG_S390_PFTOKEN: 3539 r = put_user(vcpu->arch.pfault_token, 3540 (u64 __user *)reg->addr); 3541 break; 3542 case KVM_REG_S390_PFCOMPARE: 3543 r = put_user(vcpu->arch.pfault_compare, 3544 (u64 __user *)reg->addr); 3545 break; 3546 case KVM_REG_S390_PFSELECT: 3547 r = put_user(vcpu->arch.pfault_select, 3548 (u64 __user *)reg->addr); 3549 break; 3550 case KVM_REG_S390_PP: 3551 r = put_user(vcpu->arch.sie_block->pp, 3552 (u64 __user *)reg->addr); 3553 break; 3554 case KVM_REG_S390_GBEA: 3555 r = put_user(vcpu->arch.sie_block->gbea, 3556 (u64 __user *)reg->addr); 3557 break; 3558 default: 3559 break; 3560 } 3561 3562 return r; 3563 } 3564 3565 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3566 struct kvm_one_reg *reg) 3567 { 3568 int r = -EINVAL; 3569 __u64 val; 3570 3571 switch (reg->id) { 3572 case KVM_REG_S390_TODPR: 3573 r = get_user(vcpu->arch.sie_block->todpr, 3574 (u32 __user *)reg->addr); 3575 break; 3576 case KVM_REG_S390_EPOCHDIFF: 3577 r = get_user(vcpu->arch.sie_block->epoch, 3578 (u64 __user *)reg->addr); 3579 break; 3580 case KVM_REG_S390_CPU_TIMER: 3581 r = get_user(val, (u64 __user *)reg->addr); 3582 if (!r) 3583 kvm_s390_set_cpu_timer(vcpu, val); 3584 break; 3585 case KVM_REG_S390_CLOCK_COMP: 3586 r = get_user(vcpu->arch.sie_block->ckc, 3587 (u64 __user *)reg->addr); 3588 break; 3589 case KVM_REG_S390_PFTOKEN: 3590 r = get_user(vcpu->arch.pfault_token, 3591 (u64 __user *)reg->addr); 3592 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3593 kvm_clear_async_pf_completion_queue(vcpu); 3594 break; 3595 case KVM_REG_S390_PFCOMPARE: 3596 r = get_user(vcpu->arch.pfault_compare, 3597 (u64 __user *)reg->addr); 3598 break; 3599 case KVM_REG_S390_PFSELECT: 3600 r = get_user(vcpu->arch.pfault_select, 3601 (u64 __user *)reg->addr); 3602 break; 3603 case KVM_REG_S390_PP: 3604 r = get_user(vcpu->arch.sie_block->pp, 3605 (u64 __user *)reg->addr); 3606 break; 3607 case KVM_REG_S390_GBEA: 3608 r = get_user(vcpu->arch.sie_block->gbea, 3609 (u64 __user *)reg->addr); 3610 break; 3611 default: 3612 break; 3613 } 3614 3615 return r; 3616 } 3617 3618 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3619 { 3620 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3621 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3622 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3623 3624 kvm_clear_async_pf_completion_queue(vcpu); 3625 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3626 kvm_s390_vcpu_stop(vcpu); 3627 kvm_s390_clear_local_irqs(vcpu); 3628 } 3629 3630 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3631 { 3632 /* Initial reset is a superset of the normal reset */ 3633 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3634 3635 /* 3636 * This equals initial cpu reset in pop, but we don't switch to ESA. 3637 * We do not only reset the internal data, but also ... 3638 */ 3639 vcpu->arch.sie_block->gpsw.mask = 0; 3640 vcpu->arch.sie_block->gpsw.addr = 0; 3641 kvm_s390_set_prefix(vcpu, 0); 3642 kvm_s390_set_cpu_timer(vcpu, 0); 3643 vcpu->arch.sie_block->ckc = 0; 3644 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3645 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3646 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3647 3648 /* ... the data in sync regs */ 3649 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3650 vcpu->run->s.regs.ckc = 0; 3651 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3652 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3653 vcpu->run->psw_addr = 0; 3654 vcpu->run->psw_mask = 0; 3655 vcpu->run->s.regs.todpr = 0; 3656 vcpu->run->s.regs.cputm = 0; 3657 vcpu->run->s.regs.ckc = 0; 3658 vcpu->run->s.regs.pp = 0; 3659 vcpu->run->s.regs.gbea = 1; 3660 vcpu->run->s.regs.fpc = 0; 3661 /* 3662 * Do not reset these registers in the protected case, as some of 3663 * them are overlayed and they are not accessible in this case 3664 * anyway. 3665 */ 3666 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3667 vcpu->arch.sie_block->gbea = 1; 3668 vcpu->arch.sie_block->pp = 0; 3669 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3670 vcpu->arch.sie_block->todpr = 0; 3671 } 3672 } 3673 3674 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3675 { 3676 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3677 3678 /* Clear reset is a superset of the initial reset */ 3679 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3680 3681 memset(®s->gprs, 0, sizeof(regs->gprs)); 3682 memset(®s->vrs, 0, sizeof(regs->vrs)); 3683 memset(®s->acrs, 0, sizeof(regs->acrs)); 3684 memset(®s->gscb, 0, sizeof(regs->gscb)); 3685 3686 regs->etoken = 0; 3687 regs->etoken_extension = 0; 3688 } 3689 3690 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3691 { 3692 vcpu_load(vcpu); 3693 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3694 vcpu_put(vcpu); 3695 return 0; 3696 } 3697 3698 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3699 { 3700 vcpu_load(vcpu); 3701 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3702 vcpu_put(vcpu); 3703 return 0; 3704 } 3705 3706 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3707 struct kvm_sregs *sregs) 3708 { 3709 vcpu_load(vcpu); 3710 3711 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3712 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3713 3714 vcpu_put(vcpu); 3715 return 0; 3716 } 3717 3718 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3719 struct kvm_sregs *sregs) 3720 { 3721 vcpu_load(vcpu); 3722 3723 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3724 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3725 3726 vcpu_put(vcpu); 3727 return 0; 3728 } 3729 3730 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3731 { 3732 int ret = 0; 3733 3734 vcpu_load(vcpu); 3735 3736 if (test_fp_ctl(fpu->fpc)) { 3737 ret = -EINVAL; 3738 goto out; 3739 } 3740 vcpu->run->s.regs.fpc = fpu->fpc; 3741 if (MACHINE_HAS_VX) 3742 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3743 (freg_t *) fpu->fprs); 3744 else 3745 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3746 3747 out: 3748 vcpu_put(vcpu); 3749 return ret; 3750 } 3751 3752 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3753 { 3754 vcpu_load(vcpu); 3755 3756 /* make sure we have the latest values */ 3757 save_fpu_regs(); 3758 if (MACHINE_HAS_VX) 3759 convert_vx_to_fp((freg_t *) fpu->fprs, 3760 (__vector128 *) vcpu->run->s.regs.vrs); 3761 else 3762 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3763 fpu->fpc = vcpu->run->s.regs.fpc; 3764 3765 vcpu_put(vcpu); 3766 return 0; 3767 } 3768 3769 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3770 { 3771 int rc = 0; 3772 3773 if (!is_vcpu_stopped(vcpu)) 3774 rc = -EBUSY; 3775 else { 3776 vcpu->run->psw_mask = psw.mask; 3777 vcpu->run->psw_addr = psw.addr; 3778 } 3779 return rc; 3780 } 3781 3782 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3783 struct kvm_translation *tr) 3784 { 3785 return -EINVAL; /* not implemented yet */ 3786 } 3787 3788 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3789 KVM_GUESTDBG_USE_HW_BP | \ 3790 KVM_GUESTDBG_ENABLE) 3791 3792 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3793 struct kvm_guest_debug *dbg) 3794 { 3795 int rc = 0; 3796 3797 vcpu_load(vcpu); 3798 3799 vcpu->guest_debug = 0; 3800 kvm_s390_clear_bp_data(vcpu); 3801 3802 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3803 rc = -EINVAL; 3804 goto out; 3805 } 3806 if (!sclp.has_gpere) { 3807 rc = -EINVAL; 3808 goto out; 3809 } 3810 3811 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3812 vcpu->guest_debug = dbg->control; 3813 /* enforce guest PER */ 3814 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3815 3816 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3817 rc = kvm_s390_import_bp_data(vcpu, dbg); 3818 } else { 3819 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3820 vcpu->arch.guestdbg.last_bp = 0; 3821 } 3822 3823 if (rc) { 3824 vcpu->guest_debug = 0; 3825 kvm_s390_clear_bp_data(vcpu); 3826 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3827 } 3828 3829 out: 3830 vcpu_put(vcpu); 3831 return rc; 3832 } 3833 3834 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3835 struct kvm_mp_state *mp_state) 3836 { 3837 int ret; 3838 3839 vcpu_load(vcpu); 3840 3841 /* CHECK_STOP and LOAD are not supported yet */ 3842 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3843 KVM_MP_STATE_OPERATING; 3844 3845 vcpu_put(vcpu); 3846 return ret; 3847 } 3848 3849 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3850 struct kvm_mp_state *mp_state) 3851 { 3852 int rc = 0; 3853 3854 vcpu_load(vcpu); 3855 3856 /* user space knows about this interface - let it control the state */ 3857 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 3858 3859 switch (mp_state->mp_state) { 3860 case KVM_MP_STATE_STOPPED: 3861 rc = kvm_s390_vcpu_stop(vcpu); 3862 break; 3863 case KVM_MP_STATE_OPERATING: 3864 rc = kvm_s390_vcpu_start(vcpu); 3865 break; 3866 case KVM_MP_STATE_LOAD: 3867 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3868 rc = -ENXIO; 3869 break; 3870 } 3871 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3872 break; 3873 case KVM_MP_STATE_CHECK_STOP: 3874 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3875 default: 3876 rc = -ENXIO; 3877 } 3878 3879 vcpu_put(vcpu); 3880 return rc; 3881 } 3882 3883 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3884 { 3885 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3886 } 3887 3888 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3889 { 3890 retry: 3891 kvm_s390_vcpu_request_handled(vcpu); 3892 if (!kvm_request_pending(vcpu)) 3893 return 0; 3894 /* 3895 * If the guest prefix changed, re-arm the ipte notifier for the 3896 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3897 * This ensures that the ipte instruction for this request has 3898 * already finished. We might race against a second unmapper that 3899 * wants to set the blocking bit. Lets just retry the request loop. 3900 */ 3901 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { 3902 int rc; 3903 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3904 kvm_s390_get_prefix(vcpu), 3905 PAGE_SIZE * 2, PROT_WRITE); 3906 if (rc) { 3907 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3908 return rc; 3909 } 3910 goto retry; 3911 } 3912 3913 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3914 vcpu->arch.sie_block->ihcpu = 0xffff; 3915 goto retry; 3916 } 3917 3918 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3919 if (!ibs_enabled(vcpu)) { 3920 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3921 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3922 } 3923 goto retry; 3924 } 3925 3926 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3927 if (ibs_enabled(vcpu)) { 3928 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3929 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3930 } 3931 goto retry; 3932 } 3933 3934 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3935 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3936 goto retry; 3937 } 3938 3939 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3940 /* 3941 * Disable CMM virtualization; we will emulate the ESSA 3942 * instruction manually, in order to provide additional 3943 * functionalities needed for live migration. 3944 */ 3945 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3946 goto retry; 3947 } 3948 3949 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3950 /* 3951 * Re-enable CMM virtualization if CMMA is available and 3952 * CMM has been used. 3953 */ 3954 if ((vcpu->kvm->arch.use_cmma) && 3955 (vcpu->kvm->mm->context.uses_cmm)) 3956 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3957 goto retry; 3958 } 3959 3960 /* nothing to do, just clear the request */ 3961 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3962 /* we left the vsie handler, nothing to do, just clear the request */ 3963 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3964 3965 return 0; 3966 } 3967 3968 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 3969 { 3970 struct kvm_vcpu *vcpu; 3971 union tod_clock clk; 3972 unsigned long i; 3973 3974 preempt_disable(); 3975 3976 store_tod_clock_ext(&clk); 3977 3978 kvm->arch.epoch = gtod->tod - clk.tod; 3979 kvm->arch.epdx = 0; 3980 if (test_kvm_facility(kvm, 139)) { 3981 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3982 if (kvm->arch.epoch > gtod->tod) 3983 kvm->arch.epdx -= 1; 3984 } 3985 3986 kvm_s390_vcpu_block_all(kvm); 3987 kvm_for_each_vcpu(i, vcpu, kvm) { 3988 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3989 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3990 } 3991 3992 kvm_s390_vcpu_unblock_all(kvm); 3993 preempt_enable(); 3994 } 3995 3996 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 3997 { 3998 mutex_lock(&kvm->lock); 3999 __kvm_s390_set_tod_clock(kvm, gtod); 4000 mutex_unlock(&kvm->lock); 4001 } 4002 4003 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4004 { 4005 if (!mutex_trylock(&kvm->lock)) 4006 return 0; 4007 __kvm_s390_set_tod_clock(kvm, gtod); 4008 mutex_unlock(&kvm->lock); 4009 return 1; 4010 } 4011 4012 /** 4013 * kvm_arch_fault_in_page - fault-in guest page if necessary 4014 * @vcpu: The corresponding virtual cpu 4015 * @gpa: Guest physical address 4016 * @writable: Whether the page should be writable or not 4017 * 4018 * Make sure that a guest page has been faulted-in on the host. 4019 * 4020 * Return: Zero on success, negative error code otherwise. 4021 */ 4022 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 4023 { 4024 return gmap_fault(vcpu->arch.gmap, gpa, 4025 writable ? FAULT_FLAG_WRITE : 0); 4026 } 4027 4028 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4029 unsigned long token) 4030 { 4031 struct kvm_s390_interrupt inti; 4032 struct kvm_s390_irq irq; 4033 4034 if (start_token) { 4035 irq.u.ext.ext_params2 = token; 4036 irq.type = KVM_S390_INT_PFAULT_INIT; 4037 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4038 } else { 4039 inti.type = KVM_S390_INT_PFAULT_DONE; 4040 inti.parm64 = token; 4041 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4042 } 4043 } 4044 4045 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4046 struct kvm_async_pf *work) 4047 { 4048 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4049 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4050 4051 return true; 4052 } 4053 4054 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4055 struct kvm_async_pf *work) 4056 { 4057 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4058 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4059 } 4060 4061 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4062 struct kvm_async_pf *work) 4063 { 4064 /* s390 will always inject the page directly */ 4065 } 4066 4067 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4068 { 4069 /* 4070 * s390 will always inject the page directly, 4071 * but we still want check_async_completion to cleanup 4072 */ 4073 return true; 4074 } 4075 4076 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4077 { 4078 hva_t hva; 4079 struct kvm_arch_async_pf arch; 4080 4081 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4082 return false; 4083 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4084 vcpu->arch.pfault_compare) 4085 return false; 4086 if (psw_extint_disabled(vcpu)) 4087 return false; 4088 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4089 return false; 4090 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4091 return false; 4092 if (!vcpu->arch.gmap->pfault_enabled) 4093 return false; 4094 4095 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4096 hva += current->thread.gmap_addr & ~PAGE_MASK; 4097 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4098 return false; 4099 4100 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4101 } 4102 4103 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4104 { 4105 int rc, cpuflags; 4106 4107 /* 4108 * On s390 notifications for arriving pages will be delivered directly 4109 * to the guest but the house keeping for completed pfaults is 4110 * handled outside the worker. 4111 */ 4112 kvm_check_async_pf_completion(vcpu); 4113 4114 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4115 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4116 4117 if (need_resched()) 4118 schedule(); 4119 4120 if (!kvm_is_ucontrol(vcpu->kvm)) { 4121 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4122 if (rc) 4123 return rc; 4124 } 4125 4126 rc = kvm_s390_handle_requests(vcpu); 4127 if (rc) 4128 return rc; 4129 4130 if (guestdbg_enabled(vcpu)) { 4131 kvm_s390_backup_guest_per_regs(vcpu); 4132 kvm_s390_patch_guest_per_regs(vcpu); 4133 } 4134 4135 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4136 4137 vcpu->arch.sie_block->icptcode = 0; 4138 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4139 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4140 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4141 4142 return 0; 4143 } 4144 4145 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4146 { 4147 struct kvm_s390_pgm_info pgm_info = { 4148 .code = PGM_ADDRESSING, 4149 }; 4150 u8 opcode, ilen; 4151 int rc; 4152 4153 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4154 trace_kvm_s390_sie_fault(vcpu); 4155 4156 /* 4157 * We want to inject an addressing exception, which is defined as a 4158 * suppressing or terminating exception. However, since we came here 4159 * by a DAT access exception, the PSW still points to the faulting 4160 * instruction since DAT exceptions are nullifying. So we've got 4161 * to look up the current opcode to get the length of the instruction 4162 * to be able to forward the PSW. 4163 */ 4164 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4165 ilen = insn_length(opcode); 4166 if (rc < 0) { 4167 return rc; 4168 } else if (rc) { 4169 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4170 * Forward by arbitrary ilc, injection will take care of 4171 * nullification if necessary. 4172 */ 4173 pgm_info = vcpu->arch.pgm; 4174 ilen = 4; 4175 } 4176 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4177 kvm_s390_forward_psw(vcpu, ilen); 4178 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4179 } 4180 4181 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4182 { 4183 struct mcck_volatile_info *mcck_info; 4184 struct sie_page *sie_page; 4185 4186 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4187 vcpu->arch.sie_block->icptcode); 4188 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4189 4190 if (guestdbg_enabled(vcpu)) 4191 kvm_s390_restore_guest_per_regs(vcpu); 4192 4193 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4194 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4195 4196 if (exit_reason == -EINTR) { 4197 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4198 sie_page = container_of(vcpu->arch.sie_block, 4199 struct sie_page, sie_block); 4200 mcck_info = &sie_page->mcck_info; 4201 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4202 return 0; 4203 } 4204 4205 if (vcpu->arch.sie_block->icptcode > 0) { 4206 int rc = kvm_handle_sie_intercept(vcpu); 4207 4208 if (rc != -EOPNOTSUPP) 4209 return rc; 4210 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4211 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4212 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4213 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4214 return -EREMOTE; 4215 } else if (exit_reason != -EFAULT) { 4216 vcpu->stat.exit_null++; 4217 return 0; 4218 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4219 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4220 vcpu->run->s390_ucontrol.trans_exc_code = 4221 current->thread.gmap_addr; 4222 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4223 return -EREMOTE; 4224 } else if (current->thread.gmap_pfault) { 4225 trace_kvm_s390_major_guest_pfault(vcpu); 4226 current->thread.gmap_pfault = 0; 4227 if (kvm_arch_setup_async_pf(vcpu)) 4228 return 0; 4229 vcpu->stat.pfault_sync++; 4230 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4231 } 4232 return vcpu_post_run_fault_in_sie(vcpu); 4233 } 4234 4235 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4236 static int __vcpu_run(struct kvm_vcpu *vcpu) 4237 { 4238 int rc, exit_reason; 4239 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4240 4241 /* 4242 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4243 * ning the guest), so that memslots (and other stuff) are protected 4244 */ 4245 kvm_vcpu_srcu_read_lock(vcpu); 4246 4247 do { 4248 rc = vcpu_pre_run(vcpu); 4249 if (rc) 4250 break; 4251 4252 kvm_vcpu_srcu_read_unlock(vcpu); 4253 /* 4254 * As PF_VCPU will be used in fault handler, between 4255 * guest_enter and guest_exit should be no uaccess. 4256 */ 4257 local_irq_disable(); 4258 guest_enter_irqoff(); 4259 __disable_cpu_timer_accounting(vcpu); 4260 local_irq_enable(); 4261 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4262 memcpy(sie_page->pv_grregs, 4263 vcpu->run->s.regs.gprs, 4264 sizeof(sie_page->pv_grregs)); 4265 } 4266 if (test_cpu_flag(CIF_FPU)) 4267 load_fpu_regs(); 4268 exit_reason = sie64a(vcpu->arch.sie_block, 4269 vcpu->run->s.regs.gprs); 4270 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4271 memcpy(vcpu->run->s.regs.gprs, 4272 sie_page->pv_grregs, 4273 sizeof(sie_page->pv_grregs)); 4274 /* 4275 * We're not allowed to inject interrupts on intercepts 4276 * that leave the guest state in an "in-between" state 4277 * where the next SIE entry will do a continuation. 4278 * Fence interrupts in our "internal" PSW. 4279 */ 4280 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4281 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4282 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4283 } 4284 } 4285 local_irq_disable(); 4286 __enable_cpu_timer_accounting(vcpu); 4287 guest_exit_irqoff(); 4288 local_irq_enable(); 4289 kvm_vcpu_srcu_read_lock(vcpu); 4290 4291 rc = vcpu_post_run(vcpu, exit_reason); 4292 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4293 4294 kvm_vcpu_srcu_read_unlock(vcpu); 4295 return rc; 4296 } 4297 4298 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4299 { 4300 struct kvm_run *kvm_run = vcpu->run; 4301 struct runtime_instr_cb *riccb; 4302 struct gs_cb *gscb; 4303 4304 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4305 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4306 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4307 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4308 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4309 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4310 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4311 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4312 } 4313 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4314 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4315 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4316 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4317 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4318 kvm_clear_async_pf_completion_queue(vcpu); 4319 } 4320 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4321 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4322 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4323 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4324 } 4325 /* 4326 * If userspace sets the riccb (e.g. after migration) to a valid state, 4327 * we should enable RI here instead of doing the lazy enablement. 4328 */ 4329 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4330 test_kvm_facility(vcpu->kvm, 64) && 4331 riccb->v && 4332 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4333 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4334 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4335 } 4336 /* 4337 * If userspace sets the gscb (e.g. after migration) to non-zero, 4338 * we should enable GS here instead of doing the lazy enablement. 4339 */ 4340 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4341 test_kvm_facility(vcpu->kvm, 133) && 4342 gscb->gssm && 4343 !vcpu->arch.gs_enabled) { 4344 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4345 vcpu->arch.sie_block->ecb |= ECB_GS; 4346 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4347 vcpu->arch.gs_enabled = 1; 4348 } 4349 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4350 test_kvm_facility(vcpu->kvm, 82)) { 4351 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4352 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4353 } 4354 if (MACHINE_HAS_GS) { 4355 preempt_disable(); 4356 __ctl_set_bit(2, 4); 4357 if (current->thread.gs_cb) { 4358 vcpu->arch.host_gscb = current->thread.gs_cb; 4359 save_gs_cb(vcpu->arch.host_gscb); 4360 } 4361 if (vcpu->arch.gs_enabled) { 4362 current->thread.gs_cb = (struct gs_cb *) 4363 &vcpu->run->s.regs.gscb; 4364 restore_gs_cb(current->thread.gs_cb); 4365 } 4366 preempt_enable(); 4367 } 4368 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4369 } 4370 4371 static void sync_regs(struct kvm_vcpu *vcpu) 4372 { 4373 struct kvm_run *kvm_run = vcpu->run; 4374 4375 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4376 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4377 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4378 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4379 /* some control register changes require a tlb flush */ 4380 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4381 } 4382 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4383 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4384 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4385 } 4386 save_access_regs(vcpu->arch.host_acrs); 4387 restore_access_regs(vcpu->run->s.regs.acrs); 4388 /* save host (userspace) fprs/vrs */ 4389 save_fpu_regs(); 4390 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4391 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4392 if (MACHINE_HAS_VX) 4393 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4394 else 4395 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4396 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4397 if (test_fp_ctl(current->thread.fpu.fpc)) 4398 /* User space provided an invalid FPC, let's clear it */ 4399 current->thread.fpu.fpc = 0; 4400 4401 /* Sync fmt2 only data */ 4402 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4403 sync_regs_fmt2(vcpu); 4404 } else { 4405 /* 4406 * In several places we have to modify our internal view to 4407 * not do things that are disallowed by the ultravisor. For 4408 * example we must not inject interrupts after specific exits 4409 * (e.g. 112 prefix page not secure). We do this by turning 4410 * off the machine check, external and I/O interrupt bits 4411 * of our PSW copy. To avoid getting validity intercepts, we 4412 * do only accept the condition code from userspace. 4413 */ 4414 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4415 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4416 PSW_MASK_CC; 4417 } 4418 4419 kvm_run->kvm_dirty_regs = 0; 4420 } 4421 4422 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4423 { 4424 struct kvm_run *kvm_run = vcpu->run; 4425 4426 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4427 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4428 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4429 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4430 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4431 if (MACHINE_HAS_GS) { 4432 preempt_disable(); 4433 __ctl_set_bit(2, 4); 4434 if (vcpu->arch.gs_enabled) 4435 save_gs_cb(current->thread.gs_cb); 4436 current->thread.gs_cb = vcpu->arch.host_gscb; 4437 restore_gs_cb(vcpu->arch.host_gscb); 4438 if (!vcpu->arch.host_gscb) 4439 __ctl_clear_bit(2, 4); 4440 vcpu->arch.host_gscb = NULL; 4441 preempt_enable(); 4442 } 4443 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4444 } 4445 4446 static void store_regs(struct kvm_vcpu *vcpu) 4447 { 4448 struct kvm_run *kvm_run = vcpu->run; 4449 4450 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4451 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4452 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4453 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4454 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4455 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4456 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4457 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4458 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4459 save_access_regs(vcpu->run->s.regs.acrs); 4460 restore_access_regs(vcpu->arch.host_acrs); 4461 /* Save guest register state */ 4462 save_fpu_regs(); 4463 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4464 /* Restore will be done lazily at return */ 4465 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4466 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4467 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4468 store_regs_fmt2(vcpu); 4469 } 4470 4471 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4472 { 4473 struct kvm_run *kvm_run = vcpu->run; 4474 int rc; 4475 4476 if (kvm_run->immediate_exit) 4477 return -EINTR; 4478 4479 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4480 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4481 return -EINVAL; 4482 4483 vcpu_load(vcpu); 4484 4485 if (guestdbg_exit_pending(vcpu)) { 4486 kvm_s390_prepare_debug_exit(vcpu); 4487 rc = 0; 4488 goto out; 4489 } 4490 4491 kvm_sigset_activate(vcpu); 4492 4493 /* 4494 * no need to check the return value of vcpu_start as it can only have 4495 * an error for protvirt, but protvirt means user cpu state 4496 */ 4497 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4498 kvm_s390_vcpu_start(vcpu); 4499 } else if (is_vcpu_stopped(vcpu)) { 4500 pr_err_ratelimited("can't run stopped vcpu %d\n", 4501 vcpu->vcpu_id); 4502 rc = -EINVAL; 4503 goto out; 4504 } 4505 4506 sync_regs(vcpu); 4507 enable_cpu_timer_accounting(vcpu); 4508 4509 might_fault(); 4510 rc = __vcpu_run(vcpu); 4511 4512 if (signal_pending(current) && !rc) { 4513 kvm_run->exit_reason = KVM_EXIT_INTR; 4514 rc = -EINTR; 4515 } 4516 4517 if (guestdbg_exit_pending(vcpu) && !rc) { 4518 kvm_s390_prepare_debug_exit(vcpu); 4519 rc = 0; 4520 } 4521 4522 if (rc == -EREMOTE) { 4523 /* userspace support is needed, kvm_run has been prepared */ 4524 rc = 0; 4525 } 4526 4527 disable_cpu_timer_accounting(vcpu); 4528 store_regs(vcpu); 4529 4530 kvm_sigset_deactivate(vcpu); 4531 4532 vcpu->stat.exit_userspace++; 4533 out: 4534 vcpu_put(vcpu); 4535 return rc; 4536 } 4537 4538 /* 4539 * store status at address 4540 * we use have two special cases: 4541 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4542 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4543 */ 4544 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4545 { 4546 unsigned char archmode = 1; 4547 freg_t fprs[NUM_FPRS]; 4548 unsigned int px; 4549 u64 clkcomp, cputm; 4550 int rc; 4551 4552 px = kvm_s390_get_prefix(vcpu); 4553 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4554 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4555 return -EFAULT; 4556 gpa = 0; 4557 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4558 if (write_guest_real(vcpu, 163, &archmode, 1)) 4559 return -EFAULT; 4560 gpa = px; 4561 } else 4562 gpa -= __LC_FPREGS_SAVE_AREA; 4563 4564 /* manually convert vector registers if necessary */ 4565 if (MACHINE_HAS_VX) { 4566 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4567 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4568 fprs, 128); 4569 } else { 4570 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4571 vcpu->run->s.regs.fprs, 128); 4572 } 4573 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4574 vcpu->run->s.regs.gprs, 128); 4575 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4576 &vcpu->arch.sie_block->gpsw, 16); 4577 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4578 &px, 4); 4579 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4580 &vcpu->run->s.regs.fpc, 4); 4581 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4582 &vcpu->arch.sie_block->todpr, 4); 4583 cputm = kvm_s390_get_cpu_timer(vcpu); 4584 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4585 &cputm, 8); 4586 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4587 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4588 &clkcomp, 8); 4589 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4590 &vcpu->run->s.regs.acrs, 64); 4591 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4592 &vcpu->arch.sie_block->gcr, 128); 4593 return rc ? -EFAULT : 0; 4594 } 4595 4596 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4597 { 4598 /* 4599 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4600 * switch in the run ioctl. Let's update our copies before we save 4601 * it into the save area 4602 */ 4603 save_fpu_regs(); 4604 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4605 save_access_regs(vcpu->run->s.regs.acrs); 4606 4607 return kvm_s390_store_status_unloaded(vcpu, addr); 4608 } 4609 4610 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4611 { 4612 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4613 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4614 } 4615 4616 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4617 { 4618 unsigned long i; 4619 struct kvm_vcpu *vcpu; 4620 4621 kvm_for_each_vcpu(i, vcpu, kvm) { 4622 __disable_ibs_on_vcpu(vcpu); 4623 } 4624 } 4625 4626 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4627 { 4628 if (!sclp.has_ibs) 4629 return; 4630 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4631 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4632 } 4633 4634 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4635 { 4636 int i, online_vcpus, r = 0, started_vcpus = 0; 4637 4638 if (!is_vcpu_stopped(vcpu)) 4639 return 0; 4640 4641 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4642 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4643 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4644 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4645 4646 /* Let's tell the UV that we want to change into the operating state */ 4647 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4648 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4649 if (r) { 4650 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4651 return r; 4652 } 4653 } 4654 4655 for (i = 0; i < online_vcpus; i++) { 4656 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 4657 started_vcpus++; 4658 } 4659 4660 if (started_vcpus == 0) { 4661 /* we're the only active VCPU -> speed it up */ 4662 __enable_ibs_on_vcpu(vcpu); 4663 } else if (started_vcpus == 1) { 4664 /* 4665 * As we are starting a second VCPU, we have to disable 4666 * the IBS facility on all VCPUs to remove potentially 4667 * outstanding ENABLE requests. 4668 */ 4669 __disable_ibs_on_all_vcpus(vcpu->kvm); 4670 } 4671 4672 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4673 /* 4674 * The real PSW might have changed due to a RESTART interpreted by the 4675 * ultravisor. We block all interrupts and let the next sie exit 4676 * refresh our view. 4677 */ 4678 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4679 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4680 /* 4681 * Another VCPU might have used IBS while we were offline. 4682 * Let's play safe and flush the VCPU at startup. 4683 */ 4684 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4685 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4686 return 0; 4687 } 4688 4689 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4690 { 4691 int i, online_vcpus, r = 0, started_vcpus = 0; 4692 struct kvm_vcpu *started_vcpu = NULL; 4693 4694 if (is_vcpu_stopped(vcpu)) 4695 return 0; 4696 4697 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4698 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4699 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4700 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4701 4702 /* Let's tell the UV that we want to change into the stopped state */ 4703 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4704 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4705 if (r) { 4706 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4707 return r; 4708 } 4709 } 4710 4711 /* 4712 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 4713 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 4714 * have been fully processed. This will ensure that the VCPU 4715 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 4716 */ 4717 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4718 kvm_s390_clear_stop_irq(vcpu); 4719 4720 __disable_ibs_on_vcpu(vcpu); 4721 4722 for (i = 0; i < online_vcpus; i++) { 4723 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 4724 4725 if (!is_vcpu_stopped(tmp)) { 4726 started_vcpus++; 4727 started_vcpu = tmp; 4728 } 4729 } 4730 4731 if (started_vcpus == 1) { 4732 /* 4733 * As we only have one VCPU left, we want to enable the 4734 * IBS facility for that VCPU to speed it up. 4735 */ 4736 __enable_ibs_on_vcpu(started_vcpu); 4737 } 4738 4739 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4740 return 0; 4741 } 4742 4743 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4744 struct kvm_enable_cap *cap) 4745 { 4746 int r; 4747 4748 if (cap->flags) 4749 return -EINVAL; 4750 4751 switch (cap->cap) { 4752 case KVM_CAP_S390_CSS_SUPPORT: 4753 if (!vcpu->kvm->arch.css_support) { 4754 vcpu->kvm->arch.css_support = 1; 4755 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4756 trace_kvm_s390_enable_css(vcpu->kvm); 4757 } 4758 r = 0; 4759 break; 4760 default: 4761 r = -EINVAL; 4762 break; 4763 } 4764 return r; 4765 } 4766 4767 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, 4768 struct kvm_s390_mem_op *mop) 4769 { 4770 void __user *uaddr = (void __user *)mop->buf; 4771 int r = 0; 4772 4773 if (mop->flags || !mop->size) 4774 return -EINVAL; 4775 if (mop->size + mop->sida_offset < mop->size) 4776 return -EINVAL; 4777 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4778 return -E2BIG; 4779 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 4780 return -EINVAL; 4781 4782 switch (mop->op) { 4783 case KVM_S390_MEMOP_SIDA_READ: 4784 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4785 mop->sida_offset), mop->size)) 4786 r = -EFAULT; 4787 4788 break; 4789 case KVM_S390_MEMOP_SIDA_WRITE: 4790 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4791 mop->sida_offset), uaddr, mop->size)) 4792 r = -EFAULT; 4793 break; 4794 } 4795 return r; 4796 } 4797 4798 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, 4799 struct kvm_s390_mem_op *mop) 4800 { 4801 void __user *uaddr = (void __user *)mop->buf; 4802 void *tmpbuf = NULL; 4803 int r = 0; 4804 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4805 | KVM_S390_MEMOP_F_CHECK_ONLY 4806 | KVM_S390_MEMOP_F_SKEY_PROTECTION; 4807 4808 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4809 return -EINVAL; 4810 if (mop->size > MEM_OP_MAX_SIZE) 4811 return -E2BIG; 4812 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4813 return -EINVAL; 4814 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 4815 if (access_key_invalid(mop->key)) 4816 return -EINVAL; 4817 } else { 4818 mop->key = 0; 4819 } 4820 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4821 tmpbuf = vmalloc(mop->size); 4822 if (!tmpbuf) 4823 return -ENOMEM; 4824 } 4825 4826 switch (mop->op) { 4827 case KVM_S390_MEMOP_LOGICAL_READ: 4828 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4829 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4830 GACC_FETCH, mop->key); 4831 break; 4832 } 4833 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4834 mop->size, mop->key); 4835 if (r == 0) { 4836 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4837 r = -EFAULT; 4838 } 4839 break; 4840 case KVM_S390_MEMOP_LOGICAL_WRITE: 4841 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4842 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4843 GACC_STORE, mop->key); 4844 break; 4845 } 4846 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4847 r = -EFAULT; 4848 break; 4849 } 4850 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4851 mop->size, mop->key); 4852 break; 4853 } 4854 4855 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4856 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4857 4858 vfree(tmpbuf); 4859 return r; 4860 } 4861 4862 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, 4863 struct kvm_s390_mem_op *mop) 4864 { 4865 int r, srcu_idx; 4866 4867 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4868 4869 switch (mop->op) { 4870 case KVM_S390_MEMOP_LOGICAL_READ: 4871 case KVM_S390_MEMOP_LOGICAL_WRITE: 4872 r = kvm_s390_vcpu_mem_op(vcpu, mop); 4873 break; 4874 case KVM_S390_MEMOP_SIDA_READ: 4875 case KVM_S390_MEMOP_SIDA_WRITE: 4876 /* we are locked against sida going away by the vcpu->mutex */ 4877 r = kvm_s390_vcpu_sida_op(vcpu, mop); 4878 break; 4879 default: 4880 r = -EINVAL; 4881 } 4882 4883 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4884 return r; 4885 } 4886 4887 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4888 unsigned int ioctl, unsigned long arg) 4889 { 4890 struct kvm_vcpu *vcpu = filp->private_data; 4891 void __user *argp = (void __user *)arg; 4892 4893 switch (ioctl) { 4894 case KVM_S390_IRQ: { 4895 struct kvm_s390_irq s390irq; 4896 4897 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4898 return -EFAULT; 4899 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4900 } 4901 case KVM_S390_INTERRUPT: { 4902 struct kvm_s390_interrupt s390int; 4903 struct kvm_s390_irq s390irq = {}; 4904 4905 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4906 return -EFAULT; 4907 if (s390int_to_s390irq(&s390int, &s390irq)) 4908 return -EINVAL; 4909 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4910 } 4911 } 4912 return -ENOIOCTLCMD; 4913 } 4914 4915 long kvm_arch_vcpu_ioctl(struct file *filp, 4916 unsigned int ioctl, unsigned long arg) 4917 { 4918 struct kvm_vcpu *vcpu = filp->private_data; 4919 void __user *argp = (void __user *)arg; 4920 int idx; 4921 long r; 4922 u16 rc, rrc; 4923 4924 vcpu_load(vcpu); 4925 4926 switch (ioctl) { 4927 case KVM_S390_STORE_STATUS: 4928 idx = srcu_read_lock(&vcpu->kvm->srcu); 4929 r = kvm_s390_store_status_unloaded(vcpu, arg); 4930 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4931 break; 4932 case KVM_S390_SET_INITIAL_PSW: { 4933 psw_t psw; 4934 4935 r = -EFAULT; 4936 if (copy_from_user(&psw, argp, sizeof(psw))) 4937 break; 4938 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4939 break; 4940 } 4941 case KVM_S390_CLEAR_RESET: 4942 r = 0; 4943 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4944 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4945 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4946 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4947 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4948 rc, rrc); 4949 } 4950 break; 4951 case KVM_S390_INITIAL_RESET: 4952 r = 0; 4953 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4954 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4955 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4956 UVC_CMD_CPU_RESET_INITIAL, 4957 &rc, &rrc); 4958 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4959 rc, rrc); 4960 } 4961 break; 4962 case KVM_S390_NORMAL_RESET: 4963 r = 0; 4964 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4965 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4966 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4967 UVC_CMD_CPU_RESET, &rc, &rrc); 4968 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4969 rc, rrc); 4970 } 4971 break; 4972 case KVM_SET_ONE_REG: 4973 case KVM_GET_ONE_REG: { 4974 struct kvm_one_reg reg; 4975 r = -EINVAL; 4976 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4977 break; 4978 r = -EFAULT; 4979 if (copy_from_user(®, argp, sizeof(reg))) 4980 break; 4981 if (ioctl == KVM_SET_ONE_REG) 4982 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4983 else 4984 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4985 break; 4986 } 4987 #ifdef CONFIG_KVM_S390_UCONTROL 4988 case KVM_S390_UCAS_MAP: { 4989 struct kvm_s390_ucas_mapping ucasmap; 4990 4991 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4992 r = -EFAULT; 4993 break; 4994 } 4995 4996 if (!kvm_is_ucontrol(vcpu->kvm)) { 4997 r = -EINVAL; 4998 break; 4999 } 5000 5001 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 5002 ucasmap.vcpu_addr, ucasmap.length); 5003 break; 5004 } 5005 case KVM_S390_UCAS_UNMAP: { 5006 struct kvm_s390_ucas_mapping ucasmap; 5007 5008 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5009 r = -EFAULT; 5010 break; 5011 } 5012 5013 if (!kvm_is_ucontrol(vcpu->kvm)) { 5014 r = -EINVAL; 5015 break; 5016 } 5017 5018 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 5019 ucasmap.length); 5020 break; 5021 } 5022 #endif 5023 case KVM_S390_VCPU_FAULT: { 5024 r = gmap_fault(vcpu->arch.gmap, arg, 0); 5025 break; 5026 } 5027 case KVM_ENABLE_CAP: 5028 { 5029 struct kvm_enable_cap cap; 5030 r = -EFAULT; 5031 if (copy_from_user(&cap, argp, sizeof(cap))) 5032 break; 5033 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5034 break; 5035 } 5036 case KVM_S390_MEM_OP: { 5037 struct kvm_s390_mem_op mem_op; 5038 5039 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5040 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); 5041 else 5042 r = -EFAULT; 5043 break; 5044 } 5045 case KVM_S390_SET_IRQ_STATE: { 5046 struct kvm_s390_irq_state irq_state; 5047 5048 r = -EFAULT; 5049 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5050 break; 5051 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5052 irq_state.len == 0 || 5053 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5054 r = -EINVAL; 5055 break; 5056 } 5057 /* do not use irq_state.flags, it will break old QEMUs */ 5058 r = kvm_s390_set_irq_state(vcpu, 5059 (void __user *) irq_state.buf, 5060 irq_state.len); 5061 break; 5062 } 5063 case KVM_S390_GET_IRQ_STATE: { 5064 struct kvm_s390_irq_state irq_state; 5065 5066 r = -EFAULT; 5067 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5068 break; 5069 if (irq_state.len == 0) { 5070 r = -EINVAL; 5071 break; 5072 } 5073 /* do not use irq_state.flags, it will break old QEMUs */ 5074 r = kvm_s390_get_irq_state(vcpu, 5075 (__u8 __user *) irq_state.buf, 5076 irq_state.len); 5077 break; 5078 } 5079 default: 5080 r = -ENOTTY; 5081 } 5082 5083 vcpu_put(vcpu); 5084 return r; 5085 } 5086 5087 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5088 { 5089 #ifdef CONFIG_KVM_S390_UCONTROL 5090 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5091 && (kvm_is_ucontrol(vcpu->kvm))) { 5092 vmf->page = virt_to_page(vcpu->arch.sie_block); 5093 get_page(vmf->page); 5094 return 0; 5095 } 5096 #endif 5097 return VM_FAULT_SIGBUS; 5098 } 5099 5100 /* Section: memory related */ 5101 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5102 const struct kvm_memory_slot *old, 5103 struct kvm_memory_slot *new, 5104 enum kvm_mr_change change) 5105 { 5106 gpa_t size; 5107 5108 /* When we are protected, we should not change the memory slots */ 5109 if (kvm_s390_pv_get_handle(kvm)) 5110 return -EINVAL; 5111 5112 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 5113 return 0; 5114 5115 /* A few sanity checks. We can have memory slots which have to be 5116 located/ended at a segment boundary (1MB). The memory in userland is 5117 ok to be fragmented into various different vmas. It is okay to mmap() 5118 and munmap() stuff in this slot after doing this call at any time */ 5119 5120 if (new->userspace_addr & 0xffffful) 5121 return -EINVAL; 5122 5123 size = new->npages * PAGE_SIZE; 5124 if (size & 0xffffful) 5125 return -EINVAL; 5126 5127 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5128 return -EINVAL; 5129 5130 return 0; 5131 } 5132 5133 void kvm_arch_commit_memory_region(struct kvm *kvm, 5134 struct kvm_memory_slot *old, 5135 const struct kvm_memory_slot *new, 5136 enum kvm_mr_change change) 5137 { 5138 int rc = 0; 5139 5140 switch (change) { 5141 case KVM_MR_DELETE: 5142 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5143 old->npages * PAGE_SIZE); 5144 break; 5145 case KVM_MR_MOVE: 5146 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5147 old->npages * PAGE_SIZE); 5148 if (rc) 5149 break; 5150 fallthrough; 5151 case KVM_MR_CREATE: 5152 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5153 new->base_gfn * PAGE_SIZE, 5154 new->npages * PAGE_SIZE); 5155 break; 5156 case KVM_MR_FLAGS_ONLY: 5157 break; 5158 default: 5159 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5160 } 5161 if (rc) 5162 pr_warn("failed to commit memory region\n"); 5163 return; 5164 } 5165 5166 static inline unsigned long nonhyp_mask(int i) 5167 { 5168 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5169 5170 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5171 } 5172 5173 static int __init kvm_s390_init(void) 5174 { 5175 int i; 5176 5177 if (!sclp.has_sief2) { 5178 pr_info("SIE is not available\n"); 5179 return -ENODEV; 5180 } 5181 5182 if (nested && hpage) { 5183 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5184 return -EINVAL; 5185 } 5186 5187 for (i = 0; i < 16; i++) 5188 kvm_s390_fac_base[i] |= 5189 stfle_fac_list[i] & nonhyp_mask(i); 5190 5191 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5192 } 5193 5194 static void __exit kvm_s390_exit(void) 5195 { 5196 kvm_exit(); 5197 } 5198 5199 module_init(kvm_s390_init); 5200 module_exit(kvm_s390_exit); 5201 5202 /* 5203 * Enable autoloading of the kvm module. 5204 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5205 * since x86 takes a different approach. 5206 */ 5207 #include <linux/miscdevice.h> 5208 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5209 MODULE_ALIAS("devname:kvm"); 5210