1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #define KMSG_COMPONENT "kvm-s390" 15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 #include <linux/pgtable.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 52 #define CREATE_TRACE_POINTS 53 #include "trace.h" 54 #include "trace-s390.h" 55 56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 57 #define LOCAL_IRQS 32 58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 59 (KVM_MAX_VCPUS + LOCAL_IRQS)) 60 61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 62 KVM_GENERIC_VM_STATS(), 63 STATS_DESC_COUNTER(VM, inject_io), 64 STATS_DESC_COUNTER(VM, inject_float_mchk), 65 STATS_DESC_COUNTER(VM, inject_pfault_done), 66 STATS_DESC_COUNTER(VM, inject_service_signal), 67 STATS_DESC_COUNTER(VM, inject_virtio) 68 }; 69 70 const struct kvm_stats_header kvm_vm_stats_header = { 71 .name_size = KVM_STATS_NAME_SIZE, 72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 73 .id_offset = sizeof(struct kvm_stats_header), 74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 76 sizeof(kvm_vm_stats_desc), 77 }; 78 79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 80 KVM_GENERIC_VCPU_STATS(), 81 STATS_DESC_COUNTER(VCPU, exit_userspace), 82 STATS_DESC_COUNTER(VCPU, exit_null), 83 STATS_DESC_COUNTER(VCPU, exit_external_request), 84 STATS_DESC_COUNTER(VCPU, exit_io_request), 85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 86 STATS_DESC_COUNTER(VCPU, exit_stop_request), 87 STATS_DESC_COUNTER(VCPU, exit_validity), 88 STATS_DESC_COUNTER(VCPU, exit_instruction), 89 STATS_DESC_COUNTER(VCPU, exit_pei), 90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 91 STATS_DESC_COUNTER(VCPU, instruction_lctl), 92 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 93 STATS_DESC_COUNTER(VCPU, instruction_stctl), 94 STATS_DESC_COUNTER(VCPU, instruction_stctg), 95 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 97 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 98 STATS_DESC_COUNTER(VCPU, deliver_ckc), 99 STATS_DESC_COUNTER(VCPU, deliver_cputm), 100 STATS_DESC_COUNTER(VCPU, deliver_external_call), 101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 102 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 103 STATS_DESC_COUNTER(VCPU, deliver_virtio), 104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 107 STATS_DESC_COUNTER(VCPU, deliver_program), 108 STATS_DESC_COUNTER(VCPU, deliver_io), 109 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 110 STATS_DESC_COUNTER(VCPU, exit_wait_state), 111 STATS_DESC_COUNTER(VCPU, inject_ckc), 112 STATS_DESC_COUNTER(VCPU, inject_cputm), 113 STATS_DESC_COUNTER(VCPU, inject_external_call), 114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 115 STATS_DESC_COUNTER(VCPU, inject_mchk), 116 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 117 STATS_DESC_COUNTER(VCPU, inject_program), 118 STATS_DESC_COUNTER(VCPU, inject_restart), 119 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 120 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 121 STATS_DESC_COUNTER(VCPU, instruction_epsw), 122 STATS_DESC_COUNTER(VCPU, instruction_gs), 123 STATS_DESC_COUNTER(VCPU, instruction_io_other), 124 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 125 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 126 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 127 STATS_DESC_COUNTER(VCPU, instruction_ptff), 128 STATS_DESC_COUNTER(VCPU, instruction_sck), 129 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 130 STATS_DESC_COUNTER(VCPU, instruction_stidp), 131 STATS_DESC_COUNTER(VCPU, instruction_spx), 132 STATS_DESC_COUNTER(VCPU, instruction_stpx), 133 STATS_DESC_COUNTER(VCPU, instruction_stap), 134 STATS_DESC_COUNTER(VCPU, instruction_iske), 135 STATS_DESC_COUNTER(VCPU, instruction_ri), 136 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 137 STATS_DESC_COUNTER(VCPU, instruction_sske), 138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 139 STATS_DESC_COUNTER(VCPU, instruction_stsi), 140 STATS_DESC_COUNTER(VCPU, instruction_stfl), 141 STATS_DESC_COUNTER(VCPU, instruction_tb), 142 STATS_DESC_COUNTER(VCPU, instruction_tpi), 143 STATS_DESC_COUNTER(VCPU, instruction_tprot), 144 STATS_DESC_COUNTER(VCPU, instruction_tsch), 145 STATS_DESC_COUNTER(VCPU, instruction_sie), 146 STATS_DESC_COUNTER(VCPU, instruction_essa), 147 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 168 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 173 STATS_DESC_COUNTER(VCPU, pfault_sync) 174 }; 175 176 const struct kvm_stats_header kvm_vcpu_stats_header = { 177 .name_size = KVM_STATS_NAME_SIZE, 178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 179 .id_offset = sizeof(struct kvm_stats_header), 180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 182 sizeof(kvm_vcpu_stats_desc), 183 }; 184 185 /* allow nested virtualization in KVM (if enabled by user space) */ 186 static int nested; 187 module_param(nested, int, S_IRUGO); 188 MODULE_PARM_DESC(nested, "Nested virtualization support"); 189 190 /* allow 1m huge page guest backing, if !nested */ 191 static int hpage; 192 module_param(hpage, int, 0444); 193 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 194 195 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 196 static u8 halt_poll_max_steal = 10; 197 module_param(halt_poll_max_steal, byte, 0644); 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 199 200 /* if set to true, the GISA will be initialized and used if available */ 201 static bool use_gisa = true; 202 module_param(use_gisa, bool, 0644); 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 204 205 /* maximum diag9c forwarding per second */ 206 unsigned int diag9c_forwarding_hz; 207 module_param(diag9c_forwarding_hz, uint, 0644); 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 209 210 /* 211 * For now we handle at most 16 double words as this is what the s390 base 212 * kernel handles and stores in the prefix page. If we ever need to go beyond 213 * this, this requires changes to code, but the external uapi can stay. 214 */ 215 #define SIZE_INTERNAL 16 216 217 /* 218 * Base feature mask that defines default mask for facilities. Consists of the 219 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 220 */ 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 222 /* 223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 224 * and defines the facilities that can be enabled via a cpu model. 225 */ 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 227 228 static unsigned long kvm_s390_fac_size(void) 229 { 230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 233 sizeof(stfle_fac_list)); 234 235 return SIZE_INTERNAL; 236 } 237 238 /* available cpu features supported by kvm */ 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 240 /* available subfunctions indicated via query / "test bit" */ 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 242 243 static struct gmap_notifier gmap_notifier; 244 static struct gmap_notifier vsie_gmap_notifier; 245 debug_info_t *kvm_s390_dbf; 246 debug_info_t *kvm_s390_dbf_uv; 247 248 /* Section: not file related */ 249 int kvm_arch_hardware_enable(void) 250 { 251 /* every s390 is virtualization enabled ;-) */ 252 return 0; 253 } 254 255 int kvm_arch_check_processor_compat(void *opaque) 256 { 257 return 0; 258 } 259 260 /* forward declarations */ 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 262 unsigned long end); 263 static int sca_switch_to_extended(struct kvm *kvm); 264 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 266 { 267 u8 delta_idx = 0; 268 269 /* 270 * The TOD jumps by delta, we have to compensate this by adding 271 * -delta to the epoch. 272 */ 273 delta = -delta; 274 275 /* sign-extension - we're adding to signed values below */ 276 if ((s64)delta < 0) 277 delta_idx = -1; 278 279 scb->epoch += delta; 280 if (scb->ecd & ECD_MEF) { 281 scb->epdx += delta_idx; 282 if (scb->epoch < delta) 283 scb->epdx += 1; 284 } 285 } 286 287 /* 288 * This callback is executed during stop_machine(). All CPUs are therefore 289 * temporarily stopped. In order not to change guest behavior, we have to 290 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 291 * so a CPU won't be stopped while calculating with the epoch. 292 */ 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 294 void *v) 295 { 296 struct kvm *kvm; 297 struct kvm_vcpu *vcpu; 298 unsigned long i; 299 unsigned long long *delta = v; 300 301 list_for_each_entry(kvm, &vm_list, vm_list) { 302 kvm_for_each_vcpu(i, vcpu, kvm) { 303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 304 if (i == 0) { 305 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 306 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 307 } 308 if (vcpu->arch.cputm_enabled) 309 vcpu->arch.cputm_start += *delta; 310 if (vcpu->arch.vsie_block) 311 kvm_clock_sync_scb(vcpu->arch.vsie_block, 312 *delta); 313 } 314 } 315 return NOTIFY_OK; 316 } 317 318 static struct notifier_block kvm_clock_notifier = { 319 .notifier_call = kvm_clock_sync, 320 }; 321 322 int kvm_arch_hardware_setup(void *opaque) 323 { 324 gmap_notifier.notifier_call = kvm_gmap_notifier; 325 gmap_register_pte_notifier(&gmap_notifier); 326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 327 gmap_register_pte_notifier(&vsie_gmap_notifier); 328 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 329 &kvm_clock_notifier); 330 return 0; 331 } 332 333 void kvm_arch_hardware_unsetup(void) 334 { 335 gmap_unregister_pte_notifier(&gmap_notifier); 336 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 338 &kvm_clock_notifier); 339 } 340 341 static void allow_cpu_feat(unsigned long nr) 342 { 343 set_bit_inv(nr, kvm_s390_available_cpu_feat); 344 } 345 346 static inline int plo_test_bit(unsigned char nr) 347 { 348 unsigned long function = (unsigned long)nr | 0x100; 349 int cc; 350 351 asm volatile( 352 " lgr 0,%[function]\n" 353 /* Parameter registers are ignored for "test bit" */ 354 " plo 0,0,0,0(0)\n" 355 " ipm %0\n" 356 " srl %0,28\n" 357 : "=d" (cc) 358 : [function] "d" (function) 359 : "cc", "0"); 360 return cc == 0; 361 } 362 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 364 { 365 asm volatile( 366 " lghi 0,0\n" 367 " lgr 1,%[query]\n" 368 /* Parameter registers are ignored */ 369 " .insn rrf,%[opc] << 16,2,4,6,0\n" 370 : 371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 372 : "cc", "memory", "0", "1"); 373 } 374 375 #define INSN_SORTL 0xb938 376 #define INSN_DFLTCC 0xb939 377 378 static void kvm_s390_cpu_feat_init(void) 379 { 380 int i; 381 382 for (i = 0; i < 256; ++i) { 383 if (plo_test_bit(i)) 384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 385 } 386 387 if (test_facility(28)) /* TOD-clock steering */ 388 ptff(kvm_s390_available_subfunc.ptff, 389 sizeof(kvm_s390_available_subfunc.ptff), 390 PTFF_QAF); 391 392 if (test_facility(17)) { /* MSA */ 393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 394 kvm_s390_available_subfunc.kmac); 395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 396 kvm_s390_available_subfunc.kmc); 397 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 398 kvm_s390_available_subfunc.km); 399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 400 kvm_s390_available_subfunc.kimd); 401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.klmd); 403 } 404 if (test_facility(76)) /* MSA3 */ 405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 406 kvm_s390_available_subfunc.pckmo); 407 if (test_facility(77)) { /* MSA4 */ 408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 409 kvm_s390_available_subfunc.kmctr); 410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 411 kvm_s390_available_subfunc.kmf); 412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 413 kvm_s390_available_subfunc.kmo); 414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 415 kvm_s390_available_subfunc.pcc); 416 } 417 if (test_facility(57)) /* MSA5 */ 418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 419 kvm_s390_available_subfunc.ppno); 420 421 if (test_facility(146)) /* MSA8 */ 422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 423 kvm_s390_available_subfunc.kma); 424 425 if (test_facility(155)) /* MSA9 */ 426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 427 kvm_s390_available_subfunc.kdsa); 428 429 if (test_facility(150)) /* SORTL */ 430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 431 432 if (test_facility(151)) /* DFLTCC */ 433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 434 435 if (MACHINE_HAS_ESOP) 436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 437 /* 438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 440 */ 441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 442 !test_facility(3) || !nested) 443 return; 444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 445 if (sclp.has_64bscao) 446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 447 if (sclp.has_siif) 448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 449 if (sclp.has_gpere) 450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 451 if (sclp.has_gsls) 452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 453 if (sclp.has_ib) 454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 455 if (sclp.has_cei) 456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 457 if (sclp.has_ibs) 458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 459 if (sclp.has_kss) 460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 461 /* 462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 463 * all skey handling functions read/set the skey from the PGSTE 464 * instead of the real storage key. 465 * 466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 467 * pages being detected as preserved although they are resident. 468 * 469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 471 * 472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 475 * 476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 477 * cannot easily shadow the SCA because of the ipte lock. 478 */ 479 } 480 481 int kvm_arch_init(void *opaque) 482 { 483 int rc = -ENOMEM; 484 485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 486 if (!kvm_s390_dbf) 487 return -ENOMEM; 488 489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 490 if (!kvm_s390_dbf_uv) 491 goto out; 492 493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 495 goto out; 496 497 kvm_s390_cpu_feat_init(); 498 499 /* Register floating interrupt controller interface. */ 500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 501 if (rc) { 502 pr_err("A FLIC registration call failed with rc=%d\n", rc); 503 goto out; 504 } 505 506 rc = kvm_s390_gib_init(GAL_ISC); 507 if (rc) 508 goto out; 509 510 return 0; 511 512 out: 513 kvm_arch_exit(); 514 return rc; 515 } 516 517 void kvm_arch_exit(void) 518 { 519 kvm_s390_gib_destroy(); 520 debug_unregister(kvm_s390_dbf); 521 debug_unregister(kvm_s390_dbf_uv); 522 } 523 524 /* Section: device related */ 525 long kvm_arch_dev_ioctl(struct file *filp, 526 unsigned int ioctl, unsigned long arg) 527 { 528 if (ioctl == KVM_S390_ENABLE_SIE) 529 return s390_enable_sie(); 530 return -EINVAL; 531 } 532 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 534 { 535 int r; 536 537 switch (ext) { 538 case KVM_CAP_S390_PSW: 539 case KVM_CAP_S390_GMAP: 540 case KVM_CAP_SYNC_MMU: 541 #ifdef CONFIG_KVM_S390_UCONTROL 542 case KVM_CAP_S390_UCONTROL: 543 #endif 544 case KVM_CAP_ASYNC_PF: 545 case KVM_CAP_SYNC_REGS: 546 case KVM_CAP_ONE_REG: 547 case KVM_CAP_ENABLE_CAP: 548 case KVM_CAP_S390_CSS_SUPPORT: 549 case KVM_CAP_IOEVENTFD: 550 case KVM_CAP_DEVICE_CTRL: 551 case KVM_CAP_S390_IRQCHIP: 552 case KVM_CAP_VM_ATTRIBUTES: 553 case KVM_CAP_MP_STATE: 554 case KVM_CAP_IMMEDIATE_EXIT: 555 case KVM_CAP_S390_INJECT_IRQ: 556 case KVM_CAP_S390_USER_SIGP: 557 case KVM_CAP_S390_USER_STSI: 558 case KVM_CAP_S390_SKEYS: 559 case KVM_CAP_S390_IRQ_STATE: 560 case KVM_CAP_S390_USER_INSTR0: 561 case KVM_CAP_S390_CMMA_MIGRATION: 562 case KVM_CAP_S390_AIS: 563 case KVM_CAP_S390_AIS_MIGRATION: 564 case KVM_CAP_S390_VCPU_RESETS: 565 case KVM_CAP_SET_GUEST_DEBUG: 566 case KVM_CAP_S390_DIAG318: 567 r = 1; 568 break; 569 case KVM_CAP_SET_GUEST_DEBUG2: 570 r = KVM_GUESTDBG_VALID_MASK; 571 break; 572 case KVM_CAP_S390_HPAGE_1M: 573 r = 0; 574 if (hpage && !kvm_is_ucontrol(kvm)) 575 r = 1; 576 break; 577 case KVM_CAP_S390_MEM_OP: 578 r = MEM_OP_MAX_SIZE; 579 break; 580 case KVM_CAP_NR_VCPUS: 581 case KVM_CAP_MAX_VCPUS: 582 case KVM_CAP_MAX_VCPU_ID: 583 r = KVM_S390_BSCA_CPU_SLOTS; 584 if (!kvm_s390_use_sca_entries()) 585 r = KVM_MAX_VCPUS; 586 else if (sclp.has_esca && sclp.has_64bscao) 587 r = KVM_S390_ESCA_CPU_SLOTS; 588 if (ext == KVM_CAP_NR_VCPUS) 589 r = min_t(unsigned int, num_online_cpus(), r); 590 break; 591 case KVM_CAP_S390_COW: 592 r = MACHINE_HAS_ESOP; 593 break; 594 case KVM_CAP_S390_VECTOR_REGISTERS: 595 r = MACHINE_HAS_VX; 596 break; 597 case KVM_CAP_S390_RI: 598 r = test_facility(64); 599 break; 600 case KVM_CAP_S390_GS: 601 r = test_facility(133); 602 break; 603 case KVM_CAP_S390_BPB: 604 r = test_facility(82); 605 break; 606 case KVM_CAP_S390_PROTECTED: 607 r = is_prot_virt_host(); 608 break; 609 default: 610 r = 0; 611 } 612 return r; 613 } 614 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 616 { 617 int i; 618 gfn_t cur_gfn, last_gfn; 619 unsigned long gaddr, vmaddr; 620 struct gmap *gmap = kvm->arch.gmap; 621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 622 623 /* Loop over all guest segments */ 624 cur_gfn = memslot->base_gfn; 625 last_gfn = memslot->base_gfn + memslot->npages; 626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 627 gaddr = gfn_to_gpa(cur_gfn); 628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 629 if (kvm_is_error_hva(vmaddr)) 630 continue; 631 632 bitmap_zero(bitmap, _PAGE_ENTRIES); 633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 634 for (i = 0; i < _PAGE_ENTRIES; i++) { 635 if (test_bit(i, bitmap)) 636 mark_page_dirty(kvm, cur_gfn + i); 637 } 638 639 if (fatal_signal_pending(current)) 640 return; 641 cond_resched(); 642 } 643 } 644 645 /* Section: vm related */ 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 647 648 /* 649 * Get (and clear) the dirty memory log for a memory slot. 650 */ 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 652 struct kvm_dirty_log *log) 653 { 654 int r; 655 unsigned long n; 656 struct kvm_memory_slot *memslot; 657 int is_dirty; 658 659 if (kvm_is_ucontrol(kvm)) 660 return -EINVAL; 661 662 mutex_lock(&kvm->slots_lock); 663 664 r = -EINVAL; 665 if (log->slot >= KVM_USER_MEM_SLOTS) 666 goto out; 667 668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 669 if (r) 670 goto out; 671 672 /* Clear the dirty log */ 673 if (is_dirty) { 674 n = kvm_dirty_bitmap_bytes(memslot); 675 memset(memslot->dirty_bitmap, 0, n); 676 } 677 r = 0; 678 out: 679 mutex_unlock(&kvm->slots_lock); 680 return r; 681 } 682 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 684 { 685 unsigned long i; 686 struct kvm_vcpu *vcpu; 687 688 kvm_for_each_vcpu(i, vcpu, kvm) { 689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 690 } 691 } 692 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 694 { 695 int r; 696 697 if (cap->flags) 698 return -EINVAL; 699 700 switch (cap->cap) { 701 case KVM_CAP_S390_IRQCHIP: 702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 703 kvm->arch.use_irqchip = 1; 704 r = 0; 705 break; 706 case KVM_CAP_S390_USER_SIGP: 707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 708 kvm->arch.user_sigp = 1; 709 r = 0; 710 break; 711 case KVM_CAP_S390_VECTOR_REGISTERS: 712 mutex_lock(&kvm->lock); 713 if (kvm->created_vcpus) { 714 r = -EBUSY; 715 } else if (MACHINE_HAS_VX) { 716 set_kvm_facility(kvm->arch.model.fac_mask, 129); 717 set_kvm_facility(kvm->arch.model.fac_list, 129); 718 if (test_facility(134)) { 719 set_kvm_facility(kvm->arch.model.fac_mask, 134); 720 set_kvm_facility(kvm->arch.model.fac_list, 134); 721 } 722 if (test_facility(135)) { 723 set_kvm_facility(kvm->arch.model.fac_mask, 135); 724 set_kvm_facility(kvm->arch.model.fac_list, 135); 725 } 726 if (test_facility(148)) { 727 set_kvm_facility(kvm->arch.model.fac_mask, 148); 728 set_kvm_facility(kvm->arch.model.fac_list, 148); 729 } 730 if (test_facility(152)) { 731 set_kvm_facility(kvm->arch.model.fac_mask, 152); 732 set_kvm_facility(kvm->arch.model.fac_list, 152); 733 } 734 if (test_facility(192)) { 735 set_kvm_facility(kvm->arch.model.fac_mask, 192); 736 set_kvm_facility(kvm->arch.model.fac_list, 192); 737 } 738 r = 0; 739 } else 740 r = -EINVAL; 741 mutex_unlock(&kvm->lock); 742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 743 r ? "(not available)" : "(success)"); 744 break; 745 case KVM_CAP_S390_RI: 746 r = -EINVAL; 747 mutex_lock(&kvm->lock); 748 if (kvm->created_vcpus) { 749 r = -EBUSY; 750 } else if (test_facility(64)) { 751 set_kvm_facility(kvm->arch.model.fac_mask, 64); 752 set_kvm_facility(kvm->arch.model.fac_list, 64); 753 r = 0; 754 } 755 mutex_unlock(&kvm->lock); 756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 757 r ? "(not available)" : "(success)"); 758 break; 759 case KVM_CAP_S390_AIS: 760 mutex_lock(&kvm->lock); 761 if (kvm->created_vcpus) { 762 r = -EBUSY; 763 } else { 764 set_kvm_facility(kvm->arch.model.fac_mask, 72); 765 set_kvm_facility(kvm->arch.model.fac_list, 72); 766 r = 0; 767 } 768 mutex_unlock(&kvm->lock); 769 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 770 r ? "(not available)" : "(success)"); 771 break; 772 case KVM_CAP_S390_GS: 773 r = -EINVAL; 774 mutex_lock(&kvm->lock); 775 if (kvm->created_vcpus) { 776 r = -EBUSY; 777 } else if (test_facility(133)) { 778 set_kvm_facility(kvm->arch.model.fac_mask, 133); 779 set_kvm_facility(kvm->arch.model.fac_list, 133); 780 r = 0; 781 } 782 mutex_unlock(&kvm->lock); 783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 784 r ? "(not available)" : "(success)"); 785 break; 786 case KVM_CAP_S390_HPAGE_1M: 787 mutex_lock(&kvm->lock); 788 if (kvm->created_vcpus) 789 r = -EBUSY; 790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 791 r = -EINVAL; 792 else { 793 r = 0; 794 mmap_write_lock(kvm->mm); 795 kvm->mm->context.allow_gmap_hpage_1m = 1; 796 mmap_write_unlock(kvm->mm); 797 /* 798 * We might have to create fake 4k page 799 * tables. To avoid that the hardware works on 800 * stale PGSTEs, we emulate these instructions. 801 */ 802 kvm->arch.use_skf = 0; 803 kvm->arch.use_pfmfi = 0; 804 } 805 mutex_unlock(&kvm->lock); 806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 807 r ? "(not available)" : "(success)"); 808 break; 809 case KVM_CAP_S390_USER_STSI: 810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 811 kvm->arch.user_stsi = 1; 812 r = 0; 813 break; 814 case KVM_CAP_S390_USER_INSTR0: 815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 816 kvm->arch.user_instr0 = 1; 817 icpt_operexc_on_all_vcpus(kvm); 818 r = 0; 819 break; 820 default: 821 r = -EINVAL; 822 break; 823 } 824 return r; 825 } 826 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 828 { 829 int ret; 830 831 switch (attr->attr) { 832 case KVM_S390_VM_MEM_LIMIT_SIZE: 833 ret = 0; 834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 835 kvm->arch.mem_limit); 836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 837 ret = -EFAULT; 838 break; 839 default: 840 ret = -ENXIO; 841 break; 842 } 843 return ret; 844 } 845 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 847 { 848 int ret; 849 unsigned int idx; 850 switch (attr->attr) { 851 case KVM_S390_VM_MEM_ENABLE_CMMA: 852 ret = -ENXIO; 853 if (!sclp.has_cmma) 854 break; 855 856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 857 mutex_lock(&kvm->lock); 858 if (kvm->created_vcpus) 859 ret = -EBUSY; 860 else if (kvm->mm->context.allow_gmap_hpage_1m) 861 ret = -EINVAL; 862 else { 863 kvm->arch.use_cmma = 1; 864 /* Not compatible with cmma. */ 865 kvm->arch.use_pfmfi = 0; 866 ret = 0; 867 } 868 mutex_unlock(&kvm->lock); 869 break; 870 case KVM_S390_VM_MEM_CLR_CMMA: 871 ret = -ENXIO; 872 if (!sclp.has_cmma) 873 break; 874 ret = -EINVAL; 875 if (!kvm->arch.use_cmma) 876 break; 877 878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 879 mutex_lock(&kvm->lock); 880 idx = srcu_read_lock(&kvm->srcu); 881 s390_reset_cmma(kvm->arch.gmap->mm); 882 srcu_read_unlock(&kvm->srcu, idx); 883 mutex_unlock(&kvm->lock); 884 ret = 0; 885 break; 886 case KVM_S390_VM_MEM_LIMIT_SIZE: { 887 unsigned long new_limit; 888 889 if (kvm_is_ucontrol(kvm)) 890 return -EINVAL; 891 892 if (get_user(new_limit, (u64 __user *)attr->addr)) 893 return -EFAULT; 894 895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 896 new_limit > kvm->arch.mem_limit) 897 return -E2BIG; 898 899 if (!new_limit) 900 return -EINVAL; 901 902 /* gmap_create takes last usable address */ 903 if (new_limit != KVM_S390_NO_MEM_LIMIT) 904 new_limit -= 1; 905 906 ret = -EBUSY; 907 mutex_lock(&kvm->lock); 908 if (!kvm->created_vcpus) { 909 /* gmap_create will round the limit up */ 910 struct gmap *new = gmap_create(current->mm, new_limit); 911 912 if (!new) { 913 ret = -ENOMEM; 914 } else { 915 gmap_remove(kvm->arch.gmap); 916 new->private = kvm; 917 kvm->arch.gmap = new; 918 ret = 0; 919 } 920 } 921 mutex_unlock(&kvm->lock); 922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 924 (void *) kvm->arch.gmap->asce); 925 break; 926 } 927 default: 928 ret = -ENXIO; 929 break; 930 } 931 return ret; 932 } 933 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 935 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 937 { 938 struct kvm_vcpu *vcpu; 939 unsigned long i; 940 941 kvm_s390_vcpu_block_all(kvm); 942 943 kvm_for_each_vcpu(i, vcpu, kvm) { 944 kvm_s390_vcpu_crypto_setup(vcpu); 945 /* recreate the shadow crycb by leaving the VSIE handler */ 946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 947 } 948 949 kvm_s390_vcpu_unblock_all(kvm); 950 } 951 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 953 { 954 mutex_lock(&kvm->lock); 955 switch (attr->attr) { 956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 957 if (!test_kvm_facility(kvm, 76)) { 958 mutex_unlock(&kvm->lock); 959 return -EINVAL; 960 } 961 get_random_bytes( 962 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 964 kvm->arch.crypto.aes_kw = 1; 965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 966 break; 967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 968 if (!test_kvm_facility(kvm, 76)) { 969 mutex_unlock(&kvm->lock); 970 return -EINVAL; 971 } 972 get_random_bytes( 973 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 975 kvm->arch.crypto.dea_kw = 1; 976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 977 break; 978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 979 if (!test_kvm_facility(kvm, 76)) { 980 mutex_unlock(&kvm->lock); 981 return -EINVAL; 982 } 983 kvm->arch.crypto.aes_kw = 0; 984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 987 break; 988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 989 if (!test_kvm_facility(kvm, 76)) { 990 mutex_unlock(&kvm->lock); 991 return -EINVAL; 992 } 993 kvm->arch.crypto.dea_kw = 0; 994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 997 break; 998 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 999 if (!ap_instructions_available()) { 1000 mutex_unlock(&kvm->lock); 1001 return -EOPNOTSUPP; 1002 } 1003 kvm->arch.crypto.apie = 1; 1004 break; 1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1006 if (!ap_instructions_available()) { 1007 mutex_unlock(&kvm->lock); 1008 return -EOPNOTSUPP; 1009 } 1010 kvm->arch.crypto.apie = 0; 1011 break; 1012 default: 1013 mutex_unlock(&kvm->lock); 1014 return -ENXIO; 1015 } 1016 1017 kvm_s390_vcpu_crypto_reset_all(kvm); 1018 mutex_unlock(&kvm->lock); 1019 return 0; 1020 } 1021 1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1023 { 1024 unsigned long cx; 1025 struct kvm_vcpu *vcpu; 1026 1027 kvm_for_each_vcpu(cx, vcpu, kvm) 1028 kvm_s390_sync_request(req, vcpu); 1029 } 1030 1031 /* 1032 * Must be called with kvm->srcu held to avoid races on memslots, and with 1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1034 */ 1035 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1036 { 1037 struct kvm_memory_slot *ms; 1038 struct kvm_memslots *slots; 1039 unsigned long ram_pages = 0; 1040 int bkt; 1041 1042 /* migration mode already enabled */ 1043 if (kvm->arch.migration_mode) 1044 return 0; 1045 slots = kvm_memslots(kvm); 1046 if (!slots || kvm_memslots_empty(slots)) 1047 return -EINVAL; 1048 1049 if (!kvm->arch.use_cmma) { 1050 kvm->arch.migration_mode = 1; 1051 return 0; 1052 } 1053 /* mark all the pages in active slots as dirty */ 1054 kvm_for_each_memslot(ms, bkt, slots) { 1055 if (!ms->dirty_bitmap) 1056 return -EINVAL; 1057 /* 1058 * The second half of the bitmap is only used on x86, 1059 * and would be wasted otherwise, so we put it to good 1060 * use here to keep track of the state of the storage 1061 * attributes. 1062 */ 1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1064 ram_pages += ms->npages; 1065 } 1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1067 kvm->arch.migration_mode = 1; 1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1069 return 0; 1070 } 1071 1072 /* 1073 * Must be called with kvm->slots_lock to avoid races with ourselves and 1074 * kvm_s390_vm_start_migration. 1075 */ 1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1077 { 1078 /* migration mode already disabled */ 1079 if (!kvm->arch.migration_mode) 1080 return 0; 1081 kvm->arch.migration_mode = 0; 1082 if (kvm->arch.use_cmma) 1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1084 return 0; 1085 } 1086 1087 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1088 struct kvm_device_attr *attr) 1089 { 1090 int res = -ENXIO; 1091 1092 mutex_lock(&kvm->slots_lock); 1093 switch (attr->attr) { 1094 case KVM_S390_VM_MIGRATION_START: 1095 res = kvm_s390_vm_start_migration(kvm); 1096 break; 1097 case KVM_S390_VM_MIGRATION_STOP: 1098 res = kvm_s390_vm_stop_migration(kvm); 1099 break; 1100 default: 1101 break; 1102 } 1103 mutex_unlock(&kvm->slots_lock); 1104 1105 return res; 1106 } 1107 1108 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1109 struct kvm_device_attr *attr) 1110 { 1111 u64 mig = kvm->arch.migration_mode; 1112 1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1114 return -ENXIO; 1115 1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1117 return -EFAULT; 1118 return 0; 1119 } 1120 1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1122 { 1123 struct kvm_s390_vm_tod_clock gtod; 1124 1125 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1126 return -EFAULT; 1127 1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1129 return -EINVAL; 1130 kvm_s390_set_tod_clock(kvm, >od); 1131 1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1133 gtod.epoch_idx, gtod.tod); 1134 1135 return 0; 1136 } 1137 1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1139 { 1140 u8 gtod_high; 1141 1142 if (copy_from_user(>od_high, (void __user *)attr->addr, 1143 sizeof(gtod_high))) 1144 return -EFAULT; 1145 1146 if (gtod_high != 0) 1147 return -EINVAL; 1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1149 1150 return 0; 1151 } 1152 1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1154 { 1155 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1156 1157 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1158 sizeof(gtod.tod))) 1159 return -EFAULT; 1160 1161 kvm_s390_set_tod_clock(kvm, >od); 1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1163 return 0; 1164 } 1165 1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1167 { 1168 int ret; 1169 1170 if (attr->flags) 1171 return -EINVAL; 1172 1173 switch (attr->attr) { 1174 case KVM_S390_VM_TOD_EXT: 1175 ret = kvm_s390_set_tod_ext(kvm, attr); 1176 break; 1177 case KVM_S390_VM_TOD_HIGH: 1178 ret = kvm_s390_set_tod_high(kvm, attr); 1179 break; 1180 case KVM_S390_VM_TOD_LOW: 1181 ret = kvm_s390_set_tod_low(kvm, attr); 1182 break; 1183 default: 1184 ret = -ENXIO; 1185 break; 1186 } 1187 return ret; 1188 } 1189 1190 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1191 struct kvm_s390_vm_tod_clock *gtod) 1192 { 1193 union tod_clock clk; 1194 1195 preempt_disable(); 1196 1197 store_tod_clock_ext(&clk); 1198 1199 gtod->tod = clk.tod + kvm->arch.epoch; 1200 gtod->epoch_idx = 0; 1201 if (test_kvm_facility(kvm, 139)) { 1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1203 if (gtod->tod < clk.tod) 1204 gtod->epoch_idx += 1; 1205 } 1206 1207 preempt_enable(); 1208 } 1209 1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 struct kvm_s390_vm_tod_clock gtod; 1213 1214 memset(>od, 0, sizeof(gtod)); 1215 kvm_s390_get_tod_clock(kvm, >od); 1216 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1217 return -EFAULT; 1218 1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1220 gtod.epoch_idx, gtod.tod); 1221 return 0; 1222 } 1223 1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1225 { 1226 u8 gtod_high = 0; 1227 1228 if (copy_to_user((void __user *)attr->addr, >od_high, 1229 sizeof(gtod_high))) 1230 return -EFAULT; 1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1232 1233 return 0; 1234 } 1235 1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1237 { 1238 u64 gtod; 1239 1240 gtod = kvm_s390_get_tod_clock_fast(kvm); 1241 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1242 return -EFAULT; 1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1244 1245 return 0; 1246 } 1247 1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1249 { 1250 int ret; 1251 1252 if (attr->flags) 1253 return -EINVAL; 1254 1255 switch (attr->attr) { 1256 case KVM_S390_VM_TOD_EXT: 1257 ret = kvm_s390_get_tod_ext(kvm, attr); 1258 break; 1259 case KVM_S390_VM_TOD_HIGH: 1260 ret = kvm_s390_get_tod_high(kvm, attr); 1261 break; 1262 case KVM_S390_VM_TOD_LOW: 1263 ret = kvm_s390_get_tod_low(kvm, attr); 1264 break; 1265 default: 1266 ret = -ENXIO; 1267 break; 1268 } 1269 return ret; 1270 } 1271 1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1273 { 1274 struct kvm_s390_vm_cpu_processor *proc; 1275 u16 lowest_ibc, unblocked_ibc; 1276 int ret = 0; 1277 1278 mutex_lock(&kvm->lock); 1279 if (kvm->created_vcpus) { 1280 ret = -EBUSY; 1281 goto out; 1282 } 1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1284 if (!proc) { 1285 ret = -ENOMEM; 1286 goto out; 1287 } 1288 if (!copy_from_user(proc, (void __user *)attr->addr, 1289 sizeof(*proc))) { 1290 kvm->arch.model.cpuid = proc->cpuid; 1291 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1292 unblocked_ibc = sclp.ibc & 0xfff; 1293 if (lowest_ibc && proc->ibc) { 1294 if (proc->ibc > unblocked_ibc) 1295 kvm->arch.model.ibc = unblocked_ibc; 1296 else if (proc->ibc < lowest_ibc) 1297 kvm->arch.model.ibc = lowest_ibc; 1298 else 1299 kvm->arch.model.ibc = proc->ibc; 1300 } 1301 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1302 S390_ARCH_FAC_LIST_SIZE_BYTE); 1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1304 kvm->arch.model.ibc, 1305 kvm->arch.model.cpuid); 1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1307 kvm->arch.model.fac_list[0], 1308 kvm->arch.model.fac_list[1], 1309 kvm->arch.model.fac_list[2]); 1310 } else 1311 ret = -EFAULT; 1312 kfree(proc); 1313 out: 1314 mutex_unlock(&kvm->lock); 1315 return ret; 1316 } 1317 1318 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1319 struct kvm_device_attr *attr) 1320 { 1321 struct kvm_s390_vm_cpu_feat data; 1322 1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1324 return -EFAULT; 1325 if (!bitmap_subset((unsigned long *) data.feat, 1326 kvm_s390_available_cpu_feat, 1327 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1328 return -EINVAL; 1329 1330 mutex_lock(&kvm->lock); 1331 if (kvm->created_vcpus) { 1332 mutex_unlock(&kvm->lock); 1333 return -EBUSY; 1334 } 1335 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1336 KVM_S390_VM_CPU_FEAT_NR_BITS); 1337 mutex_unlock(&kvm->lock); 1338 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1339 data.feat[0], 1340 data.feat[1], 1341 data.feat[2]); 1342 return 0; 1343 } 1344 1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1346 struct kvm_device_attr *attr) 1347 { 1348 mutex_lock(&kvm->lock); 1349 if (kvm->created_vcpus) { 1350 mutex_unlock(&kvm->lock); 1351 return -EBUSY; 1352 } 1353 1354 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1355 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1356 mutex_unlock(&kvm->lock); 1357 return -EFAULT; 1358 } 1359 mutex_unlock(&kvm->lock); 1360 1361 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1366 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1369 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1372 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1375 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1378 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1381 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1384 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1387 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1390 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1393 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1396 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1399 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1402 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1405 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1408 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1413 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1418 1419 return 0; 1420 } 1421 1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1423 { 1424 int ret = -ENXIO; 1425 1426 switch (attr->attr) { 1427 case KVM_S390_VM_CPU_PROCESSOR: 1428 ret = kvm_s390_set_processor(kvm, attr); 1429 break; 1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1431 ret = kvm_s390_set_processor_feat(kvm, attr); 1432 break; 1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1434 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1435 break; 1436 } 1437 return ret; 1438 } 1439 1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1441 { 1442 struct kvm_s390_vm_cpu_processor *proc; 1443 int ret = 0; 1444 1445 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1446 if (!proc) { 1447 ret = -ENOMEM; 1448 goto out; 1449 } 1450 proc->cpuid = kvm->arch.model.cpuid; 1451 proc->ibc = kvm->arch.model.ibc; 1452 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1453 S390_ARCH_FAC_LIST_SIZE_BYTE); 1454 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1455 kvm->arch.model.ibc, 1456 kvm->arch.model.cpuid); 1457 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1458 kvm->arch.model.fac_list[0], 1459 kvm->arch.model.fac_list[1], 1460 kvm->arch.model.fac_list[2]); 1461 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1462 ret = -EFAULT; 1463 kfree(proc); 1464 out: 1465 return ret; 1466 } 1467 1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1469 { 1470 struct kvm_s390_vm_cpu_machine *mach; 1471 int ret = 0; 1472 1473 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1474 if (!mach) { 1475 ret = -ENOMEM; 1476 goto out; 1477 } 1478 get_cpu_id((struct cpuid *) &mach->cpuid); 1479 mach->ibc = sclp.ibc; 1480 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1481 S390_ARCH_FAC_LIST_SIZE_BYTE); 1482 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1483 sizeof(stfle_fac_list)); 1484 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1485 kvm->arch.model.ibc, 1486 kvm->arch.model.cpuid); 1487 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1488 mach->fac_mask[0], 1489 mach->fac_mask[1], 1490 mach->fac_mask[2]); 1491 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1492 mach->fac_list[0], 1493 mach->fac_list[1], 1494 mach->fac_list[2]); 1495 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1496 ret = -EFAULT; 1497 kfree(mach); 1498 out: 1499 return ret; 1500 } 1501 1502 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1503 struct kvm_device_attr *attr) 1504 { 1505 struct kvm_s390_vm_cpu_feat data; 1506 1507 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1508 KVM_S390_VM_CPU_FEAT_NR_BITS); 1509 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1510 return -EFAULT; 1511 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1512 data.feat[0], 1513 data.feat[1], 1514 data.feat[2]); 1515 return 0; 1516 } 1517 1518 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1519 struct kvm_device_attr *attr) 1520 { 1521 struct kvm_s390_vm_cpu_feat data; 1522 1523 bitmap_copy((unsigned long *) data.feat, 1524 kvm_s390_available_cpu_feat, 1525 KVM_S390_VM_CPU_FEAT_NR_BITS); 1526 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1527 return -EFAULT; 1528 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1529 data.feat[0], 1530 data.feat[1], 1531 data.feat[2]); 1532 return 0; 1533 } 1534 1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1536 struct kvm_device_attr *attr) 1537 { 1538 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1539 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1540 return -EFAULT; 1541 1542 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1547 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1550 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1553 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1556 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1559 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1562 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1565 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1568 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1571 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1574 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1577 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1580 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1583 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1586 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1589 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1594 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1599 1600 return 0; 1601 } 1602 1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1604 struct kvm_device_attr *attr) 1605 { 1606 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1607 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1608 return -EFAULT; 1609 1610 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1615 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1618 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1621 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1624 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1627 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1630 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1633 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1636 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1639 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1642 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1645 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1648 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1651 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1654 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1657 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1662 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1667 1668 return 0; 1669 } 1670 1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1672 { 1673 int ret = -ENXIO; 1674 1675 switch (attr->attr) { 1676 case KVM_S390_VM_CPU_PROCESSOR: 1677 ret = kvm_s390_get_processor(kvm, attr); 1678 break; 1679 case KVM_S390_VM_CPU_MACHINE: 1680 ret = kvm_s390_get_machine(kvm, attr); 1681 break; 1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1683 ret = kvm_s390_get_processor_feat(kvm, attr); 1684 break; 1685 case KVM_S390_VM_CPU_MACHINE_FEAT: 1686 ret = kvm_s390_get_machine_feat(kvm, attr); 1687 break; 1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1689 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1690 break; 1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1692 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1693 break; 1694 } 1695 return ret; 1696 } 1697 1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1699 { 1700 int ret; 1701 1702 switch (attr->group) { 1703 case KVM_S390_VM_MEM_CTRL: 1704 ret = kvm_s390_set_mem_control(kvm, attr); 1705 break; 1706 case KVM_S390_VM_TOD: 1707 ret = kvm_s390_set_tod(kvm, attr); 1708 break; 1709 case KVM_S390_VM_CPU_MODEL: 1710 ret = kvm_s390_set_cpu_model(kvm, attr); 1711 break; 1712 case KVM_S390_VM_CRYPTO: 1713 ret = kvm_s390_vm_set_crypto(kvm, attr); 1714 break; 1715 case KVM_S390_VM_MIGRATION: 1716 ret = kvm_s390_vm_set_migration(kvm, attr); 1717 break; 1718 default: 1719 ret = -ENXIO; 1720 break; 1721 } 1722 1723 return ret; 1724 } 1725 1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1727 { 1728 int ret; 1729 1730 switch (attr->group) { 1731 case KVM_S390_VM_MEM_CTRL: 1732 ret = kvm_s390_get_mem_control(kvm, attr); 1733 break; 1734 case KVM_S390_VM_TOD: 1735 ret = kvm_s390_get_tod(kvm, attr); 1736 break; 1737 case KVM_S390_VM_CPU_MODEL: 1738 ret = kvm_s390_get_cpu_model(kvm, attr); 1739 break; 1740 case KVM_S390_VM_MIGRATION: 1741 ret = kvm_s390_vm_get_migration(kvm, attr); 1742 break; 1743 default: 1744 ret = -ENXIO; 1745 break; 1746 } 1747 1748 return ret; 1749 } 1750 1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1752 { 1753 int ret; 1754 1755 switch (attr->group) { 1756 case KVM_S390_VM_MEM_CTRL: 1757 switch (attr->attr) { 1758 case KVM_S390_VM_MEM_ENABLE_CMMA: 1759 case KVM_S390_VM_MEM_CLR_CMMA: 1760 ret = sclp.has_cmma ? 0 : -ENXIO; 1761 break; 1762 case KVM_S390_VM_MEM_LIMIT_SIZE: 1763 ret = 0; 1764 break; 1765 default: 1766 ret = -ENXIO; 1767 break; 1768 } 1769 break; 1770 case KVM_S390_VM_TOD: 1771 switch (attr->attr) { 1772 case KVM_S390_VM_TOD_LOW: 1773 case KVM_S390_VM_TOD_HIGH: 1774 ret = 0; 1775 break; 1776 default: 1777 ret = -ENXIO; 1778 break; 1779 } 1780 break; 1781 case KVM_S390_VM_CPU_MODEL: 1782 switch (attr->attr) { 1783 case KVM_S390_VM_CPU_PROCESSOR: 1784 case KVM_S390_VM_CPU_MACHINE: 1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1786 case KVM_S390_VM_CPU_MACHINE_FEAT: 1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1789 ret = 0; 1790 break; 1791 default: 1792 ret = -ENXIO; 1793 break; 1794 } 1795 break; 1796 case KVM_S390_VM_CRYPTO: 1797 switch (attr->attr) { 1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1802 ret = 0; 1803 break; 1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1806 ret = ap_instructions_available() ? 0 : -ENXIO; 1807 break; 1808 default: 1809 ret = -ENXIO; 1810 break; 1811 } 1812 break; 1813 case KVM_S390_VM_MIGRATION: 1814 ret = 0; 1815 break; 1816 default: 1817 ret = -ENXIO; 1818 break; 1819 } 1820 1821 return ret; 1822 } 1823 1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1825 { 1826 uint8_t *keys; 1827 uint64_t hva; 1828 int srcu_idx, i, r = 0; 1829 1830 if (args->flags != 0) 1831 return -EINVAL; 1832 1833 /* Is this guest using storage keys? */ 1834 if (!mm_uses_skeys(current->mm)) 1835 return KVM_S390_GET_SKEYS_NONE; 1836 1837 /* Enforce sane limit on memory allocation */ 1838 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1839 return -EINVAL; 1840 1841 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1842 if (!keys) 1843 return -ENOMEM; 1844 1845 mmap_read_lock(current->mm); 1846 srcu_idx = srcu_read_lock(&kvm->srcu); 1847 for (i = 0; i < args->count; i++) { 1848 hva = gfn_to_hva(kvm, args->start_gfn + i); 1849 if (kvm_is_error_hva(hva)) { 1850 r = -EFAULT; 1851 break; 1852 } 1853 1854 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1855 if (r) 1856 break; 1857 } 1858 srcu_read_unlock(&kvm->srcu, srcu_idx); 1859 mmap_read_unlock(current->mm); 1860 1861 if (!r) { 1862 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1863 sizeof(uint8_t) * args->count); 1864 if (r) 1865 r = -EFAULT; 1866 } 1867 1868 kvfree(keys); 1869 return r; 1870 } 1871 1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1873 { 1874 uint8_t *keys; 1875 uint64_t hva; 1876 int srcu_idx, i, r = 0; 1877 bool unlocked; 1878 1879 if (args->flags != 0) 1880 return -EINVAL; 1881 1882 /* Enforce sane limit on memory allocation */ 1883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1884 return -EINVAL; 1885 1886 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1887 if (!keys) 1888 return -ENOMEM; 1889 1890 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1891 sizeof(uint8_t) * args->count); 1892 if (r) { 1893 r = -EFAULT; 1894 goto out; 1895 } 1896 1897 /* Enable storage key handling for the guest */ 1898 r = s390_enable_skey(); 1899 if (r) 1900 goto out; 1901 1902 i = 0; 1903 mmap_read_lock(current->mm); 1904 srcu_idx = srcu_read_lock(&kvm->srcu); 1905 while (i < args->count) { 1906 unlocked = false; 1907 hva = gfn_to_hva(kvm, args->start_gfn + i); 1908 if (kvm_is_error_hva(hva)) { 1909 r = -EFAULT; 1910 break; 1911 } 1912 1913 /* Lowest order bit is reserved */ 1914 if (keys[i] & 0x01) { 1915 r = -EINVAL; 1916 break; 1917 } 1918 1919 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1920 if (r) { 1921 r = fixup_user_fault(current->mm, hva, 1922 FAULT_FLAG_WRITE, &unlocked); 1923 if (r) 1924 break; 1925 } 1926 if (!r) 1927 i++; 1928 } 1929 srcu_read_unlock(&kvm->srcu, srcu_idx); 1930 mmap_read_unlock(current->mm); 1931 out: 1932 kvfree(keys); 1933 return r; 1934 } 1935 1936 /* 1937 * Base address and length must be sent at the start of each block, therefore 1938 * it's cheaper to send some clean data, as long as it's less than the size of 1939 * two longs. 1940 */ 1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1942 /* for consistency */ 1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1944 1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1946 u8 *res, unsigned long bufsize) 1947 { 1948 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1949 1950 args->count = 0; 1951 while (args->count < bufsize) { 1952 hva = gfn_to_hva(kvm, cur_gfn); 1953 /* 1954 * We return an error if the first value was invalid, but we 1955 * return successfully if at least one value was copied. 1956 */ 1957 if (kvm_is_error_hva(hva)) 1958 return args->count ? 0 : -EFAULT; 1959 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1960 pgstev = 0; 1961 res[args->count++] = (pgstev >> 24) & 0x43; 1962 cur_gfn++; 1963 } 1964 1965 return 0; 1966 } 1967 1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 1969 gfn_t gfn) 1970 { 1971 return ____gfn_to_memslot(slots, gfn, true); 1972 } 1973 1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1975 unsigned long cur_gfn) 1976 { 1977 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 1978 unsigned long ofs = cur_gfn - ms->base_gfn; 1979 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 1980 1981 if (ms->base_gfn + ms->npages <= cur_gfn) { 1982 mnode = rb_next(mnode); 1983 /* If we are above the highest slot, wrap around */ 1984 if (!mnode) 1985 mnode = rb_first(&slots->gfn_tree); 1986 1987 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1988 ofs = 0; 1989 } 1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1991 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 1992 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1993 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 1994 } 1995 return ms->base_gfn + ofs; 1996 } 1997 1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1999 u8 *res, unsigned long bufsize) 2000 { 2001 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2002 struct kvm_memslots *slots = kvm_memslots(kvm); 2003 struct kvm_memory_slot *ms; 2004 2005 if (unlikely(kvm_memslots_empty(slots))) 2006 return 0; 2007 2008 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2009 ms = gfn_to_memslot(kvm, cur_gfn); 2010 args->count = 0; 2011 args->start_gfn = cur_gfn; 2012 if (!ms) 2013 return 0; 2014 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2015 mem_end = kvm_s390_get_gfn_end(slots); 2016 2017 while (args->count < bufsize) { 2018 hva = gfn_to_hva(kvm, cur_gfn); 2019 if (kvm_is_error_hva(hva)) 2020 return 0; 2021 /* Decrement only if we actually flipped the bit to 0 */ 2022 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2023 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2024 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2025 pgstev = 0; 2026 /* Save the value */ 2027 res[args->count++] = (pgstev >> 24) & 0x43; 2028 /* If the next bit is too far away, stop. */ 2029 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2030 return 0; 2031 /* If we reached the previous "next", find the next one */ 2032 if (cur_gfn == next_gfn) 2033 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2034 /* Reached the end of memory or of the buffer, stop */ 2035 if ((next_gfn >= mem_end) || 2036 (next_gfn - args->start_gfn >= bufsize)) 2037 return 0; 2038 cur_gfn++; 2039 /* Reached the end of the current memslot, take the next one. */ 2040 if (cur_gfn - ms->base_gfn >= ms->npages) { 2041 ms = gfn_to_memslot(kvm, cur_gfn); 2042 if (!ms) 2043 return 0; 2044 } 2045 } 2046 return 0; 2047 } 2048 2049 /* 2050 * This function searches for the next page with dirty CMMA attributes, and 2051 * saves the attributes in the buffer up to either the end of the buffer or 2052 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2053 * no trailing clean bytes are saved. 2054 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2055 * output buffer will indicate 0 as length. 2056 */ 2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2058 struct kvm_s390_cmma_log *args) 2059 { 2060 unsigned long bufsize; 2061 int srcu_idx, peek, ret; 2062 u8 *values; 2063 2064 if (!kvm->arch.use_cmma) 2065 return -ENXIO; 2066 /* Invalid/unsupported flags were specified */ 2067 if (args->flags & ~KVM_S390_CMMA_PEEK) 2068 return -EINVAL; 2069 /* Migration mode query, and we are not doing a migration */ 2070 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2071 if (!peek && !kvm->arch.migration_mode) 2072 return -EINVAL; 2073 /* CMMA is disabled or was not used, or the buffer has length zero */ 2074 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2075 if (!bufsize || !kvm->mm->context.uses_cmm) { 2076 memset(args, 0, sizeof(*args)); 2077 return 0; 2078 } 2079 /* We are not peeking, and there are no dirty pages */ 2080 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2081 memset(args, 0, sizeof(*args)); 2082 return 0; 2083 } 2084 2085 values = vmalloc(bufsize); 2086 if (!values) 2087 return -ENOMEM; 2088 2089 mmap_read_lock(kvm->mm); 2090 srcu_idx = srcu_read_lock(&kvm->srcu); 2091 if (peek) 2092 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2093 else 2094 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2095 srcu_read_unlock(&kvm->srcu, srcu_idx); 2096 mmap_read_unlock(kvm->mm); 2097 2098 if (kvm->arch.migration_mode) 2099 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2100 else 2101 args->remaining = 0; 2102 2103 if (copy_to_user((void __user *)args->values, values, args->count)) 2104 ret = -EFAULT; 2105 2106 vfree(values); 2107 return ret; 2108 } 2109 2110 /* 2111 * This function sets the CMMA attributes for the given pages. If the input 2112 * buffer has zero length, no action is taken, otherwise the attributes are 2113 * set and the mm->context.uses_cmm flag is set. 2114 */ 2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2116 const struct kvm_s390_cmma_log *args) 2117 { 2118 unsigned long hva, mask, pgstev, i; 2119 uint8_t *bits; 2120 int srcu_idx, r = 0; 2121 2122 mask = args->mask; 2123 2124 if (!kvm->arch.use_cmma) 2125 return -ENXIO; 2126 /* invalid/unsupported flags */ 2127 if (args->flags != 0) 2128 return -EINVAL; 2129 /* Enforce sane limit on memory allocation */ 2130 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2131 return -EINVAL; 2132 /* Nothing to do */ 2133 if (args->count == 0) 2134 return 0; 2135 2136 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2137 if (!bits) 2138 return -ENOMEM; 2139 2140 r = copy_from_user(bits, (void __user *)args->values, args->count); 2141 if (r) { 2142 r = -EFAULT; 2143 goto out; 2144 } 2145 2146 mmap_read_lock(kvm->mm); 2147 srcu_idx = srcu_read_lock(&kvm->srcu); 2148 for (i = 0; i < args->count; i++) { 2149 hva = gfn_to_hva(kvm, args->start_gfn + i); 2150 if (kvm_is_error_hva(hva)) { 2151 r = -EFAULT; 2152 break; 2153 } 2154 2155 pgstev = bits[i]; 2156 pgstev = pgstev << 24; 2157 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2158 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2159 } 2160 srcu_read_unlock(&kvm->srcu, srcu_idx); 2161 mmap_read_unlock(kvm->mm); 2162 2163 if (!kvm->mm->context.uses_cmm) { 2164 mmap_write_lock(kvm->mm); 2165 kvm->mm->context.uses_cmm = 1; 2166 mmap_write_unlock(kvm->mm); 2167 } 2168 out: 2169 vfree(bits); 2170 return r; 2171 } 2172 2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2174 { 2175 struct kvm_vcpu *vcpu; 2176 u16 rc, rrc; 2177 int ret = 0; 2178 unsigned long i; 2179 2180 /* 2181 * We ignore failures and try to destroy as many CPUs as possible. 2182 * At the same time we must not free the assigned resources when 2183 * this fails, as the ultravisor has still access to that memory. 2184 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2185 * behind. 2186 * We want to return the first failure rc and rrc, though. 2187 */ 2188 kvm_for_each_vcpu(i, vcpu, kvm) { 2189 mutex_lock(&vcpu->mutex); 2190 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2191 *rcp = rc; 2192 *rrcp = rrc; 2193 ret = -EIO; 2194 } 2195 mutex_unlock(&vcpu->mutex); 2196 } 2197 return ret; 2198 } 2199 2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2201 { 2202 unsigned long i; 2203 int r = 0; 2204 u16 dummy; 2205 2206 struct kvm_vcpu *vcpu; 2207 2208 kvm_for_each_vcpu(i, vcpu, kvm) { 2209 mutex_lock(&vcpu->mutex); 2210 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2211 mutex_unlock(&vcpu->mutex); 2212 if (r) 2213 break; 2214 } 2215 if (r) 2216 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2217 return r; 2218 } 2219 2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2221 { 2222 int r = 0; 2223 u16 dummy; 2224 void __user *argp = (void __user *)cmd->data; 2225 2226 switch (cmd->cmd) { 2227 case KVM_PV_ENABLE: { 2228 r = -EINVAL; 2229 if (kvm_s390_pv_is_protected(kvm)) 2230 break; 2231 2232 /* 2233 * FMT 4 SIE needs esca. As we never switch back to bsca from 2234 * esca, we need no cleanup in the error cases below 2235 */ 2236 r = sca_switch_to_extended(kvm); 2237 if (r) 2238 break; 2239 2240 mmap_write_lock(current->mm); 2241 r = gmap_mark_unmergeable(); 2242 mmap_write_unlock(current->mm); 2243 if (r) 2244 break; 2245 2246 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2247 if (r) 2248 break; 2249 2250 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2251 if (r) 2252 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2253 2254 /* we need to block service interrupts from now on */ 2255 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2256 break; 2257 } 2258 case KVM_PV_DISABLE: { 2259 r = -EINVAL; 2260 if (!kvm_s390_pv_is_protected(kvm)) 2261 break; 2262 2263 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2264 /* 2265 * If a CPU could not be destroyed, destroy VM will also fail. 2266 * There is no point in trying to destroy it. Instead return 2267 * the rc and rrc from the first CPU that failed destroying. 2268 */ 2269 if (r) 2270 break; 2271 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2272 2273 /* no need to block service interrupts any more */ 2274 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2275 break; 2276 } 2277 case KVM_PV_SET_SEC_PARMS: { 2278 struct kvm_s390_pv_sec_parm parms = {}; 2279 void *hdr; 2280 2281 r = -EINVAL; 2282 if (!kvm_s390_pv_is_protected(kvm)) 2283 break; 2284 2285 r = -EFAULT; 2286 if (copy_from_user(&parms, argp, sizeof(parms))) 2287 break; 2288 2289 /* Currently restricted to 8KB */ 2290 r = -EINVAL; 2291 if (parms.length > PAGE_SIZE * 2) 2292 break; 2293 2294 r = -ENOMEM; 2295 hdr = vmalloc(parms.length); 2296 if (!hdr) 2297 break; 2298 2299 r = -EFAULT; 2300 if (!copy_from_user(hdr, (void __user *)parms.origin, 2301 parms.length)) 2302 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2303 &cmd->rc, &cmd->rrc); 2304 2305 vfree(hdr); 2306 break; 2307 } 2308 case KVM_PV_UNPACK: { 2309 struct kvm_s390_pv_unp unp = {}; 2310 2311 r = -EINVAL; 2312 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2313 break; 2314 2315 r = -EFAULT; 2316 if (copy_from_user(&unp, argp, sizeof(unp))) 2317 break; 2318 2319 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2320 &cmd->rc, &cmd->rrc); 2321 break; 2322 } 2323 case KVM_PV_VERIFY: { 2324 r = -EINVAL; 2325 if (!kvm_s390_pv_is_protected(kvm)) 2326 break; 2327 2328 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2329 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2330 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2331 cmd->rrc); 2332 break; 2333 } 2334 case KVM_PV_PREP_RESET: { 2335 r = -EINVAL; 2336 if (!kvm_s390_pv_is_protected(kvm)) 2337 break; 2338 2339 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2340 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2341 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2342 cmd->rc, cmd->rrc); 2343 break; 2344 } 2345 case KVM_PV_UNSHARE_ALL: { 2346 r = -EINVAL; 2347 if (!kvm_s390_pv_is_protected(kvm)) 2348 break; 2349 2350 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2351 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2352 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2353 cmd->rc, cmd->rrc); 2354 break; 2355 } 2356 default: 2357 r = -ENOTTY; 2358 } 2359 return r; 2360 } 2361 2362 long kvm_arch_vm_ioctl(struct file *filp, 2363 unsigned int ioctl, unsigned long arg) 2364 { 2365 struct kvm *kvm = filp->private_data; 2366 void __user *argp = (void __user *)arg; 2367 struct kvm_device_attr attr; 2368 int r; 2369 2370 switch (ioctl) { 2371 case KVM_S390_INTERRUPT: { 2372 struct kvm_s390_interrupt s390int; 2373 2374 r = -EFAULT; 2375 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2376 break; 2377 r = kvm_s390_inject_vm(kvm, &s390int); 2378 break; 2379 } 2380 case KVM_CREATE_IRQCHIP: { 2381 struct kvm_irq_routing_entry routing; 2382 2383 r = -EINVAL; 2384 if (kvm->arch.use_irqchip) { 2385 /* Set up dummy routing. */ 2386 memset(&routing, 0, sizeof(routing)); 2387 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2388 } 2389 break; 2390 } 2391 case KVM_SET_DEVICE_ATTR: { 2392 r = -EFAULT; 2393 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2394 break; 2395 r = kvm_s390_vm_set_attr(kvm, &attr); 2396 break; 2397 } 2398 case KVM_GET_DEVICE_ATTR: { 2399 r = -EFAULT; 2400 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2401 break; 2402 r = kvm_s390_vm_get_attr(kvm, &attr); 2403 break; 2404 } 2405 case KVM_HAS_DEVICE_ATTR: { 2406 r = -EFAULT; 2407 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2408 break; 2409 r = kvm_s390_vm_has_attr(kvm, &attr); 2410 break; 2411 } 2412 case KVM_S390_GET_SKEYS: { 2413 struct kvm_s390_skeys args; 2414 2415 r = -EFAULT; 2416 if (copy_from_user(&args, argp, 2417 sizeof(struct kvm_s390_skeys))) 2418 break; 2419 r = kvm_s390_get_skeys(kvm, &args); 2420 break; 2421 } 2422 case KVM_S390_SET_SKEYS: { 2423 struct kvm_s390_skeys args; 2424 2425 r = -EFAULT; 2426 if (copy_from_user(&args, argp, 2427 sizeof(struct kvm_s390_skeys))) 2428 break; 2429 r = kvm_s390_set_skeys(kvm, &args); 2430 break; 2431 } 2432 case KVM_S390_GET_CMMA_BITS: { 2433 struct kvm_s390_cmma_log args; 2434 2435 r = -EFAULT; 2436 if (copy_from_user(&args, argp, sizeof(args))) 2437 break; 2438 mutex_lock(&kvm->slots_lock); 2439 r = kvm_s390_get_cmma_bits(kvm, &args); 2440 mutex_unlock(&kvm->slots_lock); 2441 if (!r) { 2442 r = copy_to_user(argp, &args, sizeof(args)); 2443 if (r) 2444 r = -EFAULT; 2445 } 2446 break; 2447 } 2448 case KVM_S390_SET_CMMA_BITS: { 2449 struct kvm_s390_cmma_log args; 2450 2451 r = -EFAULT; 2452 if (copy_from_user(&args, argp, sizeof(args))) 2453 break; 2454 mutex_lock(&kvm->slots_lock); 2455 r = kvm_s390_set_cmma_bits(kvm, &args); 2456 mutex_unlock(&kvm->slots_lock); 2457 break; 2458 } 2459 case KVM_S390_PV_COMMAND: { 2460 struct kvm_pv_cmd args; 2461 2462 /* protvirt means user cpu state */ 2463 kvm_s390_set_user_cpu_state_ctrl(kvm); 2464 r = 0; 2465 if (!is_prot_virt_host()) { 2466 r = -EINVAL; 2467 break; 2468 } 2469 if (copy_from_user(&args, argp, sizeof(args))) { 2470 r = -EFAULT; 2471 break; 2472 } 2473 if (args.flags) { 2474 r = -EINVAL; 2475 break; 2476 } 2477 mutex_lock(&kvm->lock); 2478 r = kvm_s390_handle_pv(kvm, &args); 2479 mutex_unlock(&kvm->lock); 2480 if (copy_to_user(argp, &args, sizeof(args))) { 2481 r = -EFAULT; 2482 break; 2483 } 2484 break; 2485 } 2486 default: 2487 r = -ENOTTY; 2488 } 2489 2490 return r; 2491 } 2492 2493 static int kvm_s390_apxa_installed(void) 2494 { 2495 struct ap_config_info info; 2496 2497 if (ap_instructions_available()) { 2498 if (ap_qci(&info) == 0) 2499 return info.apxa; 2500 } 2501 2502 return 0; 2503 } 2504 2505 /* 2506 * The format of the crypto control block (CRYCB) is specified in the 3 low 2507 * order bits of the CRYCB designation (CRYCBD) field as follows: 2508 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2509 * AP extended addressing (APXA) facility are installed. 2510 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2511 * Format 2: Both the APXA and MSAX3 facilities are installed 2512 */ 2513 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2514 { 2515 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2516 2517 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2518 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2519 2520 /* Check whether MSAX3 is installed */ 2521 if (!test_kvm_facility(kvm, 76)) 2522 return; 2523 2524 if (kvm_s390_apxa_installed()) 2525 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2526 else 2527 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2528 } 2529 2530 /* 2531 * kvm_arch_crypto_set_masks 2532 * 2533 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2534 * to be set. 2535 * @apm: the mask identifying the accessible AP adapters 2536 * @aqm: the mask identifying the accessible AP domains 2537 * @adm: the mask identifying the accessible AP control domains 2538 * 2539 * Set the masks that identify the adapters, domains and control domains to 2540 * which the KVM guest is granted access. 2541 * 2542 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2543 * function. 2544 */ 2545 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2546 unsigned long *aqm, unsigned long *adm) 2547 { 2548 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2549 2550 kvm_s390_vcpu_block_all(kvm); 2551 2552 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2553 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2554 memcpy(crycb->apcb1.apm, apm, 32); 2555 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2556 apm[0], apm[1], apm[2], apm[3]); 2557 memcpy(crycb->apcb1.aqm, aqm, 32); 2558 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2559 aqm[0], aqm[1], aqm[2], aqm[3]); 2560 memcpy(crycb->apcb1.adm, adm, 32); 2561 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2562 adm[0], adm[1], adm[2], adm[3]); 2563 break; 2564 case CRYCB_FORMAT1: 2565 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2566 memcpy(crycb->apcb0.apm, apm, 8); 2567 memcpy(crycb->apcb0.aqm, aqm, 2); 2568 memcpy(crycb->apcb0.adm, adm, 2); 2569 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2570 apm[0], *((unsigned short *)aqm), 2571 *((unsigned short *)adm)); 2572 break; 2573 default: /* Can not happen */ 2574 break; 2575 } 2576 2577 /* recreate the shadow crycb for each vcpu */ 2578 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2579 kvm_s390_vcpu_unblock_all(kvm); 2580 } 2581 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2582 2583 /* 2584 * kvm_arch_crypto_clear_masks 2585 * 2586 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2587 * to be cleared. 2588 * 2589 * Clear the masks that identify the adapters, domains and control domains to 2590 * which the KVM guest is granted access. 2591 * 2592 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2593 * function. 2594 */ 2595 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2596 { 2597 kvm_s390_vcpu_block_all(kvm); 2598 2599 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2600 sizeof(kvm->arch.crypto.crycb->apcb0)); 2601 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2602 sizeof(kvm->arch.crypto.crycb->apcb1)); 2603 2604 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2605 /* recreate the shadow crycb for each vcpu */ 2606 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2607 kvm_s390_vcpu_unblock_all(kvm); 2608 } 2609 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2610 2611 static u64 kvm_s390_get_initial_cpuid(void) 2612 { 2613 struct cpuid cpuid; 2614 2615 get_cpu_id(&cpuid); 2616 cpuid.version = 0xff; 2617 return *((u64 *) &cpuid); 2618 } 2619 2620 static void kvm_s390_crypto_init(struct kvm *kvm) 2621 { 2622 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2623 kvm_s390_set_crycb_format(kvm); 2624 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 2625 2626 if (!test_kvm_facility(kvm, 76)) 2627 return; 2628 2629 /* Enable AES/DEA protected key functions by default */ 2630 kvm->arch.crypto.aes_kw = 1; 2631 kvm->arch.crypto.dea_kw = 1; 2632 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2633 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2634 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2635 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2636 } 2637 2638 static void sca_dispose(struct kvm *kvm) 2639 { 2640 if (kvm->arch.use_esca) 2641 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2642 else 2643 free_page((unsigned long)(kvm->arch.sca)); 2644 kvm->arch.sca = NULL; 2645 } 2646 2647 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2648 { 2649 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2650 int i, rc; 2651 char debug_name[16]; 2652 static unsigned long sca_offset; 2653 2654 rc = -EINVAL; 2655 #ifdef CONFIG_KVM_S390_UCONTROL 2656 if (type & ~KVM_VM_S390_UCONTROL) 2657 goto out_err; 2658 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2659 goto out_err; 2660 #else 2661 if (type) 2662 goto out_err; 2663 #endif 2664 2665 rc = s390_enable_sie(); 2666 if (rc) 2667 goto out_err; 2668 2669 rc = -ENOMEM; 2670 2671 if (!sclp.has_64bscao) 2672 alloc_flags |= GFP_DMA; 2673 rwlock_init(&kvm->arch.sca_lock); 2674 /* start with basic SCA */ 2675 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2676 if (!kvm->arch.sca) 2677 goto out_err; 2678 mutex_lock(&kvm_lock); 2679 sca_offset += 16; 2680 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2681 sca_offset = 0; 2682 kvm->arch.sca = (struct bsca_block *) 2683 ((char *) kvm->arch.sca + sca_offset); 2684 mutex_unlock(&kvm_lock); 2685 2686 sprintf(debug_name, "kvm-%u", current->pid); 2687 2688 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2689 if (!kvm->arch.dbf) 2690 goto out_err; 2691 2692 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2693 kvm->arch.sie_page2 = 2694 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2695 if (!kvm->arch.sie_page2) 2696 goto out_err; 2697 2698 kvm->arch.sie_page2->kvm = kvm; 2699 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2700 2701 for (i = 0; i < kvm_s390_fac_size(); i++) { 2702 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 2703 (kvm_s390_fac_base[i] | 2704 kvm_s390_fac_ext[i]); 2705 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 2706 kvm_s390_fac_base[i]; 2707 } 2708 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2709 2710 /* we are always in czam mode - even on pre z14 machines */ 2711 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2712 set_kvm_facility(kvm->arch.model.fac_list, 138); 2713 /* we emulate STHYI in kvm */ 2714 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2715 set_kvm_facility(kvm->arch.model.fac_list, 74); 2716 if (MACHINE_HAS_TLB_GUEST) { 2717 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2718 set_kvm_facility(kvm->arch.model.fac_list, 147); 2719 } 2720 2721 if (css_general_characteristics.aiv && test_facility(65)) 2722 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2723 2724 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2725 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2726 2727 kvm_s390_crypto_init(kvm); 2728 2729 mutex_init(&kvm->arch.float_int.ais_lock); 2730 spin_lock_init(&kvm->arch.float_int.lock); 2731 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2732 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2733 init_waitqueue_head(&kvm->arch.ipte_wq); 2734 mutex_init(&kvm->arch.ipte_mutex); 2735 2736 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2737 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2738 2739 if (type & KVM_VM_S390_UCONTROL) { 2740 kvm->arch.gmap = NULL; 2741 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2742 } else { 2743 if (sclp.hamax == U64_MAX) 2744 kvm->arch.mem_limit = TASK_SIZE_MAX; 2745 else 2746 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2747 sclp.hamax + 1); 2748 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2749 if (!kvm->arch.gmap) 2750 goto out_err; 2751 kvm->arch.gmap->private = kvm; 2752 kvm->arch.gmap->pfault_enabled = 0; 2753 } 2754 2755 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2756 kvm->arch.use_skf = sclp.has_skey; 2757 spin_lock_init(&kvm->arch.start_stop_lock); 2758 kvm_s390_vsie_init(kvm); 2759 if (use_gisa) 2760 kvm_s390_gisa_init(kvm); 2761 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2762 2763 return 0; 2764 out_err: 2765 free_page((unsigned long)kvm->arch.sie_page2); 2766 debug_unregister(kvm->arch.dbf); 2767 sca_dispose(kvm); 2768 KVM_EVENT(3, "creation of vm failed: %d", rc); 2769 return rc; 2770 } 2771 2772 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2773 { 2774 u16 rc, rrc; 2775 2776 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2777 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2778 kvm_s390_clear_local_irqs(vcpu); 2779 kvm_clear_async_pf_completion_queue(vcpu); 2780 if (!kvm_is_ucontrol(vcpu->kvm)) 2781 sca_del_vcpu(vcpu); 2782 2783 if (kvm_is_ucontrol(vcpu->kvm)) 2784 gmap_remove(vcpu->arch.gmap); 2785 2786 if (vcpu->kvm->arch.use_cmma) 2787 kvm_s390_vcpu_unsetup_cmma(vcpu); 2788 /* We can not hold the vcpu mutex here, we are already dying */ 2789 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2790 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2791 free_page((unsigned long)(vcpu->arch.sie_block)); 2792 } 2793 2794 void kvm_arch_destroy_vm(struct kvm *kvm) 2795 { 2796 u16 rc, rrc; 2797 2798 kvm_destroy_vcpus(kvm); 2799 sca_dispose(kvm); 2800 kvm_s390_gisa_destroy(kvm); 2801 /* 2802 * We are already at the end of life and kvm->lock is not taken. 2803 * This is ok as the file descriptor is closed by now and nobody 2804 * can mess with the pv state. To avoid lockdep_assert_held from 2805 * complaining we do not use kvm_s390_pv_is_protected. 2806 */ 2807 if (kvm_s390_pv_get_handle(kvm)) 2808 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2809 debug_unregister(kvm->arch.dbf); 2810 free_page((unsigned long)kvm->arch.sie_page2); 2811 if (!kvm_is_ucontrol(kvm)) 2812 gmap_remove(kvm->arch.gmap); 2813 kvm_s390_destroy_adapters(kvm); 2814 kvm_s390_clear_float_irqs(kvm); 2815 kvm_s390_vsie_destroy(kvm); 2816 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2817 } 2818 2819 /* Section: vcpu related */ 2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2821 { 2822 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2823 if (!vcpu->arch.gmap) 2824 return -ENOMEM; 2825 vcpu->arch.gmap->private = vcpu->kvm; 2826 2827 return 0; 2828 } 2829 2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2831 { 2832 if (!kvm_s390_use_sca_entries()) 2833 return; 2834 read_lock(&vcpu->kvm->arch.sca_lock); 2835 if (vcpu->kvm->arch.use_esca) { 2836 struct esca_block *sca = vcpu->kvm->arch.sca; 2837 2838 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2839 sca->cpu[vcpu->vcpu_id].sda = 0; 2840 } else { 2841 struct bsca_block *sca = vcpu->kvm->arch.sca; 2842 2843 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2844 sca->cpu[vcpu->vcpu_id].sda = 0; 2845 } 2846 read_unlock(&vcpu->kvm->arch.sca_lock); 2847 } 2848 2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2850 { 2851 if (!kvm_s390_use_sca_entries()) { 2852 struct bsca_block *sca = vcpu->kvm->arch.sca; 2853 2854 /* we still need the basic sca for the ipte control */ 2855 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2856 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2857 return; 2858 } 2859 read_lock(&vcpu->kvm->arch.sca_lock); 2860 if (vcpu->kvm->arch.use_esca) { 2861 struct esca_block *sca = vcpu->kvm->arch.sca; 2862 2863 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2864 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2865 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2866 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2867 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2868 } else { 2869 struct bsca_block *sca = vcpu->kvm->arch.sca; 2870 2871 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2872 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2873 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2874 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2875 } 2876 read_unlock(&vcpu->kvm->arch.sca_lock); 2877 } 2878 2879 /* Basic SCA to Extended SCA data copy routines */ 2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2881 { 2882 d->sda = s->sda; 2883 d->sigp_ctrl.c = s->sigp_ctrl.c; 2884 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2885 } 2886 2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2888 { 2889 int i; 2890 2891 d->ipte_control = s->ipte_control; 2892 d->mcn[0] = s->mcn; 2893 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2894 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2895 } 2896 2897 static int sca_switch_to_extended(struct kvm *kvm) 2898 { 2899 struct bsca_block *old_sca = kvm->arch.sca; 2900 struct esca_block *new_sca; 2901 struct kvm_vcpu *vcpu; 2902 unsigned long vcpu_idx; 2903 u32 scaol, scaoh; 2904 2905 if (kvm->arch.use_esca) 2906 return 0; 2907 2908 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 2909 if (!new_sca) 2910 return -ENOMEM; 2911 2912 scaoh = (u32)((u64)(new_sca) >> 32); 2913 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2914 2915 kvm_s390_vcpu_block_all(kvm); 2916 write_lock(&kvm->arch.sca_lock); 2917 2918 sca_copy_b_to_e(new_sca, old_sca); 2919 2920 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2921 vcpu->arch.sie_block->scaoh = scaoh; 2922 vcpu->arch.sie_block->scaol = scaol; 2923 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2924 } 2925 kvm->arch.sca = new_sca; 2926 kvm->arch.use_esca = 1; 2927 2928 write_unlock(&kvm->arch.sca_lock); 2929 kvm_s390_vcpu_unblock_all(kvm); 2930 2931 free_page((unsigned long)old_sca); 2932 2933 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2934 old_sca, kvm->arch.sca); 2935 return 0; 2936 } 2937 2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2939 { 2940 int rc; 2941 2942 if (!kvm_s390_use_sca_entries()) { 2943 if (id < KVM_MAX_VCPUS) 2944 return true; 2945 return false; 2946 } 2947 if (id < KVM_S390_BSCA_CPU_SLOTS) 2948 return true; 2949 if (!sclp.has_esca || !sclp.has_64bscao) 2950 return false; 2951 2952 mutex_lock(&kvm->lock); 2953 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2954 mutex_unlock(&kvm->lock); 2955 2956 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2957 } 2958 2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2961 { 2962 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2963 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2964 vcpu->arch.cputm_start = get_tod_clock_fast(); 2965 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2966 } 2967 2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2970 { 2971 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2972 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2973 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2974 vcpu->arch.cputm_start = 0; 2975 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2976 } 2977 2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2980 { 2981 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2982 vcpu->arch.cputm_enabled = true; 2983 __start_cpu_timer_accounting(vcpu); 2984 } 2985 2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2988 { 2989 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2990 __stop_cpu_timer_accounting(vcpu); 2991 vcpu->arch.cputm_enabled = false; 2992 } 2993 2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2995 { 2996 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2997 __enable_cpu_timer_accounting(vcpu); 2998 preempt_enable(); 2999 } 3000 3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3002 { 3003 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3004 __disable_cpu_timer_accounting(vcpu); 3005 preempt_enable(); 3006 } 3007 3008 /* set the cpu timer - may only be called from the VCPU thread itself */ 3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3010 { 3011 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3012 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3013 if (vcpu->arch.cputm_enabled) 3014 vcpu->arch.cputm_start = get_tod_clock_fast(); 3015 vcpu->arch.sie_block->cputm = cputm; 3016 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3017 preempt_enable(); 3018 } 3019 3020 /* update and get the cpu timer - can also be called from other VCPU threads */ 3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3022 { 3023 unsigned int seq; 3024 __u64 value; 3025 3026 if (unlikely(!vcpu->arch.cputm_enabled)) 3027 return vcpu->arch.sie_block->cputm; 3028 3029 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3030 do { 3031 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3032 /* 3033 * If the writer would ever execute a read in the critical 3034 * section, e.g. in irq context, we have a deadlock. 3035 */ 3036 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3037 value = vcpu->arch.sie_block->cputm; 3038 /* if cputm_start is 0, accounting is being started/stopped */ 3039 if (likely(vcpu->arch.cputm_start)) 3040 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3041 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3042 preempt_enable(); 3043 return value; 3044 } 3045 3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3047 { 3048 3049 gmap_enable(vcpu->arch.enabled_gmap); 3050 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3051 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3052 __start_cpu_timer_accounting(vcpu); 3053 vcpu->cpu = cpu; 3054 } 3055 3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3057 { 3058 vcpu->cpu = -1; 3059 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3060 __stop_cpu_timer_accounting(vcpu); 3061 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3062 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3063 gmap_disable(vcpu->arch.enabled_gmap); 3064 3065 } 3066 3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3068 { 3069 mutex_lock(&vcpu->kvm->lock); 3070 preempt_disable(); 3071 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3072 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3073 preempt_enable(); 3074 mutex_unlock(&vcpu->kvm->lock); 3075 if (!kvm_is_ucontrol(vcpu->kvm)) { 3076 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3077 sca_add_vcpu(vcpu); 3078 } 3079 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3080 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3081 /* make vcpu_load load the right gmap on the first trigger */ 3082 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3083 } 3084 3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3086 { 3087 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3088 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3089 return true; 3090 return false; 3091 } 3092 3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3094 { 3095 /* At least one ECC subfunction must be present */ 3096 return kvm_has_pckmo_subfunc(kvm, 32) || 3097 kvm_has_pckmo_subfunc(kvm, 33) || 3098 kvm_has_pckmo_subfunc(kvm, 34) || 3099 kvm_has_pckmo_subfunc(kvm, 40) || 3100 kvm_has_pckmo_subfunc(kvm, 41); 3101 3102 } 3103 3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3105 { 3106 /* 3107 * If the AP instructions are not being interpreted and the MSAX3 3108 * facility is not configured for the guest, there is nothing to set up. 3109 */ 3110 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3111 return; 3112 3113 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3114 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3115 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3116 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3117 3118 if (vcpu->kvm->arch.crypto.apie) 3119 vcpu->arch.sie_block->eca |= ECA_APIE; 3120 3121 /* Set up protected key support */ 3122 if (vcpu->kvm->arch.crypto.aes_kw) { 3123 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3124 /* ecc is also wrapped with AES key */ 3125 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3126 vcpu->arch.sie_block->ecd |= ECD_ECC; 3127 } 3128 3129 if (vcpu->kvm->arch.crypto.dea_kw) 3130 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3131 } 3132 3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3134 { 3135 free_page(vcpu->arch.sie_block->cbrlo); 3136 vcpu->arch.sie_block->cbrlo = 0; 3137 } 3138 3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3140 { 3141 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3142 if (!vcpu->arch.sie_block->cbrlo) 3143 return -ENOMEM; 3144 return 0; 3145 } 3146 3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3148 { 3149 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3150 3151 vcpu->arch.sie_block->ibc = model->ibc; 3152 if (test_kvm_facility(vcpu->kvm, 7)) 3153 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3154 } 3155 3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3157 { 3158 int rc = 0; 3159 u16 uvrc, uvrrc; 3160 3161 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3162 CPUSTAT_SM | 3163 CPUSTAT_STOPPED); 3164 3165 if (test_kvm_facility(vcpu->kvm, 78)) 3166 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3167 else if (test_kvm_facility(vcpu->kvm, 8)) 3168 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3169 3170 kvm_s390_vcpu_setup_model(vcpu); 3171 3172 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3173 if (MACHINE_HAS_ESOP) 3174 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3175 if (test_kvm_facility(vcpu->kvm, 9)) 3176 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3177 if (test_kvm_facility(vcpu->kvm, 73)) 3178 vcpu->arch.sie_block->ecb |= ECB_TE; 3179 if (!kvm_is_ucontrol(vcpu->kvm)) 3180 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3181 3182 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3183 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3184 if (test_kvm_facility(vcpu->kvm, 130)) 3185 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3186 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3187 if (sclp.has_cei) 3188 vcpu->arch.sie_block->eca |= ECA_CEI; 3189 if (sclp.has_ib) 3190 vcpu->arch.sie_block->eca |= ECA_IB; 3191 if (sclp.has_siif) 3192 vcpu->arch.sie_block->eca |= ECA_SII; 3193 if (sclp.has_sigpif) 3194 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3195 if (test_kvm_facility(vcpu->kvm, 129)) { 3196 vcpu->arch.sie_block->eca |= ECA_VX; 3197 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3198 } 3199 if (test_kvm_facility(vcpu->kvm, 139)) 3200 vcpu->arch.sie_block->ecd |= ECD_MEF; 3201 if (test_kvm_facility(vcpu->kvm, 156)) 3202 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3203 if (vcpu->arch.sie_block->gd) { 3204 vcpu->arch.sie_block->eca |= ECA_AIV; 3205 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3206 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3207 } 3208 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3209 | SDNXC; 3210 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3211 3212 if (sclp.has_kss) 3213 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3214 else 3215 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3216 3217 if (vcpu->kvm->arch.use_cmma) { 3218 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3219 if (rc) 3220 return rc; 3221 } 3222 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3223 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3224 3225 vcpu->arch.sie_block->hpid = HPID_KVM; 3226 3227 kvm_s390_vcpu_crypto_setup(vcpu); 3228 3229 mutex_lock(&vcpu->kvm->lock); 3230 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3231 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3232 if (rc) 3233 kvm_s390_vcpu_unsetup_cmma(vcpu); 3234 } 3235 mutex_unlock(&vcpu->kvm->lock); 3236 3237 return rc; 3238 } 3239 3240 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3241 { 3242 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3243 return -EINVAL; 3244 return 0; 3245 } 3246 3247 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3248 { 3249 struct sie_page *sie_page; 3250 int rc; 3251 3252 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3253 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3254 if (!sie_page) 3255 return -ENOMEM; 3256 3257 vcpu->arch.sie_block = &sie_page->sie_block; 3258 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3259 3260 /* the real guest size will always be smaller than msl */ 3261 vcpu->arch.sie_block->mso = 0; 3262 vcpu->arch.sie_block->msl = sclp.hamax; 3263 3264 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3265 spin_lock_init(&vcpu->arch.local_int.lock); 3266 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; 3267 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 3268 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 3269 seqcount_init(&vcpu->arch.cputm_seqcount); 3270 3271 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3272 kvm_clear_async_pf_completion_queue(vcpu); 3273 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3274 KVM_SYNC_GPRS | 3275 KVM_SYNC_ACRS | 3276 KVM_SYNC_CRS | 3277 KVM_SYNC_ARCH0 | 3278 KVM_SYNC_PFAULT | 3279 KVM_SYNC_DIAG318; 3280 kvm_s390_set_prefix(vcpu, 0); 3281 if (test_kvm_facility(vcpu->kvm, 64)) 3282 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3283 if (test_kvm_facility(vcpu->kvm, 82)) 3284 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3285 if (test_kvm_facility(vcpu->kvm, 133)) 3286 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3287 if (test_kvm_facility(vcpu->kvm, 156)) 3288 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3289 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3290 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3291 */ 3292 if (MACHINE_HAS_VX) 3293 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3294 else 3295 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3296 3297 if (kvm_is_ucontrol(vcpu->kvm)) { 3298 rc = __kvm_ucontrol_vcpu_init(vcpu); 3299 if (rc) 3300 goto out_free_sie_block; 3301 } 3302 3303 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3304 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3305 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3306 3307 rc = kvm_s390_vcpu_setup(vcpu); 3308 if (rc) 3309 goto out_ucontrol_uninit; 3310 return 0; 3311 3312 out_ucontrol_uninit: 3313 if (kvm_is_ucontrol(vcpu->kvm)) 3314 gmap_remove(vcpu->arch.gmap); 3315 out_free_sie_block: 3316 free_page((unsigned long)(vcpu->arch.sie_block)); 3317 return rc; 3318 } 3319 3320 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3321 { 3322 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3323 return kvm_s390_vcpu_has_irq(vcpu, 0); 3324 } 3325 3326 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3327 { 3328 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3329 } 3330 3331 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3332 { 3333 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3334 exit_sie(vcpu); 3335 } 3336 3337 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3338 { 3339 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3340 } 3341 3342 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3343 { 3344 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3345 exit_sie(vcpu); 3346 } 3347 3348 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3349 { 3350 return atomic_read(&vcpu->arch.sie_block->prog20) & 3351 (PROG_BLOCK_SIE | PROG_REQUEST); 3352 } 3353 3354 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3355 { 3356 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3357 } 3358 3359 /* 3360 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3361 * If the CPU is not running (e.g. waiting as idle) the function will 3362 * return immediately. */ 3363 void exit_sie(struct kvm_vcpu *vcpu) 3364 { 3365 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3366 kvm_s390_vsie_kick(vcpu); 3367 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3368 cpu_relax(); 3369 } 3370 3371 /* Kick a guest cpu out of SIE to process a request synchronously */ 3372 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3373 { 3374 kvm_make_request(req, vcpu); 3375 kvm_s390_vcpu_request(vcpu); 3376 } 3377 3378 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3379 unsigned long end) 3380 { 3381 struct kvm *kvm = gmap->private; 3382 struct kvm_vcpu *vcpu; 3383 unsigned long prefix; 3384 unsigned long i; 3385 3386 if (gmap_is_shadow(gmap)) 3387 return; 3388 if (start >= 1UL << 31) 3389 /* We are only interested in prefix pages */ 3390 return; 3391 kvm_for_each_vcpu(i, vcpu, kvm) { 3392 /* match against both prefix pages */ 3393 prefix = kvm_s390_get_prefix(vcpu); 3394 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3395 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3396 start, end); 3397 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 3398 } 3399 } 3400 } 3401 3402 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3403 { 3404 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3405 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3406 READ_ONCE(halt_poll_max_steal)) { 3407 vcpu->stat.halt_no_poll_steal++; 3408 return true; 3409 } 3410 return false; 3411 } 3412 3413 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3414 { 3415 /* kvm common code refers to this, but never calls it */ 3416 BUG(); 3417 return 0; 3418 } 3419 3420 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3421 struct kvm_one_reg *reg) 3422 { 3423 int r = -EINVAL; 3424 3425 switch (reg->id) { 3426 case KVM_REG_S390_TODPR: 3427 r = put_user(vcpu->arch.sie_block->todpr, 3428 (u32 __user *)reg->addr); 3429 break; 3430 case KVM_REG_S390_EPOCHDIFF: 3431 r = put_user(vcpu->arch.sie_block->epoch, 3432 (u64 __user *)reg->addr); 3433 break; 3434 case KVM_REG_S390_CPU_TIMER: 3435 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3436 (u64 __user *)reg->addr); 3437 break; 3438 case KVM_REG_S390_CLOCK_COMP: 3439 r = put_user(vcpu->arch.sie_block->ckc, 3440 (u64 __user *)reg->addr); 3441 break; 3442 case KVM_REG_S390_PFTOKEN: 3443 r = put_user(vcpu->arch.pfault_token, 3444 (u64 __user *)reg->addr); 3445 break; 3446 case KVM_REG_S390_PFCOMPARE: 3447 r = put_user(vcpu->arch.pfault_compare, 3448 (u64 __user *)reg->addr); 3449 break; 3450 case KVM_REG_S390_PFSELECT: 3451 r = put_user(vcpu->arch.pfault_select, 3452 (u64 __user *)reg->addr); 3453 break; 3454 case KVM_REG_S390_PP: 3455 r = put_user(vcpu->arch.sie_block->pp, 3456 (u64 __user *)reg->addr); 3457 break; 3458 case KVM_REG_S390_GBEA: 3459 r = put_user(vcpu->arch.sie_block->gbea, 3460 (u64 __user *)reg->addr); 3461 break; 3462 default: 3463 break; 3464 } 3465 3466 return r; 3467 } 3468 3469 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3470 struct kvm_one_reg *reg) 3471 { 3472 int r = -EINVAL; 3473 __u64 val; 3474 3475 switch (reg->id) { 3476 case KVM_REG_S390_TODPR: 3477 r = get_user(vcpu->arch.sie_block->todpr, 3478 (u32 __user *)reg->addr); 3479 break; 3480 case KVM_REG_S390_EPOCHDIFF: 3481 r = get_user(vcpu->arch.sie_block->epoch, 3482 (u64 __user *)reg->addr); 3483 break; 3484 case KVM_REG_S390_CPU_TIMER: 3485 r = get_user(val, (u64 __user *)reg->addr); 3486 if (!r) 3487 kvm_s390_set_cpu_timer(vcpu, val); 3488 break; 3489 case KVM_REG_S390_CLOCK_COMP: 3490 r = get_user(vcpu->arch.sie_block->ckc, 3491 (u64 __user *)reg->addr); 3492 break; 3493 case KVM_REG_S390_PFTOKEN: 3494 r = get_user(vcpu->arch.pfault_token, 3495 (u64 __user *)reg->addr); 3496 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3497 kvm_clear_async_pf_completion_queue(vcpu); 3498 break; 3499 case KVM_REG_S390_PFCOMPARE: 3500 r = get_user(vcpu->arch.pfault_compare, 3501 (u64 __user *)reg->addr); 3502 break; 3503 case KVM_REG_S390_PFSELECT: 3504 r = get_user(vcpu->arch.pfault_select, 3505 (u64 __user *)reg->addr); 3506 break; 3507 case KVM_REG_S390_PP: 3508 r = get_user(vcpu->arch.sie_block->pp, 3509 (u64 __user *)reg->addr); 3510 break; 3511 case KVM_REG_S390_GBEA: 3512 r = get_user(vcpu->arch.sie_block->gbea, 3513 (u64 __user *)reg->addr); 3514 break; 3515 default: 3516 break; 3517 } 3518 3519 return r; 3520 } 3521 3522 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3523 { 3524 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3525 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3526 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3527 3528 kvm_clear_async_pf_completion_queue(vcpu); 3529 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3530 kvm_s390_vcpu_stop(vcpu); 3531 kvm_s390_clear_local_irqs(vcpu); 3532 } 3533 3534 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3535 { 3536 /* Initial reset is a superset of the normal reset */ 3537 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3538 3539 /* 3540 * This equals initial cpu reset in pop, but we don't switch to ESA. 3541 * We do not only reset the internal data, but also ... 3542 */ 3543 vcpu->arch.sie_block->gpsw.mask = 0; 3544 vcpu->arch.sie_block->gpsw.addr = 0; 3545 kvm_s390_set_prefix(vcpu, 0); 3546 kvm_s390_set_cpu_timer(vcpu, 0); 3547 vcpu->arch.sie_block->ckc = 0; 3548 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3549 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3550 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3551 3552 /* ... the data in sync regs */ 3553 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3554 vcpu->run->s.regs.ckc = 0; 3555 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3556 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3557 vcpu->run->psw_addr = 0; 3558 vcpu->run->psw_mask = 0; 3559 vcpu->run->s.regs.todpr = 0; 3560 vcpu->run->s.regs.cputm = 0; 3561 vcpu->run->s.regs.ckc = 0; 3562 vcpu->run->s.regs.pp = 0; 3563 vcpu->run->s.regs.gbea = 1; 3564 vcpu->run->s.regs.fpc = 0; 3565 /* 3566 * Do not reset these registers in the protected case, as some of 3567 * them are overlayed and they are not accessible in this case 3568 * anyway. 3569 */ 3570 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3571 vcpu->arch.sie_block->gbea = 1; 3572 vcpu->arch.sie_block->pp = 0; 3573 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3574 vcpu->arch.sie_block->todpr = 0; 3575 } 3576 } 3577 3578 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3579 { 3580 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3581 3582 /* Clear reset is a superset of the initial reset */ 3583 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3584 3585 memset(®s->gprs, 0, sizeof(regs->gprs)); 3586 memset(®s->vrs, 0, sizeof(regs->vrs)); 3587 memset(®s->acrs, 0, sizeof(regs->acrs)); 3588 memset(®s->gscb, 0, sizeof(regs->gscb)); 3589 3590 regs->etoken = 0; 3591 regs->etoken_extension = 0; 3592 } 3593 3594 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3595 { 3596 vcpu_load(vcpu); 3597 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3598 vcpu_put(vcpu); 3599 return 0; 3600 } 3601 3602 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3603 { 3604 vcpu_load(vcpu); 3605 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3606 vcpu_put(vcpu); 3607 return 0; 3608 } 3609 3610 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3611 struct kvm_sregs *sregs) 3612 { 3613 vcpu_load(vcpu); 3614 3615 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3616 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3617 3618 vcpu_put(vcpu); 3619 return 0; 3620 } 3621 3622 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3623 struct kvm_sregs *sregs) 3624 { 3625 vcpu_load(vcpu); 3626 3627 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3628 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3629 3630 vcpu_put(vcpu); 3631 return 0; 3632 } 3633 3634 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3635 { 3636 int ret = 0; 3637 3638 vcpu_load(vcpu); 3639 3640 if (test_fp_ctl(fpu->fpc)) { 3641 ret = -EINVAL; 3642 goto out; 3643 } 3644 vcpu->run->s.regs.fpc = fpu->fpc; 3645 if (MACHINE_HAS_VX) 3646 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3647 (freg_t *) fpu->fprs); 3648 else 3649 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3650 3651 out: 3652 vcpu_put(vcpu); 3653 return ret; 3654 } 3655 3656 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3657 { 3658 vcpu_load(vcpu); 3659 3660 /* make sure we have the latest values */ 3661 save_fpu_regs(); 3662 if (MACHINE_HAS_VX) 3663 convert_vx_to_fp((freg_t *) fpu->fprs, 3664 (__vector128 *) vcpu->run->s.regs.vrs); 3665 else 3666 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3667 fpu->fpc = vcpu->run->s.regs.fpc; 3668 3669 vcpu_put(vcpu); 3670 return 0; 3671 } 3672 3673 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3674 { 3675 int rc = 0; 3676 3677 if (!is_vcpu_stopped(vcpu)) 3678 rc = -EBUSY; 3679 else { 3680 vcpu->run->psw_mask = psw.mask; 3681 vcpu->run->psw_addr = psw.addr; 3682 } 3683 return rc; 3684 } 3685 3686 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3687 struct kvm_translation *tr) 3688 { 3689 return -EINVAL; /* not implemented yet */ 3690 } 3691 3692 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3693 KVM_GUESTDBG_USE_HW_BP | \ 3694 KVM_GUESTDBG_ENABLE) 3695 3696 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3697 struct kvm_guest_debug *dbg) 3698 { 3699 int rc = 0; 3700 3701 vcpu_load(vcpu); 3702 3703 vcpu->guest_debug = 0; 3704 kvm_s390_clear_bp_data(vcpu); 3705 3706 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3707 rc = -EINVAL; 3708 goto out; 3709 } 3710 if (!sclp.has_gpere) { 3711 rc = -EINVAL; 3712 goto out; 3713 } 3714 3715 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3716 vcpu->guest_debug = dbg->control; 3717 /* enforce guest PER */ 3718 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3719 3720 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3721 rc = kvm_s390_import_bp_data(vcpu, dbg); 3722 } else { 3723 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3724 vcpu->arch.guestdbg.last_bp = 0; 3725 } 3726 3727 if (rc) { 3728 vcpu->guest_debug = 0; 3729 kvm_s390_clear_bp_data(vcpu); 3730 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3731 } 3732 3733 out: 3734 vcpu_put(vcpu); 3735 return rc; 3736 } 3737 3738 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3739 struct kvm_mp_state *mp_state) 3740 { 3741 int ret; 3742 3743 vcpu_load(vcpu); 3744 3745 /* CHECK_STOP and LOAD are not supported yet */ 3746 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3747 KVM_MP_STATE_OPERATING; 3748 3749 vcpu_put(vcpu); 3750 return ret; 3751 } 3752 3753 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3754 struct kvm_mp_state *mp_state) 3755 { 3756 int rc = 0; 3757 3758 vcpu_load(vcpu); 3759 3760 /* user space knows about this interface - let it control the state */ 3761 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 3762 3763 switch (mp_state->mp_state) { 3764 case KVM_MP_STATE_STOPPED: 3765 rc = kvm_s390_vcpu_stop(vcpu); 3766 break; 3767 case KVM_MP_STATE_OPERATING: 3768 rc = kvm_s390_vcpu_start(vcpu); 3769 break; 3770 case KVM_MP_STATE_LOAD: 3771 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3772 rc = -ENXIO; 3773 break; 3774 } 3775 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3776 break; 3777 case KVM_MP_STATE_CHECK_STOP: 3778 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3779 default: 3780 rc = -ENXIO; 3781 } 3782 3783 vcpu_put(vcpu); 3784 return rc; 3785 } 3786 3787 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3788 { 3789 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3790 } 3791 3792 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3793 { 3794 retry: 3795 kvm_s390_vcpu_request_handled(vcpu); 3796 if (!kvm_request_pending(vcpu)) 3797 return 0; 3798 /* 3799 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3800 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3801 * This ensures that the ipte instruction for this request has 3802 * already finished. We might race against a second unmapper that 3803 * wants to set the blocking bit. Lets just retry the request loop. 3804 */ 3805 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3806 int rc; 3807 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3808 kvm_s390_get_prefix(vcpu), 3809 PAGE_SIZE * 2, PROT_WRITE); 3810 if (rc) { 3811 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3812 return rc; 3813 } 3814 goto retry; 3815 } 3816 3817 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3818 vcpu->arch.sie_block->ihcpu = 0xffff; 3819 goto retry; 3820 } 3821 3822 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3823 if (!ibs_enabled(vcpu)) { 3824 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3825 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3826 } 3827 goto retry; 3828 } 3829 3830 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3831 if (ibs_enabled(vcpu)) { 3832 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3833 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3834 } 3835 goto retry; 3836 } 3837 3838 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3839 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3840 goto retry; 3841 } 3842 3843 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3844 /* 3845 * Disable CMM virtualization; we will emulate the ESSA 3846 * instruction manually, in order to provide additional 3847 * functionalities needed for live migration. 3848 */ 3849 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3850 goto retry; 3851 } 3852 3853 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3854 /* 3855 * Re-enable CMM virtualization if CMMA is available and 3856 * CMM has been used. 3857 */ 3858 if ((vcpu->kvm->arch.use_cmma) && 3859 (vcpu->kvm->mm->context.uses_cmm)) 3860 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3861 goto retry; 3862 } 3863 3864 /* nothing to do, just clear the request */ 3865 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3866 /* we left the vsie handler, nothing to do, just clear the request */ 3867 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3868 3869 return 0; 3870 } 3871 3872 void kvm_s390_set_tod_clock(struct kvm *kvm, 3873 const struct kvm_s390_vm_tod_clock *gtod) 3874 { 3875 struct kvm_vcpu *vcpu; 3876 union tod_clock clk; 3877 unsigned long i; 3878 3879 mutex_lock(&kvm->lock); 3880 preempt_disable(); 3881 3882 store_tod_clock_ext(&clk); 3883 3884 kvm->arch.epoch = gtod->tod - clk.tod; 3885 kvm->arch.epdx = 0; 3886 if (test_kvm_facility(kvm, 139)) { 3887 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3888 if (kvm->arch.epoch > gtod->tod) 3889 kvm->arch.epdx -= 1; 3890 } 3891 3892 kvm_s390_vcpu_block_all(kvm); 3893 kvm_for_each_vcpu(i, vcpu, kvm) { 3894 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3895 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3896 } 3897 3898 kvm_s390_vcpu_unblock_all(kvm); 3899 preempt_enable(); 3900 mutex_unlock(&kvm->lock); 3901 } 3902 3903 /** 3904 * kvm_arch_fault_in_page - fault-in guest page if necessary 3905 * @vcpu: The corresponding virtual cpu 3906 * @gpa: Guest physical address 3907 * @writable: Whether the page should be writable or not 3908 * 3909 * Make sure that a guest page has been faulted-in on the host. 3910 * 3911 * Return: Zero on success, negative error code otherwise. 3912 */ 3913 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3914 { 3915 return gmap_fault(vcpu->arch.gmap, gpa, 3916 writable ? FAULT_FLAG_WRITE : 0); 3917 } 3918 3919 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3920 unsigned long token) 3921 { 3922 struct kvm_s390_interrupt inti; 3923 struct kvm_s390_irq irq; 3924 3925 if (start_token) { 3926 irq.u.ext.ext_params2 = token; 3927 irq.type = KVM_S390_INT_PFAULT_INIT; 3928 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3929 } else { 3930 inti.type = KVM_S390_INT_PFAULT_DONE; 3931 inti.parm64 = token; 3932 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3933 } 3934 } 3935 3936 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3937 struct kvm_async_pf *work) 3938 { 3939 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3940 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3941 3942 return true; 3943 } 3944 3945 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3946 struct kvm_async_pf *work) 3947 { 3948 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3949 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3950 } 3951 3952 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3953 struct kvm_async_pf *work) 3954 { 3955 /* s390 will always inject the page directly */ 3956 } 3957 3958 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 3959 { 3960 /* 3961 * s390 will always inject the page directly, 3962 * but we still want check_async_completion to cleanup 3963 */ 3964 return true; 3965 } 3966 3967 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3968 { 3969 hva_t hva; 3970 struct kvm_arch_async_pf arch; 3971 3972 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3973 return false; 3974 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3975 vcpu->arch.pfault_compare) 3976 return false; 3977 if (psw_extint_disabled(vcpu)) 3978 return false; 3979 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3980 return false; 3981 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 3982 return false; 3983 if (!vcpu->arch.gmap->pfault_enabled) 3984 return false; 3985 3986 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3987 hva += current->thread.gmap_addr & ~PAGE_MASK; 3988 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3989 return false; 3990 3991 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3992 } 3993 3994 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3995 { 3996 int rc, cpuflags; 3997 3998 /* 3999 * On s390 notifications for arriving pages will be delivered directly 4000 * to the guest but the house keeping for completed pfaults is 4001 * handled outside the worker. 4002 */ 4003 kvm_check_async_pf_completion(vcpu); 4004 4005 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4006 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4007 4008 if (need_resched()) 4009 schedule(); 4010 4011 if (!kvm_is_ucontrol(vcpu->kvm)) { 4012 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4013 if (rc) 4014 return rc; 4015 } 4016 4017 rc = kvm_s390_handle_requests(vcpu); 4018 if (rc) 4019 return rc; 4020 4021 if (guestdbg_enabled(vcpu)) { 4022 kvm_s390_backup_guest_per_regs(vcpu); 4023 kvm_s390_patch_guest_per_regs(vcpu); 4024 } 4025 4026 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4027 4028 vcpu->arch.sie_block->icptcode = 0; 4029 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4030 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4031 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4032 4033 return 0; 4034 } 4035 4036 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4037 { 4038 struct kvm_s390_pgm_info pgm_info = { 4039 .code = PGM_ADDRESSING, 4040 }; 4041 u8 opcode, ilen; 4042 int rc; 4043 4044 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4045 trace_kvm_s390_sie_fault(vcpu); 4046 4047 /* 4048 * We want to inject an addressing exception, which is defined as a 4049 * suppressing or terminating exception. However, since we came here 4050 * by a DAT access exception, the PSW still points to the faulting 4051 * instruction since DAT exceptions are nullifying. So we've got 4052 * to look up the current opcode to get the length of the instruction 4053 * to be able to forward the PSW. 4054 */ 4055 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4056 ilen = insn_length(opcode); 4057 if (rc < 0) { 4058 return rc; 4059 } else if (rc) { 4060 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4061 * Forward by arbitrary ilc, injection will take care of 4062 * nullification if necessary. 4063 */ 4064 pgm_info = vcpu->arch.pgm; 4065 ilen = 4; 4066 } 4067 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4068 kvm_s390_forward_psw(vcpu, ilen); 4069 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4070 } 4071 4072 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4073 { 4074 struct mcck_volatile_info *mcck_info; 4075 struct sie_page *sie_page; 4076 4077 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4078 vcpu->arch.sie_block->icptcode); 4079 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4080 4081 if (guestdbg_enabled(vcpu)) 4082 kvm_s390_restore_guest_per_regs(vcpu); 4083 4084 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4085 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4086 4087 if (exit_reason == -EINTR) { 4088 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4089 sie_page = container_of(vcpu->arch.sie_block, 4090 struct sie_page, sie_block); 4091 mcck_info = &sie_page->mcck_info; 4092 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4093 return 0; 4094 } 4095 4096 if (vcpu->arch.sie_block->icptcode > 0) { 4097 int rc = kvm_handle_sie_intercept(vcpu); 4098 4099 if (rc != -EOPNOTSUPP) 4100 return rc; 4101 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4102 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4103 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4104 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4105 return -EREMOTE; 4106 } else if (exit_reason != -EFAULT) { 4107 vcpu->stat.exit_null++; 4108 return 0; 4109 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4110 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4111 vcpu->run->s390_ucontrol.trans_exc_code = 4112 current->thread.gmap_addr; 4113 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4114 return -EREMOTE; 4115 } else if (current->thread.gmap_pfault) { 4116 trace_kvm_s390_major_guest_pfault(vcpu); 4117 current->thread.gmap_pfault = 0; 4118 if (kvm_arch_setup_async_pf(vcpu)) 4119 return 0; 4120 vcpu->stat.pfault_sync++; 4121 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4122 } 4123 return vcpu_post_run_fault_in_sie(vcpu); 4124 } 4125 4126 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4127 static int __vcpu_run(struct kvm_vcpu *vcpu) 4128 { 4129 int rc, exit_reason; 4130 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4131 4132 /* 4133 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4134 * ning the guest), so that memslots (and other stuff) are protected 4135 */ 4136 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4137 4138 do { 4139 rc = vcpu_pre_run(vcpu); 4140 if (rc) 4141 break; 4142 4143 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4144 /* 4145 * As PF_VCPU will be used in fault handler, between 4146 * guest_enter and guest_exit should be no uaccess. 4147 */ 4148 local_irq_disable(); 4149 guest_enter_irqoff(); 4150 __disable_cpu_timer_accounting(vcpu); 4151 local_irq_enable(); 4152 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4153 memcpy(sie_page->pv_grregs, 4154 vcpu->run->s.regs.gprs, 4155 sizeof(sie_page->pv_grregs)); 4156 } 4157 if (test_cpu_flag(CIF_FPU)) 4158 load_fpu_regs(); 4159 exit_reason = sie64a(vcpu->arch.sie_block, 4160 vcpu->run->s.regs.gprs); 4161 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4162 memcpy(vcpu->run->s.regs.gprs, 4163 sie_page->pv_grregs, 4164 sizeof(sie_page->pv_grregs)); 4165 /* 4166 * We're not allowed to inject interrupts on intercepts 4167 * that leave the guest state in an "in-between" state 4168 * where the next SIE entry will do a continuation. 4169 * Fence interrupts in our "internal" PSW. 4170 */ 4171 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4172 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4173 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4174 } 4175 } 4176 local_irq_disable(); 4177 __enable_cpu_timer_accounting(vcpu); 4178 guest_exit_irqoff(); 4179 local_irq_enable(); 4180 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4181 4182 rc = vcpu_post_run(vcpu, exit_reason); 4183 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4184 4185 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4186 return rc; 4187 } 4188 4189 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4190 { 4191 struct kvm_run *kvm_run = vcpu->run; 4192 struct runtime_instr_cb *riccb; 4193 struct gs_cb *gscb; 4194 4195 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4196 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4197 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4198 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4199 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4200 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4201 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4202 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4203 } 4204 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4205 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4206 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4207 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4208 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4209 kvm_clear_async_pf_completion_queue(vcpu); 4210 } 4211 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4212 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4213 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4214 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4215 } 4216 /* 4217 * If userspace sets the riccb (e.g. after migration) to a valid state, 4218 * we should enable RI here instead of doing the lazy enablement. 4219 */ 4220 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4221 test_kvm_facility(vcpu->kvm, 64) && 4222 riccb->v && 4223 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4224 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4225 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4226 } 4227 /* 4228 * If userspace sets the gscb (e.g. after migration) to non-zero, 4229 * we should enable GS here instead of doing the lazy enablement. 4230 */ 4231 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4232 test_kvm_facility(vcpu->kvm, 133) && 4233 gscb->gssm && 4234 !vcpu->arch.gs_enabled) { 4235 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4236 vcpu->arch.sie_block->ecb |= ECB_GS; 4237 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4238 vcpu->arch.gs_enabled = 1; 4239 } 4240 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4241 test_kvm_facility(vcpu->kvm, 82)) { 4242 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4243 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4244 } 4245 if (MACHINE_HAS_GS) { 4246 preempt_disable(); 4247 __ctl_set_bit(2, 4); 4248 if (current->thread.gs_cb) { 4249 vcpu->arch.host_gscb = current->thread.gs_cb; 4250 save_gs_cb(vcpu->arch.host_gscb); 4251 } 4252 if (vcpu->arch.gs_enabled) { 4253 current->thread.gs_cb = (struct gs_cb *) 4254 &vcpu->run->s.regs.gscb; 4255 restore_gs_cb(current->thread.gs_cb); 4256 } 4257 preempt_enable(); 4258 } 4259 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4260 } 4261 4262 static void sync_regs(struct kvm_vcpu *vcpu) 4263 { 4264 struct kvm_run *kvm_run = vcpu->run; 4265 4266 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4267 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4268 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4269 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4270 /* some control register changes require a tlb flush */ 4271 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4272 } 4273 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4274 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4275 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4276 } 4277 save_access_regs(vcpu->arch.host_acrs); 4278 restore_access_regs(vcpu->run->s.regs.acrs); 4279 /* save host (userspace) fprs/vrs */ 4280 save_fpu_regs(); 4281 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4282 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4283 if (MACHINE_HAS_VX) 4284 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4285 else 4286 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4287 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4288 if (test_fp_ctl(current->thread.fpu.fpc)) 4289 /* User space provided an invalid FPC, let's clear it */ 4290 current->thread.fpu.fpc = 0; 4291 4292 /* Sync fmt2 only data */ 4293 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4294 sync_regs_fmt2(vcpu); 4295 } else { 4296 /* 4297 * In several places we have to modify our internal view to 4298 * not do things that are disallowed by the ultravisor. For 4299 * example we must not inject interrupts after specific exits 4300 * (e.g. 112 prefix page not secure). We do this by turning 4301 * off the machine check, external and I/O interrupt bits 4302 * of our PSW copy. To avoid getting validity intercepts, we 4303 * do only accept the condition code from userspace. 4304 */ 4305 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4306 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4307 PSW_MASK_CC; 4308 } 4309 4310 kvm_run->kvm_dirty_regs = 0; 4311 } 4312 4313 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4314 { 4315 struct kvm_run *kvm_run = vcpu->run; 4316 4317 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4318 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4319 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4320 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4321 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4322 if (MACHINE_HAS_GS) { 4323 preempt_disable(); 4324 __ctl_set_bit(2, 4); 4325 if (vcpu->arch.gs_enabled) 4326 save_gs_cb(current->thread.gs_cb); 4327 current->thread.gs_cb = vcpu->arch.host_gscb; 4328 restore_gs_cb(vcpu->arch.host_gscb); 4329 if (!vcpu->arch.host_gscb) 4330 __ctl_clear_bit(2, 4); 4331 vcpu->arch.host_gscb = NULL; 4332 preempt_enable(); 4333 } 4334 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4335 } 4336 4337 static void store_regs(struct kvm_vcpu *vcpu) 4338 { 4339 struct kvm_run *kvm_run = vcpu->run; 4340 4341 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4342 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4343 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4344 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4345 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4346 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4347 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4348 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4349 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4350 save_access_regs(vcpu->run->s.regs.acrs); 4351 restore_access_regs(vcpu->arch.host_acrs); 4352 /* Save guest register state */ 4353 save_fpu_regs(); 4354 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4355 /* Restore will be done lazily at return */ 4356 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4357 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4358 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4359 store_regs_fmt2(vcpu); 4360 } 4361 4362 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4363 { 4364 struct kvm_run *kvm_run = vcpu->run; 4365 int rc; 4366 4367 if (kvm_run->immediate_exit) 4368 return -EINTR; 4369 4370 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4371 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4372 return -EINVAL; 4373 4374 vcpu_load(vcpu); 4375 4376 if (guestdbg_exit_pending(vcpu)) { 4377 kvm_s390_prepare_debug_exit(vcpu); 4378 rc = 0; 4379 goto out; 4380 } 4381 4382 kvm_sigset_activate(vcpu); 4383 4384 /* 4385 * no need to check the return value of vcpu_start as it can only have 4386 * an error for protvirt, but protvirt means user cpu state 4387 */ 4388 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4389 kvm_s390_vcpu_start(vcpu); 4390 } else if (is_vcpu_stopped(vcpu)) { 4391 pr_err_ratelimited("can't run stopped vcpu %d\n", 4392 vcpu->vcpu_id); 4393 rc = -EINVAL; 4394 goto out; 4395 } 4396 4397 sync_regs(vcpu); 4398 enable_cpu_timer_accounting(vcpu); 4399 4400 might_fault(); 4401 rc = __vcpu_run(vcpu); 4402 4403 if (signal_pending(current) && !rc) { 4404 kvm_run->exit_reason = KVM_EXIT_INTR; 4405 rc = -EINTR; 4406 } 4407 4408 if (guestdbg_exit_pending(vcpu) && !rc) { 4409 kvm_s390_prepare_debug_exit(vcpu); 4410 rc = 0; 4411 } 4412 4413 if (rc == -EREMOTE) { 4414 /* userspace support is needed, kvm_run has been prepared */ 4415 rc = 0; 4416 } 4417 4418 disable_cpu_timer_accounting(vcpu); 4419 store_regs(vcpu); 4420 4421 kvm_sigset_deactivate(vcpu); 4422 4423 vcpu->stat.exit_userspace++; 4424 out: 4425 vcpu_put(vcpu); 4426 return rc; 4427 } 4428 4429 /* 4430 * store status at address 4431 * we use have two special cases: 4432 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4433 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4434 */ 4435 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4436 { 4437 unsigned char archmode = 1; 4438 freg_t fprs[NUM_FPRS]; 4439 unsigned int px; 4440 u64 clkcomp, cputm; 4441 int rc; 4442 4443 px = kvm_s390_get_prefix(vcpu); 4444 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4445 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4446 return -EFAULT; 4447 gpa = 0; 4448 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4449 if (write_guest_real(vcpu, 163, &archmode, 1)) 4450 return -EFAULT; 4451 gpa = px; 4452 } else 4453 gpa -= __LC_FPREGS_SAVE_AREA; 4454 4455 /* manually convert vector registers if necessary */ 4456 if (MACHINE_HAS_VX) { 4457 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4458 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4459 fprs, 128); 4460 } else { 4461 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4462 vcpu->run->s.regs.fprs, 128); 4463 } 4464 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4465 vcpu->run->s.regs.gprs, 128); 4466 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4467 &vcpu->arch.sie_block->gpsw, 16); 4468 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4469 &px, 4); 4470 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4471 &vcpu->run->s.regs.fpc, 4); 4472 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4473 &vcpu->arch.sie_block->todpr, 4); 4474 cputm = kvm_s390_get_cpu_timer(vcpu); 4475 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4476 &cputm, 8); 4477 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4478 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4479 &clkcomp, 8); 4480 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4481 &vcpu->run->s.regs.acrs, 64); 4482 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4483 &vcpu->arch.sie_block->gcr, 128); 4484 return rc ? -EFAULT : 0; 4485 } 4486 4487 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4488 { 4489 /* 4490 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4491 * switch in the run ioctl. Let's update our copies before we save 4492 * it into the save area 4493 */ 4494 save_fpu_regs(); 4495 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4496 save_access_regs(vcpu->run->s.regs.acrs); 4497 4498 return kvm_s390_store_status_unloaded(vcpu, addr); 4499 } 4500 4501 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4502 { 4503 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4504 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4505 } 4506 4507 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4508 { 4509 unsigned long i; 4510 struct kvm_vcpu *vcpu; 4511 4512 kvm_for_each_vcpu(i, vcpu, kvm) { 4513 __disable_ibs_on_vcpu(vcpu); 4514 } 4515 } 4516 4517 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4518 { 4519 if (!sclp.has_ibs) 4520 return; 4521 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4522 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4523 } 4524 4525 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4526 { 4527 int i, online_vcpus, r = 0, started_vcpus = 0; 4528 4529 if (!is_vcpu_stopped(vcpu)) 4530 return 0; 4531 4532 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4533 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4534 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4535 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4536 4537 /* Let's tell the UV that we want to change into the operating state */ 4538 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4539 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4540 if (r) { 4541 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4542 return r; 4543 } 4544 } 4545 4546 for (i = 0; i < online_vcpus; i++) { 4547 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 4548 started_vcpus++; 4549 } 4550 4551 if (started_vcpus == 0) { 4552 /* we're the only active VCPU -> speed it up */ 4553 __enable_ibs_on_vcpu(vcpu); 4554 } else if (started_vcpus == 1) { 4555 /* 4556 * As we are starting a second VCPU, we have to disable 4557 * the IBS facility on all VCPUs to remove potentially 4558 * outstanding ENABLE requests. 4559 */ 4560 __disable_ibs_on_all_vcpus(vcpu->kvm); 4561 } 4562 4563 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4564 /* 4565 * The real PSW might have changed due to a RESTART interpreted by the 4566 * ultravisor. We block all interrupts and let the next sie exit 4567 * refresh our view. 4568 */ 4569 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4570 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4571 /* 4572 * Another VCPU might have used IBS while we were offline. 4573 * Let's play safe and flush the VCPU at startup. 4574 */ 4575 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4576 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4577 return 0; 4578 } 4579 4580 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4581 { 4582 int i, online_vcpus, r = 0, started_vcpus = 0; 4583 struct kvm_vcpu *started_vcpu = NULL; 4584 4585 if (is_vcpu_stopped(vcpu)) 4586 return 0; 4587 4588 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4589 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4590 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4591 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4592 4593 /* Let's tell the UV that we want to change into the stopped state */ 4594 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4595 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4596 if (r) { 4597 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4598 return r; 4599 } 4600 } 4601 4602 /* 4603 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 4604 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 4605 * have been fully processed. This will ensure that the VCPU 4606 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 4607 */ 4608 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4609 kvm_s390_clear_stop_irq(vcpu); 4610 4611 __disable_ibs_on_vcpu(vcpu); 4612 4613 for (i = 0; i < online_vcpus; i++) { 4614 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 4615 4616 if (!is_vcpu_stopped(tmp)) { 4617 started_vcpus++; 4618 started_vcpu = tmp; 4619 } 4620 } 4621 4622 if (started_vcpus == 1) { 4623 /* 4624 * As we only have one VCPU left, we want to enable the 4625 * IBS facility for that VCPU to speed it up. 4626 */ 4627 __enable_ibs_on_vcpu(started_vcpu); 4628 } 4629 4630 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4631 return 0; 4632 } 4633 4634 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4635 struct kvm_enable_cap *cap) 4636 { 4637 int r; 4638 4639 if (cap->flags) 4640 return -EINVAL; 4641 4642 switch (cap->cap) { 4643 case KVM_CAP_S390_CSS_SUPPORT: 4644 if (!vcpu->kvm->arch.css_support) { 4645 vcpu->kvm->arch.css_support = 1; 4646 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4647 trace_kvm_s390_enable_css(vcpu->kvm); 4648 } 4649 r = 0; 4650 break; 4651 default: 4652 r = -EINVAL; 4653 break; 4654 } 4655 return r; 4656 } 4657 4658 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, 4659 struct kvm_s390_mem_op *mop) 4660 { 4661 void __user *uaddr = (void __user *)mop->buf; 4662 int r = 0; 4663 4664 if (mop->flags || !mop->size) 4665 return -EINVAL; 4666 if (mop->size + mop->sida_offset < mop->size) 4667 return -EINVAL; 4668 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4669 return -E2BIG; 4670 4671 switch (mop->op) { 4672 case KVM_S390_MEMOP_SIDA_READ: 4673 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4674 mop->sida_offset), mop->size)) 4675 r = -EFAULT; 4676 4677 break; 4678 case KVM_S390_MEMOP_SIDA_WRITE: 4679 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4680 mop->sida_offset), uaddr, mop->size)) 4681 r = -EFAULT; 4682 break; 4683 } 4684 return r; 4685 } 4686 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 4687 struct kvm_s390_mem_op *mop) 4688 { 4689 void __user *uaddr = (void __user *)mop->buf; 4690 void *tmpbuf = NULL; 4691 int r = 0; 4692 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4693 | KVM_S390_MEMOP_F_CHECK_ONLY; 4694 4695 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4696 return -EINVAL; 4697 4698 if (mop->size > MEM_OP_MAX_SIZE) 4699 return -E2BIG; 4700 4701 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4702 return -EINVAL; 4703 4704 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4705 tmpbuf = vmalloc(mop->size); 4706 if (!tmpbuf) 4707 return -ENOMEM; 4708 } 4709 4710 switch (mop->op) { 4711 case KVM_S390_MEMOP_LOGICAL_READ: 4712 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4713 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4714 mop->size, GACC_FETCH); 4715 break; 4716 } 4717 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4718 if (r == 0) { 4719 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4720 r = -EFAULT; 4721 } 4722 break; 4723 case KVM_S390_MEMOP_LOGICAL_WRITE: 4724 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4725 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4726 mop->size, GACC_STORE); 4727 break; 4728 } 4729 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4730 r = -EFAULT; 4731 break; 4732 } 4733 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4734 break; 4735 } 4736 4737 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4738 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4739 4740 vfree(tmpbuf); 4741 return r; 4742 } 4743 4744 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, 4745 struct kvm_s390_mem_op *mop) 4746 { 4747 int r, srcu_idx; 4748 4749 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4750 4751 switch (mop->op) { 4752 case KVM_S390_MEMOP_LOGICAL_READ: 4753 case KVM_S390_MEMOP_LOGICAL_WRITE: 4754 r = kvm_s390_guest_mem_op(vcpu, mop); 4755 break; 4756 case KVM_S390_MEMOP_SIDA_READ: 4757 case KVM_S390_MEMOP_SIDA_WRITE: 4758 /* we are locked against sida going away by the vcpu->mutex */ 4759 r = kvm_s390_guest_sida_op(vcpu, mop); 4760 break; 4761 default: 4762 r = -EINVAL; 4763 } 4764 4765 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4766 return r; 4767 } 4768 4769 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4770 unsigned int ioctl, unsigned long arg) 4771 { 4772 struct kvm_vcpu *vcpu = filp->private_data; 4773 void __user *argp = (void __user *)arg; 4774 4775 switch (ioctl) { 4776 case KVM_S390_IRQ: { 4777 struct kvm_s390_irq s390irq; 4778 4779 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4780 return -EFAULT; 4781 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4782 } 4783 case KVM_S390_INTERRUPT: { 4784 struct kvm_s390_interrupt s390int; 4785 struct kvm_s390_irq s390irq = {}; 4786 4787 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4788 return -EFAULT; 4789 if (s390int_to_s390irq(&s390int, &s390irq)) 4790 return -EINVAL; 4791 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4792 } 4793 } 4794 return -ENOIOCTLCMD; 4795 } 4796 4797 long kvm_arch_vcpu_ioctl(struct file *filp, 4798 unsigned int ioctl, unsigned long arg) 4799 { 4800 struct kvm_vcpu *vcpu = filp->private_data; 4801 void __user *argp = (void __user *)arg; 4802 int idx; 4803 long r; 4804 u16 rc, rrc; 4805 4806 vcpu_load(vcpu); 4807 4808 switch (ioctl) { 4809 case KVM_S390_STORE_STATUS: 4810 idx = srcu_read_lock(&vcpu->kvm->srcu); 4811 r = kvm_s390_store_status_unloaded(vcpu, arg); 4812 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4813 break; 4814 case KVM_S390_SET_INITIAL_PSW: { 4815 psw_t psw; 4816 4817 r = -EFAULT; 4818 if (copy_from_user(&psw, argp, sizeof(psw))) 4819 break; 4820 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4821 break; 4822 } 4823 case KVM_S390_CLEAR_RESET: 4824 r = 0; 4825 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4826 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4827 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4828 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4829 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4830 rc, rrc); 4831 } 4832 break; 4833 case KVM_S390_INITIAL_RESET: 4834 r = 0; 4835 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4836 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4837 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4838 UVC_CMD_CPU_RESET_INITIAL, 4839 &rc, &rrc); 4840 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4841 rc, rrc); 4842 } 4843 break; 4844 case KVM_S390_NORMAL_RESET: 4845 r = 0; 4846 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4847 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4848 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4849 UVC_CMD_CPU_RESET, &rc, &rrc); 4850 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4851 rc, rrc); 4852 } 4853 break; 4854 case KVM_SET_ONE_REG: 4855 case KVM_GET_ONE_REG: { 4856 struct kvm_one_reg reg; 4857 r = -EINVAL; 4858 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4859 break; 4860 r = -EFAULT; 4861 if (copy_from_user(®, argp, sizeof(reg))) 4862 break; 4863 if (ioctl == KVM_SET_ONE_REG) 4864 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4865 else 4866 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4867 break; 4868 } 4869 #ifdef CONFIG_KVM_S390_UCONTROL 4870 case KVM_S390_UCAS_MAP: { 4871 struct kvm_s390_ucas_mapping ucasmap; 4872 4873 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4874 r = -EFAULT; 4875 break; 4876 } 4877 4878 if (!kvm_is_ucontrol(vcpu->kvm)) { 4879 r = -EINVAL; 4880 break; 4881 } 4882 4883 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4884 ucasmap.vcpu_addr, ucasmap.length); 4885 break; 4886 } 4887 case KVM_S390_UCAS_UNMAP: { 4888 struct kvm_s390_ucas_mapping ucasmap; 4889 4890 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4891 r = -EFAULT; 4892 break; 4893 } 4894 4895 if (!kvm_is_ucontrol(vcpu->kvm)) { 4896 r = -EINVAL; 4897 break; 4898 } 4899 4900 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4901 ucasmap.length); 4902 break; 4903 } 4904 #endif 4905 case KVM_S390_VCPU_FAULT: { 4906 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4907 break; 4908 } 4909 case KVM_ENABLE_CAP: 4910 { 4911 struct kvm_enable_cap cap; 4912 r = -EFAULT; 4913 if (copy_from_user(&cap, argp, sizeof(cap))) 4914 break; 4915 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4916 break; 4917 } 4918 case KVM_S390_MEM_OP: { 4919 struct kvm_s390_mem_op mem_op; 4920 4921 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4922 r = kvm_s390_guest_memsida_op(vcpu, &mem_op); 4923 else 4924 r = -EFAULT; 4925 break; 4926 } 4927 case KVM_S390_SET_IRQ_STATE: { 4928 struct kvm_s390_irq_state irq_state; 4929 4930 r = -EFAULT; 4931 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4932 break; 4933 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4934 irq_state.len == 0 || 4935 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4936 r = -EINVAL; 4937 break; 4938 } 4939 /* do not use irq_state.flags, it will break old QEMUs */ 4940 r = kvm_s390_set_irq_state(vcpu, 4941 (void __user *) irq_state.buf, 4942 irq_state.len); 4943 break; 4944 } 4945 case KVM_S390_GET_IRQ_STATE: { 4946 struct kvm_s390_irq_state irq_state; 4947 4948 r = -EFAULT; 4949 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4950 break; 4951 if (irq_state.len == 0) { 4952 r = -EINVAL; 4953 break; 4954 } 4955 /* do not use irq_state.flags, it will break old QEMUs */ 4956 r = kvm_s390_get_irq_state(vcpu, 4957 (__u8 __user *) irq_state.buf, 4958 irq_state.len); 4959 break; 4960 } 4961 default: 4962 r = -ENOTTY; 4963 } 4964 4965 vcpu_put(vcpu); 4966 return r; 4967 } 4968 4969 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 4970 { 4971 #ifdef CONFIG_KVM_S390_UCONTROL 4972 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 4973 && (kvm_is_ucontrol(vcpu->kvm))) { 4974 vmf->page = virt_to_page(vcpu->arch.sie_block); 4975 get_page(vmf->page); 4976 return 0; 4977 } 4978 #endif 4979 return VM_FAULT_SIGBUS; 4980 } 4981 4982 /* Section: memory related */ 4983 int kvm_arch_prepare_memory_region(struct kvm *kvm, 4984 const struct kvm_memory_slot *old, 4985 struct kvm_memory_slot *new, 4986 enum kvm_mr_change change) 4987 { 4988 gpa_t size; 4989 4990 /* When we are protected, we should not change the memory slots */ 4991 if (kvm_s390_pv_get_handle(kvm)) 4992 return -EINVAL; 4993 4994 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 4995 return 0; 4996 4997 /* A few sanity checks. We can have memory slots which have to be 4998 located/ended at a segment boundary (1MB). The memory in userland is 4999 ok to be fragmented into various different vmas. It is okay to mmap() 5000 and munmap() stuff in this slot after doing this call at any time */ 5001 5002 if (new->userspace_addr & 0xffffful) 5003 return -EINVAL; 5004 5005 size = new->npages * PAGE_SIZE; 5006 if (size & 0xffffful) 5007 return -EINVAL; 5008 5009 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5010 return -EINVAL; 5011 5012 return 0; 5013 } 5014 5015 void kvm_arch_commit_memory_region(struct kvm *kvm, 5016 struct kvm_memory_slot *old, 5017 const struct kvm_memory_slot *new, 5018 enum kvm_mr_change change) 5019 { 5020 int rc = 0; 5021 5022 switch (change) { 5023 case KVM_MR_DELETE: 5024 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5025 old->npages * PAGE_SIZE); 5026 break; 5027 case KVM_MR_MOVE: 5028 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5029 old->npages * PAGE_SIZE); 5030 if (rc) 5031 break; 5032 fallthrough; 5033 case KVM_MR_CREATE: 5034 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5035 new->base_gfn * PAGE_SIZE, 5036 new->npages * PAGE_SIZE); 5037 break; 5038 case KVM_MR_FLAGS_ONLY: 5039 break; 5040 default: 5041 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5042 } 5043 if (rc) 5044 pr_warn("failed to commit memory region\n"); 5045 return; 5046 } 5047 5048 static inline unsigned long nonhyp_mask(int i) 5049 { 5050 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5051 5052 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5053 } 5054 5055 static int __init kvm_s390_init(void) 5056 { 5057 int i; 5058 5059 if (!sclp.has_sief2) { 5060 pr_info("SIE is not available\n"); 5061 return -ENODEV; 5062 } 5063 5064 if (nested && hpage) { 5065 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5066 return -EINVAL; 5067 } 5068 5069 for (i = 0; i < 16; i++) 5070 kvm_s390_fac_base[i] |= 5071 stfle_fac_list[i] & nonhyp_mask(i); 5072 5073 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5074 } 5075 5076 static void __exit kvm_s390_exit(void) 5077 { 5078 kvm_exit(); 5079 } 5080 5081 module_init(kvm_s390_init); 5082 module_exit(kvm_s390_exit); 5083 5084 /* 5085 * Enable autoloading of the kvm module. 5086 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5087 * since x86 takes a different approach. 5088 */ 5089 #include <linux/miscdevice.h> 5090 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5091 MODULE_ALIAS("devname:kvm"); 5092