1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #define KMSG_COMPONENT "kvm-s390" 15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 #include <linux/pgtable.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 52 #define CREATE_TRACE_POINTS 53 #include "trace.h" 54 #include "trace-s390.h" 55 56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 57 #define LOCAL_IRQS 32 58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 59 (KVM_MAX_VCPUS + LOCAL_IRQS)) 60 61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 62 KVM_GENERIC_VM_STATS(), 63 STATS_DESC_COUNTER(VM, inject_io), 64 STATS_DESC_COUNTER(VM, inject_float_mchk), 65 STATS_DESC_COUNTER(VM, inject_pfault_done), 66 STATS_DESC_COUNTER(VM, inject_service_signal), 67 STATS_DESC_COUNTER(VM, inject_virtio) 68 }; 69 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == 70 sizeof(struct kvm_vm_stat) / sizeof(u64)); 71 72 const struct kvm_stats_header kvm_vm_stats_header = { 73 .name_size = KVM_STATS_NAME_SIZE, 74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 75 .id_offset = sizeof(struct kvm_stats_header), 76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 78 sizeof(kvm_vm_stats_desc), 79 }; 80 81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 82 KVM_GENERIC_VCPU_STATS(), 83 STATS_DESC_COUNTER(VCPU, exit_userspace), 84 STATS_DESC_COUNTER(VCPU, exit_null), 85 STATS_DESC_COUNTER(VCPU, exit_external_request), 86 STATS_DESC_COUNTER(VCPU, exit_io_request), 87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 88 STATS_DESC_COUNTER(VCPU, exit_stop_request), 89 STATS_DESC_COUNTER(VCPU, exit_validity), 90 STATS_DESC_COUNTER(VCPU, exit_instruction), 91 STATS_DESC_COUNTER(VCPU, exit_pei), 92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 93 STATS_DESC_COUNTER(VCPU, instruction_lctl), 94 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 95 STATS_DESC_COUNTER(VCPU, instruction_stctl), 96 STATS_DESC_COUNTER(VCPU, instruction_stctg), 97 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 99 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 100 STATS_DESC_COUNTER(VCPU, deliver_ckc), 101 STATS_DESC_COUNTER(VCPU, deliver_cputm), 102 STATS_DESC_COUNTER(VCPU, deliver_external_call), 103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 104 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_virtio), 106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 109 STATS_DESC_COUNTER(VCPU, deliver_program), 110 STATS_DESC_COUNTER(VCPU, deliver_io), 111 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 112 STATS_DESC_COUNTER(VCPU, exit_wait_state), 113 STATS_DESC_COUNTER(VCPU, inject_ckc), 114 STATS_DESC_COUNTER(VCPU, inject_cputm), 115 STATS_DESC_COUNTER(VCPU, inject_external_call), 116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 117 STATS_DESC_COUNTER(VCPU, inject_mchk), 118 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 119 STATS_DESC_COUNTER(VCPU, inject_program), 120 STATS_DESC_COUNTER(VCPU, inject_restart), 121 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 122 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 123 STATS_DESC_COUNTER(VCPU, instruction_epsw), 124 STATS_DESC_COUNTER(VCPU, instruction_gs), 125 STATS_DESC_COUNTER(VCPU, instruction_io_other), 126 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 127 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 128 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 129 STATS_DESC_COUNTER(VCPU, instruction_ptff), 130 STATS_DESC_COUNTER(VCPU, instruction_sck), 131 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 132 STATS_DESC_COUNTER(VCPU, instruction_stidp), 133 STATS_DESC_COUNTER(VCPU, instruction_spx), 134 STATS_DESC_COUNTER(VCPU, instruction_stpx), 135 STATS_DESC_COUNTER(VCPU, instruction_stap), 136 STATS_DESC_COUNTER(VCPU, instruction_iske), 137 STATS_DESC_COUNTER(VCPU, instruction_ri), 138 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 139 STATS_DESC_COUNTER(VCPU, instruction_sske), 140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 141 STATS_DESC_COUNTER(VCPU, instruction_stsi), 142 STATS_DESC_COUNTER(VCPU, instruction_stfl), 143 STATS_DESC_COUNTER(VCPU, instruction_tb), 144 STATS_DESC_COUNTER(VCPU, instruction_tpi), 145 STATS_DESC_COUNTER(VCPU, instruction_tprot), 146 STATS_DESC_COUNTER(VCPU, instruction_tsch), 147 STATS_DESC_COUNTER(VCPU, instruction_sie), 148 STATS_DESC_COUNTER(VCPU, instruction_essa), 149 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 170 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 175 STATS_DESC_COUNTER(VCPU, pfault_sync) 176 }; 177 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == 178 sizeof(struct kvm_vcpu_stat) / sizeof(u64)); 179 180 const struct kvm_stats_header kvm_vcpu_stats_header = { 181 .name_size = KVM_STATS_NAME_SIZE, 182 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 183 .id_offset = sizeof(struct kvm_stats_header), 184 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 185 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 186 sizeof(kvm_vcpu_stats_desc), 187 }; 188 189 /* allow nested virtualization in KVM (if enabled by user space) */ 190 static int nested; 191 module_param(nested, int, S_IRUGO); 192 MODULE_PARM_DESC(nested, "Nested virtualization support"); 193 194 /* allow 1m huge page guest backing, if !nested */ 195 static int hpage; 196 module_param(hpage, int, 0444); 197 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 198 199 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 200 static u8 halt_poll_max_steal = 10; 201 module_param(halt_poll_max_steal, byte, 0644); 202 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 203 204 /* if set to true, the GISA will be initialized and used if available */ 205 static bool use_gisa = true; 206 module_param(use_gisa, bool, 0644); 207 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 208 209 /* maximum diag9c forwarding per second */ 210 unsigned int diag9c_forwarding_hz; 211 module_param(diag9c_forwarding_hz, uint, 0644); 212 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 213 214 /* 215 * For now we handle at most 16 double words as this is what the s390 base 216 * kernel handles and stores in the prefix page. If we ever need to go beyond 217 * this, this requires changes to code, but the external uapi can stay. 218 */ 219 #define SIZE_INTERNAL 16 220 221 /* 222 * Base feature mask that defines default mask for facilities. Consists of the 223 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 224 */ 225 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 226 /* 227 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 228 * and defines the facilities that can be enabled via a cpu model. 229 */ 230 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 231 232 static unsigned long kvm_s390_fac_size(void) 233 { 234 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 235 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 236 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 237 sizeof(stfle_fac_list)); 238 239 return SIZE_INTERNAL; 240 } 241 242 /* available cpu features supported by kvm */ 243 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 244 /* available subfunctions indicated via query / "test bit" */ 245 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 246 247 static struct gmap_notifier gmap_notifier; 248 static struct gmap_notifier vsie_gmap_notifier; 249 debug_info_t *kvm_s390_dbf; 250 debug_info_t *kvm_s390_dbf_uv; 251 252 /* Section: not file related */ 253 int kvm_arch_hardware_enable(void) 254 { 255 /* every s390 is virtualization enabled ;-) */ 256 return 0; 257 } 258 259 int kvm_arch_check_processor_compat(void *opaque) 260 { 261 return 0; 262 } 263 264 /* forward declarations */ 265 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 266 unsigned long end); 267 static int sca_switch_to_extended(struct kvm *kvm); 268 269 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 270 { 271 u8 delta_idx = 0; 272 273 /* 274 * The TOD jumps by delta, we have to compensate this by adding 275 * -delta to the epoch. 276 */ 277 delta = -delta; 278 279 /* sign-extension - we're adding to signed values below */ 280 if ((s64)delta < 0) 281 delta_idx = -1; 282 283 scb->epoch += delta; 284 if (scb->ecd & ECD_MEF) { 285 scb->epdx += delta_idx; 286 if (scb->epoch < delta) 287 scb->epdx += 1; 288 } 289 } 290 291 /* 292 * This callback is executed during stop_machine(). All CPUs are therefore 293 * temporarily stopped. In order not to change guest behavior, we have to 294 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 295 * so a CPU won't be stopped while calculating with the epoch. 296 */ 297 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 298 void *v) 299 { 300 struct kvm *kvm; 301 struct kvm_vcpu *vcpu; 302 int i; 303 unsigned long long *delta = v; 304 305 list_for_each_entry(kvm, &vm_list, vm_list) { 306 kvm_for_each_vcpu(i, vcpu, kvm) { 307 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 308 if (i == 0) { 309 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 310 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 311 } 312 if (vcpu->arch.cputm_enabled) 313 vcpu->arch.cputm_start += *delta; 314 if (vcpu->arch.vsie_block) 315 kvm_clock_sync_scb(vcpu->arch.vsie_block, 316 *delta); 317 } 318 } 319 return NOTIFY_OK; 320 } 321 322 static struct notifier_block kvm_clock_notifier = { 323 .notifier_call = kvm_clock_sync, 324 }; 325 326 int kvm_arch_hardware_setup(void *opaque) 327 { 328 gmap_notifier.notifier_call = kvm_gmap_notifier; 329 gmap_register_pte_notifier(&gmap_notifier); 330 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 331 gmap_register_pte_notifier(&vsie_gmap_notifier); 332 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 333 &kvm_clock_notifier); 334 return 0; 335 } 336 337 void kvm_arch_hardware_unsetup(void) 338 { 339 gmap_unregister_pte_notifier(&gmap_notifier); 340 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 341 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 342 &kvm_clock_notifier); 343 } 344 345 static void allow_cpu_feat(unsigned long nr) 346 { 347 set_bit_inv(nr, kvm_s390_available_cpu_feat); 348 } 349 350 static inline int plo_test_bit(unsigned char nr) 351 { 352 unsigned long function = (unsigned long)nr | 0x100; 353 int cc; 354 355 asm volatile( 356 " lgr 0,%[function]\n" 357 /* Parameter registers are ignored for "test bit" */ 358 " plo 0,0,0,0(0)\n" 359 " ipm %0\n" 360 " srl %0,28\n" 361 : "=d" (cc) 362 : [function] "d" (function) 363 : "cc", "0"); 364 return cc == 0; 365 } 366 367 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 368 { 369 asm volatile( 370 " lghi 0,0\n" 371 " lgr 1,%[query]\n" 372 /* Parameter registers are ignored */ 373 " .insn rrf,%[opc] << 16,2,4,6,0\n" 374 : 375 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 376 : "cc", "memory", "0", "1"); 377 } 378 379 #define INSN_SORTL 0xb938 380 #define INSN_DFLTCC 0xb939 381 382 static void kvm_s390_cpu_feat_init(void) 383 { 384 int i; 385 386 for (i = 0; i < 256; ++i) { 387 if (plo_test_bit(i)) 388 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 389 } 390 391 if (test_facility(28)) /* TOD-clock steering */ 392 ptff(kvm_s390_available_subfunc.ptff, 393 sizeof(kvm_s390_available_subfunc.ptff), 394 PTFF_QAF); 395 396 if (test_facility(17)) { /* MSA */ 397 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 398 kvm_s390_available_subfunc.kmac); 399 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 400 kvm_s390_available_subfunc.kmc); 401 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.km); 403 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 404 kvm_s390_available_subfunc.kimd); 405 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 406 kvm_s390_available_subfunc.klmd); 407 } 408 if (test_facility(76)) /* MSA3 */ 409 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 410 kvm_s390_available_subfunc.pckmo); 411 if (test_facility(77)) { /* MSA4 */ 412 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 413 kvm_s390_available_subfunc.kmctr); 414 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 415 kvm_s390_available_subfunc.kmf); 416 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 417 kvm_s390_available_subfunc.kmo); 418 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 419 kvm_s390_available_subfunc.pcc); 420 } 421 if (test_facility(57)) /* MSA5 */ 422 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 423 kvm_s390_available_subfunc.ppno); 424 425 if (test_facility(146)) /* MSA8 */ 426 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 427 kvm_s390_available_subfunc.kma); 428 429 if (test_facility(155)) /* MSA9 */ 430 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 431 kvm_s390_available_subfunc.kdsa); 432 433 if (test_facility(150)) /* SORTL */ 434 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 435 436 if (test_facility(151)) /* DFLTCC */ 437 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 438 439 if (MACHINE_HAS_ESOP) 440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 441 /* 442 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 443 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 444 */ 445 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 446 !test_facility(3) || !nested) 447 return; 448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 449 if (sclp.has_64bscao) 450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 451 if (sclp.has_siif) 452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 453 if (sclp.has_gpere) 454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 455 if (sclp.has_gsls) 456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 457 if (sclp.has_ib) 458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 459 if (sclp.has_cei) 460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 461 if (sclp.has_ibs) 462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 463 if (sclp.has_kss) 464 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 465 /* 466 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 467 * all skey handling functions read/set the skey from the PGSTE 468 * instead of the real storage key. 469 * 470 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 471 * pages being detected as preserved although they are resident. 472 * 473 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 474 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 475 * 476 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 477 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 478 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 479 * 480 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 481 * cannot easily shadow the SCA because of the ipte lock. 482 */ 483 } 484 485 int kvm_arch_init(void *opaque) 486 { 487 int rc = -ENOMEM; 488 489 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 490 if (!kvm_s390_dbf) 491 return -ENOMEM; 492 493 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 494 if (!kvm_s390_dbf_uv) 495 goto out; 496 497 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 498 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 499 goto out; 500 501 kvm_s390_cpu_feat_init(); 502 503 /* Register floating interrupt controller interface. */ 504 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 505 if (rc) { 506 pr_err("A FLIC registration call failed with rc=%d\n", rc); 507 goto out; 508 } 509 510 rc = kvm_s390_gib_init(GAL_ISC); 511 if (rc) 512 goto out; 513 514 return 0; 515 516 out: 517 kvm_arch_exit(); 518 return rc; 519 } 520 521 void kvm_arch_exit(void) 522 { 523 kvm_s390_gib_destroy(); 524 debug_unregister(kvm_s390_dbf); 525 debug_unregister(kvm_s390_dbf_uv); 526 } 527 528 /* Section: device related */ 529 long kvm_arch_dev_ioctl(struct file *filp, 530 unsigned int ioctl, unsigned long arg) 531 { 532 if (ioctl == KVM_S390_ENABLE_SIE) 533 return s390_enable_sie(); 534 return -EINVAL; 535 } 536 537 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 538 { 539 int r; 540 541 switch (ext) { 542 case KVM_CAP_S390_PSW: 543 case KVM_CAP_S390_GMAP: 544 case KVM_CAP_SYNC_MMU: 545 #ifdef CONFIG_KVM_S390_UCONTROL 546 case KVM_CAP_S390_UCONTROL: 547 #endif 548 case KVM_CAP_ASYNC_PF: 549 case KVM_CAP_SYNC_REGS: 550 case KVM_CAP_ONE_REG: 551 case KVM_CAP_ENABLE_CAP: 552 case KVM_CAP_S390_CSS_SUPPORT: 553 case KVM_CAP_IOEVENTFD: 554 case KVM_CAP_DEVICE_CTRL: 555 case KVM_CAP_S390_IRQCHIP: 556 case KVM_CAP_VM_ATTRIBUTES: 557 case KVM_CAP_MP_STATE: 558 case KVM_CAP_IMMEDIATE_EXIT: 559 case KVM_CAP_S390_INJECT_IRQ: 560 case KVM_CAP_S390_USER_SIGP: 561 case KVM_CAP_S390_USER_STSI: 562 case KVM_CAP_S390_SKEYS: 563 case KVM_CAP_S390_IRQ_STATE: 564 case KVM_CAP_S390_USER_INSTR0: 565 case KVM_CAP_S390_CMMA_MIGRATION: 566 case KVM_CAP_S390_AIS: 567 case KVM_CAP_S390_AIS_MIGRATION: 568 case KVM_CAP_S390_VCPU_RESETS: 569 case KVM_CAP_SET_GUEST_DEBUG: 570 case KVM_CAP_S390_DIAG318: 571 r = 1; 572 break; 573 case KVM_CAP_SET_GUEST_DEBUG2: 574 r = KVM_GUESTDBG_VALID_MASK; 575 break; 576 case KVM_CAP_S390_HPAGE_1M: 577 r = 0; 578 if (hpage && !kvm_is_ucontrol(kvm)) 579 r = 1; 580 break; 581 case KVM_CAP_S390_MEM_OP: 582 r = MEM_OP_MAX_SIZE; 583 break; 584 case KVM_CAP_NR_VCPUS: 585 case KVM_CAP_MAX_VCPUS: 586 case KVM_CAP_MAX_VCPU_ID: 587 r = KVM_S390_BSCA_CPU_SLOTS; 588 if (!kvm_s390_use_sca_entries()) 589 r = KVM_MAX_VCPUS; 590 else if (sclp.has_esca && sclp.has_64bscao) 591 r = KVM_S390_ESCA_CPU_SLOTS; 592 break; 593 case KVM_CAP_S390_COW: 594 r = MACHINE_HAS_ESOP; 595 break; 596 case KVM_CAP_S390_VECTOR_REGISTERS: 597 r = MACHINE_HAS_VX; 598 break; 599 case KVM_CAP_S390_RI: 600 r = test_facility(64); 601 break; 602 case KVM_CAP_S390_GS: 603 r = test_facility(133); 604 break; 605 case KVM_CAP_S390_BPB: 606 r = test_facility(82); 607 break; 608 case KVM_CAP_S390_PROTECTED: 609 r = is_prot_virt_host(); 610 break; 611 default: 612 r = 0; 613 } 614 return r; 615 } 616 617 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 618 { 619 int i; 620 gfn_t cur_gfn, last_gfn; 621 unsigned long gaddr, vmaddr; 622 struct gmap *gmap = kvm->arch.gmap; 623 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 624 625 /* Loop over all guest segments */ 626 cur_gfn = memslot->base_gfn; 627 last_gfn = memslot->base_gfn + memslot->npages; 628 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 629 gaddr = gfn_to_gpa(cur_gfn); 630 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 631 if (kvm_is_error_hva(vmaddr)) 632 continue; 633 634 bitmap_zero(bitmap, _PAGE_ENTRIES); 635 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 636 for (i = 0; i < _PAGE_ENTRIES; i++) { 637 if (test_bit(i, bitmap)) 638 mark_page_dirty(kvm, cur_gfn + i); 639 } 640 641 if (fatal_signal_pending(current)) 642 return; 643 cond_resched(); 644 } 645 } 646 647 /* Section: vm related */ 648 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 649 650 /* 651 * Get (and clear) the dirty memory log for a memory slot. 652 */ 653 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 654 struct kvm_dirty_log *log) 655 { 656 int r; 657 unsigned long n; 658 struct kvm_memory_slot *memslot; 659 int is_dirty; 660 661 if (kvm_is_ucontrol(kvm)) 662 return -EINVAL; 663 664 mutex_lock(&kvm->slots_lock); 665 666 r = -EINVAL; 667 if (log->slot >= KVM_USER_MEM_SLOTS) 668 goto out; 669 670 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 671 if (r) 672 goto out; 673 674 /* Clear the dirty log */ 675 if (is_dirty) { 676 n = kvm_dirty_bitmap_bytes(memslot); 677 memset(memslot->dirty_bitmap, 0, n); 678 } 679 r = 0; 680 out: 681 mutex_unlock(&kvm->slots_lock); 682 return r; 683 } 684 685 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 686 { 687 unsigned int i; 688 struct kvm_vcpu *vcpu; 689 690 kvm_for_each_vcpu(i, vcpu, kvm) { 691 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 692 } 693 } 694 695 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 696 { 697 int r; 698 699 if (cap->flags) 700 return -EINVAL; 701 702 switch (cap->cap) { 703 case KVM_CAP_S390_IRQCHIP: 704 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 705 kvm->arch.use_irqchip = 1; 706 r = 0; 707 break; 708 case KVM_CAP_S390_USER_SIGP: 709 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 710 kvm->arch.user_sigp = 1; 711 r = 0; 712 break; 713 case KVM_CAP_S390_VECTOR_REGISTERS: 714 mutex_lock(&kvm->lock); 715 if (kvm->created_vcpus) { 716 r = -EBUSY; 717 } else if (MACHINE_HAS_VX) { 718 set_kvm_facility(kvm->arch.model.fac_mask, 129); 719 set_kvm_facility(kvm->arch.model.fac_list, 129); 720 if (test_facility(134)) { 721 set_kvm_facility(kvm->arch.model.fac_mask, 134); 722 set_kvm_facility(kvm->arch.model.fac_list, 134); 723 } 724 if (test_facility(135)) { 725 set_kvm_facility(kvm->arch.model.fac_mask, 135); 726 set_kvm_facility(kvm->arch.model.fac_list, 135); 727 } 728 if (test_facility(148)) { 729 set_kvm_facility(kvm->arch.model.fac_mask, 148); 730 set_kvm_facility(kvm->arch.model.fac_list, 148); 731 } 732 if (test_facility(152)) { 733 set_kvm_facility(kvm->arch.model.fac_mask, 152); 734 set_kvm_facility(kvm->arch.model.fac_list, 152); 735 } 736 if (test_facility(192)) { 737 set_kvm_facility(kvm->arch.model.fac_mask, 192); 738 set_kvm_facility(kvm->arch.model.fac_list, 192); 739 } 740 r = 0; 741 } else 742 r = -EINVAL; 743 mutex_unlock(&kvm->lock); 744 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 745 r ? "(not available)" : "(success)"); 746 break; 747 case KVM_CAP_S390_RI: 748 r = -EINVAL; 749 mutex_lock(&kvm->lock); 750 if (kvm->created_vcpus) { 751 r = -EBUSY; 752 } else if (test_facility(64)) { 753 set_kvm_facility(kvm->arch.model.fac_mask, 64); 754 set_kvm_facility(kvm->arch.model.fac_list, 64); 755 r = 0; 756 } 757 mutex_unlock(&kvm->lock); 758 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 759 r ? "(not available)" : "(success)"); 760 break; 761 case KVM_CAP_S390_AIS: 762 mutex_lock(&kvm->lock); 763 if (kvm->created_vcpus) { 764 r = -EBUSY; 765 } else { 766 set_kvm_facility(kvm->arch.model.fac_mask, 72); 767 set_kvm_facility(kvm->arch.model.fac_list, 72); 768 r = 0; 769 } 770 mutex_unlock(&kvm->lock); 771 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 772 r ? "(not available)" : "(success)"); 773 break; 774 case KVM_CAP_S390_GS: 775 r = -EINVAL; 776 mutex_lock(&kvm->lock); 777 if (kvm->created_vcpus) { 778 r = -EBUSY; 779 } else if (test_facility(133)) { 780 set_kvm_facility(kvm->arch.model.fac_mask, 133); 781 set_kvm_facility(kvm->arch.model.fac_list, 133); 782 r = 0; 783 } 784 mutex_unlock(&kvm->lock); 785 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 786 r ? "(not available)" : "(success)"); 787 break; 788 case KVM_CAP_S390_HPAGE_1M: 789 mutex_lock(&kvm->lock); 790 if (kvm->created_vcpus) 791 r = -EBUSY; 792 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 793 r = -EINVAL; 794 else { 795 r = 0; 796 mmap_write_lock(kvm->mm); 797 kvm->mm->context.allow_gmap_hpage_1m = 1; 798 mmap_write_unlock(kvm->mm); 799 /* 800 * We might have to create fake 4k page 801 * tables. To avoid that the hardware works on 802 * stale PGSTEs, we emulate these instructions. 803 */ 804 kvm->arch.use_skf = 0; 805 kvm->arch.use_pfmfi = 0; 806 } 807 mutex_unlock(&kvm->lock); 808 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 809 r ? "(not available)" : "(success)"); 810 break; 811 case KVM_CAP_S390_USER_STSI: 812 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 813 kvm->arch.user_stsi = 1; 814 r = 0; 815 break; 816 case KVM_CAP_S390_USER_INSTR0: 817 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 818 kvm->arch.user_instr0 = 1; 819 icpt_operexc_on_all_vcpus(kvm); 820 r = 0; 821 break; 822 default: 823 r = -EINVAL; 824 break; 825 } 826 return r; 827 } 828 829 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 830 { 831 int ret; 832 833 switch (attr->attr) { 834 case KVM_S390_VM_MEM_LIMIT_SIZE: 835 ret = 0; 836 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 837 kvm->arch.mem_limit); 838 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 839 ret = -EFAULT; 840 break; 841 default: 842 ret = -ENXIO; 843 break; 844 } 845 return ret; 846 } 847 848 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 849 { 850 int ret; 851 unsigned int idx; 852 switch (attr->attr) { 853 case KVM_S390_VM_MEM_ENABLE_CMMA: 854 ret = -ENXIO; 855 if (!sclp.has_cmma) 856 break; 857 858 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 859 mutex_lock(&kvm->lock); 860 if (kvm->created_vcpus) 861 ret = -EBUSY; 862 else if (kvm->mm->context.allow_gmap_hpage_1m) 863 ret = -EINVAL; 864 else { 865 kvm->arch.use_cmma = 1; 866 /* Not compatible with cmma. */ 867 kvm->arch.use_pfmfi = 0; 868 ret = 0; 869 } 870 mutex_unlock(&kvm->lock); 871 break; 872 case KVM_S390_VM_MEM_CLR_CMMA: 873 ret = -ENXIO; 874 if (!sclp.has_cmma) 875 break; 876 ret = -EINVAL; 877 if (!kvm->arch.use_cmma) 878 break; 879 880 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 881 mutex_lock(&kvm->lock); 882 idx = srcu_read_lock(&kvm->srcu); 883 s390_reset_cmma(kvm->arch.gmap->mm); 884 srcu_read_unlock(&kvm->srcu, idx); 885 mutex_unlock(&kvm->lock); 886 ret = 0; 887 break; 888 case KVM_S390_VM_MEM_LIMIT_SIZE: { 889 unsigned long new_limit; 890 891 if (kvm_is_ucontrol(kvm)) 892 return -EINVAL; 893 894 if (get_user(new_limit, (u64 __user *)attr->addr)) 895 return -EFAULT; 896 897 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 898 new_limit > kvm->arch.mem_limit) 899 return -E2BIG; 900 901 if (!new_limit) 902 return -EINVAL; 903 904 /* gmap_create takes last usable address */ 905 if (new_limit != KVM_S390_NO_MEM_LIMIT) 906 new_limit -= 1; 907 908 ret = -EBUSY; 909 mutex_lock(&kvm->lock); 910 if (!kvm->created_vcpus) { 911 /* gmap_create will round the limit up */ 912 struct gmap *new = gmap_create(current->mm, new_limit); 913 914 if (!new) { 915 ret = -ENOMEM; 916 } else { 917 gmap_remove(kvm->arch.gmap); 918 new->private = kvm; 919 kvm->arch.gmap = new; 920 ret = 0; 921 } 922 } 923 mutex_unlock(&kvm->lock); 924 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 925 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 926 (void *) kvm->arch.gmap->asce); 927 break; 928 } 929 default: 930 ret = -ENXIO; 931 break; 932 } 933 return ret; 934 } 935 936 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 937 938 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 939 { 940 struct kvm_vcpu *vcpu; 941 int i; 942 943 kvm_s390_vcpu_block_all(kvm); 944 945 kvm_for_each_vcpu(i, vcpu, kvm) { 946 kvm_s390_vcpu_crypto_setup(vcpu); 947 /* recreate the shadow crycb by leaving the VSIE handler */ 948 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 949 } 950 951 kvm_s390_vcpu_unblock_all(kvm); 952 } 953 954 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 955 { 956 mutex_lock(&kvm->lock); 957 switch (attr->attr) { 958 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 959 if (!test_kvm_facility(kvm, 76)) { 960 mutex_unlock(&kvm->lock); 961 return -EINVAL; 962 } 963 get_random_bytes( 964 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 965 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 966 kvm->arch.crypto.aes_kw = 1; 967 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 968 break; 969 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 970 if (!test_kvm_facility(kvm, 76)) { 971 mutex_unlock(&kvm->lock); 972 return -EINVAL; 973 } 974 get_random_bytes( 975 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 976 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 977 kvm->arch.crypto.dea_kw = 1; 978 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 979 break; 980 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 981 if (!test_kvm_facility(kvm, 76)) { 982 mutex_unlock(&kvm->lock); 983 return -EINVAL; 984 } 985 kvm->arch.crypto.aes_kw = 0; 986 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 987 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 988 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 989 break; 990 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 991 if (!test_kvm_facility(kvm, 76)) { 992 mutex_unlock(&kvm->lock); 993 return -EINVAL; 994 } 995 kvm->arch.crypto.dea_kw = 0; 996 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 997 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 998 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 999 break; 1000 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1001 if (!ap_instructions_available()) { 1002 mutex_unlock(&kvm->lock); 1003 return -EOPNOTSUPP; 1004 } 1005 kvm->arch.crypto.apie = 1; 1006 break; 1007 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1008 if (!ap_instructions_available()) { 1009 mutex_unlock(&kvm->lock); 1010 return -EOPNOTSUPP; 1011 } 1012 kvm->arch.crypto.apie = 0; 1013 break; 1014 default: 1015 mutex_unlock(&kvm->lock); 1016 return -ENXIO; 1017 } 1018 1019 kvm_s390_vcpu_crypto_reset_all(kvm); 1020 mutex_unlock(&kvm->lock); 1021 return 0; 1022 } 1023 1024 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1025 { 1026 int cx; 1027 struct kvm_vcpu *vcpu; 1028 1029 kvm_for_each_vcpu(cx, vcpu, kvm) 1030 kvm_s390_sync_request(req, vcpu); 1031 } 1032 1033 /* 1034 * Must be called with kvm->srcu held to avoid races on memslots, and with 1035 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1036 */ 1037 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1038 { 1039 struct kvm_memory_slot *ms; 1040 struct kvm_memslots *slots; 1041 unsigned long ram_pages = 0; 1042 int slotnr; 1043 1044 /* migration mode already enabled */ 1045 if (kvm->arch.migration_mode) 1046 return 0; 1047 slots = kvm_memslots(kvm); 1048 if (!slots || !slots->used_slots) 1049 return -EINVAL; 1050 1051 if (!kvm->arch.use_cmma) { 1052 kvm->arch.migration_mode = 1; 1053 return 0; 1054 } 1055 /* mark all the pages in active slots as dirty */ 1056 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 1057 ms = slots->memslots + slotnr; 1058 if (!ms->dirty_bitmap) 1059 return -EINVAL; 1060 /* 1061 * The second half of the bitmap is only used on x86, 1062 * and would be wasted otherwise, so we put it to good 1063 * use here to keep track of the state of the storage 1064 * attributes. 1065 */ 1066 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1067 ram_pages += ms->npages; 1068 } 1069 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1070 kvm->arch.migration_mode = 1; 1071 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1072 return 0; 1073 } 1074 1075 /* 1076 * Must be called with kvm->slots_lock to avoid races with ourselves and 1077 * kvm_s390_vm_start_migration. 1078 */ 1079 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1080 { 1081 /* migration mode already disabled */ 1082 if (!kvm->arch.migration_mode) 1083 return 0; 1084 kvm->arch.migration_mode = 0; 1085 if (kvm->arch.use_cmma) 1086 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1087 return 0; 1088 } 1089 1090 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1091 struct kvm_device_attr *attr) 1092 { 1093 int res = -ENXIO; 1094 1095 mutex_lock(&kvm->slots_lock); 1096 switch (attr->attr) { 1097 case KVM_S390_VM_MIGRATION_START: 1098 res = kvm_s390_vm_start_migration(kvm); 1099 break; 1100 case KVM_S390_VM_MIGRATION_STOP: 1101 res = kvm_s390_vm_stop_migration(kvm); 1102 break; 1103 default: 1104 break; 1105 } 1106 mutex_unlock(&kvm->slots_lock); 1107 1108 return res; 1109 } 1110 1111 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1112 struct kvm_device_attr *attr) 1113 { 1114 u64 mig = kvm->arch.migration_mode; 1115 1116 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1117 return -ENXIO; 1118 1119 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1120 return -EFAULT; 1121 return 0; 1122 } 1123 1124 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1125 { 1126 struct kvm_s390_vm_tod_clock gtod; 1127 1128 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1129 return -EFAULT; 1130 1131 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1132 return -EINVAL; 1133 kvm_s390_set_tod_clock(kvm, >od); 1134 1135 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1136 gtod.epoch_idx, gtod.tod); 1137 1138 return 0; 1139 } 1140 1141 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1142 { 1143 u8 gtod_high; 1144 1145 if (copy_from_user(>od_high, (void __user *)attr->addr, 1146 sizeof(gtod_high))) 1147 return -EFAULT; 1148 1149 if (gtod_high != 0) 1150 return -EINVAL; 1151 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1152 1153 return 0; 1154 } 1155 1156 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1157 { 1158 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1159 1160 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1161 sizeof(gtod.tod))) 1162 return -EFAULT; 1163 1164 kvm_s390_set_tod_clock(kvm, >od); 1165 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1166 return 0; 1167 } 1168 1169 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1170 { 1171 int ret; 1172 1173 if (attr->flags) 1174 return -EINVAL; 1175 1176 switch (attr->attr) { 1177 case KVM_S390_VM_TOD_EXT: 1178 ret = kvm_s390_set_tod_ext(kvm, attr); 1179 break; 1180 case KVM_S390_VM_TOD_HIGH: 1181 ret = kvm_s390_set_tod_high(kvm, attr); 1182 break; 1183 case KVM_S390_VM_TOD_LOW: 1184 ret = kvm_s390_set_tod_low(kvm, attr); 1185 break; 1186 default: 1187 ret = -ENXIO; 1188 break; 1189 } 1190 return ret; 1191 } 1192 1193 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1194 struct kvm_s390_vm_tod_clock *gtod) 1195 { 1196 union tod_clock clk; 1197 1198 preempt_disable(); 1199 1200 store_tod_clock_ext(&clk); 1201 1202 gtod->tod = clk.tod + kvm->arch.epoch; 1203 gtod->epoch_idx = 0; 1204 if (test_kvm_facility(kvm, 139)) { 1205 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1206 if (gtod->tod < clk.tod) 1207 gtod->epoch_idx += 1; 1208 } 1209 1210 preempt_enable(); 1211 } 1212 1213 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1214 { 1215 struct kvm_s390_vm_tod_clock gtod; 1216 1217 memset(>od, 0, sizeof(gtod)); 1218 kvm_s390_get_tod_clock(kvm, >od); 1219 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1220 return -EFAULT; 1221 1222 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1223 gtod.epoch_idx, gtod.tod); 1224 return 0; 1225 } 1226 1227 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1228 { 1229 u8 gtod_high = 0; 1230 1231 if (copy_to_user((void __user *)attr->addr, >od_high, 1232 sizeof(gtod_high))) 1233 return -EFAULT; 1234 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1235 1236 return 0; 1237 } 1238 1239 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1240 { 1241 u64 gtod; 1242 1243 gtod = kvm_s390_get_tod_clock_fast(kvm); 1244 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1245 return -EFAULT; 1246 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1247 1248 return 0; 1249 } 1250 1251 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1252 { 1253 int ret; 1254 1255 if (attr->flags) 1256 return -EINVAL; 1257 1258 switch (attr->attr) { 1259 case KVM_S390_VM_TOD_EXT: 1260 ret = kvm_s390_get_tod_ext(kvm, attr); 1261 break; 1262 case KVM_S390_VM_TOD_HIGH: 1263 ret = kvm_s390_get_tod_high(kvm, attr); 1264 break; 1265 case KVM_S390_VM_TOD_LOW: 1266 ret = kvm_s390_get_tod_low(kvm, attr); 1267 break; 1268 default: 1269 ret = -ENXIO; 1270 break; 1271 } 1272 return ret; 1273 } 1274 1275 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1276 { 1277 struct kvm_s390_vm_cpu_processor *proc; 1278 u16 lowest_ibc, unblocked_ibc; 1279 int ret = 0; 1280 1281 mutex_lock(&kvm->lock); 1282 if (kvm->created_vcpus) { 1283 ret = -EBUSY; 1284 goto out; 1285 } 1286 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1287 if (!proc) { 1288 ret = -ENOMEM; 1289 goto out; 1290 } 1291 if (!copy_from_user(proc, (void __user *)attr->addr, 1292 sizeof(*proc))) { 1293 kvm->arch.model.cpuid = proc->cpuid; 1294 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1295 unblocked_ibc = sclp.ibc & 0xfff; 1296 if (lowest_ibc && proc->ibc) { 1297 if (proc->ibc > unblocked_ibc) 1298 kvm->arch.model.ibc = unblocked_ibc; 1299 else if (proc->ibc < lowest_ibc) 1300 kvm->arch.model.ibc = lowest_ibc; 1301 else 1302 kvm->arch.model.ibc = proc->ibc; 1303 } 1304 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1305 S390_ARCH_FAC_LIST_SIZE_BYTE); 1306 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1307 kvm->arch.model.ibc, 1308 kvm->arch.model.cpuid); 1309 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1310 kvm->arch.model.fac_list[0], 1311 kvm->arch.model.fac_list[1], 1312 kvm->arch.model.fac_list[2]); 1313 } else 1314 ret = -EFAULT; 1315 kfree(proc); 1316 out: 1317 mutex_unlock(&kvm->lock); 1318 return ret; 1319 } 1320 1321 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1322 struct kvm_device_attr *attr) 1323 { 1324 struct kvm_s390_vm_cpu_feat data; 1325 1326 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1327 return -EFAULT; 1328 if (!bitmap_subset((unsigned long *) data.feat, 1329 kvm_s390_available_cpu_feat, 1330 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1331 return -EINVAL; 1332 1333 mutex_lock(&kvm->lock); 1334 if (kvm->created_vcpus) { 1335 mutex_unlock(&kvm->lock); 1336 return -EBUSY; 1337 } 1338 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1339 KVM_S390_VM_CPU_FEAT_NR_BITS); 1340 mutex_unlock(&kvm->lock); 1341 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1342 data.feat[0], 1343 data.feat[1], 1344 data.feat[2]); 1345 return 0; 1346 } 1347 1348 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1349 struct kvm_device_attr *attr) 1350 { 1351 mutex_lock(&kvm->lock); 1352 if (kvm->created_vcpus) { 1353 mutex_unlock(&kvm->lock); 1354 return -EBUSY; 1355 } 1356 1357 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1358 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1359 mutex_unlock(&kvm->lock); 1360 return -EFAULT; 1361 } 1362 mutex_unlock(&kvm->lock); 1363 1364 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1366 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1367 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1368 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1369 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1370 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1371 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1372 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1375 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1376 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1377 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1378 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1379 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1380 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1381 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1382 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1383 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1384 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1385 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1386 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1387 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1388 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1389 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1390 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1393 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1396 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1397 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1398 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1399 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1400 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1401 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1402 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1403 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1404 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1405 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1406 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1407 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1408 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1409 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1410 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1411 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1413 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1414 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1415 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1416 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1418 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1419 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1420 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1421 1422 return 0; 1423 } 1424 1425 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1426 { 1427 int ret = -ENXIO; 1428 1429 switch (attr->attr) { 1430 case KVM_S390_VM_CPU_PROCESSOR: 1431 ret = kvm_s390_set_processor(kvm, attr); 1432 break; 1433 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1434 ret = kvm_s390_set_processor_feat(kvm, attr); 1435 break; 1436 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1437 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1438 break; 1439 } 1440 return ret; 1441 } 1442 1443 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1444 { 1445 struct kvm_s390_vm_cpu_processor *proc; 1446 int ret = 0; 1447 1448 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1449 if (!proc) { 1450 ret = -ENOMEM; 1451 goto out; 1452 } 1453 proc->cpuid = kvm->arch.model.cpuid; 1454 proc->ibc = kvm->arch.model.ibc; 1455 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1456 S390_ARCH_FAC_LIST_SIZE_BYTE); 1457 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1458 kvm->arch.model.ibc, 1459 kvm->arch.model.cpuid); 1460 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1461 kvm->arch.model.fac_list[0], 1462 kvm->arch.model.fac_list[1], 1463 kvm->arch.model.fac_list[2]); 1464 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1465 ret = -EFAULT; 1466 kfree(proc); 1467 out: 1468 return ret; 1469 } 1470 1471 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1472 { 1473 struct kvm_s390_vm_cpu_machine *mach; 1474 int ret = 0; 1475 1476 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1477 if (!mach) { 1478 ret = -ENOMEM; 1479 goto out; 1480 } 1481 get_cpu_id((struct cpuid *) &mach->cpuid); 1482 mach->ibc = sclp.ibc; 1483 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1484 S390_ARCH_FAC_LIST_SIZE_BYTE); 1485 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1486 sizeof(stfle_fac_list)); 1487 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1488 kvm->arch.model.ibc, 1489 kvm->arch.model.cpuid); 1490 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1491 mach->fac_mask[0], 1492 mach->fac_mask[1], 1493 mach->fac_mask[2]); 1494 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1495 mach->fac_list[0], 1496 mach->fac_list[1], 1497 mach->fac_list[2]); 1498 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1499 ret = -EFAULT; 1500 kfree(mach); 1501 out: 1502 return ret; 1503 } 1504 1505 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1506 struct kvm_device_attr *attr) 1507 { 1508 struct kvm_s390_vm_cpu_feat data; 1509 1510 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1511 KVM_S390_VM_CPU_FEAT_NR_BITS); 1512 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1513 return -EFAULT; 1514 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1515 data.feat[0], 1516 data.feat[1], 1517 data.feat[2]); 1518 return 0; 1519 } 1520 1521 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1522 struct kvm_device_attr *attr) 1523 { 1524 struct kvm_s390_vm_cpu_feat data; 1525 1526 bitmap_copy((unsigned long *) data.feat, 1527 kvm_s390_available_cpu_feat, 1528 KVM_S390_VM_CPU_FEAT_NR_BITS); 1529 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1530 return -EFAULT; 1531 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1532 data.feat[0], 1533 data.feat[1], 1534 data.feat[2]); 1535 return 0; 1536 } 1537 1538 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1539 struct kvm_device_attr *attr) 1540 { 1541 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1542 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1543 return -EFAULT; 1544 1545 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1547 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1548 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1549 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1550 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1551 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1552 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1553 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1556 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1557 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1558 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1559 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1560 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1561 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1562 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1563 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1564 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1565 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1566 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1568 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1570 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1571 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1574 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1577 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1578 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1579 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1580 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1581 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1582 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1583 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1584 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1585 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1586 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1587 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1588 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1589 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1590 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1591 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1592 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1594 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1595 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1596 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1597 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1599 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1600 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1601 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1602 1603 return 0; 1604 } 1605 1606 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1607 struct kvm_device_attr *attr) 1608 { 1609 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1610 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1611 return -EFAULT; 1612 1613 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1615 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1616 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1617 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1618 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1619 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1620 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1621 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1622 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1623 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1624 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1625 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1626 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1627 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1629 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1630 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1631 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1632 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1633 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1634 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1635 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1636 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1637 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1638 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1639 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1640 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1641 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1642 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1643 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1644 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1645 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1646 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1647 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1648 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1649 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1650 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1651 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1652 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1653 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1654 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1655 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1656 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1657 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1658 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1659 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1660 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1662 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1663 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1664 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1665 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1667 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1668 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1669 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1670 1671 return 0; 1672 } 1673 1674 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1675 { 1676 int ret = -ENXIO; 1677 1678 switch (attr->attr) { 1679 case KVM_S390_VM_CPU_PROCESSOR: 1680 ret = kvm_s390_get_processor(kvm, attr); 1681 break; 1682 case KVM_S390_VM_CPU_MACHINE: 1683 ret = kvm_s390_get_machine(kvm, attr); 1684 break; 1685 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1686 ret = kvm_s390_get_processor_feat(kvm, attr); 1687 break; 1688 case KVM_S390_VM_CPU_MACHINE_FEAT: 1689 ret = kvm_s390_get_machine_feat(kvm, attr); 1690 break; 1691 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1692 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1693 break; 1694 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1695 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1696 break; 1697 } 1698 return ret; 1699 } 1700 1701 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1702 { 1703 int ret; 1704 1705 switch (attr->group) { 1706 case KVM_S390_VM_MEM_CTRL: 1707 ret = kvm_s390_set_mem_control(kvm, attr); 1708 break; 1709 case KVM_S390_VM_TOD: 1710 ret = kvm_s390_set_tod(kvm, attr); 1711 break; 1712 case KVM_S390_VM_CPU_MODEL: 1713 ret = kvm_s390_set_cpu_model(kvm, attr); 1714 break; 1715 case KVM_S390_VM_CRYPTO: 1716 ret = kvm_s390_vm_set_crypto(kvm, attr); 1717 break; 1718 case KVM_S390_VM_MIGRATION: 1719 ret = kvm_s390_vm_set_migration(kvm, attr); 1720 break; 1721 default: 1722 ret = -ENXIO; 1723 break; 1724 } 1725 1726 return ret; 1727 } 1728 1729 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1730 { 1731 int ret; 1732 1733 switch (attr->group) { 1734 case KVM_S390_VM_MEM_CTRL: 1735 ret = kvm_s390_get_mem_control(kvm, attr); 1736 break; 1737 case KVM_S390_VM_TOD: 1738 ret = kvm_s390_get_tod(kvm, attr); 1739 break; 1740 case KVM_S390_VM_CPU_MODEL: 1741 ret = kvm_s390_get_cpu_model(kvm, attr); 1742 break; 1743 case KVM_S390_VM_MIGRATION: 1744 ret = kvm_s390_vm_get_migration(kvm, attr); 1745 break; 1746 default: 1747 ret = -ENXIO; 1748 break; 1749 } 1750 1751 return ret; 1752 } 1753 1754 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1755 { 1756 int ret; 1757 1758 switch (attr->group) { 1759 case KVM_S390_VM_MEM_CTRL: 1760 switch (attr->attr) { 1761 case KVM_S390_VM_MEM_ENABLE_CMMA: 1762 case KVM_S390_VM_MEM_CLR_CMMA: 1763 ret = sclp.has_cmma ? 0 : -ENXIO; 1764 break; 1765 case KVM_S390_VM_MEM_LIMIT_SIZE: 1766 ret = 0; 1767 break; 1768 default: 1769 ret = -ENXIO; 1770 break; 1771 } 1772 break; 1773 case KVM_S390_VM_TOD: 1774 switch (attr->attr) { 1775 case KVM_S390_VM_TOD_LOW: 1776 case KVM_S390_VM_TOD_HIGH: 1777 ret = 0; 1778 break; 1779 default: 1780 ret = -ENXIO; 1781 break; 1782 } 1783 break; 1784 case KVM_S390_VM_CPU_MODEL: 1785 switch (attr->attr) { 1786 case KVM_S390_VM_CPU_PROCESSOR: 1787 case KVM_S390_VM_CPU_MACHINE: 1788 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1789 case KVM_S390_VM_CPU_MACHINE_FEAT: 1790 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1791 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1792 ret = 0; 1793 break; 1794 default: 1795 ret = -ENXIO; 1796 break; 1797 } 1798 break; 1799 case KVM_S390_VM_CRYPTO: 1800 switch (attr->attr) { 1801 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1802 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1803 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1804 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1805 ret = 0; 1806 break; 1807 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1808 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1809 ret = ap_instructions_available() ? 0 : -ENXIO; 1810 break; 1811 default: 1812 ret = -ENXIO; 1813 break; 1814 } 1815 break; 1816 case KVM_S390_VM_MIGRATION: 1817 ret = 0; 1818 break; 1819 default: 1820 ret = -ENXIO; 1821 break; 1822 } 1823 1824 return ret; 1825 } 1826 1827 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1828 { 1829 uint8_t *keys; 1830 uint64_t hva; 1831 int srcu_idx, i, r = 0; 1832 1833 if (args->flags != 0) 1834 return -EINVAL; 1835 1836 /* Is this guest using storage keys? */ 1837 if (!mm_uses_skeys(current->mm)) 1838 return KVM_S390_GET_SKEYS_NONE; 1839 1840 /* Enforce sane limit on memory allocation */ 1841 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1842 return -EINVAL; 1843 1844 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1845 if (!keys) 1846 return -ENOMEM; 1847 1848 mmap_read_lock(current->mm); 1849 srcu_idx = srcu_read_lock(&kvm->srcu); 1850 for (i = 0; i < args->count; i++) { 1851 hva = gfn_to_hva(kvm, args->start_gfn + i); 1852 if (kvm_is_error_hva(hva)) { 1853 r = -EFAULT; 1854 break; 1855 } 1856 1857 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1858 if (r) 1859 break; 1860 } 1861 srcu_read_unlock(&kvm->srcu, srcu_idx); 1862 mmap_read_unlock(current->mm); 1863 1864 if (!r) { 1865 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1866 sizeof(uint8_t) * args->count); 1867 if (r) 1868 r = -EFAULT; 1869 } 1870 1871 kvfree(keys); 1872 return r; 1873 } 1874 1875 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1876 { 1877 uint8_t *keys; 1878 uint64_t hva; 1879 int srcu_idx, i, r = 0; 1880 bool unlocked; 1881 1882 if (args->flags != 0) 1883 return -EINVAL; 1884 1885 /* Enforce sane limit on memory allocation */ 1886 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1887 return -EINVAL; 1888 1889 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1890 if (!keys) 1891 return -ENOMEM; 1892 1893 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1894 sizeof(uint8_t) * args->count); 1895 if (r) { 1896 r = -EFAULT; 1897 goto out; 1898 } 1899 1900 /* Enable storage key handling for the guest */ 1901 r = s390_enable_skey(); 1902 if (r) 1903 goto out; 1904 1905 i = 0; 1906 mmap_read_lock(current->mm); 1907 srcu_idx = srcu_read_lock(&kvm->srcu); 1908 while (i < args->count) { 1909 unlocked = false; 1910 hva = gfn_to_hva(kvm, args->start_gfn + i); 1911 if (kvm_is_error_hva(hva)) { 1912 r = -EFAULT; 1913 break; 1914 } 1915 1916 /* Lowest order bit is reserved */ 1917 if (keys[i] & 0x01) { 1918 r = -EINVAL; 1919 break; 1920 } 1921 1922 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1923 if (r) { 1924 r = fixup_user_fault(current->mm, hva, 1925 FAULT_FLAG_WRITE, &unlocked); 1926 if (r) 1927 break; 1928 } 1929 if (!r) 1930 i++; 1931 } 1932 srcu_read_unlock(&kvm->srcu, srcu_idx); 1933 mmap_read_unlock(current->mm); 1934 out: 1935 kvfree(keys); 1936 return r; 1937 } 1938 1939 /* 1940 * Base address and length must be sent at the start of each block, therefore 1941 * it's cheaper to send some clean data, as long as it's less than the size of 1942 * two longs. 1943 */ 1944 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1945 /* for consistency */ 1946 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1947 1948 /* 1949 * Similar to gfn_to_memslot, but returns the index of a memslot also when the 1950 * address falls in a hole. In that case the index of one of the memslots 1951 * bordering the hole is returned. 1952 */ 1953 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) 1954 { 1955 int start = 0, end = slots->used_slots; 1956 int slot = atomic_read(&slots->lru_slot); 1957 struct kvm_memory_slot *memslots = slots->memslots; 1958 1959 if (gfn >= memslots[slot].base_gfn && 1960 gfn < memslots[slot].base_gfn + memslots[slot].npages) 1961 return slot; 1962 1963 while (start < end) { 1964 slot = start + (end - start) / 2; 1965 1966 if (gfn >= memslots[slot].base_gfn) 1967 end = slot; 1968 else 1969 start = slot + 1; 1970 } 1971 1972 if (start >= slots->used_slots) 1973 return slots->used_slots - 1; 1974 1975 if (gfn >= memslots[start].base_gfn && 1976 gfn < memslots[start].base_gfn + memslots[start].npages) { 1977 atomic_set(&slots->lru_slot, start); 1978 } 1979 1980 return start; 1981 } 1982 1983 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1984 u8 *res, unsigned long bufsize) 1985 { 1986 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1987 1988 args->count = 0; 1989 while (args->count < bufsize) { 1990 hva = gfn_to_hva(kvm, cur_gfn); 1991 /* 1992 * We return an error if the first value was invalid, but we 1993 * return successfully if at least one value was copied. 1994 */ 1995 if (kvm_is_error_hva(hva)) 1996 return args->count ? 0 : -EFAULT; 1997 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1998 pgstev = 0; 1999 res[args->count++] = (pgstev >> 24) & 0x43; 2000 cur_gfn++; 2001 } 2002 2003 return 0; 2004 } 2005 2006 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 2007 unsigned long cur_gfn) 2008 { 2009 int slotidx = gfn_to_memslot_approx(slots, cur_gfn); 2010 struct kvm_memory_slot *ms = slots->memslots + slotidx; 2011 unsigned long ofs = cur_gfn - ms->base_gfn; 2012 2013 if (ms->base_gfn + ms->npages <= cur_gfn) { 2014 slotidx--; 2015 /* If we are above the highest slot, wrap around */ 2016 if (slotidx < 0) 2017 slotidx = slots->used_slots - 1; 2018 2019 ms = slots->memslots + slotidx; 2020 ofs = 0; 2021 } 2022 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 2023 while ((slotidx > 0) && (ofs >= ms->npages)) { 2024 slotidx--; 2025 ms = slots->memslots + slotidx; 2026 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 2027 } 2028 return ms->base_gfn + ofs; 2029 } 2030 2031 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2032 u8 *res, unsigned long bufsize) 2033 { 2034 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2035 struct kvm_memslots *slots = kvm_memslots(kvm); 2036 struct kvm_memory_slot *ms; 2037 2038 if (unlikely(!slots->used_slots)) 2039 return 0; 2040 2041 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2042 ms = gfn_to_memslot(kvm, cur_gfn); 2043 args->count = 0; 2044 args->start_gfn = cur_gfn; 2045 if (!ms) 2046 return 0; 2047 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2048 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; 2049 2050 while (args->count < bufsize) { 2051 hva = gfn_to_hva(kvm, cur_gfn); 2052 if (kvm_is_error_hva(hva)) 2053 return 0; 2054 /* Decrement only if we actually flipped the bit to 0 */ 2055 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2056 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2057 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2058 pgstev = 0; 2059 /* Save the value */ 2060 res[args->count++] = (pgstev >> 24) & 0x43; 2061 /* If the next bit is too far away, stop. */ 2062 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2063 return 0; 2064 /* If we reached the previous "next", find the next one */ 2065 if (cur_gfn == next_gfn) 2066 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2067 /* Reached the end of memory or of the buffer, stop */ 2068 if ((next_gfn >= mem_end) || 2069 (next_gfn - args->start_gfn >= bufsize)) 2070 return 0; 2071 cur_gfn++; 2072 /* Reached the end of the current memslot, take the next one. */ 2073 if (cur_gfn - ms->base_gfn >= ms->npages) { 2074 ms = gfn_to_memslot(kvm, cur_gfn); 2075 if (!ms) 2076 return 0; 2077 } 2078 } 2079 return 0; 2080 } 2081 2082 /* 2083 * This function searches for the next page with dirty CMMA attributes, and 2084 * saves the attributes in the buffer up to either the end of the buffer or 2085 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2086 * no trailing clean bytes are saved. 2087 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2088 * output buffer will indicate 0 as length. 2089 */ 2090 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2091 struct kvm_s390_cmma_log *args) 2092 { 2093 unsigned long bufsize; 2094 int srcu_idx, peek, ret; 2095 u8 *values; 2096 2097 if (!kvm->arch.use_cmma) 2098 return -ENXIO; 2099 /* Invalid/unsupported flags were specified */ 2100 if (args->flags & ~KVM_S390_CMMA_PEEK) 2101 return -EINVAL; 2102 /* Migration mode query, and we are not doing a migration */ 2103 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2104 if (!peek && !kvm->arch.migration_mode) 2105 return -EINVAL; 2106 /* CMMA is disabled or was not used, or the buffer has length zero */ 2107 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2108 if (!bufsize || !kvm->mm->context.uses_cmm) { 2109 memset(args, 0, sizeof(*args)); 2110 return 0; 2111 } 2112 /* We are not peeking, and there are no dirty pages */ 2113 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2114 memset(args, 0, sizeof(*args)); 2115 return 0; 2116 } 2117 2118 values = vmalloc(bufsize); 2119 if (!values) 2120 return -ENOMEM; 2121 2122 mmap_read_lock(kvm->mm); 2123 srcu_idx = srcu_read_lock(&kvm->srcu); 2124 if (peek) 2125 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2126 else 2127 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2128 srcu_read_unlock(&kvm->srcu, srcu_idx); 2129 mmap_read_unlock(kvm->mm); 2130 2131 if (kvm->arch.migration_mode) 2132 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2133 else 2134 args->remaining = 0; 2135 2136 if (copy_to_user((void __user *)args->values, values, args->count)) 2137 ret = -EFAULT; 2138 2139 vfree(values); 2140 return ret; 2141 } 2142 2143 /* 2144 * This function sets the CMMA attributes for the given pages. If the input 2145 * buffer has zero length, no action is taken, otherwise the attributes are 2146 * set and the mm->context.uses_cmm flag is set. 2147 */ 2148 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2149 const struct kvm_s390_cmma_log *args) 2150 { 2151 unsigned long hva, mask, pgstev, i; 2152 uint8_t *bits; 2153 int srcu_idx, r = 0; 2154 2155 mask = args->mask; 2156 2157 if (!kvm->arch.use_cmma) 2158 return -ENXIO; 2159 /* invalid/unsupported flags */ 2160 if (args->flags != 0) 2161 return -EINVAL; 2162 /* Enforce sane limit on memory allocation */ 2163 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2164 return -EINVAL; 2165 /* Nothing to do */ 2166 if (args->count == 0) 2167 return 0; 2168 2169 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2170 if (!bits) 2171 return -ENOMEM; 2172 2173 r = copy_from_user(bits, (void __user *)args->values, args->count); 2174 if (r) { 2175 r = -EFAULT; 2176 goto out; 2177 } 2178 2179 mmap_read_lock(kvm->mm); 2180 srcu_idx = srcu_read_lock(&kvm->srcu); 2181 for (i = 0; i < args->count; i++) { 2182 hva = gfn_to_hva(kvm, args->start_gfn + i); 2183 if (kvm_is_error_hva(hva)) { 2184 r = -EFAULT; 2185 break; 2186 } 2187 2188 pgstev = bits[i]; 2189 pgstev = pgstev << 24; 2190 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2191 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2192 } 2193 srcu_read_unlock(&kvm->srcu, srcu_idx); 2194 mmap_read_unlock(kvm->mm); 2195 2196 if (!kvm->mm->context.uses_cmm) { 2197 mmap_write_lock(kvm->mm); 2198 kvm->mm->context.uses_cmm = 1; 2199 mmap_write_unlock(kvm->mm); 2200 } 2201 out: 2202 vfree(bits); 2203 return r; 2204 } 2205 2206 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2207 { 2208 struct kvm_vcpu *vcpu; 2209 u16 rc, rrc; 2210 int ret = 0; 2211 int i; 2212 2213 /* 2214 * We ignore failures and try to destroy as many CPUs as possible. 2215 * At the same time we must not free the assigned resources when 2216 * this fails, as the ultravisor has still access to that memory. 2217 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2218 * behind. 2219 * We want to return the first failure rc and rrc, though. 2220 */ 2221 kvm_for_each_vcpu(i, vcpu, kvm) { 2222 mutex_lock(&vcpu->mutex); 2223 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2224 *rcp = rc; 2225 *rrcp = rrc; 2226 ret = -EIO; 2227 } 2228 mutex_unlock(&vcpu->mutex); 2229 } 2230 return ret; 2231 } 2232 2233 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2234 { 2235 int i, r = 0; 2236 u16 dummy; 2237 2238 struct kvm_vcpu *vcpu; 2239 2240 kvm_for_each_vcpu(i, vcpu, kvm) { 2241 mutex_lock(&vcpu->mutex); 2242 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2243 mutex_unlock(&vcpu->mutex); 2244 if (r) 2245 break; 2246 } 2247 if (r) 2248 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2249 return r; 2250 } 2251 2252 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2253 { 2254 int r = 0; 2255 u16 dummy; 2256 void __user *argp = (void __user *)cmd->data; 2257 2258 switch (cmd->cmd) { 2259 case KVM_PV_ENABLE: { 2260 r = -EINVAL; 2261 if (kvm_s390_pv_is_protected(kvm)) 2262 break; 2263 2264 /* 2265 * FMT 4 SIE needs esca. As we never switch back to bsca from 2266 * esca, we need no cleanup in the error cases below 2267 */ 2268 r = sca_switch_to_extended(kvm); 2269 if (r) 2270 break; 2271 2272 mmap_write_lock(current->mm); 2273 r = gmap_mark_unmergeable(); 2274 mmap_write_unlock(current->mm); 2275 if (r) 2276 break; 2277 2278 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2279 if (r) 2280 break; 2281 2282 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2283 if (r) 2284 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2285 2286 /* we need to block service interrupts from now on */ 2287 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2288 break; 2289 } 2290 case KVM_PV_DISABLE: { 2291 r = -EINVAL; 2292 if (!kvm_s390_pv_is_protected(kvm)) 2293 break; 2294 2295 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2296 /* 2297 * If a CPU could not be destroyed, destroy VM will also fail. 2298 * There is no point in trying to destroy it. Instead return 2299 * the rc and rrc from the first CPU that failed destroying. 2300 */ 2301 if (r) 2302 break; 2303 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2304 2305 /* no need to block service interrupts any more */ 2306 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2307 break; 2308 } 2309 case KVM_PV_SET_SEC_PARMS: { 2310 struct kvm_s390_pv_sec_parm parms = {}; 2311 void *hdr; 2312 2313 r = -EINVAL; 2314 if (!kvm_s390_pv_is_protected(kvm)) 2315 break; 2316 2317 r = -EFAULT; 2318 if (copy_from_user(&parms, argp, sizeof(parms))) 2319 break; 2320 2321 /* Currently restricted to 8KB */ 2322 r = -EINVAL; 2323 if (parms.length > PAGE_SIZE * 2) 2324 break; 2325 2326 r = -ENOMEM; 2327 hdr = vmalloc(parms.length); 2328 if (!hdr) 2329 break; 2330 2331 r = -EFAULT; 2332 if (!copy_from_user(hdr, (void __user *)parms.origin, 2333 parms.length)) 2334 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2335 &cmd->rc, &cmd->rrc); 2336 2337 vfree(hdr); 2338 break; 2339 } 2340 case KVM_PV_UNPACK: { 2341 struct kvm_s390_pv_unp unp = {}; 2342 2343 r = -EINVAL; 2344 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2345 break; 2346 2347 r = -EFAULT; 2348 if (copy_from_user(&unp, argp, sizeof(unp))) 2349 break; 2350 2351 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2352 &cmd->rc, &cmd->rrc); 2353 break; 2354 } 2355 case KVM_PV_VERIFY: { 2356 r = -EINVAL; 2357 if (!kvm_s390_pv_is_protected(kvm)) 2358 break; 2359 2360 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2361 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2362 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2363 cmd->rrc); 2364 break; 2365 } 2366 case KVM_PV_PREP_RESET: { 2367 r = -EINVAL; 2368 if (!kvm_s390_pv_is_protected(kvm)) 2369 break; 2370 2371 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2372 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2373 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2374 cmd->rc, cmd->rrc); 2375 break; 2376 } 2377 case KVM_PV_UNSHARE_ALL: { 2378 r = -EINVAL; 2379 if (!kvm_s390_pv_is_protected(kvm)) 2380 break; 2381 2382 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2383 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2384 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2385 cmd->rc, cmd->rrc); 2386 break; 2387 } 2388 default: 2389 r = -ENOTTY; 2390 } 2391 return r; 2392 } 2393 2394 long kvm_arch_vm_ioctl(struct file *filp, 2395 unsigned int ioctl, unsigned long arg) 2396 { 2397 struct kvm *kvm = filp->private_data; 2398 void __user *argp = (void __user *)arg; 2399 struct kvm_device_attr attr; 2400 int r; 2401 2402 switch (ioctl) { 2403 case KVM_S390_INTERRUPT: { 2404 struct kvm_s390_interrupt s390int; 2405 2406 r = -EFAULT; 2407 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2408 break; 2409 r = kvm_s390_inject_vm(kvm, &s390int); 2410 break; 2411 } 2412 case KVM_CREATE_IRQCHIP: { 2413 struct kvm_irq_routing_entry routing; 2414 2415 r = -EINVAL; 2416 if (kvm->arch.use_irqchip) { 2417 /* Set up dummy routing. */ 2418 memset(&routing, 0, sizeof(routing)); 2419 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2420 } 2421 break; 2422 } 2423 case KVM_SET_DEVICE_ATTR: { 2424 r = -EFAULT; 2425 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2426 break; 2427 r = kvm_s390_vm_set_attr(kvm, &attr); 2428 break; 2429 } 2430 case KVM_GET_DEVICE_ATTR: { 2431 r = -EFAULT; 2432 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2433 break; 2434 r = kvm_s390_vm_get_attr(kvm, &attr); 2435 break; 2436 } 2437 case KVM_HAS_DEVICE_ATTR: { 2438 r = -EFAULT; 2439 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2440 break; 2441 r = kvm_s390_vm_has_attr(kvm, &attr); 2442 break; 2443 } 2444 case KVM_S390_GET_SKEYS: { 2445 struct kvm_s390_skeys args; 2446 2447 r = -EFAULT; 2448 if (copy_from_user(&args, argp, 2449 sizeof(struct kvm_s390_skeys))) 2450 break; 2451 r = kvm_s390_get_skeys(kvm, &args); 2452 break; 2453 } 2454 case KVM_S390_SET_SKEYS: { 2455 struct kvm_s390_skeys args; 2456 2457 r = -EFAULT; 2458 if (copy_from_user(&args, argp, 2459 sizeof(struct kvm_s390_skeys))) 2460 break; 2461 r = kvm_s390_set_skeys(kvm, &args); 2462 break; 2463 } 2464 case KVM_S390_GET_CMMA_BITS: { 2465 struct kvm_s390_cmma_log args; 2466 2467 r = -EFAULT; 2468 if (copy_from_user(&args, argp, sizeof(args))) 2469 break; 2470 mutex_lock(&kvm->slots_lock); 2471 r = kvm_s390_get_cmma_bits(kvm, &args); 2472 mutex_unlock(&kvm->slots_lock); 2473 if (!r) { 2474 r = copy_to_user(argp, &args, sizeof(args)); 2475 if (r) 2476 r = -EFAULT; 2477 } 2478 break; 2479 } 2480 case KVM_S390_SET_CMMA_BITS: { 2481 struct kvm_s390_cmma_log args; 2482 2483 r = -EFAULT; 2484 if (copy_from_user(&args, argp, sizeof(args))) 2485 break; 2486 mutex_lock(&kvm->slots_lock); 2487 r = kvm_s390_set_cmma_bits(kvm, &args); 2488 mutex_unlock(&kvm->slots_lock); 2489 break; 2490 } 2491 case KVM_S390_PV_COMMAND: { 2492 struct kvm_pv_cmd args; 2493 2494 /* protvirt means user sigp */ 2495 kvm->arch.user_cpu_state_ctrl = 1; 2496 r = 0; 2497 if (!is_prot_virt_host()) { 2498 r = -EINVAL; 2499 break; 2500 } 2501 if (copy_from_user(&args, argp, sizeof(args))) { 2502 r = -EFAULT; 2503 break; 2504 } 2505 if (args.flags) { 2506 r = -EINVAL; 2507 break; 2508 } 2509 mutex_lock(&kvm->lock); 2510 r = kvm_s390_handle_pv(kvm, &args); 2511 mutex_unlock(&kvm->lock); 2512 if (copy_to_user(argp, &args, sizeof(args))) { 2513 r = -EFAULT; 2514 break; 2515 } 2516 break; 2517 } 2518 default: 2519 r = -ENOTTY; 2520 } 2521 2522 return r; 2523 } 2524 2525 static int kvm_s390_apxa_installed(void) 2526 { 2527 struct ap_config_info info; 2528 2529 if (ap_instructions_available()) { 2530 if (ap_qci(&info) == 0) 2531 return info.apxa; 2532 } 2533 2534 return 0; 2535 } 2536 2537 /* 2538 * The format of the crypto control block (CRYCB) is specified in the 3 low 2539 * order bits of the CRYCB designation (CRYCBD) field as follows: 2540 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2541 * AP extended addressing (APXA) facility are installed. 2542 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2543 * Format 2: Both the APXA and MSAX3 facilities are installed 2544 */ 2545 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2546 { 2547 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2548 2549 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2550 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2551 2552 /* Check whether MSAX3 is installed */ 2553 if (!test_kvm_facility(kvm, 76)) 2554 return; 2555 2556 if (kvm_s390_apxa_installed()) 2557 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2558 else 2559 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2560 } 2561 2562 /* 2563 * kvm_arch_crypto_set_masks 2564 * 2565 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2566 * to be set. 2567 * @apm: the mask identifying the accessible AP adapters 2568 * @aqm: the mask identifying the accessible AP domains 2569 * @adm: the mask identifying the accessible AP control domains 2570 * 2571 * Set the masks that identify the adapters, domains and control domains to 2572 * which the KVM guest is granted access. 2573 * 2574 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2575 * function. 2576 */ 2577 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2578 unsigned long *aqm, unsigned long *adm) 2579 { 2580 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2581 2582 kvm_s390_vcpu_block_all(kvm); 2583 2584 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2585 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2586 memcpy(crycb->apcb1.apm, apm, 32); 2587 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2588 apm[0], apm[1], apm[2], apm[3]); 2589 memcpy(crycb->apcb1.aqm, aqm, 32); 2590 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2591 aqm[0], aqm[1], aqm[2], aqm[3]); 2592 memcpy(crycb->apcb1.adm, adm, 32); 2593 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2594 adm[0], adm[1], adm[2], adm[3]); 2595 break; 2596 case CRYCB_FORMAT1: 2597 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2598 memcpy(crycb->apcb0.apm, apm, 8); 2599 memcpy(crycb->apcb0.aqm, aqm, 2); 2600 memcpy(crycb->apcb0.adm, adm, 2); 2601 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2602 apm[0], *((unsigned short *)aqm), 2603 *((unsigned short *)adm)); 2604 break; 2605 default: /* Can not happen */ 2606 break; 2607 } 2608 2609 /* recreate the shadow crycb for each vcpu */ 2610 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2611 kvm_s390_vcpu_unblock_all(kvm); 2612 } 2613 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2614 2615 /* 2616 * kvm_arch_crypto_clear_masks 2617 * 2618 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2619 * to be cleared. 2620 * 2621 * Clear the masks that identify the adapters, domains and control domains to 2622 * which the KVM guest is granted access. 2623 * 2624 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2625 * function. 2626 */ 2627 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2628 { 2629 kvm_s390_vcpu_block_all(kvm); 2630 2631 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2632 sizeof(kvm->arch.crypto.crycb->apcb0)); 2633 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2634 sizeof(kvm->arch.crypto.crycb->apcb1)); 2635 2636 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2637 /* recreate the shadow crycb for each vcpu */ 2638 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2639 kvm_s390_vcpu_unblock_all(kvm); 2640 } 2641 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2642 2643 static u64 kvm_s390_get_initial_cpuid(void) 2644 { 2645 struct cpuid cpuid; 2646 2647 get_cpu_id(&cpuid); 2648 cpuid.version = 0xff; 2649 return *((u64 *) &cpuid); 2650 } 2651 2652 static void kvm_s390_crypto_init(struct kvm *kvm) 2653 { 2654 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2655 kvm_s390_set_crycb_format(kvm); 2656 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 2657 2658 if (!test_kvm_facility(kvm, 76)) 2659 return; 2660 2661 /* Enable AES/DEA protected key functions by default */ 2662 kvm->arch.crypto.aes_kw = 1; 2663 kvm->arch.crypto.dea_kw = 1; 2664 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2665 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2666 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2667 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2668 } 2669 2670 static void sca_dispose(struct kvm *kvm) 2671 { 2672 if (kvm->arch.use_esca) 2673 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2674 else 2675 free_page((unsigned long)(kvm->arch.sca)); 2676 kvm->arch.sca = NULL; 2677 } 2678 2679 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2680 { 2681 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2682 int i, rc; 2683 char debug_name[16]; 2684 static unsigned long sca_offset; 2685 2686 rc = -EINVAL; 2687 #ifdef CONFIG_KVM_S390_UCONTROL 2688 if (type & ~KVM_VM_S390_UCONTROL) 2689 goto out_err; 2690 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2691 goto out_err; 2692 #else 2693 if (type) 2694 goto out_err; 2695 #endif 2696 2697 rc = s390_enable_sie(); 2698 if (rc) 2699 goto out_err; 2700 2701 rc = -ENOMEM; 2702 2703 if (!sclp.has_64bscao) 2704 alloc_flags |= GFP_DMA; 2705 rwlock_init(&kvm->arch.sca_lock); 2706 /* start with basic SCA */ 2707 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2708 if (!kvm->arch.sca) 2709 goto out_err; 2710 mutex_lock(&kvm_lock); 2711 sca_offset += 16; 2712 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2713 sca_offset = 0; 2714 kvm->arch.sca = (struct bsca_block *) 2715 ((char *) kvm->arch.sca + sca_offset); 2716 mutex_unlock(&kvm_lock); 2717 2718 sprintf(debug_name, "kvm-%u", current->pid); 2719 2720 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2721 if (!kvm->arch.dbf) 2722 goto out_err; 2723 2724 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2725 kvm->arch.sie_page2 = 2726 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2727 if (!kvm->arch.sie_page2) 2728 goto out_err; 2729 2730 kvm->arch.sie_page2->kvm = kvm; 2731 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2732 2733 for (i = 0; i < kvm_s390_fac_size(); i++) { 2734 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 2735 (kvm_s390_fac_base[i] | 2736 kvm_s390_fac_ext[i]); 2737 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 2738 kvm_s390_fac_base[i]; 2739 } 2740 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2741 2742 /* we are always in czam mode - even on pre z14 machines */ 2743 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2744 set_kvm_facility(kvm->arch.model.fac_list, 138); 2745 /* we emulate STHYI in kvm */ 2746 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2747 set_kvm_facility(kvm->arch.model.fac_list, 74); 2748 if (MACHINE_HAS_TLB_GUEST) { 2749 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2750 set_kvm_facility(kvm->arch.model.fac_list, 147); 2751 } 2752 2753 if (css_general_characteristics.aiv && test_facility(65)) 2754 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2755 2756 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2757 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2758 2759 kvm_s390_crypto_init(kvm); 2760 2761 mutex_init(&kvm->arch.float_int.ais_lock); 2762 spin_lock_init(&kvm->arch.float_int.lock); 2763 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2764 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2765 init_waitqueue_head(&kvm->arch.ipte_wq); 2766 mutex_init(&kvm->arch.ipte_mutex); 2767 2768 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2769 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2770 2771 if (type & KVM_VM_S390_UCONTROL) { 2772 kvm->arch.gmap = NULL; 2773 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2774 } else { 2775 if (sclp.hamax == U64_MAX) 2776 kvm->arch.mem_limit = TASK_SIZE_MAX; 2777 else 2778 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2779 sclp.hamax + 1); 2780 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2781 if (!kvm->arch.gmap) 2782 goto out_err; 2783 kvm->arch.gmap->private = kvm; 2784 kvm->arch.gmap->pfault_enabled = 0; 2785 } 2786 2787 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2788 kvm->arch.use_skf = sclp.has_skey; 2789 spin_lock_init(&kvm->arch.start_stop_lock); 2790 kvm_s390_vsie_init(kvm); 2791 if (use_gisa) 2792 kvm_s390_gisa_init(kvm); 2793 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2794 2795 return 0; 2796 out_err: 2797 free_page((unsigned long)kvm->arch.sie_page2); 2798 debug_unregister(kvm->arch.dbf); 2799 sca_dispose(kvm); 2800 KVM_EVENT(3, "creation of vm failed: %d", rc); 2801 return rc; 2802 } 2803 2804 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2805 { 2806 u16 rc, rrc; 2807 2808 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2809 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2810 kvm_s390_clear_local_irqs(vcpu); 2811 kvm_clear_async_pf_completion_queue(vcpu); 2812 if (!kvm_is_ucontrol(vcpu->kvm)) 2813 sca_del_vcpu(vcpu); 2814 2815 if (kvm_is_ucontrol(vcpu->kvm)) 2816 gmap_remove(vcpu->arch.gmap); 2817 2818 if (vcpu->kvm->arch.use_cmma) 2819 kvm_s390_vcpu_unsetup_cmma(vcpu); 2820 /* We can not hold the vcpu mutex here, we are already dying */ 2821 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2822 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2823 free_page((unsigned long)(vcpu->arch.sie_block)); 2824 } 2825 2826 static void kvm_free_vcpus(struct kvm *kvm) 2827 { 2828 unsigned int i; 2829 struct kvm_vcpu *vcpu; 2830 2831 kvm_for_each_vcpu(i, vcpu, kvm) 2832 kvm_vcpu_destroy(vcpu); 2833 2834 mutex_lock(&kvm->lock); 2835 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2836 kvm->vcpus[i] = NULL; 2837 2838 atomic_set(&kvm->online_vcpus, 0); 2839 mutex_unlock(&kvm->lock); 2840 } 2841 2842 void kvm_arch_destroy_vm(struct kvm *kvm) 2843 { 2844 u16 rc, rrc; 2845 2846 kvm_free_vcpus(kvm); 2847 sca_dispose(kvm); 2848 kvm_s390_gisa_destroy(kvm); 2849 /* 2850 * We are already at the end of life and kvm->lock is not taken. 2851 * This is ok as the file descriptor is closed by now and nobody 2852 * can mess with the pv state. To avoid lockdep_assert_held from 2853 * complaining we do not use kvm_s390_pv_is_protected. 2854 */ 2855 if (kvm_s390_pv_get_handle(kvm)) 2856 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2857 debug_unregister(kvm->arch.dbf); 2858 free_page((unsigned long)kvm->arch.sie_page2); 2859 if (!kvm_is_ucontrol(kvm)) 2860 gmap_remove(kvm->arch.gmap); 2861 kvm_s390_destroy_adapters(kvm); 2862 kvm_s390_clear_float_irqs(kvm); 2863 kvm_s390_vsie_destroy(kvm); 2864 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2865 } 2866 2867 /* Section: vcpu related */ 2868 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2869 { 2870 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2871 if (!vcpu->arch.gmap) 2872 return -ENOMEM; 2873 vcpu->arch.gmap->private = vcpu->kvm; 2874 2875 return 0; 2876 } 2877 2878 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2879 { 2880 if (!kvm_s390_use_sca_entries()) 2881 return; 2882 read_lock(&vcpu->kvm->arch.sca_lock); 2883 if (vcpu->kvm->arch.use_esca) { 2884 struct esca_block *sca = vcpu->kvm->arch.sca; 2885 2886 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2887 sca->cpu[vcpu->vcpu_id].sda = 0; 2888 } else { 2889 struct bsca_block *sca = vcpu->kvm->arch.sca; 2890 2891 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2892 sca->cpu[vcpu->vcpu_id].sda = 0; 2893 } 2894 read_unlock(&vcpu->kvm->arch.sca_lock); 2895 } 2896 2897 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2898 { 2899 if (!kvm_s390_use_sca_entries()) { 2900 struct bsca_block *sca = vcpu->kvm->arch.sca; 2901 2902 /* we still need the basic sca for the ipte control */ 2903 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2904 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2905 return; 2906 } 2907 read_lock(&vcpu->kvm->arch.sca_lock); 2908 if (vcpu->kvm->arch.use_esca) { 2909 struct esca_block *sca = vcpu->kvm->arch.sca; 2910 2911 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2912 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2913 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2914 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2915 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2916 } else { 2917 struct bsca_block *sca = vcpu->kvm->arch.sca; 2918 2919 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2920 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2921 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2922 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2923 } 2924 read_unlock(&vcpu->kvm->arch.sca_lock); 2925 } 2926 2927 /* Basic SCA to Extended SCA data copy routines */ 2928 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2929 { 2930 d->sda = s->sda; 2931 d->sigp_ctrl.c = s->sigp_ctrl.c; 2932 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2933 } 2934 2935 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2936 { 2937 int i; 2938 2939 d->ipte_control = s->ipte_control; 2940 d->mcn[0] = s->mcn; 2941 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2942 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2943 } 2944 2945 static int sca_switch_to_extended(struct kvm *kvm) 2946 { 2947 struct bsca_block *old_sca = kvm->arch.sca; 2948 struct esca_block *new_sca; 2949 struct kvm_vcpu *vcpu; 2950 unsigned int vcpu_idx; 2951 u32 scaol, scaoh; 2952 2953 if (kvm->arch.use_esca) 2954 return 0; 2955 2956 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 2957 if (!new_sca) 2958 return -ENOMEM; 2959 2960 scaoh = (u32)((u64)(new_sca) >> 32); 2961 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2962 2963 kvm_s390_vcpu_block_all(kvm); 2964 write_lock(&kvm->arch.sca_lock); 2965 2966 sca_copy_b_to_e(new_sca, old_sca); 2967 2968 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2969 vcpu->arch.sie_block->scaoh = scaoh; 2970 vcpu->arch.sie_block->scaol = scaol; 2971 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2972 } 2973 kvm->arch.sca = new_sca; 2974 kvm->arch.use_esca = 1; 2975 2976 write_unlock(&kvm->arch.sca_lock); 2977 kvm_s390_vcpu_unblock_all(kvm); 2978 2979 free_page((unsigned long)old_sca); 2980 2981 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2982 old_sca, kvm->arch.sca); 2983 return 0; 2984 } 2985 2986 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2987 { 2988 int rc; 2989 2990 if (!kvm_s390_use_sca_entries()) { 2991 if (id < KVM_MAX_VCPUS) 2992 return true; 2993 return false; 2994 } 2995 if (id < KVM_S390_BSCA_CPU_SLOTS) 2996 return true; 2997 if (!sclp.has_esca || !sclp.has_64bscao) 2998 return false; 2999 3000 mutex_lock(&kvm->lock); 3001 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3002 mutex_unlock(&kvm->lock); 3003 3004 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3005 } 3006 3007 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3008 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3009 { 3010 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3011 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3012 vcpu->arch.cputm_start = get_tod_clock_fast(); 3013 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3014 } 3015 3016 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3017 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3018 { 3019 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3020 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3021 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3022 vcpu->arch.cputm_start = 0; 3023 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3024 } 3025 3026 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3027 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3028 { 3029 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3030 vcpu->arch.cputm_enabled = true; 3031 __start_cpu_timer_accounting(vcpu); 3032 } 3033 3034 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3035 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3036 { 3037 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3038 __stop_cpu_timer_accounting(vcpu); 3039 vcpu->arch.cputm_enabled = false; 3040 } 3041 3042 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3043 { 3044 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3045 __enable_cpu_timer_accounting(vcpu); 3046 preempt_enable(); 3047 } 3048 3049 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3050 { 3051 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3052 __disable_cpu_timer_accounting(vcpu); 3053 preempt_enable(); 3054 } 3055 3056 /* set the cpu timer - may only be called from the VCPU thread itself */ 3057 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3058 { 3059 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3060 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3061 if (vcpu->arch.cputm_enabled) 3062 vcpu->arch.cputm_start = get_tod_clock_fast(); 3063 vcpu->arch.sie_block->cputm = cputm; 3064 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3065 preempt_enable(); 3066 } 3067 3068 /* update and get the cpu timer - can also be called from other VCPU threads */ 3069 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3070 { 3071 unsigned int seq; 3072 __u64 value; 3073 3074 if (unlikely(!vcpu->arch.cputm_enabled)) 3075 return vcpu->arch.sie_block->cputm; 3076 3077 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3078 do { 3079 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3080 /* 3081 * If the writer would ever execute a read in the critical 3082 * section, e.g. in irq context, we have a deadlock. 3083 */ 3084 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3085 value = vcpu->arch.sie_block->cputm; 3086 /* if cputm_start is 0, accounting is being started/stopped */ 3087 if (likely(vcpu->arch.cputm_start)) 3088 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3089 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3090 preempt_enable(); 3091 return value; 3092 } 3093 3094 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3095 { 3096 3097 gmap_enable(vcpu->arch.enabled_gmap); 3098 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3099 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3100 __start_cpu_timer_accounting(vcpu); 3101 vcpu->cpu = cpu; 3102 } 3103 3104 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3105 { 3106 vcpu->cpu = -1; 3107 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3108 __stop_cpu_timer_accounting(vcpu); 3109 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3110 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3111 gmap_disable(vcpu->arch.enabled_gmap); 3112 3113 } 3114 3115 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3116 { 3117 mutex_lock(&vcpu->kvm->lock); 3118 preempt_disable(); 3119 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3120 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3121 preempt_enable(); 3122 mutex_unlock(&vcpu->kvm->lock); 3123 if (!kvm_is_ucontrol(vcpu->kvm)) { 3124 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3125 sca_add_vcpu(vcpu); 3126 } 3127 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3128 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3129 /* make vcpu_load load the right gmap on the first trigger */ 3130 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3131 } 3132 3133 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3134 { 3135 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3136 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3137 return true; 3138 return false; 3139 } 3140 3141 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3142 { 3143 /* At least one ECC subfunction must be present */ 3144 return kvm_has_pckmo_subfunc(kvm, 32) || 3145 kvm_has_pckmo_subfunc(kvm, 33) || 3146 kvm_has_pckmo_subfunc(kvm, 34) || 3147 kvm_has_pckmo_subfunc(kvm, 40) || 3148 kvm_has_pckmo_subfunc(kvm, 41); 3149 3150 } 3151 3152 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3153 { 3154 /* 3155 * If the AP instructions are not being interpreted and the MSAX3 3156 * facility is not configured for the guest, there is nothing to set up. 3157 */ 3158 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3159 return; 3160 3161 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3162 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3163 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3164 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3165 3166 if (vcpu->kvm->arch.crypto.apie) 3167 vcpu->arch.sie_block->eca |= ECA_APIE; 3168 3169 /* Set up protected key support */ 3170 if (vcpu->kvm->arch.crypto.aes_kw) { 3171 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3172 /* ecc is also wrapped with AES key */ 3173 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3174 vcpu->arch.sie_block->ecd |= ECD_ECC; 3175 } 3176 3177 if (vcpu->kvm->arch.crypto.dea_kw) 3178 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3179 } 3180 3181 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3182 { 3183 free_page(vcpu->arch.sie_block->cbrlo); 3184 vcpu->arch.sie_block->cbrlo = 0; 3185 } 3186 3187 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3188 { 3189 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3190 if (!vcpu->arch.sie_block->cbrlo) 3191 return -ENOMEM; 3192 return 0; 3193 } 3194 3195 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3196 { 3197 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3198 3199 vcpu->arch.sie_block->ibc = model->ibc; 3200 if (test_kvm_facility(vcpu->kvm, 7)) 3201 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3202 } 3203 3204 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3205 { 3206 int rc = 0; 3207 u16 uvrc, uvrrc; 3208 3209 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3210 CPUSTAT_SM | 3211 CPUSTAT_STOPPED); 3212 3213 if (test_kvm_facility(vcpu->kvm, 78)) 3214 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3215 else if (test_kvm_facility(vcpu->kvm, 8)) 3216 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3217 3218 kvm_s390_vcpu_setup_model(vcpu); 3219 3220 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3221 if (MACHINE_HAS_ESOP) 3222 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3223 if (test_kvm_facility(vcpu->kvm, 9)) 3224 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3225 if (test_kvm_facility(vcpu->kvm, 73)) 3226 vcpu->arch.sie_block->ecb |= ECB_TE; 3227 3228 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3229 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3230 if (test_kvm_facility(vcpu->kvm, 130)) 3231 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3232 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3233 if (sclp.has_cei) 3234 vcpu->arch.sie_block->eca |= ECA_CEI; 3235 if (sclp.has_ib) 3236 vcpu->arch.sie_block->eca |= ECA_IB; 3237 if (sclp.has_siif) 3238 vcpu->arch.sie_block->eca |= ECA_SII; 3239 if (sclp.has_sigpif) 3240 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3241 if (test_kvm_facility(vcpu->kvm, 129)) { 3242 vcpu->arch.sie_block->eca |= ECA_VX; 3243 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3244 } 3245 if (test_kvm_facility(vcpu->kvm, 139)) 3246 vcpu->arch.sie_block->ecd |= ECD_MEF; 3247 if (test_kvm_facility(vcpu->kvm, 156)) 3248 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3249 if (vcpu->arch.sie_block->gd) { 3250 vcpu->arch.sie_block->eca |= ECA_AIV; 3251 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3252 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3253 } 3254 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3255 | SDNXC; 3256 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3257 3258 if (sclp.has_kss) 3259 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3260 else 3261 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3262 3263 if (vcpu->kvm->arch.use_cmma) { 3264 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3265 if (rc) 3266 return rc; 3267 } 3268 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3269 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3270 3271 vcpu->arch.sie_block->hpid = HPID_KVM; 3272 3273 kvm_s390_vcpu_crypto_setup(vcpu); 3274 3275 mutex_lock(&vcpu->kvm->lock); 3276 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3277 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3278 if (rc) 3279 kvm_s390_vcpu_unsetup_cmma(vcpu); 3280 } 3281 mutex_unlock(&vcpu->kvm->lock); 3282 3283 return rc; 3284 } 3285 3286 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3287 { 3288 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3289 return -EINVAL; 3290 return 0; 3291 } 3292 3293 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3294 { 3295 struct sie_page *sie_page; 3296 int rc; 3297 3298 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3299 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3300 if (!sie_page) 3301 return -ENOMEM; 3302 3303 vcpu->arch.sie_block = &sie_page->sie_block; 3304 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3305 3306 /* the real guest size will always be smaller than msl */ 3307 vcpu->arch.sie_block->mso = 0; 3308 vcpu->arch.sie_block->msl = sclp.hamax; 3309 3310 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3311 spin_lock_init(&vcpu->arch.local_int.lock); 3312 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; 3313 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 3314 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 3315 seqcount_init(&vcpu->arch.cputm_seqcount); 3316 3317 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3318 kvm_clear_async_pf_completion_queue(vcpu); 3319 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3320 KVM_SYNC_GPRS | 3321 KVM_SYNC_ACRS | 3322 KVM_SYNC_CRS | 3323 KVM_SYNC_ARCH0 | 3324 KVM_SYNC_PFAULT | 3325 KVM_SYNC_DIAG318; 3326 kvm_s390_set_prefix(vcpu, 0); 3327 if (test_kvm_facility(vcpu->kvm, 64)) 3328 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3329 if (test_kvm_facility(vcpu->kvm, 82)) 3330 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3331 if (test_kvm_facility(vcpu->kvm, 133)) 3332 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3333 if (test_kvm_facility(vcpu->kvm, 156)) 3334 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3335 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3336 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3337 */ 3338 if (MACHINE_HAS_VX) 3339 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3340 else 3341 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3342 3343 if (kvm_is_ucontrol(vcpu->kvm)) { 3344 rc = __kvm_ucontrol_vcpu_init(vcpu); 3345 if (rc) 3346 goto out_free_sie_block; 3347 } 3348 3349 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3350 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3351 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3352 3353 rc = kvm_s390_vcpu_setup(vcpu); 3354 if (rc) 3355 goto out_ucontrol_uninit; 3356 return 0; 3357 3358 out_ucontrol_uninit: 3359 if (kvm_is_ucontrol(vcpu->kvm)) 3360 gmap_remove(vcpu->arch.gmap); 3361 out_free_sie_block: 3362 free_page((unsigned long)(vcpu->arch.sie_block)); 3363 return rc; 3364 } 3365 3366 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3367 { 3368 return kvm_s390_vcpu_has_irq(vcpu, 0); 3369 } 3370 3371 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3372 { 3373 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3374 } 3375 3376 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3377 { 3378 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3379 exit_sie(vcpu); 3380 } 3381 3382 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3383 { 3384 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3385 } 3386 3387 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3388 { 3389 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3390 exit_sie(vcpu); 3391 } 3392 3393 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3394 { 3395 return atomic_read(&vcpu->arch.sie_block->prog20) & 3396 (PROG_BLOCK_SIE | PROG_REQUEST); 3397 } 3398 3399 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3400 { 3401 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3402 } 3403 3404 /* 3405 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3406 * If the CPU is not running (e.g. waiting as idle) the function will 3407 * return immediately. */ 3408 void exit_sie(struct kvm_vcpu *vcpu) 3409 { 3410 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3411 kvm_s390_vsie_kick(vcpu); 3412 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3413 cpu_relax(); 3414 } 3415 3416 /* Kick a guest cpu out of SIE to process a request synchronously */ 3417 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3418 { 3419 kvm_make_request(req, vcpu); 3420 kvm_s390_vcpu_request(vcpu); 3421 } 3422 3423 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3424 unsigned long end) 3425 { 3426 struct kvm *kvm = gmap->private; 3427 struct kvm_vcpu *vcpu; 3428 unsigned long prefix; 3429 int i; 3430 3431 if (gmap_is_shadow(gmap)) 3432 return; 3433 if (start >= 1UL << 31) 3434 /* We are only interested in prefix pages */ 3435 return; 3436 kvm_for_each_vcpu(i, vcpu, kvm) { 3437 /* match against both prefix pages */ 3438 prefix = kvm_s390_get_prefix(vcpu); 3439 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3440 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3441 start, end); 3442 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 3443 } 3444 } 3445 } 3446 3447 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3448 { 3449 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3450 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3451 halt_poll_max_steal) { 3452 vcpu->stat.halt_no_poll_steal++; 3453 return true; 3454 } 3455 return false; 3456 } 3457 3458 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3459 { 3460 /* kvm common code refers to this, but never calls it */ 3461 BUG(); 3462 return 0; 3463 } 3464 3465 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3466 struct kvm_one_reg *reg) 3467 { 3468 int r = -EINVAL; 3469 3470 switch (reg->id) { 3471 case KVM_REG_S390_TODPR: 3472 r = put_user(vcpu->arch.sie_block->todpr, 3473 (u32 __user *)reg->addr); 3474 break; 3475 case KVM_REG_S390_EPOCHDIFF: 3476 r = put_user(vcpu->arch.sie_block->epoch, 3477 (u64 __user *)reg->addr); 3478 break; 3479 case KVM_REG_S390_CPU_TIMER: 3480 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3481 (u64 __user *)reg->addr); 3482 break; 3483 case KVM_REG_S390_CLOCK_COMP: 3484 r = put_user(vcpu->arch.sie_block->ckc, 3485 (u64 __user *)reg->addr); 3486 break; 3487 case KVM_REG_S390_PFTOKEN: 3488 r = put_user(vcpu->arch.pfault_token, 3489 (u64 __user *)reg->addr); 3490 break; 3491 case KVM_REG_S390_PFCOMPARE: 3492 r = put_user(vcpu->arch.pfault_compare, 3493 (u64 __user *)reg->addr); 3494 break; 3495 case KVM_REG_S390_PFSELECT: 3496 r = put_user(vcpu->arch.pfault_select, 3497 (u64 __user *)reg->addr); 3498 break; 3499 case KVM_REG_S390_PP: 3500 r = put_user(vcpu->arch.sie_block->pp, 3501 (u64 __user *)reg->addr); 3502 break; 3503 case KVM_REG_S390_GBEA: 3504 r = put_user(vcpu->arch.sie_block->gbea, 3505 (u64 __user *)reg->addr); 3506 break; 3507 default: 3508 break; 3509 } 3510 3511 return r; 3512 } 3513 3514 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3515 struct kvm_one_reg *reg) 3516 { 3517 int r = -EINVAL; 3518 __u64 val; 3519 3520 switch (reg->id) { 3521 case KVM_REG_S390_TODPR: 3522 r = get_user(vcpu->arch.sie_block->todpr, 3523 (u32 __user *)reg->addr); 3524 break; 3525 case KVM_REG_S390_EPOCHDIFF: 3526 r = get_user(vcpu->arch.sie_block->epoch, 3527 (u64 __user *)reg->addr); 3528 break; 3529 case KVM_REG_S390_CPU_TIMER: 3530 r = get_user(val, (u64 __user *)reg->addr); 3531 if (!r) 3532 kvm_s390_set_cpu_timer(vcpu, val); 3533 break; 3534 case KVM_REG_S390_CLOCK_COMP: 3535 r = get_user(vcpu->arch.sie_block->ckc, 3536 (u64 __user *)reg->addr); 3537 break; 3538 case KVM_REG_S390_PFTOKEN: 3539 r = get_user(vcpu->arch.pfault_token, 3540 (u64 __user *)reg->addr); 3541 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3542 kvm_clear_async_pf_completion_queue(vcpu); 3543 break; 3544 case KVM_REG_S390_PFCOMPARE: 3545 r = get_user(vcpu->arch.pfault_compare, 3546 (u64 __user *)reg->addr); 3547 break; 3548 case KVM_REG_S390_PFSELECT: 3549 r = get_user(vcpu->arch.pfault_select, 3550 (u64 __user *)reg->addr); 3551 break; 3552 case KVM_REG_S390_PP: 3553 r = get_user(vcpu->arch.sie_block->pp, 3554 (u64 __user *)reg->addr); 3555 break; 3556 case KVM_REG_S390_GBEA: 3557 r = get_user(vcpu->arch.sie_block->gbea, 3558 (u64 __user *)reg->addr); 3559 break; 3560 default: 3561 break; 3562 } 3563 3564 return r; 3565 } 3566 3567 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3568 { 3569 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3570 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3571 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3572 3573 kvm_clear_async_pf_completion_queue(vcpu); 3574 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3575 kvm_s390_vcpu_stop(vcpu); 3576 kvm_s390_clear_local_irqs(vcpu); 3577 } 3578 3579 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3580 { 3581 /* Initial reset is a superset of the normal reset */ 3582 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3583 3584 /* 3585 * This equals initial cpu reset in pop, but we don't switch to ESA. 3586 * We do not only reset the internal data, but also ... 3587 */ 3588 vcpu->arch.sie_block->gpsw.mask = 0; 3589 vcpu->arch.sie_block->gpsw.addr = 0; 3590 kvm_s390_set_prefix(vcpu, 0); 3591 kvm_s390_set_cpu_timer(vcpu, 0); 3592 vcpu->arch.sie_block->ckc = 0; 3593 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3594 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3595 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3596 3597 /* ... the data in sync regs */ 3598 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3599 vcpu->run->s.regs.ckc = 0; 3600 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3601 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3602 vcpu->run->psw_addr = 0; 3603 vcpu->run->psw_mask = 0; 3604 vcpu->run->s.regs.todpr = 0; 3605 vcpu->run->s.regs.cputm = 0; 3606 vcpu->run->s.regs.ckc = 0; 3607 vcpu->run->s.regs.pp = 0; 3608 vcpu->run->s.regs.gbea = 1; 3609 vcpu->run->s.regs.fpc = 0; 3610 /* 3611 * Do not reset these registers in the protected case, as some of 3612 * them are overlayed and they are not accessible in this case 3613 * anyway. 3614 */ 3615 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3616 vcpu->arch.sie_block->gbea = 1; 3617 vcpu->arch.sie_block->pp = 0; 3618 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3619 vcpu->arch.sie_block->todpr = 0; 3620 } 3621 } 3622 3623 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3624 { 3625 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3626 3627 /* Clear reset is a superset of the initial reset */ 3628 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3629 3630 memset(®s->gprs, 0, sizeof(regs->gprs)); 3631 memset(®s->vrs, 0, sizeof(regs->vrs)); 3632 memset(®s->acrs, 0, sizeof(regs->acrs)); 3633 memset(®s->gscb, 0, sizeof(regs->gscb)); 3634 3635 regs->etoken = 0; 3636 regs->etoken_extension = 0; 3637 } 3638 3639 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3640 { 3641 vcpu_load(vcpu); 3642 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3643 vcpu_put(vcpu); 3644 return 0; 3645 } 3646 3647 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3648 { 3649 vcpu_load(vcpu); 3650 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3651 vcpu_put(vcpu); 3652 return 0; 3653 } 3654 3655 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3656 struct kvm_sregs *sregs) 3657 { 3658 vcpu_load(vcpu); 3659 3660 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3661 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3662 3663 vcpu_put(vcpu); 3664 return 0; 3665 } 3666 3667 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3668 struct kvm_sregs *sregs) 3669 { 3670 vcpu_load(vcpu); 3671 3672 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3673 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3674 3675 vcpu_put(vcpu); 3676 return 0; 3677 } 3678 3679 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3680 { 3681 int ret = 0; 3682 3683 vcpu_load(vcpu); 3684 3685 if (test_fp_ctl(fpu->fpc)) { 3686 ret = -EINVAL; 3687 goto out; 3688 } 3689 vcpu->run->s.regs.fpc = fpu->fpc; 3690 if (MACHINE_HAS_VX) 3691 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3692 (freg_t *) fpu->fprs); 3693 else 3694 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3695 3696 out: 3697 vcpu_put(vcpu); 3698 return ret; 3699 } 3700 3701 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3702 { 3703 vcpu_load(vcpu); 3704 3705 /* make sure we have the latest values */ 3706 save_fpu_regs(); 3707 if (MACHINE_HAS_VX) 3708 convert_vx_to_fp((freg_t *) fpu->fprs, 3709 (__vector128 *) vcpu->run->s.regs.vrs); 3710 else 3711 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3712 fpu->fpc = vcpu->run->s.regs.fpc; 3713 3714 vcpu_put(vcpu); 3715 return 0; 3716 } 3717 3718 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3719 { 3720 int rc = 0; 3721 3722 if (!is_vcpu_stopped(vcpu)) 3723 rc = -EBUSY; 3724 else { 3725 vcpu->run->psw_mask = psw.mask; 3726 vcpu->run->psw_addr = psw.addr; 3727 } 3728 return rc; 3729 } 3730 3731 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3732 struct kvm_translation *tr) 3733 { 3734 return -EINVAL; /* not implemented yet */ 3735 } 3736 3737 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3738 KVM_GUESTDBG_USE_HW_BP | \ 3739 KVM_GUESTDBG_ENABLE) 3740 3741 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3742 struct kvm_guest_debug *dbg) 3743 { 3744 int rc = 0; 3745 3746 vcpu_load(vcpu); 3747 3748 vcpu->guest_debug = 0; 3749 kvm_s390_clear_bp_data(vcpu); 3750 3751 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3752 rc = -EINVAL; 3753 goto out; 3754 } 3755 if (!sclp.has_gpere) { 3756 rc = -EINVAL; 3757 goto out; 3758 } 3759 3760 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3761 vcpu->guest_debug = dbg->control; 3762 /* enforce guest PER */ 3763 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3764 3765 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3766 rc = kvm_s390_import_bp_data(vcpu, dbg); 3767 } else { 3768 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3769 vcpu->arch.guestdbg.last_bp = 0; 3770 } 3771 3772 if (rc) { 3773 vcpu->guest_debug = 0; 3774 kvm_s390_clear_bp_data(vcpu); 3775 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3776 } 3777 3778 out: 3779 vcpu_put(vcpu); 3780 return rc; 3781 } 3782 3783 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3784 struct kvm_mp_state *mp_state) 3785 { 3786 int ret; 3787 3788 vcpu_load(vcpu); 3789 3790 /* CHECK_STOP and LOAD are not supported yet */ 3791 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3792 KVM_MP_STATE_OPERATING; 3793 3794 vcpu_put(vcpu); 3795 return ret; 3796 } 3797 3798 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3799 struct kvm_mp_state *mp_state) 3800 { 3801 int rc = 0; 3802 3803 vcpu_load(vcpu); 3804 3805 /* user space knows about this interface - let it control the state */ 3806 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 3807 3808 switch (mp_state->mp_state) { 3809 case KVM_MP_STATE_STOPPED: 3810 rc = kvm_s390_vcpu_stop(vcpu); 3811 break; 3812 case KVM_MP_STATE_OPERATING: 3813 rc = kvm_s390_vcpu_start(vcpu); 3814 break; 3815 case KVM_MP_STATE_LOAD: 3816 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3817 rc = -ENXIO; 3818 break; 3819 } 3820 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3821 break; 3822 case KVM_MP_STATE_CHECK_STOP: 3823 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3824 default: 3825 rc = -ENXIO; 3826 } 3827 3828 vcpu_put(vcpu); 3829 return rc; 3830 } 3831 3832 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3833 { 3834 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3835 } 3836 3837 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3838 { 3839 retry: 3840 kvm_s390_vcpu_request_handled(vcpu); 3841 if (!kvm_request_pending(vcpu)) 3842 return 0; 3843 /* 3844 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3845 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3846 * This ensures that the ipte instruction for this request has 3847 * already finished. We might race against a second unmapper that 3848 * wants to set the blocking bit. Lets just retry the request loop. 3849 */ 3850 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3851 int rc; 3852 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3853 kvm_s390_get_prefix(vcpu), 3854 PAGE_SIZE * 2, PROT_WRITE); 3855 if (rc) { 3856 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3857 return rc; 3858 } 3859 goto retry; 3860 } 3861 3862 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3863 vcpu->arch.sie_block->ihcpu = 0xffff; 3864 goto retry; 3865 } 3866 3867 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3868 if (!ibs_enabled(vcpu)) { 3869 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3870 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3871 } 3872 goto retry; 3873 } 3874 3875 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3876 if (ibs_enabled(vcpu)) { 3877 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3878 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3879 } 3880 goto retry; 3881 } 3882 3883 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3884 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3885 goto retry; 3886 } 3887 3888 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3889 /* 3890 * Disable CMM virtualization; we will emulate the ESSA 3891 * instruction manually, in order to provide additional 3892 * functionalities needed for live migration. 3893 */ 3894 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3895 goto retry; 3896 } 3897 3898 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3899 /* 3900 * Re-enable CMM virtualization if CMMA is available and 3901 * CMM has been used. 3902 */ 3903 if ((vcpu->kvm->arch.use_cmma) && 3904 (vcpu->kvm->mm->context.uses_cmm)) 3905 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3906 goto retry; 3907 } 3908 3909 /* nothing to do, just clear the request */ 3910 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3911 /* we left the vsie handler, nothing to do, just clear the request */ 3912 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3913 3914 return 0; 3915 } 3916 3917 void kvm_s390_set_tod_clock(struct kvm *kvm, 3918 const struct kvm_s390_vm_tod_clock *gtod) 3919 { 3920 struct kvm_vcpu *vcpu; 3921 union tod_clock clk; 3922 int i; 3923 3924 mutex_lock(&kvm->lock); 3925 preempt_disable(); 3926 3927 store_tod_clock_ext(&clk); 3928 3929 kvm->arch.epoch = gtod->tod - clk.tod; 3930 kvm->arch.epdx = 0; 3931 if (test_kvm_facility(kvm, 139)) { 3932 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3933 if (kvm->arch.epoch > gtod->tod) 3934 kvm->arch.epdx -= 1; 3935 } 3936 3937 kvm_s390_vcpu_block_all(kvm); 3938 kvm_for_each_vcpu(i, vcpu, kvm) { 3939 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3940 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3941 } 3942 3943 kvm_s390_vcpu_unblock_all(kvm); 3944 preempt_enable(); 3945 mutex_unlock(&kvm->lock); 3946 } 3947 3948 /** 3949 * kvm_arch_fault_in_page - fault-in guest page if necessary 3950 * @vcpu: The corresponding virtual cpu 3951 * @gpa: Guest physical address 3952 * @writable: Whether the page should be writable or not 3953 * 3954 * Make sure that a guest page has been faulted-in on the host. 3955 * 3956 * Return: Zero on success, negative error code otherwise. 3957 */ 3958 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3959 { 3960 return gmap_fault(vcpu->arch.gmap, gpa, 3961 writable ? FAULT_FLAG_WRITE : 0); 3962 } 3963 3964 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3965 unsigned long token) 3966 { 3967 struct kvm_s390_interrupt inti; 3968 struct kvm_s390_irq irq; 3969 3970 if (start_token) { 3971 irq.u.ext.ext_params2 = token; 3972 irq.type = KVM_S390_INT_PFAULT_INIT; 3973 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3974 } else { 3975 inti.type = KVM_S390_INT_PFAULT_DONE; 3976 inti.parm64 = token; 3977 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3978 } 3979 } 3980 3981 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3982 struct kvm_async_pf *work) 3983 { 3984 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3985 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3986 3987 return true; 3988 } 3989 3990 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3991 struct kvm_async_pf *work) 3992 { 3993 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3994 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3995 } 3996 3997 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3998 struct kvm_async_pf *work) 3999 { 4000 /* s390 will always inject the page directly */ 4001 } 4002 4003 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4004 { 4005 /* 4006 * s390 will always inject the page directly, 4007 * but we still want check_async_completion to cleanup 4008 */ 4009 return true; 4010 } 4011 4012 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4013 { 4014 hva_t hva; 4015 struct kvm_arch_async_pf arch; 4016 4017 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4018 return false; 4019 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4020 vcpu->arch.pfault_compare) 4021 return false; 4022 if (psw_extint_disabled(vcpu)) 4023 return false; 4024 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4025 return false; 4026 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4027 return false; 4028 if (!vcpu->arch.gmap->pfault_enabled) 4029 return false; 4030 4031 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4032 hva += current->thread.gmap_addr & ~PAGE_MASK; 4033 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4034 return false; 4035 4036 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4037 } 4038 4039 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4040 { 4041 int rc, cpuflags; 4042 4043 /* 4044 * On s390 notifications for arriving pages will be delivered directly 4045 * to the guest but the house keeping for completed pfaults is 4046 * handled outside the worker. 4047 */ 4048 kvm_check_async_pf_completion(vcpu); 4049 4050 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4051 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4052 4053 if (need_resched()) 4054 schedule(); 4055 4056 if (!kvm_is_ucontrol(vcpu->kvm)) { 4057 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4058 if (rc) 4059 return rc; 4060 } 4061 4062 rc = kvm_s390_handle_requests(vcpu); 4063 if (rc) 4064 return rc; 4065 4066 if (guestdbg_enabled(vcpu)) { 4067 kvm_s390_backup_guest_per_regs(vcpu); 4068 kvm_s390_patch_guest_per_regs(vcpu); 4069 } 4070 4071 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); 4072 4073 vcpu->arch.sie_block->icptcode = 0; 4074 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4075 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4076 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4077 4078 return 0; 4079 } 4080 4081 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4082 { 4083 struct kvm_s390_pgm_info pgm_info = { 4084 .code = PGM_ADDRESSING, 4085 }; 4086 u8 opcode, ilen; 4087 int rc; 4088 4089 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4090 trace_kvm_s390_sie_fault(vcpu); 4091 4092 /* 4093 * We want to inject an addressing exception, which is defined as a 4094 * suppressing or terminating exception. However, since we came here 4095 * by a DAT access exception, the PSW still points to the faulting 4096 * instruction since DAT exceptions are nullifying. So we've got 4097 * to look up the current opcode to get the length of the instruction 4098 * to be able to forward the PSW. 4099 */ 4100 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4101 ilen = insn_length(opcode); 4102 if (rc < 0) { 4103 return rc; 4104 } else if (rc) { 4105 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4106 * Forward by arbitrary ilc, injection will take care of 4107 * nullification if necessary. 4108 */ 4109 pgm_info = vcpu->arch.pgm; 4110 ilen = 4; 4111 } 4112 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4113 kvm_s390_forward_psw(vcpu, ilen); 4114 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4115 } 4116 4117 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4118 { 4119 struct mcck_volatile_info *mcck_info; 4120 struct sie_page *sie_page; 4121 4122 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4123 vcpu->arch.sie_block->icptcode); 4124 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4125 4126 if (guestdbg_enabled(vcpu)) 4127 kvm_s390_restore_guest_per_regs(vcpu); 4128 4129 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4130 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4131 4132 if (exit_reason == -EINTR) { 4133 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4134 sie_page = container_of(vcpu->arch.sie_block, 4135 struct sie_page, sie_block); 4136 mcck_info = &sie_page->mcck_info; 4137 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4138 return 0; 4139 } 4140 4141 if (vcpu->arch.sie_block->icptcode > 0) { 4142 int rc = kvm_handle_sie_intercept(vcpu); 4143 4144 if (rc != -EOPNOTSUPP) 4145 return rc; 4146 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4147 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4148 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4149 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4150 return -EREMOTE; 4151 } else if (exit_reason != -EFAULT) { 4152 vcpu->stat.exit_null++; 4153 return 0; 4154 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4155 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4156 vcpu->run->s390_ucontrol.trans_exc_code = 4157 current->thread.gmap_addr; 4158 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4159 return -EREMOTE; 4160 } else if (current->thread.gmap_pfault) { 4161 trace_kvm_s390_major_guest_pfault(vcpu); 4162 current->thread.gmap_pfault = 0; 4163 if (kvm_arch_setup_async_pf(vcpu)) 4164 return 0; 4165 vcpu->stat.pfault_sync++; 4166 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4167 } 4168 return vcpu_post_run_fault_in_sie(vcpu); 4169 } 4170 4171 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4172 static int __vcpu_run(struct kvm_vcpu *vcpu) 4173 { 4174 int rc, exit_reason; 4175 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4176 4177 /* 4178 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4179 * ning the guest), so that memslots (and other stuff) are protected 4180 */ 4181 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4182 4183 do { 4184 rc = vcpu_pre_run(vcpu); 4185 if (rc) 4186 break; 4187 4188 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4189 /* 4190 * As PF_VCPU will be used in fault handler, between 4191 * guest_enter and guest_exit should be no uaccess. 4192 */ 4193 local_irq_disable(); 4194 guest_enter_irqoff(); 4195 __disable_cpu_timer_accounting(vcpu); 4196 local_irq_enable(); 4197 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4198 memcpy(sie_page->pv_grregs, 4199 vcpu->run->s.regs.gprs, 4200 sizeof(sie_page->pv_grregs)); 4201 } 4202 if (test_cpu_flag(CIF_FPU)) 4203 load_fpu_regs(); 4204 exit_reason = sie64a(vcpu->arch.sie_block, 4205 vcpu->run->s.regs.gprs); 4206 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4207 memcpy(vcpu->run->s.regs.gprs, 4208 sie_page->pv_grregs, 4209 sizeof(sie_page->pv_grregs)); 4210 /* 4211 * We're not allowed to inject interrupts on intercepts 4212 * that leave the guest state in an "in-between" state 4213 * where the next SIE entry will do a continuation. 4214 * Fence interrupts in our "internal" PSW. 4215 */ 4216 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4217 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4218 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4219 } 4220 } 4221 local_irq_disable(); 4222 __enable_cpu_timer_accounting(vcpu); 4223 guest_exit_irqoff(); 4224 local_irq_enable(); 4225 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4226 4227 rc = vcpu_post_run(vcpu, exit_reason); 4228 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4229 4230 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4231 return rc; 4232 } 4233 4234 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4235 { 4236 struct kvm_run *kvm_run = vcpu->run; 4237 struct runtime_instr_cb *riccb; 4238 struct gs_cb *gscb; 4239 4240 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4241 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4242 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4243 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4244 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4245 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4246 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4247 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4248 } 4249 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4250 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4251 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4252 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4253 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4254 kvm_clear_async_pf_completion_queue(vcpu); 4255 } 4256 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4257 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4258 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4259 } 4260 /* 4261 * If userspace sets the riccb (e.g. after migration) to a valid state, 4262 * we should enable RI here instead of doing the lazy enablement. 4263 */ 4264 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4265 test_kvm_facility(vcpu->kvm, 64) && 4266 riccb->v && 4267 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4268 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4269 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4270 } 4271 /* 4272 * If userspace sets the gscb (e.g. after migration) to non-zero, 4273 * we should enable GS here instead of doing the lazy enablement. 4274 */ 4275 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4276 test_kvm_facility(vcpu->kvm, 133) && 4277 gscb->gssm && 4278 !vcpu->arch.gs_enabled) { 4279 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4280 vcpu->arch.sie_block->ecb |= ECB_GS; 4281 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4282 vcpu->arch.gs_enabled = 1; 4283 } 4284 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4285 test_kvm_facility(vcpu->kvm, 82)) { 4286 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4287 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4288 } 4289 if (MACHINE_HAS_GS) { 4290 preempt_disable(); 4291 __ctl_set_bit(2, 4); 4292 if (current->thread.gs_cb) { 4293 vcpu->arch.host_gscb = current->thread.gs_cb; 4294 save_gs_cb(vcpu->arch.host_gscb); 4295 } 4296 if (vcpu->arch.gs_enabled) { 4297 current->thread.gs_cb = (struct gs_cb *) 4298 &vcpu->run->s.regs.gscb; 4299 restore_gs_cb(current->thread.gs_cb); 4300 } 4301 preempt_enable(); 4302 } 4303 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4304 } 4305 4306 static void sync_regs(struct kvm_vcpu *vcpu) 4307 { 4308 struct kvm_run *kvm_run = vcpu->run; 4309 4310 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4311 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4312 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4313 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4314 /* some control register changes require a tlb flush */ 4315 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4316 } 4317 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4318 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4319 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4320 } 4321 save_access_regs(vcpu->arch.host_acrs); 4322 restore_access_regs(vcpu->run->s.regs.acrs); 4323 /* save host (userspace) fprs/vrs */ 4324 save_fpu_regs(); 4325 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4326 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4327 if (MACHINE_HAS_VX) 4328 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4329 else 4330 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4331 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4332 if (test_fp_ctl(current->thread.fpu.fpc)) 4333 /* User space provided an invalid FPC, let's clear it */ 4334 current->thread.fpu.fpc = 0; 4335 4336 /* Sync fmt2 only data */ 4337 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4338 sync_regs_fmt2(vcpu); 4339 } else { 4340 /* 4341 * In several places we have to modify our internal view to 4342 * not do things that are disallowed by the ultravisor. For 4343 * example we must not inject interrupts after specific exits 4344 * (e.g. 112 prefix page not secure). We do this by turning 4345 * off the machine check, external and I/O interrupt bits 4346 * of our PSW copy. To avoid getting validity intercepts, we 4347 * do only accept the condition code from userspace. 4348 */ 4349 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4350 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4351 PSW_MASK_CC; 4352 } 4353 4354 kvm_run->kvm_dirty_regs = 0; 4355 } 4356 4357 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4358 { 4359 struct kvm_run *kvm_run = vcpu->run; 4360 4361 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4362 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4363 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4364 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4365 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4366 if (MACHINE_HAS_GS) { 4367 preempt_disable(); 4368 __ctl_set_bit(2, 4); 4369 if (vcpu->arch.gs_enabled) 4370 save_gs_cb(current->thread.gs_cb); 4371 current->thread.gs_cb = vcpu->arch.host_gscb; 4372 restore_gs_cb(vcpu->arch.host_gscb); 4373 if (!vcpu->arch.host_gscb) 4374 __ctl_clear_bit(2, 4); 4375 vcpu->arch.host_gscb = NULL; 4376 preempt_enable(); 4377 } 4378 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4379 } 4380 4381 static void store_regs(struct kvm_vcpu *vcpu) 4382 { 4383 struct kvm_run *kvm_run = vcpu->run; 4384 4385 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4386 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4387 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4388 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4389 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4390 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4391 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4392 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4393 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4394 save_access_regs(vcpu->run->s.regs.acrs); 4395 restore_access_regs(vcpu->arch.host_acrs); 4396 /* Save guest register state */ 4397 save_fpu_regs(); 4398 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4399 /* Restore will be done lazily at return */ 4400 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4401 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4402 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4403 store_regs_fmt2(vcpu); 4404 } 4405 4406 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4407 { 4408 struct kvm_run *kvm_run = vcpu->run; 4409 int rc; 4410 4411 if (kvm_run->immediate_exit) 4412 return -EINTR; 4413 4414 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4415 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4416 return -EINVAL; 4417 4418 vcpu_load(vcpu); 4419 4420 if (guestdbg_exit_pending(vcpu)) { 4421 kvm_s390_prepare_debug_exit(vcpu); 4422 rc = 0; 4423 goto out; 4424 } 4425 4426 kvm_sigset_activate(vcpu); 4427 4428 /* 4429 * no need to check the return value of vcpu_start as it can only have 4430 * an error for protvirt, but protvirt means user cpu state 4431 */ 4432 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4433 kvm_s390_vcpu_start(vcpu); 4434 } else if (is_vcpu_stopped(vcpu)) { 4435 pr_err_ratelimited("can't run stopped vcpu %d\n", 4436 vcpu->vcpu_id); 4437 rc = -EINVAL; 4438 goto out; 4439 } 4440 4441 sync_regs(vcpu); 4442 enable_cpu_timer_accounting(vcpu); 4443 4444 might_fault(); 4445 rc = __vcpu_run(vcpu); 4446 4447 if (signal_pending(current) && !rc) { 4448 kvm_run->exit_reason = KVM_EXIT_INTR; 4449 rc = -EINTR; 4450 } 4451 4452 if (guestdbg_exit_pending(vcpu) && !rc) { 4453 kvm_s390_prepare_debug_exit(vcpu); 4454 rc = 0; 4455 } 4456 4457 if (rc == -EREMOTE) { 4458 /* userspace support is needed, kvm_run has been prepared */ 4459 rc = 0; 4460 } 4461 4462 disable_cpu_timer_accounting(vcpu); 4463 store_regs(vcpu); 4464 4465 kvm_sigset_deactivate(vcpu); 4466 4467 vcpu->stat.exit_userspace++; 4468 out: 4469 vcpu_put(vcpu); 4470 return rc; 4471 } 4472 4473 /* 4474 * store status at address 4475 * we use have two special cases: 4476 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4477 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4478 */ 4479 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4480 { 4481 unsigned char archmode = 1; 4482 freg_t fprs[NUM_FPRS]; 4483 unsigned int px; 4484 u64 clkcomp, cputm; 4485 int rc; 4486 4487 px = kvm_s390_get_prefix(vcpu); 4488 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4489 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4490 return -EFAULT; 4491 gpa = 0; 4492 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4493 if (write_guest_real(vcpu, 163, &archmode, 1)) 4494 return -EFAULT; 4495 gpa = px; 4496 } else 4497 gpa -= __LC_FPREGS_SAVE_AREA; 4498 4499 /* manually convert vector registers if necessary */ 4500 if (MACHINE_HAS_VX) { 4501 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4502 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4503 fprs, 128); 4504 } else { 4505 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4506 vcpu->run->s.regs.fprs, 128); 4507 } 4508 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4509 vcpu->run->s.regs.gprs, 128); 4510 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4511 &vcpu->arch.sie_block->gpsw, 16); 4512 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4513 &px, 4); 4514 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4515 &vcpu->run->s.regs.fpc, 4); 4516 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4517 &vcpu->arch.sie_block->todpr, 4); 4518 cputm = kvm_s390_get_cpu_timer(vcpu); 4519 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4520 &cputm, 8); 4521 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4522 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4523 &clkcomp, 8); 4524 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4525 &vcpu->run->s.regs.acrs, 64); 4526 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4527 &vcpu->arch.sie_block->gcr, 128); 4528 return rc ? -EFAULT : 0; 4529 } 4530 4531 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4532 { 4533 /* 4534 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4535 * switch in the run ioctl. Let's update our copies before we save 4536 * it into the save area 4537 */ 4538 save_fpu_regs(); 4539 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4540 save_access_regs(vcpu->run->s.regs.acrs); 4541 4542 return kvm_s390_store_status_unloaded(vcpu, addr); 4543 } 4544 4545 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4546 { 4547 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4548 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4549 } 4550 4551 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4552 { 4553 unsigned int i; 4554 struct kvm_vcpu *vcpu; 4555 4556 kvm_for_each_vcpu(i, vcpu, kvm) { 4557 __disable_ibs_on_vcpu(vcpu); 4558 } 4559 } 4560 4561 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4562 { 4563 if (!sclp.has_ibs) 4564 return; 4565 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4566 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4567 } 4568 4569 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4570 { 4571 int i, online_vcpus, r = 0, started_vcpus = 0; 4572 4573 if (!is_vcpu_stopped(vcpu)) 4574 return 0; 4575 4576 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4577 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4578 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4579 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4580 4581 /* Let's tell the UV that we want to change into the operating state */ 4582 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4583 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4584 if (r) { 4585 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4586 return r; 4587 } 4588 } 4589 4590 for (i = 0; i < online_vcpus; i++) { 4591 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 4592 started_vcpus++; 4593 } 4594 4595 if (started_vcpus == 0) { 4596 /* we're the only active VCPU -> speed it up */ 4597 __enable_ibs_on_vcpu(vcpu); 4598 } else if (started_vcpus == 1) { 4599 /* 4600 * As we are starting a second VCPU, we have to disable 4601 * the IBS facility on all VCPUs to remove potentially 4602 * outstanding ENABLE requests. 4603 */ 4604 __disable_ibs_on_all_vcpus(vcpu->kvm); 4605 } 4606 4607 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4608 /* 4609 * The real PSW might have changed due to a RESTART interpreted by the 4610 * ultravisor. We block all interrupts and let the next sie exit 4611 * refresh our view. 4612 */ 4613 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4614 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4615 /* 4616 * Another VCPU might have used IBS while we were offline. 4617 * Let's play safe and flush the VCPU at startup. 4618 */ 4619 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4620 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4621 return 0; 4622 } 4623 4624 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4625 { 4626 int i, online_vcpus, r = 0, started_vcpus = 0; 4627 struct kvm_vcpu *started_vcpu = NULL; 4628 4629 if (is_vcpu_stopped(vcpu)) 4630 return 0; 4631 4632 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4633 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4634 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4635 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4636 4637 /* Let's tell the UV that we want to change into the stopped state */ 4638 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4639 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4640 if (r) { 4641 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4642 return r; 4643 } 4644 } 4645 4646 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 4647 kvm_s390_clear_stop_irq(vcpu); 4648 4649 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4650 __disable_ibs_on_vcpu(vcpu); 4651 4652 for (i = 0; i < online_vcpus; i++) { 4653 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 4654 started_vcpus++; 4655 started_vcpu = vcpu->kvm->vcpus[i]; 4656 } 4657 } 4658 4659 if (started_vcpus == 1) { 4660 /* 4661 * As we only have one VCPU left, we want to enable the 4662 * IBS facility for that VCPU to speed it up. 4663 */ 4664 __enable_ibs_on_vcpu(started_vcpu); 4665 } 4666 4667 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4668 return 0; 4669 } 4670 4671 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4672 struct kvm_enable_cap *cap) 4673 { 4674 int r; 4675 4676 if (cap->flags) 4677 return -EINVAL; 4678 4679 switch (cap->cap) { 4680 case KVM_CAP_S390_CSS_SUPPORT: 4681 if (!vcpu->kvm->arch.css_support) { 4682 vcpu->kvm->arch.css_support = 1; 4683 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4684 trace_kvm_s390_enable_css(vcpu->kvm); 4685 } 4686 r = 0; 4687 break; 4688 default: 4689 r = -EINVAL; 4690 break; 4691 } 4692 return r; 4693 } 4694 4695 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, 4696 struct kvm_s390_mem_op *mop) 4697 { 4698 void __user *uaddr = (void __user *)mop->buf; 4699 int r = 0; 4700 4701 if (mop->flags || !mop->size) 4702 return -EINVAL; 4703 if (mop->size + mop->sida_offset < mop->size) 4704 return -EINVAL; 4705 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4706 return -E2BIG; 4707 4708 switch (mop->op) { 4709 case KVM_S390_MEMOP_SIDA_READ: 4710 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4711 mop->sida_offset), mop->size)) 4712 r = -EFAULT; 4713 4714 break; 4715 case KVM_S390_MEMOP_SIDA_WRITE: 4716 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4717 mop->sida_offset), uaddr, mop->size)) 4718 r = -EFAULT; 4719 break; 4720 } 4721 return r; 4722 } 4723 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 4724 struct kvm_s390_mem_op *mop) 4725 { 4726 void __user *uaddr = (void __user *)mop->buf; 4727 void *tmpbuf = NULL; 4728 int r = 0; 4729 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4730 | KVM_S390_MEMOP_F_CHECK_ONLY; 4731 4732 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4733 return -EINVAL; 4734 4735 if (mop->size > MEM_OP_MAX_SIZE) 4736 return -E2BIG; 4737 4738 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4739 return -EINVAL; 4740 4741 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4742 tmpbuf = vmalloc(mop->size); 4743 if (!tmpbuf) 4744 return -ENOMEM; 4745 } 4746 4747 switch (mop->op) { 4748 case KVM_S390_MEMOP_LOGICAL_READ: 4749 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4750 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4751 mop->size, GACC_FETCH); 4752 break; 4753 } 4754 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4755 if (r == 0) { 4756 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4757 r = -EFAULT; 4758 } 4759 break; 4760 case KVM_S390_MEMOP_LOGICAL_WRITE: 4761 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4762 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4763 mop->size, GACC_STORE); 4764 break; 4765 } 4766 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4767 r = -EFAULT; 4768 break; 4769 } 4770 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4771 break; 4772 } 4773 4774 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4775 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4776 4777 vfree(tmpbuf); 4778 return r; 4779 } 4780 4781 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, 4782 struct kvm_s390_mem_op *mop) 4783 { 4784 int r, srcu_idx; 4785 4786 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4787 4788 switch (mop->op) { 4789 case KVM_S390_MEMOP_LOGICAL_READ: 4790 case KVM_S390_MEMOP_LOGICAL_WRITE: 4791 r = kvm_s390_guest_mem_op(vcpu, mop); 4792 break; 4793 case KVM_S390_MEMOP_SIDA_READ: 4794 case KVM_S390_MEMOP_SIDA_WRITE: 4795 /* we are locked against sida going away by the vcpu->mutex */ 4796 r = kvm_s390_guest_sida_op(vcpu, mop); 4797 break; 4798 default: 4799 r = -EINVAL; 4800 } 4801 4802 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4803 return r; 4804 } 4805 4806 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4807 unsigned int ioctl, unsigned long arg) 4808 { 4809 struct kvm_vcpu *vcpu = filp->private_data; 4810 void __user *argp = (void __user *)arg; 4811 4812 switch (ioctl) { 4813 case KVM_S390_IRQ: { 4814 struct kvm_s390_irq s390irq; 4815 4816 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4817 return -EFAULT; 4818 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4819 } 4820 case KVM_S390_INTERRUPT: { 4821 struct kvm_s390_interrupt s390int; 4822 struct kvm_s390_irq s390irq = {}; 4823 4824 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4825 return -EFAULT; 4826 if (s390int_to_s390irq(&s390int, &s390irq)) 4827 return -EINVAL; 4828 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4829 } 4830 } 4831 return -ENOIOCTLCMD; 4832 } 4833 4834 long kvm_arch_vcpu_ioctl(struct file *filp, 4835 unsigned int ioctl, unsigned long arg) 4836 { 4837 struct kvm_vcpu *vcpu = filp->private_data; 4838 void __user *argp = (void __user *)arg; 4839 int idx; 4840 long r; 4841 u16 rc, rrc; 4842 4843 vcpu_load(vcpu); 4844 4845 switch (ioctl) { 4846 case KVM_S390_STORE_STATUS: 4847 idx = srcu_read_lock(&vcpu->kvm->srcu); 4848 r = kvm_s390_store_status_unloaded(vcpu, arg); 4849 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4850 break; 4851 case KVM_S390_SET_INITIAL_PSW: { 4852 psw_t psw; 4853 4854 r = -EFAULT; 4855 if (copy_from_user(&psw, argp, sizeof(psw))) 4856 break; 4857 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4858 break; 4859 } 4860 case KVM_S390_CLEAR_RESET: 4861 r = 0; 4862 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4863 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4864 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4865 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4866 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4867 rc, rrc); 4868 } 4869 break; 4870 case KVM_S390_INITIAL_RESET: 4871 r = 0; 4872 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4873 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4874 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4875 UVC_CMD_CPU_RESET_INITIAL, 4876 &rc, &rrc); 4877 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4878 rc, rrc); 4879 } 4880 break; 4881 case KVM_S390_NORMAL_RESET: 4882 r = 0; 4883 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4884 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4885 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4886 UVC_CMD_CPU_RESET, &rc, &rrc); 4887 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4888 rc, rrc); 4889 } 4890 break; 4891 case KVM_SET_ONE_REG: 4892 case KVM_GET_ONE_REG: { 4893 struct kvm_one_reg reg; 4894 r = -EINVAL; 4895 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4896 break; 4897 r = -EFAULT; 4898 if (copy_from_user(®, argp, sizeof(reg))) 4899 break; 4900 if (ioctl == KVM_SET_ONE_REG) 4901 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4902 else 4903 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4904 break; 4905 } 4906 #ifdef CONFIG_KVM_S390_UCONTROL 4907 case KVM_S390_UCAS_MAP: { 4908 struct kvm_s390_ucas_mapping ucasmap; 4909 4910 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4911 r = -EFAULT; 4912 break; 4913 } 4914 4915 if (!kvm_is_ucontrol(vcpu->kvm)) { 4916 r = -EINVAL; 4917 break; 4918 } 4919 4920 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4921 ucasmap.vcpu_addr, ucasmap.length); 4922 break; 4923 } 4924 case KVM_S390_UCAS_UNMAP: { 4925 struct kvm_s390_ucas_mapping ucasmap; 4926 4927 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4928 r = -EFAULT; 4929 break; 4930 } 4931 4932 if (!kvm_is_ucontrol(vcpu->kvm)) { 4933 r = -EINVAL; 4934 break; 4935 } 4936 4937 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4938 ucasmap.length); 4939 break; 4940 } 4941 #endif 4942 case KVM_S390_VCPU_FAULT: { 4943 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4944 break; 4945 } 4946 case KVM_ENABLE_CAP: 4947 { 4948 struct kvm_enable_cap cap; 4949 r = -EFAULT; 4950 if (copy_from_user(&cap, argp, sizeof(cap))) 4951 break; 4952 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4953 break; 4954 } 4955 case KVM_S390_MEM_OP: { 4956 struct kvm_s390_mem_op mem_op; 4957 4958 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4959 r = kvm_s390_guest_memsida_op(vcpu, &mem_op); 4960 else 4961 r = -EFAULT; 4962 break; 4963 } 4964 case KVM_S390_SET_IRQ_STATE: { 4965 struct kvm_s390_irq_state irq_state; 4966 4967 r = -EFAULT; 4968 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4969 break; 4970 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4971 irq_state.len == 0 || 4972 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4973 r = -EINVAL; 4974 break; 4975 } 4976 /* do not use irq_state.flags, it will break old QEMUs */ 4977 r = kvm_s390_set_irq_state(vcpu, 4978 (void __user *) irq_state.buf, 4979 irq_state.len); 4980 break; 4981 } 4982 case KVM_S390_GET_IRQ_STATE: { 4983 struct kvm_s390_irq_state irq_state; 4984 4985 r = -EFAULT; 4986 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4987 break; 4988 if (irq_state.len == 0) { 4989 r = -EINVAL; 4990 break; 4991 } 4992 /* do not use irq_state.flags, it will break old QEMUs */ 4993 r = kvm_s390_get_irq_state(vcpu, 4994 (__u8 __user *) irq_state.buf, 4995 irq_state.len); 4996 break; 4997 } 4998 default: 4999 r = -ENOTTY; 5000 } 5001 5002 vcpu_put(vcpu); 5003 return r; 5004 } 5005 5006 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5007 { 5008 #ifdef CONFIG_KVM_S390_UCONTROL 5009 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5010 && (kvm_is_ucontrol(vcpu->kvm))) { 5011 vmf->page = virt_to_page(vcpu->arch.sie_block); 5012 get_page(vmf->page); 5013 return 0; 5014 } 5015 #endif 5016 return VM_FAULT_SIGBUS; 5017 } 5018 5019 /* Section: memory related */ 5020 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5021 struct kvm_memory_slot *memslot, 5022 const struct kvm_userspace_memory_region *mem, 5023 enum kvm_mr_change change) 5024 { 5025 /* A few sanity checks. We can have memory slots which have to be 5026 located/ended at a segment boundary (1MB). The memory in userland is 5027 ok to be fragmented into various different vmas. It is okay to mmap() 5028 and munmap() stuff in this slot after doing this call at any time */ 5029 5030 if (mem->userspace_addr & 0xffffful) 5031 return -EINVAL; 5032 5033 if (mem->memory_size & 0xffffful) 5034 return -EINVAL; 5035 5036 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 5037 return -EINVAL; 5038 5039 /* When we are protected, we should not change the memory slots */ 5040 if (kvm_s390_pv_get_handle(kvm)) 5041 return -EINVAL; 5042 return 0; 5043 } 5044 5045 void kvm_arch_commit_memory_region(struct kvm *kvm, 5046 const struct kvm_userspace_memory_region *mem, 5047 struct kvm_memory_slot *old, 5048 const struct kvm_memory_slot *new, 5049 enum kvm_mr_change change) 5050 { 5051 int rc = 0; 5052 5053 switch (change) { 5054 case KVM_MR_DELETE: 5055 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5056 old->npages * PAGE_SIZE); 5057 break; 5058 case KVM_MR_MOVE: 5059 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5060 old->npages * PAGE_SIZE); 5061 if (rc) 5062 break; 5063 fallthrough; 5064 case KVM_MR_CREATE: 5065 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 5066 mem->guest_phys_addr, mem->memory_size); 5067 break; 5068 case KVM_MR_FLAGS_ONLY: 5069 break; 5070 default: 5071 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5072 } 5073 if (rc) 5074 pr_warn("failed to commit memory region\n"); 5075 return; 5076 } 5077 5078 static inline unsigned long nonhyp_mask(int i) 5079 { 5080 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5081 5082 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5083 } 5084 5085 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 5086 { 5087 vcpu->valid_wakeup = false; 5088 } 5089 5090 static int __init kvm_s390_init(void) 5091 { 5092 int i; 5093 5094 if (!sclp.has_sief2) { 5095 pr_info("SIE is not available\n"); 5096 return -ENODEV; 5097 } 5098 5099 if (nested && hpage) { 5100 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5101 return -EINVAL; 5102 } 5103 5104 for (i = 0; i < 16; i++) 5105 kvm_s390_fac_base[i] |= 5106 stfle_fac_list[i] & nonhyp_mask(i); 5107 5108 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5109 } 5110 5111 static void __exit kvm_s390_exit(void) 5112 { 5113 kvm_exit(); 5114 } 5115 5116 module_init(kvm_s390_init); 5117 module_exit(kvm_s390_exit); 5118 5119 /* 5120 * Enable autoloading of the kvm module. 5121 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5122 * since x86 takes a different approach. 5123 */ 5124 #include <linux/miscdevice.h> 5125 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5126 MODULE_ALIAS("devname:kvm"); 5127