1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com> 10 * Jason J. Herne <jjherne@us.ibm.com> 11 */ 12 13 #define KMSG_COMPONENT "kvm-s390" 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/mman.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/random.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/vmalloc.h> 30 #include <linux/bitmap.h> 31 #include <linux/sched/signal.h> 32 #include <linux/string.h> 33 #include <linux/pgtable.h> 34 35 #include <asm/asm-offsets.h> 36 #include <asm/lowcore.h> 37 #include <asm/stp.h> 38 #include <asm/gmap.h> 39 #include <asm/nmi.h> 40 #include <asm/switch_to.h> 41 #include <asm/isc.h> 42 #include <asm/sclp.h> 43 #include <asm/cpacf.h> 44 #include <asm/timex.h> 45 #include <asm/ap.h> 46 #include <asm/uv.h> 47 #include <asm/fpu/api.h> 48 #include "kvm-s390.h" 49 #include "gaccess.h" 50 51 #define CREATE_TRACE_POINTS 52 #include "trace.h" 53 #include "trace-s390.h" 54 55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 56 #define LOCAL_IRQS 32 57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 58 (KVM_MAX_VCPUS + LOCAL_IRQS)) 59 60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 61 KVM_GENERIC_VM_STATS(), 62 STATS_DESC_COUNTER(VM, inject_io), 63 STATS_DESC_COUNTER(VM, inject_float_mchk), 64 STATS_DESC_COUNTER(VM, inject_pfault_done), 65 STATS_DESC_COUNTER(VM, inject_service_signal), 66 STATS_DESC_COUNTER(VM, inject_virtio) 67 }; 68 69 const struct kvm_stats_header kvm_vm_stats_header = { 70 .name_size = KVM_STATS_NAME_SIZE, 71 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 72 .id_offset = sizeof(struct kvm_stats_header), 73 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 74 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 75 sizeof(kvm_vm_stats_desc), 76 }; 77 78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 79 KVM_GENERIC_VCPU_STATS(), 80 STATS_DESC_COUNTER(VCPU, exit_userspace), 81 STATS_DESC_COUNTER(VCPU, exit_null), 82 STATS_DESC_COUNTER(VCPU, exit_external_request), 83 STATS_DESC_COUNTER(VCPU, exit_io_request), 84 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 85 STATS_DESC_COUNTER(VCPU, exit_stop_request), 86 STATS_DESC_COUNTER(VCPU, exit_validity), 87 STATS_DESC_COUNTER(VCPU, exit_instruction), 88 STATS_DESC_COUNTER(VCPU, exit_pei), 89 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 90 STATS_DESC_COUNTER(VCPU, instruction_lctl), 91 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 92 STATS_DESC_COUNTER(VCPU, instruction_stctl), 93 STATS_DESC_COUNTER(VCPU, instruction_stctg), 94 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 95 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 96 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 97 STATS_DESC_COUNTER(VCPU, deliver_ckc), 98 STATS_DESC_COUNTER(VCPU, deliver_cputm), 99 STATS_DESC_COUNTER(VCPU, deliver_external_call), 100 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 101 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 102 STATS_DESC_COUNTER(VCPU, deliver_virtio), 103 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 104 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 106 STATS_DESC_COUNTER(VCPU, deliver_program), 107 STATS_DESC_COUNTER(VCPU, deliver_io), 108 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 109 STATS_DESC_COUNTER(VCPU, exit_wait_state), 110 STATS_DESC_COUNTER(VCPU, inject_ckc), 111 STATS_DESC_COUNTER(VCPU, inject_cputm), 112 STATS_DESC_COUNTER(VCPU, inject_external_call), 113 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 114 STATS_DESC_COUNTER(VCPU, inject_mchk), 115 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 116 STATS_DESC_COUNTER(VCPU, inject_program), 117 STATS_DESC_COUNTER(VCPU, inject_restart), 118 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 119 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 120 STATS_DESC_COUNTER(VCPU, instruction_epsw), 121 STATS_DESC_COUNTER(VCPU, instruction_gs), 122 STATS_DESC_COUNTER(VCPU, instruction_io_other), 123 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 124 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 125 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 126 STATS_DESC_COUNTER(VCPU, instruction_ptff), 127 STATS_DESC_COUNTER(VCPU, instruction_sck), 128 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 129 STATS_DESC_COUNTER(VCPU, instruction_stidp), 130 STATS_DESC_COUNTER(VCPU, instruction_spx), 131 STATS_DESC_COUNTER(VCPU, instruction_stpx), 132 STATS_DESC_COUNTER(VCPU, instruction_stap), 133 STATS_DESC_COUNTER(VCPU, instruction_iske), 134 STATS_DESC_COUNTER(VCPU, instruction_ri), 135 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 136 STATS_DESC_COUNTER(VCPU, instruction_sske), 137 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 138 STATS_DESC_COUNTER(VCPU, instruction_stsi), 139 STATS_DESC_COUNTER(VCPU, instruction_stfl), 140 STATS_DESC_COUNTER(VCPU, instruction_tb), 141 STATS_DESC_COUNTER(VCPU, instruction_tpi), 142 STATS_DESC_COUNTER(VCPU, instruction_tprot), 143 STATS_DESC_COUNTER(VCPU, instruction_tsch), 144 STATS_DESC_COUNTER(VCPU, instruction_sie), 145 STATS_DESC_COUNTER(VCPU, instruction_essa), 146 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 147 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 149 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 163 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 166 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 167 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 172 STATS_DESC_COUNTER(VCPU, pfault_sync) 173 }; 174 175 const struct kvm_stats_header kvm_vcpu_stats_header = { 176 .name_size = KVM_STATS_NAME_SIZE, 177 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 178 .id_offset = sizeof(struct kvm_stats_header), 179 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 180 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 181 sizeof(kvm_vcpu_stats_desc), 182 }; 183 184 /* allow nested virtualization in KVM (if enabled by user space) */ 185 static int nested; 186 module_param(nested, int, S_IRUGO); 187 MODULE_PARM_DESC(nested, "Nested virtualization support"); 188 189 /* allow 1m huge page guest backing, if !nested */ 190 static int hpage; 191 module_param(hpage, int, 0444); 192 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 193 194 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 195 static u8 halt_poll_max_steal = 10; 196 module_param(halt_poll_max_steal, byte, 0644); 197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 198 199 /* if set to true, the GISA will be initialized and used if available */ 200 static bool use_gisa = true; 201 module_param(use_gisa, bool, 0644); 202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 203 204 /* maximum diag9c forwarding per second */ 205 unsigned int diag9c_forwarding_hz; 206 module_param(diag9c_forwarding_hz, uint, 0644); 207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 208 209 /* 210 * For now we handle at most 16 double words as this is what the s390 base 211 * kernel handles and stores in the prefix page. If we ever need to go beyond 212 * this, this requires changes to code, but the external uapi can stay. 213 */ 214 #define SIZE_INTERNAL 16 215 216 /* 217 * Base feature mask that defines default mask for facilities. Consists of the 218 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 219 */ 220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 221 /* 222 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 223 * and defines the facilities that can be enabled via a cpu model. 224 */ 225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 226 227 static unsigned long kvm_s390_fac_size(void) 228 { 229 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 231 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 232 sizeof(stfle_fac_list)); 233 234 return SIZE_INTERNAL; 235 } 236 237 /* available cpu features supported by kvm */ 238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 239 /* available subfunctions indicated via query / "test bit" */ 240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 241 242 static struct gmap_notifier gmap_notifier; 243 static struct gmap_notifier vsie_gmap_notifier; 244 debug_info_t *kvm_s390_dbf; 245 debug_info_t *kvm_s390_dbf_uv; 246 247 /* Section: not file related */ 248 int kvm_arch_hardware_enable(void) 249 { 250 /* every s390 is virtualization enabled ;-) */ 251 return 0; 252 } 253 254 int kvm_arch_check_processor_compat(void *opaque) 255 { 256 return 0; 257 } 258 259 /* forward declarations */ 260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 261 unsigned long end); 262 static int sca_switch_to_extended(struct kvm *kvm); 263 264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 265 { 266 u8 delta_idx = 0; 267 268 /* 269 * The TOD jumps by delta, we have to compensate this by adding 270 * -delta to the epoch. 271 */ 272 delta = -delta; 273 274 /* sign-extension - we're adding to signed values below */ 275 if ((s64)delta < 0) 276 delta_idx = -1; 277 278 scb->epoch += delta; 279 if (scb->ecd & ECD_MEF) { 280 scb->epdx += delta_idx; 281 if (scb->epoch < delta) 282 scb->epdx += 1; 283 } 284 } 285 286 /* 287 * This callback is executed during stop_machine(). All CPUs are therefore 288 * temporarily stopped. In order not to change guest behavior, we have to 289 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 290 * so a CPU won't be stopped while calculating with the epoch. 291 */ 292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 293 void *v) 294 { 295 struct kvm *kvm; 296 struct kvm_vcpu *vcpu; 297 unsigned long i; 298 unsigned long long *delta = v; 299 300 list_for_each_entry(kvm, &vm_list, vm_list) { 301 kvm_for_each_vcpu(i, vcpu, kvm) { 302 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 303 if (i == 0) { 304 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 305 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 306 } 307 if (vcpu->arch.cputm_enabled) 308 vcpu->arch.cputm_start += *delta; 309 if (vcpu->arch.vsie_block) 310 kvm_clock_sync_scb(vcpu->arch.vsie_block, 311 *delta); 312 } 313 } 314 return NOTIFY_OK; 315 } 316 317 static struct notifier_block kvm_clock_notifier = { 318 .notifier_call = kvm_clock_sync, 319 }; 320 321 int kvm_arch_hardware_setup(void *opaque) 322 { 323 gmap_notifier.notifier_call = kvm_gmap_notifier; 324 gmap_register_pte_notifier(&gmap_notifier); 325 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 326 gmap_register_pte_notifier(&vsie_gmap_notifier); 327 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 328 &kvm_clock_notifier); 329 return 0; 330 } 331 332 void kvm_arch_hardware_unsetup(void) 333 { 334 gmap_unregister_pte_notifier(&gmap_notifier); 335 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 336 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 337 &kvm_clock_notifier); 338 } 339 340 static void allow_cpu_feat(unsigned long nr) 341 { 342 set_bit_inv(nr, kvm_s390_available_cpu_feat); 343 } 344 345 static inline int plo_test_bit(unsigned char nr) 346 { 347 unsigned long function = (unsigned long)nr | 0x100; 348 int cc; 349 350 asm volatile( 351 " lgr 0,%[function]\n" 352 /* Parameter registers are ignored for "test bit" */ 353 " plo 0,0,0,0(0)\n" 354 " ipm %0\n" 355 " srl %0,28\n" 356 : "=d" (cc) 357 : [function] "d" (function) 358 : "cc", "0"); 359 return cc == 0; 360 } 361 362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 363 { 364 asm volatile( 365 " lghi 0,0\n" 366 " lgr 1,%[query]\n" 367 /* Parameter registers are ignored */ 368 " .insn rrf,%[opc] << 16,2,4,6,0\n" 369 : 370 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 371 : "cc", "memory", "0", "1"); 372 } 373 374 #define INSN_SORTL 0xb938 375 #define INSN_DFLTCC 0xb939 376 377 static void kvm_s390_cpu_feat_init(void) 378 { 379 int i; 380 381 for (i = 0; i < 256; ++i) { 382 if (plo_test_bit(i)) 383 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 384 } 385 386 if (test_facility(28)) /* TOD-clock steering */ 387 ptff(kvm_s390_available_subfunc.ptff, 388 sizeof(kvm_s390_available_subfunc.ptff), 389 PTFF_QAF); 390 391 if (test_facility(17)) { /* MSA */ 392 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 393 kvm_s390_available_subfunc.kmac); 394 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 395 kvm_s390_available_subfunc.kmc); 396 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 397 kvm_s390_available_subfunc.km); 398 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 399 kvm_s390_available_subfunc.kimd); 400 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 401 kvm_s390_available_subfunc.klmd); 402 } 403 if (test_facility(76)) /* MSA3 */ 404 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 405 kvm_s390_available_subfunc.pckmo); 406 if (test_facility(77)) { /* MSA4 */ 407 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 408 kvm_s390_available_subfunc.kmctr); 409 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 410 kvm_s390_available_subfunc.kmf); 411 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 412 kvm_s390_available_subfunc.kmo); 413 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 414 kvm_s390_available_subfunc.pcc); 415 } 416 if (test_facility(57)) /* MSA5 */ 417 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 418 kvm_s390_available_subfunc.ppno); 419 420 if (test_facility(146)) /* MSA8 */ 421 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 422 kvm_s390_available_subfunc.kma); 423 424 if (test_facility(155)) /* MSA9 */ 425 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 426 kvm_s390_available_subfunc.kdsa); 427 428 if (test_facility(150)) /* SORTL */ 429 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 430 431 if (test_facility(151)) /* DFLTCC */ 432 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 433 434 if (MACHINE_HAS_ESOP) 435 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 436 /* 437 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 438 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 439 */ 440 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 441 !test_facility(3) || !nested) 442 return; 443 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 444 if (sclp.has_64bscao) 445 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 446 if (sclp.has_siif) 447 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 448 if (sclp.has_gpere) 449 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 450 if (sclp.has_gsls) 451 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 452 if (sclp.has_ib) 453 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 454 if (sclp.has_cei) 455 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 456 if (sclp.has_ibs) 457 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 458 if (sclp.has_kss) 459 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 460 /* 461 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 462 * all skey handling functions read/set the skey from the PGSTE 463 * instead of the real storage key. 464 * 465 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 466 * pages being detected as preserved although they are resident. 467 * 468 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 469 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 470 * 471 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 472 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 473 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 474 * 475 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 476 * cannot easily shadow the SCA because of the ipte lock. 477 */ 478 } 479 480 int kvm_arch_init(void *opaque) 481 { 482 int rc = -ENOMEM; 483 484 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 485 if (!kvm_s390_dbf) 486 return -ENOMEM; 487 488 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 489 if (!kvm_s390_dbf_uv) 490 goto out; 491 492 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 493 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 494 goto out; 495 496 kvm_s390_cpu_feat_init(); 497 498 /* Register floating interrupt controller interface. */ 499 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 500 if (rc) { 501 pr_err("A FLIC registration call failed with rc=%d\n", rc); 502 goto out; 503 } 504 505 rc = kvm_s390_gib_init(GAL_ISC); 506 if (rc) 507 goto out; 508 509 return 0; 510 511 out: 512 kvm_arch_exit(); 513 return rc; 514 } 515 516 void kvm_arch_exit(void) 517 { 518 kvm_s390_gib_destroy(); 519 debug_unregister(kvm_s390_dbf); 520 debug_unregister(kvm_s390_dbf_uv); 521 } 522 523 /* Section: device related */ 524 long kvm_arch_dev_ioctl(struct file *filp, 525 unsigned int ioctl, unsigned long arg) 526 { 527 if (ioctl == KVM_S390_ENABLE_SIE) 528 return s390_enable_sie(); 529 return -EINVAL; 530 } 531 532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 533 { 534 int r; 535 536 switch (ext) { 537 case KVM_CAP_S390_PSW: 538 case KVM_CAP_S390_GMAP: 539 case KVM_CAP_SYNC_MMU: 540 #ifdef CONFIG_KVM_S390_UCONTROL 541 case KVM_CAP_S390_UCONTROL: 542 #endif 543 case KVM_CAP_ASYNC_PF: 544 case KVM_CAP_SYNC_REGS: 545 case KVM_CAP_ONE_REG: 546 case KVM_CAP_ENABLE_CAP: 547 case KVM_CAP_S390_CSS_SUPPORT: 548 case KVM_CAP_IOEVENTFD: 549 case KVM_CAP_DEVICE_CTRL: 550 case KVM_CAP_S390_IRQCHIP: 551 case KVM_CAP_VM_ATTRIBUTES: 552 case KVM_CAP_MP_STATE: 553 case KVM_CAP_IMMEDIATE_EXIT: 554 case KVM_CAP_S390_INJECT_IRQ: 555 case KVM_CAP_S390_USER_SIGP: 556 case KVM_CAP_S390_USER_STSI: 557 case KVM_CAP_S390_SKEYS: 558 case KVM_CAP_S390_IRQ_STATE: 559 case KVM_CAP_S390_USER_INSTR0: 560 case KVM_CAP_S390_CMMA_MIGRATION: 561 case KVM_CAP_S390_AIS: 562 case KVM_CAP_S390_AIS_MIGRATION: 563 case KVM_CAP_S390_VCPU_RESETS: 564 case KVM_CAP_SET_GUEST_DEBUG: 565 case KVM_CAP_S390_DIAG318: 566 case KVM_CAP_S390_MEM_OP_EXTENSION: 567 r = 1; 568 break; 569 case KVM_CAP_SET_GUEST_DEBUG2: 570 r = KVM_GUESTDBG_VALID_MASK; 571 break; 572 case KVM_CAP_S390_HPAGE_1M: 573 r = 0; 574 if (hpage && !kvm_is_ucontrol(kvm)) 575 r = 1; 576 break; 577 case KVM_CAP_S390_MEM_OP: 578 r = MEM_OP_MAX_SIZE; 579 break; 580 case KVM_CAP_NR_VCPUS: 581 case KVM_CAP_MAX_VCPUS: 582 case KVM_CAP_MAX_VCPU_ID: 583 r = KVM_S390_BSCA_CPU_SLOTS; 584 if (!kvm_s390_use_sca_entries()) 585 r = KVM_MAX_VCPUS; 586 else if (sclp.has_esca && sclp.has_64bscao) 587 r = KVM_S390_ESCA_CPU_SLOTS; 588 if (ext == KVM_CAP_NR_VCPUS) 589 r = min_t(unsigned int, num_online_cpus(), r); 590 break; 591 case KVM_CAP_S390_COW: 592 r = MACHINE_HAS_ESOP; 593 break; 594 case KVM_CAP_S390_VECTOR_REGISTERS: 595 r = MACHINE_HAS_VX; 596 break; 597 case KVM_CAP_S390_RI: 598 r = test_facility(64); 599 break; 600 case KVM_CAP_S390_GS: 601 r = test_facility(133); 602 break; 603 case KVM_CAP_S390_BPB: 604 r = test_facility(82); 605 break; 606 case KVM_CAP_S390_PROTECTED: 607 r = is_prot_virt_host(); 608 break; 609 default: 610 r = 0; 611 } 612 return r; 613 } 614 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 616 { 617 int i; 618 gfn_t cur_gfn, last_gfn; 619 unsigned long gaddr, vmaddr; 620 struct gmap *gmap = kvm->arch.gmap; 621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 622 623 /* Loop over all guest segments */ 624 cur_gfn = memslot->base_gfn; 625 last_gfn = memslot->base_gfn + memslot->npages; 626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 627 gaddr = gfn_to_gpa(cur_gfn); 628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 629 if (kvm_is_error_hva(vmaddr)) 630 continue; 631 632 bitmap_zero(bitmap, _PAGE_ENTRIES); 633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 634 for (i = 0; i < _PAGE_ENTRIES; i++) { 635 if (test_bit(i, bitmap)) 636 mark_page_dirty(kvm, cur_gfn + i); 637 } 638 639 if (fatal_signal_pending(current)) 640 return; 641 cond_resched(); 642 } 643 } 644 645 /* Section: vm related */ 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 647 648 /* 649 * Get (and clear) the dirty memory log for a memory slot. 650 */ 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 652 struct kvm_dirty_log *log) 653 { 654 int r; 655 unsigned long n; 656 struct kvm_memory_slot *memslot; 657 int is_dirty; 658 659 if (kvm_is_ucontrol(kvm)) 660 return -EINVAL; 661 662 mutex_lock(&kvm->slots_lock); 663 664 r = -EINVAL; 665 if (log->slot >= KVM_USER_MEM_SLOTS) 666 goto out; 667 668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 669 if (r) 670 goto out; 671 672 /* Clear the dirty log */ 673 if (is_dirty) { 674 n = kvm_dirty_bitmap_bytes(memslot); 675 memset(memslot->dirty_bitmap, 0, n); 676 } 677 r = 0; 678 out: 679 mutex_unlock(&kvm->slots_lock); 680 return r; 681 } 682 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 684 { 685 unsigned long i; 686 struct kvm_vcpu *vcpu; 687 688 kvm_for_each_vcpu(i, vcpu, kvm) { 689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 690 } 691 } 692 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 694 { 695 int r; 696 697 if (cap->flags) 698 return -EINVAL; 699 700 switch (cap->cap) { 701 case KVM_CAP_S390_IRQCHIP: 702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 703 kvm->arch.use_irqchip = 1; 704 r = 0; 705 break; 706 case KVM_CAP_S390_USER_SIGP: 707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 708 kvm->arch.user_sigp = 1; 709 r = 0; 710 break; 711 case KVM_CAP_S390_VECTOR_REGISTERS: 712 mutex_lock(&kvm->lock); 713 if (kvm->created_vcpus) { 714 r = -EBUSY; 715 } else if (MACHINE_HAS_VX) { 716 set_kvm_facility(kvm->arch.model.fac_mask, 129); 717 set_kvm_facility(kvm->arch.model.fac_list, 129); 718 if (test_facility(134)) { 719 set_kvm_facility(kvm->arch.model.fac_mask, 134); 720 set_kvm_facility(kvm->arch.model.fac_list, 134); 721 } 722 if (test_facility(135)) { 723 set_kvm_facility(kvm->arch.model.fac_mask, 135); 724 set_kvm_facility(kvm->arch.model.fac_list, 135); 725 } 726 if (test_facility(148)) { 727 set_kvm_facility(kvm->arch.model.fac_mask, 148); 728 set_kvm_facility(kvm->arch.model.fac_list, 148); 729 } 730 if (test_facility(152)) { 731 set_kvm_facility(kvm->arch.model.fac_mask, 152); 732 set_kvm_facility(kvm->arch.model.fac_list, 152); 733 } 734 if (test_facility(192)) { 735 set_kvm_facility(kvm->arch.model.fac_mask, 192); 736 set_kvm_facility(kvm->arch.model.fac_list, 192); 737 } 738 r = 0; 739 } else 740 r = -EINVAL; 741 mutex_unlock(&kvm->lock); 742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 743 r ? "(not available)" : "(success)"); 744 break; 745 case KVM_CAP_S390_RI: 746 r = -EINVAL; 747 mutex_lock(&kvm->lock); 748 if (kvm->created_vcpus) { 749 r = -EBUSY; 750 } else if (test_facility(64)) { 751 set_kvm_facility(kvm->arch.model.fac_mask, 64); 752 set_kvm_facility(kvm->arch.model.fac_list, 64); 753 r = 0; 754 } 755 mutex_unlock(&kvm->lock); 756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 757 r ? "(not available)" : "(success)"); 758 break; 759 case KVM_CAP_S390_AIS: 760 mutex_lock(&kvm->lock); 761 if (kvm->created_vcpus) { 762 r = -EBUSY; 763 } else { 764 set_kvm_facility(kvm->arch.model.fac_mask, 72); 765 set_kvm_facility(kvm->arch.model.fac_list, 72); 766 r = 0; 767 } 768 mutex_unlock(&kvm->lock); 769 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 770 r ? "(not available)" : "(success)"); 771 break; 772 case KVM_CAP_S390_GS: 773 r = -EINVAL; 774 mutex_lock(&kvm->lock); 775 if (kvm->created_vcpus) { 776 r = -EBUSY; 777 } else if (test_facility(133)) { 778 set_kvm_facility(kvm->arch.model.fac_mask, 133); 779 set_kvm_facility(kvm->arch.model.fac_list, 133); 780 r = 0; 781 } 782 mutex_unlock(&kvm->lock); 783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 784 r ? "(not available)" : "(success)"); 785 break; 786 case KVM_CAP_S390_HPAGE_1M: 787 mutex_lock(&kvm->lock); 788 if (kvm->created_vcpus) 789 r = -EBUSY; 790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 791 r = -EINVAL; 792 else { 793 r = 0; 794 mmap_write_lock(kvm->mm); 795 kvm->mm->context.allow_gmap_hpage_1m = 1; 796 mmap_write_unlock(kvm->mm); 797 /* 798 * We might have to create fake 4k page 799 * tables. To avoid that the hardware works on 800 * stale PGSTEs, we emulate these instructions. 801 */ 802 kvm->arch.use_skf = 0; 803 kvm->arch.use_pfmfi = 0; 804 } 805 mutex_unlock(&kvm->lock); 806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 807 r ? "(not available)" : "(success)"); 808 break; 809 case KVM_CAP_S390_USER_STSI: 810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 811 kvm->arch.user_stsi = 1; 812 r = 0; 813 break; 814 case KVM_CAP_S390_USER_INSTR0: 815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 816 kvm->arch.user_instr0 = 1; 817 icpt_operexc_on_all_vcpus(kvm); 818 r = 0; 819 break; 820 default: 821 r = -EINVAL; 822 break; 823 } 824 return r; 825 } 826 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 828 { 829 int ret; 830 831 switch (attr->attr) { 832 case KVM_S390_VM_MEM_LIMIT_SIZE: 833 ret = 0; 834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 835 kvm->arch.mem_limit); 836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 837 ret = -EFAULT; 838 break; 839 default: 840 ret = -ENXIO; 841 break; 842 } 843 return ret; 844 } 845 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 847 { 848 int ret; 849 unsigned int idx; 850 switch (attr->attr) { 851 case KVM_S390_VM_MEM_ENABLE_CMMA: 852 ret = -ENXIO; 853 if (!sclp.has_cmma) 854 break; 855 856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 857 mutex_lock(&kvm->lock); 858 if (kvm->created_vcpus) 859 ret = -EBUSY; 860 else if (kvm->mm->context.allow_gmap_hpage_1m) 861 ret = -EINVAL; 862 else { 863 kvm->arch.use_cmma = 1; 864 /* Not compatible with cmma. */ 865 kvm->arch.use_pfmfi = 0; 866 ret = 0; 867 } 868 mutex_unlock(&kvm->lock); 869 break; 870 case KVM_S390_VM_MEM_CLR_CMMA: 871 ret = -ENXIO; 872 if (!sclp.has_cmma) 873 break; 874 ret = -EINVAL; 875 if (!kvm->arch.use_cmma) 876 break; 877 878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 879 mutex_lock(&kvm->lock); 880 idx = srcu_read_lock(&kvm->srcu); 881 s390_reset_cmma(kvm->arch.gmap->mm); 882 srcu_read_unlock(&kvm->srcu, idx); 883 mutex_unlock(&kvm->lock); 884 ret = 0; 885 break; 886 case KVM_S390_VM_MEM_LIMIT_SIZE: { 887 unsigned long new_limit; 888 889 if (kvm_is_ucontrol(kvm)) 890 return -EINVAL; 891 892 if (get_user(new_limit, (u64 __user *)attr->addr)) 893 return -EFAULT; 894 895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 896 new_limit > kvm->arch.mem_limit) 897 return -E2BIG; 898 899 if (!new_limit) 900 return -EINVAL; 901 902 /* gmap_create takes last usable address */ 903 if (new_limit != KVM_S390_NO_MEM_LIMIT) 904 new_limit -= 1; 905 906 ret = -EBUSY; 907 mutex_lock(&kvm->lock); 908 if (!kvm->created_vcpus) { 909 /* gmap_create will round the limit up */ 910 struct gmap *new = gmap_create(current->mm, new_limit); 911 912 if (!new) { 913 ret = -ENOMEM; 914 } else { 915 gmap_remove(kvm->arch.gmap); 916 new->private = kvm; 917 kvm->arch.gmap = new; 918 ret = 0; 919 } 920 } 921 mutex_unlock(&kvm->lock); 922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 924 (void *) kvm->arch.gmap->asce); 925 break; 926 } 927 default: 928 ret = -ENXIO; 929 break; 930 } 931 return ret; 932 } 933 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 935 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 937 { 938 struct kvm_vcpu *vcpu; 939 unsigned long i; 940 941 kvm_s390_vcpu_block_all(kvm); 942 943 kvm_for_each_vcpu(i, vcpu, kvm) { 944 kvm_s390_vcpu_crypto_setup(vcpu); 945 /* recreate the shadow crycb by leaving the VSIE handler */ 946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 947 } 948 949 kvm_s390_vcpu_unblock_all(kvm); 950 } 951 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 953 { 954 mutex_lock(&kvm->lock); 955 switch (attr->attr) { 956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 957 if (!test_kvm_facility(kvm, 76)) { 958 mutex_unlock(&kvm->lock); 959 return -EINVAL; 960 } 961 get_random_bytes( 962 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 964 kvm->arch.crypto.aes_kw = 1; 965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 966 break; 967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 968 if (!test_kvm_facility(kvm, 76)) { 969 mutex_unlock(&kvm->lock); 970 return -EINVAL; 971 } 972 get_random_bytes( 973 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 975 kvm->arch.crypto.dea_kw = 1; 976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 977 break; 978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 979 if (!test_kvm_facility(kvm, 76)) { 980 mutex_unlock(&kvm->lock); 981 return -EINVAL; 982 } 983 kvm->arch.crypto.aes_kw = 0; 984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 987 break; 988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 989 if (!test_kvm_facility(kvm, 76)) { 990 mutex_unlock(&kvm->lock); 991 return -EINVAL; 992 } 993 kvm->arch.crypto.dea_kw = 0; 994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 997 break; 998 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 999 if (!ap_instructions_available()) { 1000 mutex_unlock(&kvm->lock); 1001 return -EOPNOTSUPP; 1002 } 1003 kvm->arch.crypto.apie = 1; 1004 break; 1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1006 if (!ap_instructions_available()) { 1007 mutex_unlock(&kvm->lock); 1008 return -EOPNOTSUPP; 1009 } 1010 kvm->arch.crypto.apie = 0; 1011 break; 1012 default: 1013 mutex_unlock(&kvm->lock); 1014 return -ENXIO; 1015 } 1016 1017 kvm_s390_vcpu_crypto_reset_all(kvm); 1018 mutex_unlock(&kvm->lock); 1019 return 0; 1020 } 1021 1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1023 { 1024 unsigned long cx; 1025 struct kvm_vcpu *vcpu; 1026 1027 kvm_for_each_vcpu(cx, vcpu, kvm) 1028 kvm_s390_sync_request(req, vcpu); 1029 } 1030 1031 /* 1032 * Must be called with kvm->srcu held to avoid races on memslots, and with 1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1034 */ 1035 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1036 { 1037 struct kvm_memory_slot *ms; 1038 struct kvm_memslots *slots; 1039 unsigned long ram_pages = 0; 1040 int bkt; 1041 1042 /* migration mode already enabled */ 1043 if (kvm->arch.migration_mode) 1044 return 0; 1045 slots = kvm_memslots(kvm); 1046 if (!slots || kvm_memslots_empty(slots)) 1047 return -EINVAL; 1048 1049 if (!kvm->arch.use_cmma) { 1050 kvm->arch.migration_mode = 1; 1051 return 0; 1052 } 1053 /* mark all the pages in active slots as dirty */ 1054 kvm_for_each_memslot(ms, bkt, slots) { 1055 if (!ms->dirty_bitmap) 1056 return -EINVAL; 1057 /* 1058 * The second half of the bitmap is only used on x86, 1059 * and would be wasted otherwise, so we put it to good 1060 * use here to keep track of the state of the storage 1061 * attributes. 1062 */ 1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1064 ram_pages += ms->npages; 1065 } 1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1067 kvm->arch.migration_mode = 1; 1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1069 return 0; 1070 } 1071 1072 /* 1073 * Must be called with kvm->slots_lock to avoid races with ourselves and 1074 * kvm_s390_vm_start_migration. 1075 */ 1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1077 { 1078 /* migration mode already disabled */ 1079 if (!kvm->arch.migration_mode) 1080 return 0; 1081 kvm->arch.migration_mode = 0; 1082 if (kvm->arch.use_cmma) 1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1084 return 0; 1085 } 1086 1087 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1088 struct kvm_device_attr *attr) 1089 { 1090 int res = -ENXIO; 1091 1092 mutex_lock(&kvm->slots_lock); 1093 switch (attr->attr) { 1094 case KVM_S390_VM_MIGRATION_START: 1095 res = kvm_s390_vm_start_migration(kvm); 1096 break; 1097 case KVM_S390_VM_MIGRATION_STOP: 1098 res = kvm_s390_vm_stop_migration(kvm); 1099 break; 1100 default: 1101 break; 1102 } 1103 mutex_unlock(&kvm->slots_lock); 1104 1105 return res; 1106 } 1107 1108 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1109 struct kvm_device_attr *attr) 1110 { 1111 u64 mig = kvm->arch.migration_mode; 1112 1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1114 return -ENXIO; 1115 1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1117 return -EFAULT; 1118 return 0; 1119 } 1120 1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1122 { 1123 struct kvm_s390_vm_tod_clock gtod; 1124 1125 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1126 return -EFAULT; 1127 1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1129 return -EINVAL; 1130 kvm_s390_set_tod_clock(kvm, >od); 1131 1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1133 gtod.epoch_idx, gtod.tod); 1134 1135 return 0; 1136 } 1137 1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1139 { 1140 u8 gtod_high; 1141 1142 if (copy_from_user(>od_high, (void __user *)attr->addr, 1143 sizeof(gtod_high))) 1144 return -EFAULT; 1145 1146 if (gtod_high != 0) 1147 return -EINVAL; 1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1149 1150 return 0; 1151 } 1152 1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1154 { 1155 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1156 1157 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1158 sizeof(gtod.tod))) 1159 return -EFAULT; 1160 1161 kvm_s390_set_tod_clock(kvm, >od); 1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1163 return 0; 1164 } 1165 1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1167 { 1168 int ret; 1169 1170 if (attr->flags) 1171 return -EINVAL; 1172 1173 switch (attr->attr) { 1174 case KVM_S390_VM_TOD_EXT: 1175 ret = kvm_s390_set_tod_ext(kvm, attr); 1176 break; 1177 case KVM_S390_VM_TOD_HIGH: 1178 ret = kvm_s390_set_tod_high(kvm, attr); 1179 break; 1180 case KVM_S390_VM_TOD_LOW: 1181 ret = kvm_s390_set_tod_low(kvm, attr); 1182 break; 1183 default: 1184 ret = -ENXIO; 1185 break; 1186 } 1187 return ret; 1188 } 1189 1190 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1191 struct kvm_s390_vm_tod_clock *gtod) 1192 { 1193 union tod_clock clk; 1194 1195 preempt_disable(); 1196 1197 store_tod_clock_ext(&clk); 1198 1199 gtod->tod = clk.tod + kvm->arch.epoch; 1200 gtod->epoch_idx = 0; 1201 if (test_kvm_facility(kvm, 139)) { 1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1203 if (gtod->tod < clk.tod) 1204 gtod->epoch_idx += 1; 1205 } 1206 1207 preempt_enable(); 1208 } 1209 1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 struct kvm_s390_vm_tod_clock gtod; 1213 1214 memset(>od, 0, sizeof(gtod)); 1215 kvm_s390_get_tod_clock(kvm, >od); 1216 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1217 return -EFAULT; 1218 1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1220 gtod.epoch_idx, gtod.tod); 1221 return 0; 1222 } 1223 1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1225 { 1226 u8 gtod_high = 0; 1227 1228 if (copy_to_user((void __user *)attr->addr, >od_high, 1229 sizeof(gtod_high))) 1230 return -EFAULT; 1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1232 1233 return 0; 1234 } 1235 1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1237 { 1238 u64 gtod; 1239 1240 gtod = kvm_s390_get_tod_clock_fast(kvm); 1241 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1242 return -EFAULT; 1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1244 1245 return 0; 1246 } 1247 1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1249 { 1250 int ret; 1251 1252 if (attr->flags) 1253 return -EINVAL; 1254 1255 switch (attr->attr) { 1256 case KVM_S390_VM_TOD_EXT: 1257 ret = kvm_s390_get_tod_ext(kvm, attr); 1258 break; 1259 case KVM_S390_VM_TOD_HIGH: 1260 ret = kvm_s390_get_tod_high(kvm, attr); 1261 break; 1262 case KVM_S390_VM_TOD_LOW: 1263 ret = kvm_s390_get_tod_low(kvm, attr); 1264 break; 1265 default: 1266 ret = -ENXIO; 1267 break; 1268 } 1269 return ret; 1270 } 1271 1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1273 { 1274 struct kvm_s390_vm_cpu_processor *proc; 1275 u16 lowest_ibc, unblocked_ibc; 1276 int ret = 0; 1277 1278 mutex_lock(&kvm->lock); 1279 if (kvm->created_vcpus) { 1280 ret = -EBUSY; 1281 goto out; 1282 } 1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1284 if (!proc) { 1285 ret = -ENOMEM; 1286 goto out; 1287 } 1288 if (!copy_from_user(proc, (void __user *)attr->addr, 1289 sizeof(*proc))) { 1290 kvm->arch.model.cpuid = proc->cpuid; 1291 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1292 unblocked_ibc = sclp.ibc & 0xfff; 1293 if (lowest_ibc && proc->ibc) { 1294 if (proc->ibc > unblocked_ibc) 1295 kvm->arch.model.ibc = unblocked_ibc; 1296 else if (proc->ibc < lowest_ibc) 1297 kvm->arch.model.ibc = lowest_ibc; 1298 else 1299 kvm->arch.model.ibc = proc->ibc; 1300 } 1301 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1302 S390_ARCH_FAC_LIST_SIZE_BYTE); 1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1304 kvm->arch.model.ibc, 1305 kvm->arch.model.cpuid); 1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1307 kvm->arch.model.fac_list[0], 1308 kvm->arch.model.fac_list[1], 1309 kvm->arch.model.fac_list[2]); 1310 } else 1311 ret = -EFAULT; 1312 kfree(proc); 1313 out: 1314 mutex_unlock(&kvm->lock); 1315 return ret; 1316 } 1317 1318 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1319 struct kvm_device_attr *attr) 1320 { 1321 struct kvm_s390_vm_cpu_feat data; 1322 1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1324 return -EFAULT; 1325 if (!bitmap_subset((unsigned long *) data.feat, 1326 kvm_s390_available_cpu_feat, 1327 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1328 return -EINVAL; 1329 1330 mutex_lock(&kvm->lock); 1331 if (kvm->created_vcpus) { 1332 mutex_unlock(&kvm->lock); 1333 return -EBUSY; 1334 } 1335 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1336 KVM_S390_VM_CPU_FEAT_NR_BITS); 1337 mutex_unlock(&kvm->lock); 1338 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1339 data.feat[0], 1340 data.feat[1], 1341 data.feat[2]); 1342 return 0; 1343 } 1344 1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1346 struct kvm_device_attr *attr) 1347 { 1348 mutex_lock(&kvm->lock); 1349 if (kvm->created_vcpus) { 1350 mutex_unlock(&kvm->lock); 1351 return -EBUSY; 1352 } 1353 1354 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1355 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1356 mutex_unlock(&kvm->lock); 1357 return -EFAULT; 1358 } 1359 mutex_unlock(&kvm->lock); 1360 1361 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1366 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1369 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1372 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1375 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1378 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1381 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1384 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1387 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1390 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1393 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1396 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1399 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1402 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1405 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1408 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1413 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1418 1419 return 0; 1420 } 1421 1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1423 { 1424 int ret = -ENXIO; 1425 1426 switch (attr->attr) { 1427 case KVM_S390_VM_CPU_PROCESSOR: 1428 ret = kvm_s390_set_processor(kvm, attr); 1429 break; 1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1431 ret = kvm_s390_set_processor_feat(kvm, attr); 1432 break; 1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1434 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1435 break; 1436 } 1437 return ret; 1438 } 1439 1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1441 { 1442 struct kvm_s390_vm_cpu_processor *proc; 1443 int ret = 0; 1444 1445 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1446 if (!proc) { 1447 ret = -ENOMEM; 1448 goto out; 1449 } 1450 proc->cpuid = kvm->arch.model.cpuid; 1451 proc->ibc = kvm->arch.model.ibc; 1452 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1453 S390_ARCH_FAC_LIST_SIZE_BYTE); 1454 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1455 kvm->arch.model.ibc, 1456 kvm->arch.model.cpuid); 1457 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1458 kvm->arch.model.fac_list[0], 1459 kvm->arch.model.fac_list[1], 1460 kvm->arch.model.fac_list[2]); 1461 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1462 ret = -EFAULT; 1463 kfree(proc); 1464 out: 1465 return ret; 1466 } 1467 1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1469 { 1470 struct kvm_s390_vm_cpu_machine *mach; 1471 int ret = 0; 1472 1473 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1474 if (!mach) { 1475 ret = -ENOMEM; 1476 goto out; 1477 } 1478 get_cpu_id((struct cpuid *) &mach->cpuid); 1479 mach->ibc = sclp.ibc; 1480 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1481 S390_ARCH_FAC_LIST_SIZE_BYTE); 1482 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1483 sizeof(stfle_fac_list)); 1484 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1485 kvm->arch.model.ibc, 1486 kvm->arch.model.cpuid); 1487 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1488 mach->fac_mask[0], 1489 mach->fac_mask[1], 1490 mach->fac_mask[2]); 1491 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1492 mach->fac_list[0], 1493 mach->fac_list[1], 1494 mach->fac_list[2]); 1495 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1496 ret = -EFAULT; 1497 kfree(mach); 1498 out: 1499 return ret; 1500 } 1501 1502 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1503 struct kvm_device_attr *attr) 1504 { 1505 struct kvm_s390_vm_cpu_feat data; 1506 1507 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1508 KVM_S390_VM_CPU_FEAT_NR_BITS); 1509 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1510 return -EFAULT; 1511 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1512 data.feat[0], 1513 data.feat[1], 1514 data.feat[2]); 1515 return 0; 1516 } 1517 1518 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1519 struct kvm_device_attr *attr) 1520 { 1521 struct kvm_s390_vm_cpu_feat data; 1522 1523 bitmap_copy((unsigned long *) data.feat, 1524 kvm_s390_available_cpu_feat, 1525 KVM_S390_VM_CPU_FEAT_NR_BITS); 1526 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1527 return -EFAULT; 1528 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1529 data.feat[0], 1530 data.feat[1], 1531 data.feat[2]); 1532 return 0; 1533 } 1534 1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1536 struct kvm_device_attr *attr) 1537 { 1538 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1539 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1540 return -EFAULT; 1541 1542 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1547 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1550 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1553 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1556 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1559 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1562 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1565 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1568 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1571 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1574 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1577 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1580 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1583 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1586 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1589 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1594 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1599 1600 return 0; 1601 } 1602 1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1604 struct kvm_device_attr *attr) 1605 { 1606 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1607 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1608 return -EFAULT; 1609 1610 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1615 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1618 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1621 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1624 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1627 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1630 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1633 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1636 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1639 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1642 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1645 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1648 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1651 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1654 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1657 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1662 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1667 1668 return 0; 1669 } 1670 1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1672 { 1673 int ret = -ENXIO; 1674 1675 switch (attr->attr) { 1676 case KVM_S390_VM_CPU_PROCESSOR: 1677 ret = kvm_s390_get_processor(kvm, attr); 1678 break; 1679 case KVM_S390_VM_CPU_MACHINE: 1680 ret = kvm_s390_get_machine(kvm, attr); 1681 break; 1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1683 ret = kvm_s390_get_processor_feat(kvm, attr); 1684 break; 1685 case KVM_S390_VM_CPU_MACHINE_FEAT: 1686 ret = kvm_s390_get_machine_feat(kvm, attr); 1687 break; 1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1689 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1690 break; 1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1692 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1693 break; 1694 } 1695 return ret; 1696 } 1697 1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1699 { 1700 int ret; 1701 1702 switch (attr->group) { 1703 case KVM_S390_VM_MEM_CTRL: 1704 ret = kvm_s390_set_mem_control(kvm, attr); 1705 break; 1706 case KVM_S390_VM_TOD: 1707 ret = kvm_s390_set_tod(kvm, attr); 1708 break; 1709 case KVM_S390_VM_CPU_MODEL: 1710 ret = kvm_s390_set_cpu_model(kvm, attr); 1711 break; 1712 case KVM_S390_VM_CRYPTO: 1713 ret = kvm_s390_vm_set_crypto(kvm, attr); 1714 break; 1715 case KVM_S390_VM_MIGRATION: 1716 ret = kvm_s390_vm_set_migration(kvm, attr); 1717 break; 1718 default: 1719 ret = -ENXIO; 1720 break; 1721 } 1722 1723 return ret; 1724 } 1725 1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1727 { 1728 int ret; 1729 1730 switch (attr->group) { 1731 case KVM_S390_VM_MEM_CTRL: 1732 ret = kvm_s390_get_mem_control(kvm, attr); 1733 break; 1734 case KVM_S390_VM_TOD: 1735 ret = kvm_s390_get_tod(kvm, attr); 1736 break; 1737 case KVM_S390_VM_CPU_MODEL: 1738 ret = kvm_s390_get_cpu_model(kvm, attr); 1739 break; 1740 case KVM_S390_VM_MIGRATION: 1741 ret = kvm_s390_vm_get_migration(kvm, attr); 1742 break; 1743 default: 1744 ret = -ENXIO; 1745 break; 1746 } 1747 1748 return ret; 1749 } 1750 1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1752 { 1753 int ret; 1754 1755 switch (attr->group) { 1756 case KVM_S390_VM_MEM_CTRL: 1757 switch (attr->attr) { 1758 case KVM_S390_VM_MEM_ENABLE_CMMA: 1759 case KVM_S390_VM_MEM_CLR_CMMA: 1760 ret = sclp.has_cmma ? 0 : -ENXIO; 1761 break; 1762 case KVM_S390_VM_MEM_LIMIT_SIZE: 1763 ret = 0; 1764 break; 1765 default: 1766 ret = -ENXIO; 1767 break; 1768 } 1769 break; 1770 case KVM_S390_VM_TOD: 1771 switch (attr->attr) { 1772 case KVM_S390_VM_TOD_LOW: 1773 case KVM_S390_VM_TOD_HIGH: 1774 ret = 0; 1775 break; 1776 default: 1777 ret = -ENXIO; 1778 break; 1779 } 1780 break; 1781 case KVM_S390_VM_CPU_MODEL: 1782 switch (attr->attr) { 1783 case KVM_S390_VM_CPU_PROCESSOR: 1784 case KVM_S390_VM_CPU_MACHINE: 1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1786 case KVM_S390_VM_CPU_MACHINE_FEAT: 1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1789 ret = 0; 1790 break; 1791 default: 1792 ret = -ENXIO; 1793 break; 1794 } 1795 break; 1796 case KVM_S390_VM_CRYPTO: 1797 switch (attr->attr) { 1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1802 ret = 0; 1803 break; 1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1806 ret = ap_instructions_available() ? 0 : -ENXIO; 1807 break; 1808 default: 1809 ret = -ENXIO; 1810 break; 1811 } 1812 break; 1813 case KVM_S390_VM_MIGRATION: 1814 ret = 0; 1815 break; 1816 default: 1817 ret = -ENXIO; 1818 break; 1819 } 1820 1821 return ret; 1822 } 1823 1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1825 { 1826 uint8_t *keys; 1827 uint64_t hva; 1828 int srcu_idx, i, r = 0; 1829 1830 if (args->flags != 0) 1831 return -EINVAL; 1832 1833 /* Is this guest using storage keys? */ 1834 if (!mm_uses_skeys(current->mm)) 1835 return KVM_S390_GET_SKEYS_NONE; 1836 1837 /* Enforce sane limit on memory allocation */ 1838 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1839 return -EINVAL; 1840 1841 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1842 if (!keys) 1843 return -ENOMEM; 1844 1845 mmap_read_lock(current->mm); 1846 srcu_idx = srcu_read_lock(&kvm->srcu); 1847 for (i = 0; i < args->count; i++) { 1848 hva = gfn_to_hva(kvm, args->start_gfn + i); 1849 if (kvm_is_error_hva(hva)) { 1850 r = -EFAULT; 1851 break; 1852 } 1853 1854 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1855 if (r) 1856 break; 1857 } 1858 srcu_read_unlock(&kvm->srcu, srcu_idx); 1859 mmap_read_unlock(current->mm); 1860 1861 if (!r) { 1862 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1863 sizeof(uint8_t) * args->count); 1864 if (r) 1865 r = -EFAULT; 1866 } 1867 1868 kvfree(keys); 1869 return r; 1870 } 1871 1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1873 { 1874 uint8_t *keys; 1875 uint64_t hva; 1876 int srcu_idx, i, r = 0; 1877 bool unlocked; 1878 1879 if (args->flags != 0) 1880 return -EINVAL; 1881 1882 /* Enforce sane limit on memory allocation */ 1883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1884 return -EINVAL; 1885 1886 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1887 if (!keys) 1888 return -ENOMEM; 1889 1890 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1891 sizeof(uint8_t) * args->count); 1892 if (r) { 1893 r = -EFAULT; 1894 goto out; 1895 } 1896 1897 /* Enable storage key handling for the guest */ 1898 r = s390_enable_skey(); 1899 if (r) 1900 goto out; 1901 1902 i = 0; 1903 mmap_read_lock(current->mm); 1904 srcu_idx = srcu_read_lock(&kvm->srcu); 1905 while (i < args->count) { 1906 unlocked = false; 1907 hva = gfn_to_hva(kvm, args->start_gfn + i); 1908 if (kvm_is_error_hva(hva)) { 1909 r = -EFAULT; 1910 break; 1911 } 1912 1913 /* Lowest order bit is reserved */ 1914 if (keys[i] & 0x01) { 1915 r = -EINVAL; 1916 break; 1917 } 1918 1919 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1920 if (r) { 1921 r = fixup_user_fault(current->mm, hva, 1922 FAULT_FLAG_WRITE, &unlocked); 1923 if (r) 1924 break; 1925 } 1926 if (!r) 1927 i++; 1928 } 1929 srcu_read_unlock(&kvm->srcu, srcu_idx); 1930 mmap_read_unlock(current->mm); 1931 out: 1932 kvfree(keys); 1933 return r; 1934 } 1935 1936 /* 1937 * Base address and length must be sent at the start of each block, therefore 1938 * it's cheaper to send some clean data, as long as it's less than the size of 1939 * two longs. 1940 */ 1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1942 /* for consistency */ 1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1944 1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1946 u8 *res, unsigned long bufsize) 1947 { 1948 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1949 1950 args->count = 0; 1951 while (args->count < bufsize) { 1952 hva = gfn_to_hva(kvm, cur_gfn); 1953 /* 1954 * We return an error if the first value was invalid, but we 1955 * return successfully if at least one value was copied. 1956 */ 1957 if (kvm_is_error_hva(hva)) 1958 return args->count ? 0 : -EFAULT; 1959 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1960 pgstev = 0; 1961 res[args->count++] = (pgstev >> 24) & 0x43; 1962 cur_gfn++; 1963 } 1964 1965 return 0; 1966 } 1967 1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 1969 gfn_t gfn) 1970 { 1971 return ____gfn_to_memslot(slots, gfn, true); 1972 } 1973 1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1975 unsigned long cur_gfn) 1976 { 1977 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 1978 unsigned long ofs = cur_gfn - ms->base_gfn; 1979 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 1980 1981 if (ms->base_gfn + ms->npages <= cur_gfn) { 1982 mnode = rb_next(mnode); 1983 /* If we are above the highest slot, wrap around */ 1984 if (!mnode) 1985 mnode = rb_first(&slots->gfn_tree); 1986 1987 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1988 ofs = 0; 1989 } 1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1991 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 1992 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1993 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 1994 } 1995 return ms->base_gfn + ofs; 1996 } 1997 1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1999 u8 *res, unsigned long bufsize) 2000 { 2001 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2002 struct kvm_memslots *slots = kvm_memslots(kvm); 2003 struct kvm_memory_slot *ms; 2004 2005 if (unlikely(kvm_memslots_empty(slots))) 2006 return 0; 2007 2008 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2009 ms = gfn_to_memslot(kvm, cur_gfn); 2010 args->count = 0; 2011 args->start_gfn = cur_gfn; 2012 if (!ms) 2013 return 0; 2014 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2015 mem_end = kvm_s390_get_gfn_end(slots); 2016 2017 while (args->count < bufsize) { 2018 hva = gfn_to_hva(kvm, cur_gfn); 2019 if (kvm_is_error_hva(hva)) 2020 return 0; 2021 /* Decrement only if we actually flipped the bit to 0 */ 2022 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2023 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2024 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2025 pgstev = 0; 2026 /* Save the value */ 2027 res[args->count++] = (pgstev >> 24) & 0x43; 2028 /* If the next bit is too far away, stop. */ 2029 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2030 return 0; 2031 /* If we reached the previous "next", find the next one */ 2032 if (cur_gfn == next_gfn) 2033 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2034 /* Reached the end of memory or of the buffer, stop */ 2035 if ((next_gfn >= mem_end) || 2036 (next_gfn - args->start_gfn >= bufsize)) 2037 return 0; 2038 cur_gfn++; 2039 /* Reached the end of the current memslot, take the next one. */ 2040 if (cur_gfn - ms->base_gfn >= ms->npages) { 2041 ms = gfn_to_memslot(kvm, cur_gfn); 2042 if (!ms) 2043 return 0; 2044 } 2045 } 2046 return 0; 2047 } 2048 2049 /* 2050 * This function searches for the next page with dirty CMMA attributes, and 2051 * saves the attributes in the buffer up to either the end of the buffer or 2052 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2053 * no trailing clean bytes are saved. 2054 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2055 * output buffer will indicate 0 as length. 2056 */ 2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2058 struct kvm_s390_cmma_log *args) 2059 { 2060 unsigned long bufsize; 2061 int srcu_idx, peek, ret; 2062 u8 *values; 2063 2064 if (!kvm->arch.use_cmma) 2065 return -ENXIO; 2066 /* Invalid/unsupported flags were specified */ 2067 if (args->flags & ~KVM_S390_CMMA_PEEK) 2068 return -EINVAL; 2069 /* Migration mode query, and we are not doing a migration */ 2070 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2071 if (!peek && !kvm->arch.migration_mode) 2072 return -EINVAL; 2073 /* CMMA is disabled or was not used, or the buffer has length zero */ 2074 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2075 if (!bufsize || !kvm->mm->context.uses_cmm) { 2076 memset(args, 0, sizeof(*args)); 2077 return 0; 2078 } 2079 /* We are not peeking, and there are no dirty pages */ 2080 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2081 memset(args, 0, sizeof(*args)); 2082 return 0; 2083 } 2084 2085 values = vmalloc(bufsize); 2086 if (!values) 2087 return -ENOMEM; 2088 2089 mmap_read_lock(kvm->mm); 2090 srcu_idx = srcu_read_lock(&kvm->srcu); 2091 if (peek) 2092 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2093 else 2094 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2095 srcu_read_unlock(&kvm->srcu, srcu_idx); 2096 mmap_read_unlock(kvm->mm); 2097 2098 if (kvm->arch.migration_mode) 2099 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2100 else 2101 args->remaining = 0; 2102 2103 if (copy_to_user((void __user *)args->values, values, args->count)) 2104 ret = -EFAULT; 2105 2106 vfree(values); 2107 return ret; 2108 } 2109 2110 /* 2111 * This function sets the CMMA attributes for the given pages. If the input 2112 * buffer has zero length, no action is taken, otherwise the attributes are 2113 * set and the mm->context.uses_cmm flag is set. 2114 */ 2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2116 const struct kvm_s390_cmma_log *args) 2117 { 2118 unsigned long hva, mask, pgstev, i; 2119 uint8_t *bits; 2120 int srcu_idx, r = 0; 2121 2122 mask = args->mask; 2123 2124 if (!kvm->arch.use_cmma) 2125 return -ENXIO; 2126 /* invalid/unsupported flags */ 2127 if (args->flags != 0) 2128 return -EINVAL; 2129 /* Enforce sane limit on memory allocation */ 2130 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2131 return -EINVAL; 2132 /* Nothing to do */ 2133 if (args->count == 0) 2134 return 0; 2135 2136 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2137 if (!bits) 2138 return -ENOMEM; 2139 2140 r = copy_from_user(bits, (void __user *)args->values, args->count); 2141 if (r) { 2142 r = -EFAULT; 2143 goto out; 2144 } 2145 2146 mmap_read_lock(kvm->mm); 2147 srcu_idx = srcu_read_lock(&kvm->srcu); 2148 for (i = 0; i < args->count; i++) { 2149 hva = gfn_to_hva(kvm, args->start_gfn + i); 2150 if (kvm_is_error_hva(hva)) { 2151 r = -EFAULT; 2152 break; 2153 } 2154 2155 pgstev = bits[i]; 2156 pgstev = pgstev << 24; 2157 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2158 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2159 } 2160 srcu_read_unlock(&kvm->srcu, srcu_idx); 2161 mmap_read_unlock(kvm->mm); 2162 2163 if (!kvm->mm->context.uses_cmm) { 2164 mmap_write_lock(kvm->mm); 2165 kvm->mm->context.uses_cmm = 1; 2166 mmap_write_unlock(kvm->mm); 2167 } 2168 out: 2169 vfree(bits); 2170 return r; 2171 } 2172 2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2174 { 2175 struct kvm_vcpu *vcpu; 2176 u16 rc, rrc; 2177 int ret = 0; 2178 unsigned long i; 2179 2180 /* 2181 * We ignore failures and try to destroy as many CPUs as possible. 2182 * At the same time we must not free the assigned resources when 2183 * this fails, as the ultravisor has still access to that memory. 2184 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2185 * behind. 2186 * We want to return the first failure rc and rrc, though. 2187 */ 2188 kvm_for_each_vcpu(i, vcpu, kvm) { 2189 mutex_lock(&vcpu->mutex); 2190 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2191 *rcp = rc; 2192 *rrcp = rrc; 2193 ret = -EIO; 2194 } 2195 mutex_unlock(&vcpu->mutex); 2196 } 2197 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ 2198 if (use_gisa) 2199 kvm_s390_gisa_enable(kvm); 2200 return ret; 2201 } 2202 2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2204 { 2205 unsigned long i; 2206 int r = 0; 2207 u16 dummy; 2208 2209 struct kvm_vcpu *vcpu; 2210 2211 /* Disable the GISA if the ultravisor does not support AIV. */ 2212 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) 2213 kvm_s390_gisa_disable(kvm); 2214 2215 kvm_for_each_vcpu(i, vcpu, kvm) { 2216 mutex_lock(&vcpu->mutex); 2217 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2218 mutex_unlock(&vcpu->mutex); 2219 if (r) 2220 break; 2221 } 2222 if (r) 2223 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2224 return r; 2225 } 2226 2227 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2228 { 2229 int r = 0; 2230 u16 dummy; 2231 void __user *argp = (void __user *)cmd->data; 2232 2233 switch (cmd->cmd) { 2234 case KVM_PV_ENABLE: { 2235 r = -EINVAL; 2236 if (kvm_s390_pv_is_protected(kvm)) 2237 break; 2238 2239 /* 2240 * FMT 4 SIE needs esca. As we never switch back to bsca from 2241 * esca, we need no cleanup in the error cases below 2242 */ 2243 r = sca_switch_to_extended(kvm); 2244 if (r) 2245 break; 2246 2247 mmap_write_lock(current->mm); 2248 r = gmap_mark_unmergeable(); 2249 mmap_write_unlock(current->mm); 2250 if (r) 2251 break; 2252 2253 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2254 if (r) 2255 break; 2256 2257 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2258 if (r) 2259 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2260 2261 /* we need to block service interrupts from now on */ 2262 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2263 break; 2264 } 2265 case KVM_PV_DISABLE: { 2266 r = -EINVAL; 2267 if (!kvm_s390_pv_is_protected(kvm)) 2268 break; 2269 2270 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2271 /* 2272 * If a CPU could not be destroyed, destroy VM will also fail. 2273 * There is no point in trying to destroy it. Instead return 2274 * the rc and rrc from the first CPU that failed destroying. 2275 */ 2276 if (r) 2277 break; 2278 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2279 2280 /* no need to block service interrupts any more */ 2281 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2282 break; 2283 } 2284 case KVM_PV_SET_SEC_PARMS: { 2285 struct kvm_s390_pv_sec_parm parms = {}; 2286 void *hdr; 2287 2288 r = -EINVAL; 2289 if (!kvm_s390_pv_is_protected(kvm)) 2290 break; 2291 2292 r = -EFAULT; 2293 if (copy_from_user(&parms, argp, sizeof(parms))) 2294 break; 2295 2296 /* Currently restricted to 8KB */ 2297 r = -EINVAL; 2298 if (parms.length > PAGE_SIZE * 2) 2299 break; 2300 2301 r = -ENOMEM; 2302 hdr = vmalloc(parms.length); 2303 if (!hdr) 2304 break; 2305 2306 r = -EFAULT; 2307 if (!copy_from_user(hdr, (void __user *)parms.origin, 2308 parms.length)) 2309 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2310 &cmd->rc, &cmd->rrc); 2311 2312 vfree(hdr); 2313 break; 2314 } 2315 case KVM_PV_UNPACK: { 2316 struct kvm_s390_pv_unp unp = {}; 2317 2318 r = -EINVAL; 2319 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2320 break; 2321 2322 r = -EFAULT; 2323 if (copy_from_user(&unp, argp, sizeof(unp))) 2324 break; 2325 2326 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2327 &cmd->rc, &cmd->rrc); 2328 break; 2329 } 2330 case KVM_PV_VERIFY: { 2331 r = -EINVAL; 2332 if (!kvm_s390_pv_is_protected(kvm)) 2333 break; 2334 2335 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2336 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2337 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2338 cmd->rrc); 2339 break; 2340 } 2341 case KVM_PV_PREP_RESET: { 2342 r = -EINVAL; 2343 if (!kvm_s390_pv_is_protected(kvm)) 2344 break; 2345 2346 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2347 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2348 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2349 cmd->rc, cmd->rrc); 2350 break; 2351 } 2352 case KVM_PV_UNSHARE_ALL: { 2353 r = -EINVAL; 2354 if (!kvm_s390_pv_is_protected(kvm)) 2355 break; 2356 2357 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2358 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2359 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2360 cmd->rc, cmd->rrc); 2361 break; 2362 } 2363 default: 2364 r = -ENOTTY; 2365 } 2366 return r; 2367 } 2368 2369 static bool access_key_invalid(u8 access_key) 2370 { 2371 return access_key > 0xf; 2372 } 2373 2374 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2375 { 2376 void __user *uaddr = (void __user *)mop->buf; 2377 u64 supported_flags; 2378 void *tmpbuf = NULL; 2379 int r, srcu_idx; 2380 2381 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION 2382 | KVM_S390_MEMOP_F_CHECK_ONLY; 2383 if (mop->flags & ~supported_flags || !mop->size) 2384 return -EINVAL; 2385 if (mop->size > MEM_OP_MAX_SIZE) 2386 return -E2BIG; 2387 /* 2388 * This is technically a heuristic only, if the kvm->lock is not 2389 * taken, it is not guaranteed that the vm is/remains non-protected. 2390 * This is ok from a kernel perspective, wrongdoing is detected 2391 * on the access, -EFAULT is returned and the vm may crash the 2392 * next time it accesses the memory in question. 2393 * There is no sane usecase to do switching and a memop on two 2394 * different CPUs at the same time. 2395 */ 2396 if (kvm_s390_pv_get_handle(kvm)) 2397 return -EINVAL; 2398 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2399 if (access_key_invalid(mop->key)) 2400 return -EINVAL; 2401 } else { 2402 mop->key = 0; 2403 } 2404 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2405 tmpbuf = vmalloc(mop->size); 2406 if (!tmpbuf) 2407 return -ENOMEM; 2408 } 2409 2410 srcu_idx = srcu_read_lock(&kvm->srcu); 2411 2412 if (kvm_is_error_gpa(kvm, mop->gaddr)) { 2413 r = PGM_ADDRESSING; 2414 goto out_unlock; 2415 } 2416 2417 switch (mop->op) { 2418 case KVM_S390_MEMOP_ABSOLUTE_READ: { 2419 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2420 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); 2421 } else { 2422 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2423 mop->size, GACC_FETCH, mop->key); 2424 if (r == 0) { 2425 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2426 r = -EFAULT; 2427 } 2428 } 2429 break; 2430 } 2431 case KVM_S390_MEMOP_ABSOLUTE_WRITE: { 2432 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2433 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); 2434 } else { 2435 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2436 r = -EFAULT; 2437 break; 2438 } 2439 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2440 mop->size, GACC_STORE, mop->key); 2441 } 2442 break; 2443 } 2444 default: 2445 r = -EINVAL; 2446 } 2447 2448 out_unlock: 2449 srcu_read_unlock(&kvm->srcu, srcu_idx); 2450 2451 vfree(tmpbuf); 2452 return r; 2453 } 2454 2455 long kvm_arch_vm_ioctl(struct file *filp, 2456 unsigned int ioctl, unsigned long arg) 2457 { 2458 struct kvm *kvm = filp->private_data; 2459 void __user *argp = (void __user *)arg; 2460 struct kvm_device_attr attr; 2461 int r; 2462 2463 switch (ioctl) { 2464 case KVM_S390_INTERRUPT: { 2465 struct kvm_s390_interrupt s390int; 2466 2467 r = -EFAULT; 2468 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2469 break; 2470 r = kvm_s390_inject_vm(kvm, &s390int); 2471 break; 2472 } 2473 case KVM_CREATE_IRQCHIP: { 2474 struct kvm_irq_routing_entry routing; 2475 2476 r = -EINVAL; 2477 if (kvm->arch.use_irqchip) { 2478 /* Set up dummy routing. */ 2479 memset(&routing, 0, sizeof(routing)); 2480 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2481 } 2482 break; 2483 } 2484 case KVM_SET_DEVICE_ATTR: { 2485 r = -EFAULT; 2486 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2487 break; 2488 r = kvm_s390_vm_set_attr(kvm, &attr); 2489 break; 2490 } 2491 case KVM_GET_DEVICE_ATTR: { 2492 r = -EFAULT; 2493 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2494 break; 2495 r = kvm_s390_vm_get_attr(kvm, &attr); 2496 break; 2497 } 2498 case KVM_HAS_DEVICE_ATTR: { 2499 r = -EFAULT; 2500 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2501 break; 2502 r = kvm_s390_vm_has_attr(kvm, &attr); 2503 break; 2504 } 2505 case KVM_S390_GET_SKEYS: { 2506 struct kvm_s390_skeys args; 2507 2508 r = -EFAULT; 2509 if (copy_from_user(&args, argp, 2510 sizeof(struct kvm_s390_skeys))) 2511 break; 2512 r = kvm_s390_get_skeys(kvm, &args); 2513 break; 2514 } 2515 case KVM_S390_SET_SKEYS: { 2516 struct kvm_s390_skeys args; 2517 2518 r = -EFAULT; 2519 if (copy_from_user(&args, argp, 2520 sizeof(struct kvm_s390_skeys))) 2521 break; 2522 r = kvm_s390_set_skeys(kvm, &args); 2523 break; 2524 } 2525 case KVM_S390_GET_CMMA_BITS: { 2526 struct kvm_s390_cmma_log args; 2527 2528 r = -EFAULT; 2529 if (copy_from_user(&args, argp, sizeof(args))) 2530 break; 2531 mutex_lock(&kvm->slots_lock); 2532 r = kvm_s390_get_cmma_bits(kvm, &args); 2533 mutex_unlock(&kvm->slots_lock); 2534 if (!r) { 2535 r = copy_to_user(argp, &args, sizeof(args)); 2536 if (r) 2537 r = -EFAULT; 2538 } 2539 break; 2540 } 2541 case KVM_S390_SET_CMMA_BITS: { 2542 struct kvm_s390_cmma_log args; 2543 2544 r = -EFAULT; 2545 if (copy_from_user(&args, argp, sizeof(args))) 2546 break; 2547 mutex_lock(&kvm->slots_lock); 2548 r = kvm_s390_set_cmma_bits(kvm, &args); 2549 mutex_unlock(&kvm->slots_lock); 2550 break; 2551 } 2552 case KVM_S390_PV_COMMAND: { 2553 struct kvm_pv_cmd args; 2554 2555 /* protvirt means user cpu state */ 2556 kvm_s390_set_user_cpu_state_ctrl(kvm); 2557 r = 0; 2558 if (!is_prot_virt_host()) { 2559 r = -EINVAL; 2560 break; 2561 } 2562 if (copy_from_user(&args, argp, sizeof(args))) { 2563 r = -EFAULT; 2564 break; 2565 } 2566 if (args.flags) { 2567 r = -EINVAL; 2568 break; 2569 } 2570 mutex_lock(&kvm->lock); 2571 r = kvm_s390_handle_pv(kvm, &args); 2572 mutex_unlock(&kvm->lock); 2573 if (copy_to_user(argp, &args, sizeof(args))) { 2574 r = -EFAULT; 2575 break; 2576 } 2577 break; 2578 } 2579 case KVM_S390_MEM_OP: { 2580 struct kvm_s390_mem_op mem_op; 2581 2582 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 2583 r = kvm_s390_vm_mem_op(kvm, &mem_op); 2584 else 2585 r = -EFAULT; 2586 break; 2587 } 2588 default: 2589 r = -ENOTTY; 2590 } 2591 2592 return r; 2593 } 2594 2595 static int kvm_s390_apxa_installed(void) 2596 { 2597 struct ap_config_info info; 2598 2599 if (ap_instructions_available()) { 2600 if (ap_qci(&info) == 0) 2601 return info.apxa; 2602 } 2603 2604 return 0; 2605 } 2606 2607 /* 2608 * The format of the crypto control block (CRYCB) is specified in the 3 low 2609 * order bits of the CRYCB designation (CRYCBD) field as follows: 2610 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2611 * AP extended addressing (APXA) facility are installed. 2612 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2613 * Format 2: Both the APXA and MSAX3 facilities are installed 2614 */ 2615 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2616 { 2617 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2618 2619 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2620 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2621 2622 /* Check whether MSAX3 is installed */ 2623 if (!test_kvm_facility(kvm, 76)) 2624 return; 2625 2626 if (kvm_s390_apxa_installed()) 2627 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2628 else 2629 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2630 } 2631 2632 /* 2633 * kvm_arch_crypto_set_masks 2634 * 2635 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2636 * to be set. 2637 * @apm: the mask identifying the accessible AP adapters 2638 * @aqm: the mask identifying the accessible AP domains 2639 * @adm: the mask identifying the accessible AP control domains 2640 * 2641 * Set the masks that identify the adapters, domains and control domains to 2642 * which the KVM guest is granted access. 2643 * 2644 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2645 * function. 2646 */ 2647 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2648 unsigned long *aqm, unsigned long *adm) 2649 { 2650 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2651 2652 kvm_s390_vcpu_block_all(kvm); 2653 2654 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2655 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2656 memcpy(crycb->apcb1.apm, apm, 32); 2657 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2658 apm[0], apm[1], apm[2], apm[3]); 2659 memcpy(crycb->apcb1.aqm, aqm, 32); 2660 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2661 aqm[0], aqm[1], aqm[2], aqm[3]); 2662 memcpy(crycb->apcb1.adm, adm, 32); 2663 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2664 adm[0], adm[1], adm[2], adm[3]); 2665 break; 2666 case CRYCB_FORMAT1: 2667 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2668 memcpy(crycb->apcb0.apm, apm, 8); 2669 memcpy(crycb->apcb0.aqm, aqm, 2); 2670 memcpy(crycb->apcb0.adm, adm, 2); 2671 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2672 apm[0], *((unsigned short *)aqm), 2673 *((unsigned short *)adm)); 2674 break; 2675 default: /* Can not happen */ 2676 break; 2677 } 2678 2679 /* recreate the shadow crycb for each vcpu */ 2680 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2681 kvm_s390_vcpu_unblock_all(kvm); 2682 } 2683 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2684 2685 /* 2686 * kvm_arch_crypto_clear_masks 2687 * 2688 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2689 * to be cleared. 2690 * 2691 * Clear the masks that identify the adapters, domains and control domains to 2692 * which the KVM guest is granted access. 2693 * 2694 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2695 * function. 2696 */ 2697 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2698 { 2699 kvm_s390_vcpu_block_all(kvm); 2700 2701 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2702 sizeof(kvm->arch.crypto.crycb->apcb0)); 2703 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2704 sizeof(kvm->arch.crypto.crycb->apcb1)); 2705 2706 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2707 /* recreate the shadow crycb for each vcpu */ 2708 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2709 kvm_s390_vcpu_unblock_all(kvm); 2710 } 2711 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2712 2713 static u64 kvm_s390_get_initial_cpuid(void) 2714 { 2715 struct cpuid cpuid; 2716 2717 get_cpu_id(&cpuid); 2718 cpuid.version = 0xff; 2719 return *((u64 *) &cpuid); 2720 } 2721 2722 static void kvm_s390_crypto_init(struct kvm *kvm) 2723 { 2724 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2725 kvm_s390_set_crycb_format(kvm); 2726 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 2727 2728 if (!test_kvm_facility(kvm, 76)) 2729 return; 2730 2731 /* Enable AES/DEA protected key functions by default */ 2732 kvm->arch.crypto.aes_kw = 1; 2733 kvm->arch.crypto.dea_kw = 1; 2734 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2735 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2736 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2737 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2738 } 2739 2740 static void sca_dispose(struct kvm *kvm) 2741 { 2742 if (kvm->arch.use_esca) 2743 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2744 else 2745 free_page((unsigned long)(kvm->arch.sca)); 2746 kvm->arch.sca = NULL; 2747 } 2748 2749 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2750 { 2751 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2752 int i, rc; 2753 char debug_name[16]; 2754 static unsigned long sca_offset; 2755 2756 rc = -EINVAL; 2757 #ifdef CONFIG_KVM_S390_UCONTROL 2758 if (type & ~KVM_VM_S390_UCONTROL) 2759 goto out_err; 2760 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2761 goto out_err; 2762 #else 2763 if (type) 2764 goto out_err; 2765 #endif 2766 2767 rc = s390_enable_sie(); 2768 if (rc) 2769 goto out_err; 2770 2771 rc = -ENOMEM; 2772 2773 if (!sclp.has_64bscao) 2774 alloc_flags |= GFP_DMA; 2775 rwlock_init(&kvm->arch.sca_lock); 2776 /* start with basic SCA */ 2777 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2778 if (!kvm->arch.sca) 2779 goto out_err; 2780 mutex_lock(&kvm_lock); 2781 sca_offset += 16; 2782 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2783 sca_offset = 0; 2784 kvm->arch.sca = (struct bsca_block *) 2785 ((char *) kvm->arch.sca + sca_offset); 2786 mutex_unlock(&kvm_lock); 2787 2788 sprintf(debug_name, "kvm-%u", current->pid); 2789 2790 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2791 if (!kvm->arch.dbf) 2792 goto out_err; 2793 2794 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2795 kvm->arch.sie_page2 = 2796 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2797 if (!kvm->arch.sie_page2) 2798 goto out_err; 2799 2800 kvm->arch.sie_page2->kvm = kvm; 2801 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2802 2803 for (i = 0; i < kvm_s390_fac_size(); i++) { 2804 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 2805 (kvm_s390_fac_base[i] | 2806 kvm_s390_fac_ext[i]); 2807 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 2808 kvm_s390_fac_base[i]; 2809 } 2810 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2811 2812 /* we are always in czam mode - even on pre z14 machines */ 2813 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2814 set_kvm_facility(kvm->arch.model.fac_list, 138); 2815 /* we emulate STHYI in kvm */ 2816 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2817 set_kvm_facility(kvm->arch.model.fac_list, 74); 2818 if (MACHINE_HAS_TLB_GUEST) { 2819 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2820 set_kvm_facility(kvm->arch.model.fac_list, 147); 2821 } 2822 2823 if (css_general_characteristics.aiv && test_facility(65)) 2824 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2825 2826 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2827 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2828 2829 kvm_s390_crypto_init(kvm); 2830 2831 mutex_init(&kvm->arch.float_int.ais_lock); 2832 spin_lock_init(&kvm->arch.float_int.lock); 2833 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2834 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2835 init_waitqueue_head(&kvm->arch.ipte_wq); 2836 mutex_init(&kvm->arch.ipte_mutex); 2837 2838 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2839 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2840 2841 if (type & KVM_VM_S390_UCONTROL) { 2842 kvm->arch.gmap = NULL; 2843 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2844 } else { 2845 if (sclp.hamax == U64_MAX) 2846 kvm->arch.mem_limit = TASK_SIZE_MAX; 2847 else 2848 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2849 sclp.hamax + 1); 2850 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2851 if (!kvm->arch.gmap) 2852 goto out_err; 2853 kvm->arch.gmap->private = kvm; 2854 kvm->arch.gmap->pfault_enabled = 0; 2855 } 2856 2857 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2858 kvm->arch.use_skf = sclp.has_skey; 2859 spin_lock_init(&kvm->arch.start_stop_lock); 2860 kvm_s390_vsie_init(kvm); 2861 if (use_gisa) 2862 kvm_s390_gisa_init(kvm); 2863 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2864 2865 return 0; 2866 out_err: 2867 free_page((unsigned long)kvm->arch.sie_page2); 2868 debug_unregister(kvm->arch.dbf); 2869 sca_dispose(kvm); 2870 KVM_EVENT(3, "creation of vm failed: %d", rc); 2871 return rc; 2872 } 2873 2874 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2875 { 2876 u16 rc, rrc; 2877 2878 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2879 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2880 kvm_s390_clear_local_irqs(vcpu); 2881 kvm_clear_async_pf_completion_queue(vcpu); 2882 if (!kvm_is_ucontrol(vcpu->kvm)) 2883 sca_del_vcpu(vcpu); 2884 2885 if (kvm_is_ucontrol(vcpu->kvm)) 2886 gmap_remove(vcpu->arch.gmap); 2887 2888 if (vcpu->kvm->arch.use_cmma) 2889 kvm_s390_vcpu_unsetup_cmma(vcpu); 2890 /* We can not hold the vcpu mutex here, we are already dying */ 2891 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2892 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2893 free_page((unsigned long)(vcpu->arch.sie_block)); 2894 } 2895 2896 void kvm_arch_destroy_vm(struct kvm *kvm) 2897 { 2898 u16 rc, rrc; 2899 2900 kvm_destroy_vcpus(kvm); 2901 sca_dispose(kvm); 2902 kvm_s390_gisa_destroy(kvm); 2903 /* 2904 * We are already at the end of life and kvm->lock is not taken. 2905 * This is ok as the file descriptor is closed by now and nobody 2906 * can mess with the pv state. To avoid lockdep_assert_held from 2907 * complaining we do not use kvm_s390_pv_is_protected. 2908 */ 2909 if (kvm_s390_pv_get_handle(kvm)) 2910 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2911 debug_unregister(kvm->arch.dbf); 2912 free_page((unsigned long)kvm->arch.sie_page2); 2913 if (!kvm_is_ucontrol(kvm)) 2914 gmap_remove(kvm->arch.gmap); 2915 kvm_s390_destroy_adapters(kvm); 2916 kvm_s390_clear_float_irqs(kvm); 2917 kvm_s390_vsie_destroy(kvm); 2918 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2919 } 2920 2921 /* Section: vcpu related */ 2922 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2923 { 2924 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2925 if (!vcpu->arch.gmap) 2926 return -ENOMEM; 2927 vcpu->arch.gmap->private = vcpu->kvm; 2928 2929 return 0; 2930 } 2931 2932 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2933 { 2934 if (!kvm_s390_use_sca_entries()) 2935 return; 2936 read_lock(&vcpu->kvm->arch.sca_lock); 2937 if (vcpu->kvm->arch.use_esca) { 2938 struct esca_block *sca = vcpu->kvm->arch.sca; 2939 2940 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2941 sca->cpu[vcpu->vcpu_id].sda = 0; 2942 } else { 2943 struct bsca_block *sca = vcpu->kvm->arch.sca; 2944 2945 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2946 sca->cpu[vcpu->vcpu_id].sda = 0; 2947 } 2948 read_unlock(&vcpu->kvm->arch.sca_lock); 2949 } 2950 2951 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2952 { 2953 if (!kvm_s390_use_sca_entries()) { 2954 struct bsca_block *sca = vcpu->kvm->arch.sca; 2955 2956 /* we still need the basic sca for the ipte control */ 2957 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2958 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2959 return; 2960 } 2961 read_lock(&vcpu->kvm->arch.sca_lock); 2962 if (vcpu->kvm->arch.use_esca) { 2963 struct esca_block *sca = vcpu->kvm->arch.sca; 2964 2965 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2966 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2967 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2968 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2969 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2970 } else { 2971 struct bsca_block *sca = vcpu->kvm->arch.sca; 2972 2973 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2974 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2975 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2976 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2977 } 2978 read_unlock(&vcpu->kvm->arch.sca_lock); 2979 } 2980 2981 /* Basic SCA to Extended SCA data copy routines */ 2982 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2983 { 2984 d->sda = s->sda; 2985 d->sigp_ctrl.c = s->sigp_ctrl.c; 2986 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2987 } 2988 2989 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2990 { 2991 int i; 2992 2993 d->ipte_control = s->ipte_control; 2994 d->mcn[0] = s->mcn; 2995 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2996 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2997 } 2998 2999 static int sca_switch_to_extended(struct kvm *kvm) 3000 { 3001 struct bsca_block *old_sca = kvm->arch.sca; 3002 struct esca_block *new_sca; 3003 struct kvm_vcpu *vcpu; 3004 unsigned long vcpu_idx; 3005 u32 scaol, scaoh; 3006 3007 if (kvm->arch.use_esca) 3008 return 0; 3009 3010 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3011 if (!new_sca) 3012 return -ENOMEM; 3013 3014 scaoh = (u32)((u64)(new_sca) >> 32); 3015 scaol = (u32)(u64)(new_sca) & ~0x3fU; 3016 3017 kvm_s390_vcpu_block_all(kvm); 3018 write_lock(&kvm->arch.sca_lock); 3019 3020 sca_copy_b_to_e(new_sca, old_sca); 3021 3022 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3023 vcpu->arch.sie_block->scaoh = scaoh; 3024 vcpu->arch.sie_block->scaol = scaol; 3025 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3026 } 3027 kvm->arch.sca = new_sca; 3028 kvm->arch.use_esca = 1; 3029 3030 write_unlock(&kvm->arch.sca_lock); 3031 kvm_s390_vcpu_unblock_all(kvm); 3032 3033 free_page((unsigned long)old_sca); 3034 3035 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3036 old_sca, kvm->arch.sca); 3037 return 0; 3038 } 3039 3040 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3041 { 3042 int rc; 3043 3044 if (!kvm_s390_use_sca_entries()) { 3045 if (id < KVM_MAX_VCPUS) 3046 return true; 3047 return false; 3048 } 3049 if (id < KVM_S390_BSCA_CPU_SLOTS) 3050 return true; 3051 if (!sclp.has_esca || !sclp.has_64bscao) 3052 return false; 3053 3054 mutex_lock(&kvm->lock); 3055 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3056 mutex_unlock(&kvm->lock); 3057 3058 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3059 } 3060 3061 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3062 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3063 { 3064 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3065 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3066 vcpu->arch.cputm_start = get_tod_clock_fast(); 3067 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3068 } 3069 3070 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3071 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3072 { 3073 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3074 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3075 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3076 vcpu->arch.cputm_start = 0; 3077 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3078 } 3079 3080 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3081 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3082 { 3083 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3084 vcpu->arch.cputm_enabled = true; 3085 __start_cpu_timer_accounting(vcpu); 3086 } 3087 3088 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3089 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3090 { 3091 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3092 __stop_cpu_timer_accounting(vcpu); 3093 vcpu->arch.cputm_enabled = false; 3094 } 3095 3096 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3097 { 3098 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3099 __enable_cpu_timer_accounting(vcpu); 3100 preempt_enable(); 3101 } 3102 3103 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3104 { 3105 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3106 __disable_cpu_timer_accounting(vcpu); 3107 preempt_enable(); 3108 } 3109 3110 /* set the cpu timer - may only be called from the VCPU thread itself */ 3111 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3112 { 3113 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3114 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3115 if (vcpu->arch.cputm_enabled) 3116 vcpu->arch.cputm_start = get_tod_clock_fast(); 3117 vcpu->arch.sie_block->cputm = cputm; 3118 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3119 preempt_enable(); 3120 } 3121 3122 /* update and get the cpu timer - can also be called from other VCPU threads */ 3123 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3124 { 3125 unsigned int seq; 3126 __u64 value; 3127 3128 if (unlikely(!vcpu->arch.cputm_enabled)) 3129 return vcpu->arch.sie_block->cputm; 3130 3131 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3132 do { 3133 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3134 /* 3135 * If the writer would ever execute a read in the critical 3136 * section, e.g. in irq context, we have a deadlock. 3137 */ 3138 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3139 value = vcpu->arch.sie_block->cputm; 3140 /* if cputm_start is 0, accounting is being started/stopped */ 3141 if (likely(vcpu->arch.cputm_start)) 3142 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3143 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3144 preempt_enable(); 3145 return value; 3146 } 3147 3148 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3149 { 3150 3151 gmap_enable(vcpu->arch.enabled_gmap); 3152 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3153 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3154 __start_cpu_timer_accounting(vcpu); 3155 vcpu->cpu = cpu; 3156 } 3157 3158 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3159 { 3160 vcpu->cpu = -1; 3161 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3162 __stop_cpu_timer_accounting(vcpu); 3163 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3164 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3165 gmap_disable(vcpu->arch.enabled_gmap); 3166 3167 } 3168 3169 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3170 { 3171 mutex_lock(&vcpu->kvm->lock); 3172 preempt_disable(); 3173 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3174 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3175 preempt_enable(); 3176 mutex_unlock(&vcpu->kvm->lock); 3177 if (!kvm_is_ucontrol(vcpu->kvm)) { 3178 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3179 sca_add_vcpu(vcpu); 3180 } 3181 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3182 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3183 /* make vcpu_load load the right gmap on the first trigger */ 3184 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3185 } 3186 3187 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3188 { 3189 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3190 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3191 return true; 3192 return false; 3193 } 3194 3195 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3196 { 3197 /* At least one ECC subfunction must be present */ 3198 return kvm_has_pckmo_subfunc(kvm, 32) || 3199 kvm_has_pckmo_subfunc(kvm, 33) || 3200 kvm_has_pckmo_subfunc(kvm, 34) || 3201 kvm_has_pckmo_subfunc(kvm, 40) || 3202 kvm_has_pckmo_subfunc(kvm, 41); 3203 3204 } 3205 3206 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3207 { 3208 /* 3209 * If the AP instructions are not being interpreted and the MSAX3 3210 * facility is not configured for the guest, there is nothing to set up. 3211 */ 3212 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3213 return; 3214 3215 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3216 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3217 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3218 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3219 3220 if (vcpu->kvm->arch.crypto.apie) 3221 vcpu->arch.sie_block->eca |= ECA_APIE; 3222 3223 /* Set up protected key support */ 3224 if (vcpu->kvm->arch.crypto.aes_kw) { 3225 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3226 /* ecc is also wrapped with AES key */ 3227 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3228 vcpu->arch.sie_block->ecd |= ECD_ECC; 3229 } 3230 3231 if (vcpu->kvm->arch.crypto.dea_kw) 3232 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3233 } 3234 3235 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3236 { 3237 free_page(vcpu->arch.sie_block->cbrlo); 3238 vcpu->arch.sie_block->cbrlo = 0; 3239 } 3240 3241 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3242 { 3243 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3244 if (!vcpu->arch.sie_block->cbrlo) 3245 return -ENOMEM; 3246 return 0; 3247 } 3248 3249 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3250 { 3251 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3252 3253 vcpu->arch.sie_block->ibc = model->ibc; 3254 if (test_kvm_facility(vcpu->kvm, 7)) 3255 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3256 } 3257 3258 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3259 { 3260 int rc = 0; 3261 u16 uvrc, uvrrc; 3262 3263 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3264 CPUSTAT_SM | 3265 CPUSTAT_STOPPED); 3266 3267 if (test_kvm_facility(vcpu->kvm, 78)) 3268 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3269 else if (test_kvm_facility(vcpu->kvm, 8)) 3270 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3271 3272 kvm_s390_vcpu_setup_model(vcpu); 3273 3274 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3275 if (MACHINE_HAS_ESOP) 3276 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3277 if (test_kvm_facility(vcpu->kvm, 9)) 3278 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3279 if (test_kvm_facility(vcpu->kvm, 73)) 3280 vcpu->arch.sie_block->ecb |= ECB_TE; 3281 if (!kvm_is_ucontrol(vcpu->kvm)) 3282 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3283 3284 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3285 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3286 if (test_kvm_facility(vcpu->kvm, 130)) 3287 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3288 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3289 if (sclp.has_cei) 3290 vcpu->arch.sie_block->eca |= ECA_CEI; 3291 if (sclp.has_ib) 3292 vcpu->arch.sie_block->eca |= ECA_IB; 3293 if (sclp.has_siif) 3294 vcpu->arch.sie_block->eca |= ECA_SII; 3295 if (sclp.has_sigpif) 3296 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3297 if (test_kvm_facility(vcpu->kvm, 129)) { 3298 vcpu->arch.sie_block->eca |= ECA_VX; 3299 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3300 } 3301 if (test_kvm_facility(vcpu->kvm, 139)) 3302 vcpu->arch.sie_block->ecd |= ECD_MEF; 3303 if (test_kvm_facility(vcpu->kvm, 156)) 3304 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3305 if (vcpu->arch.sie_block->gd) { 3306 vcpu->arch.sie_block->eca |= ECA_AIV; 3307 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3308 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3309 } 3310 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3311 | SDNXC; 3312 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3313 3314 if (sclp.has_kss) 3315 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3316 else 3317 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3318 3319 if (vcpu->kvm->arch.use_cmma) { 3320 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3321 if (rc) 3322 return rc; 3323 } 3324 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3325 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3326 3327 vcpu->arch.sie_block->hpid = HPID_KVM; 3328 3329 kvm_s390_vcpu_crypto_setup(vcpu); 3330 3331 mutex_lock(&vcpu->kvm->lock); 3332 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3333 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3334 if (rc) 3335 kvm_s390_vcpu_unsetup_cmma(vcpu); 3336 } 3337 mutex_unlock(&vcpu->kvm->lock); 3338 3339 return rc; 3340 } 3341 3342 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3343 { 3344 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3345 return -EINVAL; 3346 return 0; 3347 } 3348 3349 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3350 { 3351 struct sie_page *sie_page; 3352 int rc; 3353 3354 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3355 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3356 if (!sie_page) 3357 return -ENOMEM; 3358 3359 vcpu->arch.sie_block = &sie_page->sie_block; 3360 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3361 3362 /* the real guest size will always be smaller than msl */ 3363 vcpu->arch.sie_block->mso = 0; 3364 vcpu->arch.sie_block->msl = sclp.hamax; 3365 3366 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3367 spin_lock_init(&vcpu->arch.local_int.lock); 3368 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); 3369 seqcount_init(&vcpu->arch.cputm_seqcount); 3370 3371 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3372 kvm_clear_async_pf_completion_queue(vcpu); 3373 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3374 KVM_SYNC_GPRS | 3375 KVM_SYNC_ACRS | 3376 KVM_SYNC_CRS | 3377 KVM_SYNC_ARCH0 | 3378 KVM_SYNC_PFAULT | 3379 KVM_SYNC_DIAG318; 3380 kvm_s390_set_prefix(vcpu, 0); 3381 if (test_kvm_facility(vcpu->kvm, 64)) 3382 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3383 if (test_kvm_facility(vcpu->kvm, 82)) 3384 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3385 if (test_kvm_facility(vcpu->kvm, 133)) 3386 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3387 if (test_kvm_facility(vcpu->kvm, 156)) 3388 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3389 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3390 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3391 */ 3392 if (MACHINE_HAS_VX) 3393 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3394 else 3395 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3396 3397 if (kvm_is_ucontrol(vcpu->kvm)) { 3398 rc = __kvm_ucontrol_vcpu_init(vcpu); 3399 if (rc) 3400 goto out_free_sie_block; 3401 } 3402 3403 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3404 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3405 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3406 3407 rc = kvm_s390_vcpu_setup(vcpu); 3408 if (rc) 3409 goto out_ucontrol_uninit; 3410 return 0; 3411 3412 out_ucontrol_uninit: 3413 if (kvm_is_ucontrol(vcpu->kvm)) 3414 gmap_remove(vcpu->arch.gmap); 3415 out_free_sie_block: 3416 free_page((unsigned long)(vcpu->arch.sie_block)); 3417 return rc; 3418 } 3419 3420 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3421 { 3422 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3423 return kvm_s390_vcpu_has_irq(vcpu, 0); 3424 } 3425 3426 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3427 { 3428 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3429 } 3430 3431 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3432 { 3433 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3434 exit_sie(vcpu); 3435 } 3436 3437 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3438 { 3439 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3440 } 3441 3442 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3443 { 3444 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3445 exit_sie(vcpu); 3446 } 3447 3448 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3449 { 3450 return atomic_read(&vcpu->arch.sie_block->prog20) & 3451 (PROG_BLOCK_SIE | PROG_REQUEST); 3452 } 3453 3454 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3455 { 3456 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3457 } 3458 3459 /* 3460 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3461 * If the CPU is not running (e.g. waiting as idle) the function will 3462 * return immediately. */ 3463 void exit_sie(struct kvm_vcpu *vcpu) 3464 { 3465 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3466 kvm_s390_vsie_kick(vcpu); 3467 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3468 cpu_relax(); 3469 } 3470 3471 /* Kick a guest cpu out of SIE to process a request synchronously */ 3472 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3473 { 3474 __kvm_make_request(req, vcpu); 3475 kvm_s390_vcpu_request(vcpu); 3476 } 3477 3478 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3479 unsigned long end) 3480 { 3481 struct kvm *kvm = gmap->private; 3482 struct kvm_vcpu *vcpu; 3483 unsigned long prefix; 3484 unsigned long i; 3485 3486 if (gmap_is_shadow(gmap)) 3487 return; 3488 if (start >= 1UL << 31) 3489 /* We are only interested in prefix pages */ 3490 return; 3491 kvm_for_each_vcpu(i, vcpu, kvm) { 3492 /* match against both prefix pages */ 3493 prefix = kvm_s390_get_prefix(vcpu); 3494 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3495 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3496 start, end); 3497 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3498 } 3499 } 3500 } 3501 3502 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3503 { 3504 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3505 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3506 READ_ONCE(halt_poll_max_steal)) { 3507 vcpu->stat.halt_no_poll_steal++; 3508 return true; 3509 } 3510 return false; 3511 } 3512 3513 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3514 { 3515 /* kvm common code refers to this, but never calls it */ 3516 BUG(); 3517 return 0; 3518 } 3519 3520 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3521 struct kvm_one_reg *reg) 3522 { 3523 int r = -EINVAL; 3524 3525 switch (reg->id) { 3526 case KVM_REG_S390_TODPR: 3527 r = put_user(vcpu->arch.sie_block->todpr, 3528 (u32 __user *)reg->addr); 3529 break; 3530 case KVM_REG_S390_EPOCHDIFF: 3531 r = put_user(vcpu->arch.sie_block->epoch, 3532 (u64 __user *)reg->addr); 3533 break; 3534 case KVM_REG_S390_CPU_TIMER: 3535 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3536 (u64 __user *)reg->addr); 3537 break; 3538 case KVM_REG_S390_CLOCK_COMP: 3539 r = put_user(vcpu->arch.sie_block->ckc, 3540 (u64 __user *)reg->addr); 3541 break; 3542 case KVM_REG_S390_PFTOKEN: 3543 r = put_user(vcpu->arch.pfault_token, 3544 (u64 __user *)reg->addr); 3545 break; 3546 case KVM_REG_S390_PFCOMPARE: 3547 r = put_user(vcpu->arch.pfault_compare, 3548 (u64 __user *)reg->addr); 3549 break; 3550 case KVM_REG_S390_PFSELECT: 3551 r = put_user(vcpu->arch.pfault_select, 3552 (u64 __user *)reg->addr); 3553 break; 3554 case KVM_REG_S390_PP: 3555 r = put_user(vcpu->arch.sie_block->pp, 3556 (u64 __user *)reg->addr); 3557 break; 3558 case KVM_REG_S390_GBEA: 3559 r = put_user(vcpu->arch.sie_block->gbea, 3560 (u64 __user *)reg->addr); 3561 break; 3562 default: 3563 break; 3564 } 3565 3566 return r; 3567 } 3568 3569 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3570 struct kvm_one_reg *reg) 3571 { 3572 int r = -EINVAL; 3573 __u64 val; 3574 3575 switch (reg->id) { 3576 case KVM_REG_S390_TODPR: 3577 r = get_user(vcpu->arch.sie_block->todpr, 3578 (u32 __user *)reg->addr); 3579 break; 3580 case KVM_REG_S390_EPOCHDIFF: 3581 r = get_user(vcpu->arch.sie_block->epoch, 3582 (u64 __user *)reg->addr); 3583 break; 3584 case KVM_REG_S390_CPU_TIMER: 3585 r = get_user(val, (u64 __user *)reg->addr); 3586 if (!r) 3587 kvm_s390_set_cpu_timer(vcpu, val); 3588 break; 3589 case KVM_REG_S390_CLOCK_COMP: 3590 r = get_user(vcpu->arch.sie_block->ckc, 3591 (u64 __user *)reg->addr); 3592 break; 3593 case KVM_REG_S390_PFTOKEN: 3594 r = get_user(vcpu->arch.pfault_token, 3595 (u64 __user *)reg->addr); 3596 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3597 kvm_clear_async_pf_completion_queue(vcpu); 3598 break; 3599 case KVM_REG_S390_PFCOMPARE: 3600 r = get_user(vcpu->arch.pfault_compare, 3601 (u64 __user *)reg->addr); 3602 break; 3603 case KVM_REG_S390_PFSELECT: 3604 r = get_user(vcpu->arch.pfault_select, 3605 (u64 __user *)reg->addr); 3606 break; 3607 case KVM_REG_S390_PP: 3608 r = get_user(vcpu->arch.sie_block->pp, 3609 (u64 __user *)reg->addr); 3610 break; 3611 case KVM_REG_S390_GBEA: 3612 r = get_user(vcpu->arch.sie_block->gbea, 3613 (u64 __user *)reg->addr); 3614 break; 3615 default: 3616 break; 3617 } 3618 3619 return r; 3620 } 3621 3622 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3623 { 3624 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3625 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3626 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3627 3628 kvm_clear_async_pf_completion_queue(vcpu); 3629 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3630 kvm_s390_vcpu_stop(vcpu); 3631 kvm_s390_clear_local_irqs(vcpu); 3632 } 3633 3634 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3635 { 3636 /* Initial reset is a superset of the normal reset */ 3637 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3638 3639 /* 3640 * This equals initial cpu reset in pop, but we don't switch to ESA. 3641 * We do not only reset the internal data, but also ... 3642 */ 3643 vcpu->arch.sie_block->gpsw.mask = 0; 3644 vcpu->arch.sie_block->gpsw.addr = 0; 3645 kvm_s390_set_prefix(vcpu, 0); 3646 kvm_s390_set_cpu_timer(vcpu, 0); 3647 vcpu->arch.sie_block->ckc = 0; 3648 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3649 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3650 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3651 3652 /* ... the data in sync regs */ 3653 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3654 vcpu->run->s.regs.ckc = 0; 3655 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3656 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3657 vcpu->run->psw_addr = 0; 3658 vcpu->run->psw_mask = 0; 3659 vcpu->run->s.regs.todpr = 0; 3660 vcpu->run->s.regs.cputm = 0; 3661 vcpu->run->s.regs.ckc = 0; 3662 vcpu->run->s.regs.pp = 0; 3663 vcpu->run->s.regs.gbea = 1; 3664 vcpu->run->s.regs.fpc = 0; 3665 /* 3666 * Do not reset these registers in the protected case, as some of 3667 * them are overlayed and they are not accessible in this case 3668 * anyway. 3669 */ 3670 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3671 vcpu->arch.sie_block->gbea = 1; 3672 vcpu->arch.sie_block->pp = 0; 3673 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3674 vcpu->arch.sie_block->todpr = 0; 3675 } 3676 } 3677 3678 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3679 { 3680 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3681 3682 /* Clear reset is a superset of the initial reset */ 3683 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3684 3685 memset(®s->gprs, 0, sizeof(regs->gprs)); 3686 memset(®s->vrs, 0, sizeof(regs->vrs)); 3687 memset(®s->acrs, 0, sizeof(regs->acrs)); 3688 memset(®s->gscb, 0, sizeof(regs->gscb)); 3689 3690 regs->etoken = 0; 3691 regs->etoken_extension = 0; 3692 } 3693 3694 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3695 { 3696 vcpu_load(vcpu); 3697 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3698 vcpu_put(vcpu); 3699 return 0; 3700 } 3701 3702 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3703 { 3704 vcpu_load(vcpu); 3705 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3706 vcpu_put(vcpu); 3707 return 0; 3708 } 3709 3710 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3711 struct kvm_sregs *sregs) 3712 { 3713 vcpu_load(vcpu); 3714 3715 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3716 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3717 3718 vcpu_put(vcpu); 3719 return 0; 3720 } 3721 3722 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3723 struct kvm_sregs *sregs) 3724 { 3725 vcpu_load(vcpu); 3726 3727 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3728 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3729 3730 vcpu_put(vcpu); 3731 return 0; 3732 } 3733 3734 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3735 { 3736 int ret = 0; 3737 3738 vcpu_load(vcpu); 3739 3740 if (test_fp_ctl(fpu->fpc)) { 3741 ret = -EINVAL; 3742 goto out; 3743 } 3744 vcpu->run->s.regs.fpc = fpu->fpc; 3745 if (MACHINE_HAS_VX) 3746 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3747 (freg_t *) fpu->fprs); 3748 else 3749 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3750 3751 out: 3752 vcpu_put(vcpu); 3753 return ret; 3754 } 3755 3756 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3757 { 3758 vcpu_load(vcpu); 3759 3760 /* make sure we have the latest values */ 3761 save_fpu_regs(); 3762 if (MACHINE_HAS_VX) 3763 convert_vx_to_fp((freg_t *) fpu->fprs, 3764 (__vector128 *) vcpu->run->s.regs.vrs); 3765 else 3766 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3767 fpu->fpc = vcpu->run->s.regs.fpc; 3768 3769 vcpu_put(vcpu); 3770 return 0; 3771 } 3772 3773 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3774 { 3775 int rc = 0; 3776 3777 if (!is_vcpu_stopped(vcpu)) 3778 rc = -EBUSY; 3779 else { 3780 vcpu->run->psw_mask = psw.mask; 3781 vcpu->run->psw_addr = psw.addr; 3782 } 3783 return rc; 3784 } 3785 3786 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3787 struct kvm_translation *tr) 3788 { 3789 return -EINVAL; /* not implemented yet */ 3790 } 3791 3792 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3793 KVM_GUESTDBG_USE_HW_BP | \ 3794 KVM_GUESTDBG_ENABLE) 3795 3796 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3797 struct kvm_guest_debug *dbg) 3798 { 3799 int rc = 0; 3800 3801 vcpu_load(vcpu); 3802 3803 vcpu->guest_debug = 0; 3804 kvm_s390_clear_bp_data(vcpu); 3805 3806 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3807 rc = -EINVAL; 3808 goto out; 3809 } 3810 if (!sclp.has_gpere) { 3811 rc = -EINVAL; 3812 goto out; 3813 } 3814 3815 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3816 vcpu->guest_debug = dbg->control; 3817 /* enforce guest PER */ 3818 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3819 3820 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3821 rc = kvm_s390_import_bp_data(vcpu, dbg); 3822 } else { 3823 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3824 vcpu->arch.guestdbg.last_bp = 0; 3825 } 3826 3827 if (rc) { 3828 vcpu->guest_debug = 0; 3829 kvm_s390_clear_bp_data(vcpu); 3830 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3831 } 3832 3833 out: 3834 vcpu_put(vcpu); 3835 return rc; 3836 } 3837 3838 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3839 struct kvm_mp_state *mp_state) 3840 { 3841 int ret; 3842 3843 vcpu_load(vcpu); 3844 3845 /* CHECK_STOP and LOAD are not supported yet */ 3846 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3847 KVM_MP_STATE_OPERATING; 3848 3849 vcpu_put(vcpu); 3850 return ret; 3851 } 3852 3853 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3854 struct kvm_mp_state *mp_state) 3855 { 3856 int rc = 0; 3857 3858 vcpu_load(vcpu); 3859 3860 /* user space knows about this interface - let it control the state */ 3861 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 3862 3863 switch (mp_state->mp_state) { 3864 case KVM_MP_STATE_STOPPED: 3865 rc = kvm_s390_vcpu_stop(vcpu); 3866 break; 3867 case KVM_MP_STATE_OPERATING: 3868 rc = kvm_s390_vcpu_start(vcpu); 3869 break; 3870 case KVM_MP_STATE_LOAD: 3871 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3872 rc = -ENXIO; 3873 break; 3874 } 3875 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3876 break; 3877 case KVM_MP_STATE_CHECK_STOP: 3878 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3879 default: 3880 rc = -ENXIO; 3881 } 3882 3883 vcpu_put(vcpu); 3884 return rc; 3885 } 3886 3887 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3888 { 3889 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3890 } 3891 3892 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3893 { 3894 retry: 3895 kvm_s390_vcpu_request_handled(vcpu); 3896 if (!kvm_request_pending(vcpu)) 3897 return 0; 3898 /* 3899 * If the guest prefix changed, re-arm the ipte notifier for the 3900 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3901 * This ensures that the ipte instruction for this request has 3902 * already finished. We might race against a second unmapper that 3903 * wants to set the blocking bit. Lets just retry the request loop. 3904 */ 3905 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { 3906 int rc; 3907 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3908 kvm_s390_get_prefix(vcpu), 3909 PAGE_SIZE * 2, PROT_WRITE); 3910 if (rc) { 3911 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3912 return rc; 3913 } 3914 goto retry; 3915 } 3916 3917 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3918 vcpu->arch.sie_block->ihcpu = 0xffff; 3919 goto retry; 3920 } 3921 3922 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3923 if (!ibs_enabled(vcpu)) { 3924 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3925 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3926 } 3927 goto retry; 3928 } 3929 3930 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3931 if (ibs_enabled(vcpu)) { 3932 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3933 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3934 } 3935 goto retry; 3936 } 3937 3938 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3939 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3940 goto retry; 3941 } 3942 3943 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3944 /* 3945 * Disable CMM virtualization; we will emulate the ESSA 3946 * instruction manually, in order to provide additional 3947 * functionalities needed for live migration. 3948 */ 3949 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3950 goto retry; 3951 } 3952 3953 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3954 /* 3955 * Re-enable CMM virtualization if CMMA is available and 3956 * CMM has been used. 3957 */ 3958 if ((vcpu->kvm->arch.use_cmma) && 3959 (vcpu->kvm->mm->context.uses_cmm)) 3960 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3961 goto retry; 3962 } 3963 3964 /* nothing to do, just clear the request */ 3965 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3966 /* we left the vsie handler, nothing to do, just clear the request */ 3967 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3968 3969 return 0; 3970 } 3971 3972 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 3973 { 3974 struct kvm_vcpu *vcpu; 3975 union tod_clock clk; 3976 unsigned long i; 3977 3978 preempt_disable(); 3979 3980 store_tod_clock_ext(&clk); 3981 3982 kvm->arch.epoch = gtod->tod - clk.tod; 3983 kvm->arch.epdx = 0; 3984 if (test_kvm_facility(kvm, 139)) { 3985 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3986 if (kvm->arch.epoch > gtod->tod) 3987 kvm->arch.epdx -= 1; 3988 } 3989 3990 kvm_s390_vcpu_block_all(kvm); 3991 kvm_for_each_vcpu(i, vcpu, kvm) { 3992 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3993 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3994 } 3995 3996 kvm_s390_vcpu_unblock_all(kvm); 3997 preempt_enable(); 3998 } 3999 4000 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4001 { 4002 mutex_lock(&kvm->lock); 4003 __kvm_s390_set_tod_clock(kvm, gtod); 4004 mutex_unlock(&kvm->lock); 4005 } 4006 4007 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4008 { 4009 if (!mutex_trylock(&kvm->lock)) 4010 return 0; 4011 __kvm_s390_set_tod_clock(kvm, gtod); 4012 mutex_unlock(&kvm->lock); 4013 return 1; 4014 } 4015 4016 /** 4017 * kvm_arch_fault_in_page - fault-in guest page if necessary 4018 * @vcpu: The corresponding virtual cpu 4019 * @gpa: Guest physical address 4020 * @writable: Whether the page should be writable or not 4021 * 4022 * Make sure that a guest page has been faulted-in on the host. 4023 * 4024 * Return: Zero on success, negative error code otherwise. 4025 */ 4026 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 4027 { 4028 return gmap_fault(vcpu->arch.gmap, gpa, 4029 writable ? FAULT_FLAG_WRITE : 0); 4030 } 4031 4032 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4033 unsigned long token) 4034 { 4035 struct kvm_s390_interrupt inti; 4036 struct kvm_s390_irq irq; 4037 4038 if (start_token) { 4039 irq.u.ext.ext_params2 = token; 4040 irq.type = KVM_S390_INT_PFAULT_INIT; 4041 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4042 } else { 4043 inti.type = KVM_S390_INT_PFAULT_DONE; 4044 inti.parm64 = token; 4045 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4046 } 4047 } 4048 4049 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4050 struct kvm_async_pf *work) 4051 { 4052 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4053 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4054 4055 return true; 4056 } 4057 4058 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4059 struct kvm_async_pf *work) 4060 { 4061 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4062 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4063 } 4064 4065 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4066 struct kvm_async_pf *work) 4067 { 4068 /* s390 will always inject the page directly */ 4069 } 4070 4071 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4072 { 4073 /* 4074 * s390 will always inject the page directly, 4075 * but we still want check_async_completion to cleanup 4076 */ 4077 return true; 4078 } 4079 4080 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4081 { 4082 hva_t hva; 4083 struct kvm_arch_async_pf arch; 4084 4085 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4086 return false; 4087 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4088 vcpu->arch.pfault_compare) 4089 return false; 4090 if (psw_extint_disabled(vcpu)) 4091 return false; 4092 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4093 return false; 4094 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4095 return false; 4096 if (!vcpu->arch.gmap->pfault_enabled) 4097 return false; 4098 4099 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4100 hva += current->thread.gmap_addr & ~PAGE_MASK; 4101 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4102 return false; 4103 4104 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4105 } 4106 4107 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4108 { 4109 int rc, cpuflags; 4110 4111 /* 4112 * On s390 notifications for arriving pages will be delivered directly 4113 * to the guest but the house keeping for completed pfaults is 4114 * handled outside the worker. 4115 */ 4116 kvm_check_async_pf_completion(vcpu); 4117 4118 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4119 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4120 4121 if (need_resched()) 4122 schedule(); 4123 4124 if (!kvm_is_ucontrol(vcpu->kvm)) { 4125 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4126 if (rc) 4127 return rc; 4128 } 4129 4130 rc = kvm_s390_handle_requests(vcpu); 4131 if (rc) 4132 return rc; 4133 4134 if (guestdbg_enabled(vcpu)) { 4135 kvm_s390_backup_guest_per_regs(vcpu); 4136 kvm_s390_patch_guest_per_regs(vcpu); 4137 } 4138 4139 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4140 4141 vcpu->arch.sie_block->icptcode = 0; 4142 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4143 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4144 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4145 4146 return 0; 4147 } 4148 4149 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4150 { 4151 struct kvm_s390_pgm_info pgm_info = { 4152 .code = PGM_ADDRESSING, 4153 }; 4154 u8 opcode, ilen; 4155 int rc; 4156 4157 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4158 trace_kvm_s390_sie_fault(vcpu); 4159 4160 /* 4161 * We want to inject an addressing exception, which is defined as a 4162 * suppressing or terminating exception. However, since we came here 4163 * by a DAT access exception, the PSW still points to the faulting 4164 * instruction since DAT exceptions are nullifying. So we've got 4165 * to look up the current opcode to get the length of the instruction 4166 * to be able to forward the PSW. 4167 */ 4168 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4169 ilen = insn_length(opcode); 4170 if (rc < 0) { 4171 return rc; 4172 } else if (rc) { 4173 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4174 * Forward by arbitrary ilc, injection will take care of 4175 * nullification if necessary. 4176 */ 4177 pgm_info = vcpu->arch.pgm; 4178 ilen = 4; 4179 } 4180 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4181 kvm_s390_forward_psw(vcpu, ilen); 4182 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4183 } 4184 4185 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4186 { 4187 struct mcck_volatile_info *mcck_info; 4188 struct sie_page *sie_page; 4189 4190 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4191 vcpu->arch.sie_block->icptcode); 4192 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4193 4194 if (guestdbg_enabled(vcpu)) 4195 kvm_s390_restore_guest_per_regs(vcpu); 4196 4197 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4198 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4199 4200 if (exit_reason == -EINTR) { 4201 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4202 sie_page = container_of(vcpu->arch.sie_block, 4203 struct sie_page, sie_block); 4204 mcck_info = &sie_page->mcck_info; 4205 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4206 return 0; 4207 } 4208 4209 if (vcpu->arch.sie_block->icptcode > 0) { 4210 int rc = kvm_handle_sie_intercept(vcpu); 4211 4212 if (rc != -EOPNOTSUPP) 4213 return rc; 4214 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4215 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4216 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4217 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4218 return -EREMOTE; 4219 } else if (exit_reason != -EFAULT) { 4220 vcpu->stat.exit_null++; 4221 return 0; 4222 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4223 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4224 vcpu->run->s390_ucontrol.trans_exc_code = 4225 current->thread.gmap_addr; 4226 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4227 return -EREMOTE; 4228 } else if (current->thread.gmap_pfault) { 4229 trace_kvm_s390_major_guest_pfault(vcpu); 4230 current->thread.gmap_pfault = 0; 4231 if (kvm_arch_setup_async_pf(vcpu)) 4232 return 0; 4233 vcpu->stat.pfault_sync++; 4234 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4235 } 4236 return vcpu_post_run_fault_in_sie(vcpu); 4237 } 4238 4239 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4240 static int __vcpu_run(struct kvm_vcpu *vcpu) 4241 { 4242 int rc, exit_reason; 4243 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4244 4245 /* 4246 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4247 * ning the guest), so that memslots (and other stuff) are protected 4248 */ 4249 kvm_vcpu_srcu_read_lock(vcpu); 4250 4251 do { 4252 rc = vcpu_pre_run(vcpu); 4253 if (rc) 4254 break; 4255 4256 kvm_vcpu_srcu_read_unlock(vcpu); 4257 /* 4258 * As PF_VCPU will be used in fault handler, between 4259 * guest_enter and guest_exit should be no uaccess. 4260 */ 4261 local_irq_disable(); 4262 guest_enter_irqoff(); 4263 __disable_cpu_timer_accounting(vcpu); 4264 local_irq_enable(); 4265 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4266 memcpy(sie_page->pv_grregs, 4267 vcpu->run->s.regs.gprs, 4268 sizeof(sie_page->pv_grregs)); 4269 } 4270 if (test_cpu_flag(CIF_FPU)) 4271 load_fpu_regs(); 4272 exit_reason = sie64a(vcpu->arch.sie_block, 4273 vcpu->run->s.regs.gprs); 4274 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4275 memcpy(vcpu->run->s.regs.gprs, 4276 sie_page->pv_grregs, 4277 sizeof(sie_page->pv_grregs)); 4278 /* 4279 * We're not allowed to inject interrupts on intercepts 4280 * that leave the guest state in an "in-between" state 4281 * where the next SIE entry will do a continuation. 4282 * Fence interrupts in our "internal" PSW. 4283 */ 4284 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4285 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4286 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4287 } 4288 } 4289 local_irq_disable(); 4290 __enable_cpu_timer_accounting(vcpu); 4291 guest_exit_irqoff(); 4292 local_irq_enable(); 4293 kvm_vcpu_srcu_read_lock(vcpu); 4294 4295 rc = vcpu_post_run(vcpu, exit_reason); 4296 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4297 4298 kvm_vcpu_srcu_read_unlock(vcpu); 4299 return rc; 4300 } 4301 4302 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4303 { 4304 struct kvm_run *kvm_run = vcpu->run; 4305 struct runtime_instr_cb *riccb; 4306 struct gs_cb *gscb; 4307 4308 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4309 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4310 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4311 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4312 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4313 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4314 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4315 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4316 } 4317 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4318 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4319 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4320 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4321 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4322 kvm_clear_async_pf_completion_queue(vcpu); 4323 } 4324 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4325 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4326 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4327 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4328 } 4329 /* 4330 * If userspace sets the riccb (e.g. after migration) to a valid state, 4331 * we should enable RI here instead of doing the lazy enablement. 4332 */ 4333 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4334 test_kvm_facility(vcpu->kvm, 64) && 4335 riccb->v && 4336 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4337 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4338 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4339 } 4340 /* 4341 * If userspace sets the gscb (e.g. after migration) to non-zero, 4342 * we should enable GS here instead of doing the lazy enablement. 4343 */ 4344 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4345 test_kvm_facility(vcpu->kvm, 133) && 4346 gscb->gssm && 4347 !vcpu->arch.gs_enabled) { 4348 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4349 vcpu->arch.sie_block->ecb |= ECB_GS; 4350 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4351 vcpu->arch.gs_enabled = 1; 4352 } 4353 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4354 test_kvm_facility(vcpu->kvm, 82)) { 4355 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4356 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4357 } 4358 if (MACHINE_HAS_GS) { 4359 preempt_disable(); 4360 __ctl_set_bit(2, 4); 4361 if (current->thread.gs_cb) { 4362 vcpu->arch.host_gscb = current->thread.gs_cb; 4363 save_gs_cb(vcpu->arch.host_gscb); 4364 } 4365 if (vcpu->arch.gs_enabled) { 4366 current->thread.gs_cb = (struct gs_cb *) 4367 &vcpu->run->s.regs.gscb; 4368 restore_gs_cb(current->thread.gs_cb); 4369 } 4370 preempt_enable(); 4371 } 4372 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4373 } 4374 4375 static void sync_regs(struct kvm_vcpu *vcpu) 4376 { 4377 struct kvm_run *kvm_run = vcpu->run; 4378 4379 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4380 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4381 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4382 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4383 /* some control register changes require a tlb flush */ 4384 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4385 } 4386 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4387 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4388 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4389 } 4390 save_access_regs(vcpu->arch.host_acrs); 4391 restore_access_regs(vcpu->run->s.regs.acrs); 4392 /* save host (userspace) fprs/vrs */ 4393 save_fpu_regs(); 4394 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4395 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4396 if (MACHINE_HAS_VX) 4397 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4398 else 4399 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4400 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4401 if (test_fp_ctl(current->thread.fpu.fpc)) 4402 /* User space provided an invalid FPC, let's clear it */ 4403 current->thread.fpu.fpc = 0; 4404 4405 /* Sync fmt2 only data */ 4406 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4407 sync_regs_fmt2(vcpu); 4408 } else { 4409 /* 4410 * In several places we have to modify our internal view to 4411 * not do things that are disallowed by the ultravisor. For 4412 * example we must not inject interrupts after specific exits 4413 * (e.g. 112 prefix page not secure). We do this by turning 4414 * off the machine check, external and I/O interrupt bits 4415 * of our PSW copy. To avoid getting validity intercepts, we 4416 * do only accept the condition code from userspace. 4417 */ 4418 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4419 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4420 PSW_MASK_CC; 4421 } 4422 4423 kvm_run->kvm_dirty_regs = 0; 4424 } 4425 4426 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4427 { 4428 struct kvm_run *kvm_run = vcpu->run; 4429 4430 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4431 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4432 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4433 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4434 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4435 if (MACHINE_HAS_GS) { 4436 preempt_disable(); 4437 __ctl_set_bit(2, 4); 4438 if (vcpu->arch.gs_enabled) 4439 save_gs_cb(current->thread.gs_cb); 4440 current->thread.gs_cb = vcpu->arch.host_gscb; 4441 restore_gs_cb(vcpu->arch.host_gscb); 4442 if (!vcpu->arch.host_gscb) 4443 __ctl_clear_bit(2, 4); 4444 vcpu->arch.host_gscb = NULL; 4445 preempt_enable(); 4446 } 4447 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4448 } 4449 4450 static void store_regs(struct kvm_vcpu *vcpu) 4451 { 4452 struct kvm_run *kvm_run = vcpu->run; 4453 4454 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4455 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4456 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4457 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4458 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4459 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4460 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4461 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4462 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4463 save_access_regs(vcpu->run->s.regs.acrs); 4464 restore_access_regs(vcpu->arch.host_acrs); 4465 /* Save guest register state */ 4466 save_fpu_regs(); 4467 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4468 /* Restore will be done lazily at return */ 4469 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4470 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4471 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4472 store_regs_fmt2(vcpu); 4473 } 4474 4475 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4476 { 4477 struct kvm_run *kvm_run = vcpu->run; 4478 int rc; 4479 4480 if (kvm_run->immediate_exit) 4481 return -EINTR; 4482 4483 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4484 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4485 return -EINVAL; 4486 4487 vcpu_load(vcpu); 4488 4489 if (guestdbg_exit_pending(vcpu)) { 4490 kvm_s390_prepare_debug_exit(vcpu); 4491 rc = 0; 4492 goto out; 4493 } 4494 4495 kvm_sigset_activate(vcpu); 4496 4497 /* 4498 * no need to check the return value of vcpu_start as it can only have 4499 * an error for protvirt, but protvirt means user cpu state 4500 */ 4501 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4502 kvm_s390_vcpu_start(vcpu); 4503 } else if (is_vcpu_stopped(vcpu)) { 4504 pr_err_ratelimited("can't run stopped vcpu %d\n", 4505 vcpu->vcpu_id); 4506 rc = -EINVAL; 4507 goto out; 4508 } 4509 4510 sync_regs(vcpu); 4511 enable_cpu_timer_accounting(vcpu); 4512 4513 might_fault(); 4514 rc = __vcpu_run(vcpu); 4515 4516 if (signal_pending(current) && !rc) { 4517 kvm_run->exit_reason = KVM_EXIT_INTR; 4518 rc = -EINTR; 4519 } 4520 4521 if (guestdbg_exit_pending(vcpu) && !rc) { 4522 kvm_s390_prepare_debug_exit(vcpu); 4523 rc = 0; 4524 } 4525 4526 if (rc == -EREMOTE) { 4527 /* userspace support is needed, kvm_run has been prepared */ 4528 rc = 0; 4529 } 4530 4531 disable_cpu_timer_accounting(vcpu); 4532 store_regs(vcpu); 4533 4534 kvm_sigset_deactivate(vcpu); 4535 4536 vcpu->stat.exit_userspace++; 4537 out: 4538 vcpu_put(vcpu); 4539 return rc; 4540 } 4541 4542 /* 4543 * store status at address 4544 * we use have two special cases: 4545 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4546 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4547 */ 4548 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4549 { 4550 unsigned char archmode = 1; 4551 freg_t fprs[NUM_FPRS]; 4552 unsigned int px; 4553 u64 clkcomp, cputm; 4554 int rc; 4555 4556 px = kvm_s390_get_prefix(vcpu); 4557 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4558 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4559 return -EFAULT; 4560 gpa = 0; 4561 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4562 if (write_guest_real(vcpu, 163, &archmode, 1)) 4563 return -EFAULT; 4564 gpa = px; 4565 } else 4566 gpa -= __LC_FPREGS_SAVE_AREA; 4567 4568 /* manually convert vector registers if necessary */ 4569 if (MACHINE_HAS_VX) { 4570 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4571 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4572 fprs, 128); 4573 } else { 4574 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4575 vcpu->run->s.regs.fprs, 128); 4576 } 4577 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4578 vcpu->run->s.regs.gprs, 128); 4579 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4580 &vcpu->arch.sie_block->gpsw, 16); 4581 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4582 &px, 4); 4583 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4584 &vcpu->run->s.regs.fpc, 4); 4585 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4586 &vcpu->arch.sie_block->todpr, 4); 4587 cputm = kvm_s390_get_cpu_timer(vcpu); 4588 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4589 &cputm, 8); 4590 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4591 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4592 &clkcomp, 8); 4593 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4594 &vcpu->run->s.regs.acrs, 64); 4595 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4596 &vcpu->arch.sie_block->gcr, 128); 4597 return rc ? -EFAULT : 0; 4598 } 4599 4600 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4601 { 4602 /* 4603 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4604 * switch in the run ioctl. Let's update our copies before we save 4605 * it into the save area 4606 */ 4607 save_fpu_regs(); 4608 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4609 save_access_regs(vcpu->run->s.regs.acrs); 4610 4611 return kvm_s390_store_status_unloaded(vcpu, addr); 4612 } 4613 4614 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4615 { 4616 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4617 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4618 } 4619 4620 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4621 { 4622 unsigned long i; 4623 struct kvm_vcpu *vcpu; 4624 4625 kvm_for_each_vcpu(i, vcpu, kvm) { 4626 __disable_ibs_on_vcpu(vcpu); 4627 } 4628 } 4629 4630 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4631 { 4632 if (!sclp.has_ibs) 4633 return; 4634 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4635 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4636 } 4637 4638 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4639 { 4640 int i, online_vcpus, r = 0, started_vcpus = 0; 4641 4642 if (!is_vcpu_stopped(vcpu)) 4643 return 0; 4644 4645 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4646 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4647 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4648 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4649 4650 /* Let's tell the UV that we want to change into the operating state */ 4651 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4652 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4653 if (r) { 4654 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4655 return r; 4656 } 4657 } 4658 4659 for (i = 0; i < online_vcpus; i++) { 4660 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 4661 started_vcpus++; 4662 } 4663 4664 if (started_vcpus == 0) { 4665 /* we're the only active VCPU -> speed it up */ 4666 __enable_ibs_on_vcpu(vcpu); 4667 } else if (started_vcpus == 1) { 4668 /* 4669 * As we are starting a second VCPU, we have to disable 4670 * the IBS facility on all VCPUs to remove potentially 4671 * outstanding ENABLE requests. 4672 */ 4673 __disable_ibs_on_all_vcpus(vcpu->kvm); 4674 } 4675 4676 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4677 /* 4678 * The real PSW might have changed due to a RESTART interpreted by the 4679 * ultravisor. We block all interrupts and let the next sie exit 4680 * refresh our view. 4681 */ 4682 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4683 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4684 /* 4685 * Another VCPU might have used IBS while we were offline. 4686 * Let's play safe and flush the VCPU at startup. 4687 */ 4688 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4689 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4690 return 0; 4691 } 4692 4693 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4694 { 4695 int i, online_vcpus, r = 0, started_vcpus = 0; 4696 struct kvm_vcpu *started_vcpu = NULL; 4697 4698 if (is_vcpu_stopped(vcpu)) 4699 return 0; 4700 4701 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4702 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4703 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4704 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4705 4706 /* Let's tell the UV that we want to change into the stopped state */ 4707 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4708 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4709 if (r) { 4710 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4711 return r; 4712 } 4713 } 4714 4715 /* 4716 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 4717 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 4718 * have been fully processed. This will ensure that the VCPU 4719 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 4720 */ 4721 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4722 kvm_s390_clear_stop_irq(vcpu); 4723 4724 __disable_ibs_on_vcpu(vcpu); 4725 4726 for (i = 0; i < online_vcpus; i++) { 4727 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 4728 4729 if (!is_vcpu_stopped(tmp)) { 4730 started_vcpus++; 4731 started_vcpu = tmp; 4732 } 4733 } 4734 4735 if (started_vcpus == 1) { 4736 /* 4737 * As we only have one VCPU left, we want to enable the 4738 * IBS facility for that VCPU to speed it up. 4739 */ 4740 __enable_ibs_on_vcpu(started_vcpu); 4741 } 4742 4743 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4744 return 0; 4745 } 4746 4747 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4748 struct kvm_enable_cap *cap) 4749 { 4750 int r; 4751 4752 if (cap->flags) 4753 return -EINVAL; 4754 4755 switch (cap->cap) { 4756 case KVM_CAP_S390_CSS_SUPPORT: 4757 if (!vcpu->kvm->arch.css_support) { 4758 vcpu->kvm->arch.css_support = 1; 4759 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4760 trace_kvm_s390_enable_css(vcpu->kvm); 4761 } 4762 r = 0; 4763 break; 4764 default: 4765 r = -EINVAL; 4766 break; 4767 } 4768 return r; 4769 } 4770 4771 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, 4772 struct kvm_s390_mem_op *mop) 4773 { 4774 void __user *uaddr = (void __user *)mop->buf; 4775 int r = 0; 4776 4777 if (mop->flags || !mop->size) 4778 return -EINVAL; 4779 if (mop->size + mop->sida_offset < mop->size) 4780 return -EINVAL; 4781 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4782 return -E2BIG; 4783 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 4784 return -EINVAL; 4785 4786 switch (mop->op) { 4787 case KVM_S390_MEMOP_SIDA_READ: 4788 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4789 mop->sida_offset), mop->size)) 4790 r = -EFAULT; 4791 4792 break; 4793 case KVM_S390_MEMOP_SIDA_WRITE: 4794 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4795 mop->sida_offset), uaddr, mop->size)) 4796 r = -EFAULT; 4797 break; 4798 } 4799 return r; 4800 } 4801 4802 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, 4803 struct kvm_s390_mem_op *mop) 4804 { 4805 void __user *uaddr = (void __user *)mop->buf; 4806 void *tmpbuf = NULL; 4807 int r = 0; 4808 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4809 | KVM_S390_MEMOP_F_CHECK_ONLY 4810 | KVM_S390_MEMOP_F_SKEY_PROTECTION; 4811 4812 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4813 return -EINVAL; 4814 if (mop->size > MEM_OP_MAX_SIZE) 4815 return -E2BIG; 4816 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4817 return -EINVAL; 4818 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 4819 if (access_key_invalid(mop->key)) 4820 return -EINVAL; 4821 } else { 4822 mop->key = 0; 4823 } 4824 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4825 tmpbuf = vmalloc(mop->size); 4826 if (!tmpbuf) 4827 return -ENOMEM; 4828 } 4829 4830 switch (mop->op) { 4831 case KVM_S390_MEMOP_LOGICAL_READ: 4832 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4833 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4834 GACC_FETCH, mop->key); 4835 break; 4836 } 4837 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4838 mop->size, mop->key); 4839 if (r == 0) { 4840 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4841 r = -EFAULT; 4842 } 4843 break; 4844 case KVM_S390_MEMOP_LOGICAL_WRITE: 4845 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4846 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4847 GACC_STORE, mop->key); 4848 break; 4849 } 4850 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4851 r = -EFAULT; 4852 break; 4853 } 4854 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4855 mop->size, mop->key); 4856 break; 4857 } 4858 4859 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4860 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4861 4862 vfree(tmpbuf); 4863 return r; 4864 } 4865 4866 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, 4867 struct kvm_s390_mem_op *mop) 4868 { 4869 int r, srcu_idx; 4870 4871 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4872 4873 switch (mop->op) { 4874 case KVM_S390_MEMOP_LOGICAL_READ: 4875 case KVM_S390_MEMOP_LOGICAL_WRITE: 4876 r = kvm_s390_vcpu_mem_op(vcpu, mop); 4877 break; 4878 case KVM_S390_MEMOP_SIDA_READ: 4879 case KVM_S390_MEMOP_SIDA_WRITE: 4880 /* we are locked against sida going away by the vcpu->mutex */ 4881 r = kvm_s390_vcpu_sida_op(vcpu, mop); 4882 break; 4883 default: 4884 r = -EINVAL; 4885 } 4886 4887 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4888 return r; 4889 } 4890 4891 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4892 unsigned int ioctl, unsigned long arg) 4893 { 4894 struct kvm_vcpu *vcpu = filp->private_data; 4895 void __user *argp = (void __user *)arg; 4896 4897 switch (ioctl) { 4898 case KVM_S390_IRQ: { 4899 struct kvm_s390_irq s390irq; 4900 4901 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4902 return -EFAULT; 4903 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4904 } 4905 case KVM_S390_INTERRUPT: { 4906 struct kvm_s390_interrupt s390int; 4907 struct kvm_s390_irq s390irq = {}; 4908 4909 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4910 return -EFAULT; 4911 if (s390int_to_s390irq(&s390int, &s390irq)) 4912 return -EINVAL; 4913 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4914 } 4915 } 4916 return -ENOIOCTLCMD; 4917 } 4918 4919 long kvm_arch_vcpu_ioctl(struct file *filp, 4920 unsigned int ioctl, unsigned long arg) 4921 { 4922 struct kvm_vcpu *vcpu = filp->private_data; 4923 void __user *argp = (void __user *)arg; 4924 int idx; 4925 long r; 4926 u16 rc, rrc; 4927 4928 vcpu_load(vcpu); 4929 4930 switch (ioctl) { 4931 case KVM_S390_STORE_STATUS: 4932 idx = srcu_read_lock(&vcpu->kvm->srcu); 4933 r = kvm_s390_store_status_unloaded(vcpu, arg); 4934 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4935 break; 4936 case KVM_S390_SET_INITIAL_PSW: { 4937 psw_t psw; 4938 4939 r = -EFAULT; 4940 if (copy_from_user(&psw, argp, sizeof(psw))) 4941 break; 4942 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4943 break; 4944 } 4945 case KVM_S390_CLEAR_RESET: 4946 r = 0; 4947 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4948 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4949 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4950 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4951 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4952 rc, rrc); 4953 } 4954 break; 4955 case KVM_S390_INITIAL_RESET: 4956 r = 0; 4957 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4958 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4959 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4960 UVC_CMD_CPU_RESET_INITIAL, 4961 &rc, &rrc); 4962 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4963 rc, rrc); 4964 } 4965 break; 4966 case KVM_S390_NORMAL_RESET: 4967 r = 0; 4968 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4969 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4970 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4971 UVC_CMD_CPU_RESET, &rc, &rrc); 4972 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4973 rc, rrc); 4974 } 4975 break; 4976 case KVM_SET_ONE_REG: 4977 case KVM_GET_ONE_REG: { 4978 struct kvm_one_reg reg; 4979 r = -EINVAL; 4980 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4981 break; 4982 r = -EFAULT; 4983 if (copy_from_user(®, argp, sizeof(reg))) 4984 break; 4985 if (ioctl == KVM_SET_ONE_REG) 4986 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4987 else 4988 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4989 break; 4990 } 4991 #ifdef CONFIG_KVM_S390_UCONTROL 4992 case KVM_S390_UCAS_MAP: { 4993 struct kvm_s390_ucas_mapping ucasmap; 4994 4995 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4996 r = -EFAULT; 4997 break; 4998 } 4999 5000 if (!kvm_is_ucontrol(vcpu->kvm)) { 5001 r = -EINVAL; 5002 break; 5003 } 5004 5005 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 5006 ucasmap.vcpu_addr, ucasmap.length); 5007 break; 5008 } 5009 case KVM_S390_UCAS_UNMAP: { 5010 struct kvm_s390_ucas_mapping ucasmap; 5011 5012 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5013 r = -EFAULT; 5014 break; 5015 } 5016 5017 if (!kvm_is_ucontrol(vcpu->kvm)) { 5018 r = -EINVAL; 5019 break; 5020 } 5021 5022 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 5023 ucasmap.length); 5024 break; 5025 } 5026 #endif 5027 case KVM_S390_VCPU_FAULT: { 5028 r = gmap_fault(vcpu->arch.gmap, arg, 0); 5029 break; 5030 } 5031 case KVM_ENABLE_CAP: 5032 { 5033 struct kvm_enable_cap cap; 5034 r = -EFAULT; 5035 if (copy_from_user(&cap, argp, sizeof(cap))) 5036 break; 5037 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5038 break; 5039 } 5040 case KVM_S390_MEM_OP: { 5041 struct kvm_s390_mem_op mem_op; 5042 5043 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5044 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); 5045 else 5046 r = -EFAULT; 5047 break; 5048 } 5049 case KVM_S390_SET_IRQ_STATE: { 5050 struct kvm_s390_irq_state irq_state; 5051 5052 r = -EFAULT; 5053 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5054 break; 5055 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5056 irq_state.len == 0 || 5057 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5058 r = -EINVAL; 5059 break; 5060 } 5061 /* do not use irq_state.flags, it will break old QEMUs */ 5062 r = kvm_s390_set_irq_state(vcpu, 5063 (void __user *) irq_state.buf, 5064 irq_state.len); 5065 break; 5066 } 5067 case KVM_S390_GET_IRQ_STATE: { 5068 struct kvm_s390_irq_state irq_state; 5069 5070 r = -EFAULT; 5071 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5072 break; 5073 if (irq_state.len == 0) { 5074 r = -EINVAL; 5075 break; 5076 } 5077 /* do not use irq_state.flags, it will break old QEMUs */ 5078 r = kvm_s390_get_irq_state(vcpu, 5079 (__u8 __user *) irq_state.buf, 5080 irq_state.len); 5081 break; 5082 } 5083 default: 5084 r = -ENOTTY; 5085 } 5086 5087 vcpu_put(vcpu); 5088 return r; 5089 } 5090 5091 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5092 { 5093 #ifdef CONFIG_KVM_S390_UCONTROL 5094 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5095 && (kvm_is_ucontrol(vcpu->kvm))) { 5096 vmf->page = virt_to_page(vcpu->arch.sie_block); 5097 get_page(vmf->page); 5098 return 0; 5099 } 5100 #endif 5101 return VM_FAULT_SIGBUS; 5102 } 5103 5104 /* Section: memory related */ 5105 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5106 const struct kvm_memory_slot *old, 5107 struct kvm_memory_slot *new, 5108 enum kvm_mr_change change) 5109 { 5110 gpa_t size; 5111 5112 /* When we are protected, we should not change the memory slots */ 5113 if (kvm_s390_pv_get_handle(kvm)) 5114 return -EINVAL; 5115 5116 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 5117 return 0; 5118 5119 /* A few sanity checks. We can have memory slots which have to be 5120 located/ended at a segment boundary (1MB). The memory in userland is 5121 ok to be fragmented into various different vmas. It is okay to mmap() 5122 and munmap() stuff in this slot after doing this call at any time */ 5123 5124 if (new->userspace_addr & 0xffffful) 5125 return -EINVAL; 5126 5127 size = new->npages * PAGE_SIZE; 5128 if (size & 0xffffful) 5129 return -EINVAL; 5130 5131 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5132 return -EINVAL; 5133 5134 return 0; 5135 } 5136 5137 void kvm_arch_commit_memory_region(struct kvm *kvm, 5138 struct kvm_memory_slot *old, 5139 const struct kvm_memory_slot *new, 5140 enum kvm_mr_change change) 5141 { 5142 int rc = 0; 5143 5144 switch (change) { 5145 case KVM_MR_DELETE: 5146 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5147 old->npages * PAGE_SIZE); 5148 break; 5149 case KVM_MR_MOVE: 5150 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5151 old->npages * PAGE_SIZE); 5152 if (rc) 5153 break; 5154 fallthrough; 5155 case KVM_MR_CREATE: 5156 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5157 new->base_gfn * PAGE_SIZE, 5158 new->npages * PAGE_SIZE); 5159 break; 5160 case KVM_MR_FLAGS_ONLY: 5161 break; 5162 default: 5163 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5164 } 5165 if (rc) 5166 pr_warn("failed to commit memory region\n"); 5167 return; 5168 } 5169 5170 static inline unsigned long nonhyp_mask(int i) 5171 { 5172 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5173 5174 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5175 } 5176 5177 static int __init kvm_s390_init(void) 5178 { 5179 int i; 5180 5181 if (!sclp.has_sief2) { 5182 pr_info("SIE is not available\n"); 5183 return -ENODEV; 5184 } 5185 5186 if (nested && hpage) { 5187 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5188 return -EINVAL; 5189 } 5190 5191 for (i = 0; i < 16; i++) 5192 kvm_s390_fac_base[i] |= 5193 stfle_fac_list[i] & nonhyp_mask(i); 5194 5195 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5196 } 5197 5198 static void __exit kvm_s390_exit(void) 5199 { 5200 kvm_exit(); 5201 } 5202 5203 module_init(kvm_s390_init); 5204 module_exit(kvm_s390_exit); 5205 5206 /* 5207 * Enable autoloading of the kvm module. 5208 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5209 * since x86 takes a different approach. 5210 */ 5211 #include <linux/miscdevice.h> 5212 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5213 MODULE_ALIAS("devname:kvm"); 5214