1 /* 2 * hosting zSeries kernel virtual machines 3 * 4 * Copyright IBM Corp. 2008, 2009 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2 only) 8 * as published by the Free Software Foundation. 9 * 10 * Author(s): Carsten Otte <cotte@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 14 * Jason J. Herne <jjherne@us.ibm.com> 15 */ 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/module.h> 25 #include <linux/slab.h> 26 #include <linux/timer.h> 27 #include <asm/asm-offsets.h> 28 #include <asm/lowcore.h> 29 #include <asm/pgtable.h> 30 #include <asm/nmi.h> 31 #include <asm/switch_to.h> 32 #include <asm/facility.h> 33 #include <asm/sclp.h> 34 #include "kvm-s390.h" 35 #include "gaccess.h" 36 37 #define CREATE_TRACE_POINTS 38 #include "trace.h" 39 #include "trace-s390.h" 40 41 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 42 43 struct kvm_stats_debugfs_item debugfs_entries[] = { 44 { "userspace_handled", VCPU_STAT(exit_userspace) }, 45 { "exit_null", VCPU_STAT(exit_null) }, 46 { "exit_validity", VCPU_STAT(exit_validity) }, 47 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 48 { "exit_external_request", VCPU_STAT(exit_external_request) }, 49 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 50 { "exit_instruction", VCPU_STAT(exit_instruction) }, 51 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 52 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 53 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 54 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 55 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 56 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 57 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 58 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 59 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 60 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 61 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 62 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 63 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 64 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 65 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 66 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 67 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 68 { "instruction_spx", VCPU_STAT(instruction_spx) }, 69 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 70 { "instruction_stap", VCPU_STAT(instruction_stap) }, 71 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 72 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 73 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 74 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 75 { "instruction_essa", VCPU_STAT(instruction_essa) }, 76 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 77 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 78 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 79 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 80 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 81 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 82 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 83 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 84 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 85 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 86 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 87 { "diagnose_10", VCPU_STAT(diagnose_10) }, 88 { "diagnose_44", VCPU_STAT(diagnose_44) }, 89 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 90 { NULL } 91 }; 92 93 unsigned long *vfacilities; 94 static struct gmap_notifier gmap_notifier; 95 96 /* test availability of vfacility */ 97 int test_vfacility(unsigned long nr) 98 { 99 return __test_facility(nr, (void *) vfacilities); 100 } 101 102 /* Section: not file related */ 103 int kvm_arch_hardware_enable(void *garbage) 104 { 105 /* every s390 is virtualization enabled ;-) */ 106 return 0; 107 } 108 109 void kvm_arch_hardware_disable(void *garbage) 110 { 111 } 112 113 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); 114 115 int kvm_arch_hardware_setup(void) 116 { 117 gmap_notifier.notifier_call = kvm_gmap_notifier; 118 gmap_register_ipte_notifier(&gmap_notifier); 119 return 0; 120 } 121 122 void kvm_arch_hardware_unsetup(void) 123 { 124 gmap_unregister_ipte_notifier(&gmap_notifier); 125 } 126 127 void kvm_arch_check_processor_compat(void *rtn) 128 { 129 } 130 131 int kvm_arch_init(void *opaque) 132 { 133 return 0; 134 } 135 136 void kvm_arch_exit(void) 137 { 138 } 139 140 /* Section: device related */ 141 long kvm_arch_dev_ioctl(struct file *filp, 142 unsigned int ioctl, unsigned long arg) 143 { 144 if (ioctl == KVM_S390_ENABLE_SIE) 145 return s390_enable_sie(); 146 return -EINVAL; 147 } 148 149 int kvm_dev_ioctl_check_extension(long ext) 150 { 151 int r; 152 153 switch (ext) { 154 case KVM_CAP_S390_PSW: 155 case KVM_CAP_S390_GMAP: 156 case KVM_CAP_SYNC_MMU: 157 #ifdef CONFIG_KVM_S390_UCONTROL 158 case KVM_CAP_S390_UCONTROL: 159 #endif 160 case KVM_CAP_ASYNC_PF: 161 case KVM_CAP_SYNC_REGS: 162 case KVM_CAP_ONE_REG: 163 case KVM_CAP_ENABLE_CAP: 164 case KVM_CAP_S390_CSS_SUPPORT: 165 case KVM_CAP_IRQFD: 166 case KVM_CAP_IOEVENTFD: 167 case KVM_CAP_DEVICE_CTRL: 168 case KVM_CAP_ENABLE_CAP_VM: 169 case KVM_CAP_VM_ATTRIBUTES: 170 case KVM_CAP_MP_STATE: 171 r = 1; 172 break; 173 case KVM_CAP_NR_VCPUS: 174 case KVM_CAP_MAX_VCPUS: 175 r = KVM_MAX_VCPUS; 176 break; 177 case KVM_CAP_NR_MEMSLOTS: 178 r = KVM_USER_MEM_SLOTS; 179 break; 180 case KVM_CAP_S390_COW: 181 r = MACHINE_HAS_ESOP; 182 break; 183 default: 184 r = 0; 185 } 186 return r; 187 } 188 189 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 190 struct kvm_memory_slot *memslot) 191 { 192 gfn_t cur_gfn, last_gfn; 193 unsigned long address; 194 struct gmap *gmap = kvm->arch.gmap; 195 196 down_read(&gmap->mm->mmap_sem); 197 /* Loop over all guest pages */ 198 last_gfn = memslot->base_gfn + memslot->npages; 199 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 200 address = gfn_to_hva_memslot(memslot, cur_gfn); 201 202 if (gmap_test_and_clear_dirty(address, gmap)) 203 mark_page_dirty(kvm, cur_gfn); 204 } 205 up_read(&gmap->mm->mmap_sem); 206 } 207 208 /* Section: vm related */ 209 /* 210 * Get (and clear) the dirty memory log for a memory slot. 211 */ 212 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 213 struct kvm_dirty_log *log) 214 { 215 int r; 216 unsigned long n; 217 struct kvm_memory_slot *memslot; 218 int is_dirty = 0; 219 220 mutex_lock(&kvm->slots_lock); 221 222 r = -EINVAL; 223 if (log->slot >= KVM_USER_MEM_SLOTS) 224 goto out; 225 226 memslot = id_to_memslot(kvm->memslots, log->slot); 227 r = -ENOENT; 228 if (!memslot->dirty_bitmap) 229 goto out; 230 231 kvm_s390_sync_dirty_log(kvm, memslot); 232 r = kvm_get_dirty_log(kvm, log, &is_dirty); 233 if (r) 234 goto out; 235 236 /* Clear the dirty log */ 237 if (is_dirty) { 238 n = kvm_dirty_bitmap_bytes(memslot); 239 memset(memslot->dirty_bitmap, 0, n); 240 } 241 r = 0; 242 out: 243 mutex_unlock(&kvm->slots_lock); 244 return r; 245 } 246 247 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 248 { 249 int r; 250 251 if (cap->flags) 252 return -EINVAL; 253 254 switch (cap->cap) { 255 case KVM_CAP_S390_IRQCHIP: 256 kvm->arch.use_irqchip = 1; 257 r = 0; 258 break; 259 default: 260 r = -EINVAL; 261 break; 262 } 263 return r; 264 } 265 266 static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 267 { 268 int ret; 269 unsigned int idx; 270 switch (attr->attr) { 271 case KVM_S390_VM_MEM_ENABLE_CMMA: 272 ret = -EBUSY; 273 mutex_lock(&kvm->lock); 274 if (atomic_read(&kvm->online_vcpus) == 0) { 275 kvm->arch.use_cmma = 1; 276 ret = 0; 277 } 278 mutex_unlock(&kvm->lock); 279 break; 280 case KVM_S390_VM_MEM_CLR_CMMA: 281 mutex_lock(&kvm->lock); 282 idx = srcu_read_lock(&kvm->srcu); 283 page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false); 284 srcu_read_unlock(&kvm->srcu, idx); 285 mutex_unlock(&kvm->lock); 286 ret = 0; 287 break; 288 default: 289 ret = -ENXIO; 290 break; 291 } 292 return ret; 293 } 294 295 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 296 { 297 int ret; 298 299 switch (attr->group) { 300 case KVM_S390_VM_MEM_CTRL: 301 ret = kvm_s390_mem_control(kvm, attr); 302 break; 303 default: 304 ret = -ENXIO; 305 break; 306 } 307 308 return ret; 309 } 310 311 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 312 { 313 return -ENXIO; 314 } 315 316 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 317 { 318 int ret; 319 320 switch (attr->group) { 321 case KVM_S390_VM_MEM_CTRL: 322 switch (attr->attr) { 323 case KVM_S390_VM_MEM_ENABLE_CMMA: 324 case KVM_S390_VM_MEM_CLR_CMMA: 325 ret = 0; 326 break; 327 default: 328 ret = -ENXIO; 329 break; 330 } 331 break; 332 default: 333 ret = -ENXIO; 334 break; 335 } 336 337 return ret; 338 } 339 340 long kvm_arch_vm_ioctl(struct file *filp, 341 unsigned int ioctl, unsigned long arg) 342 { 343 struct kvm *kvm = filp->private_data; 344 void __user *argp = (void __user *)arg; 345 struct kvm_device_attr attr; 346 int r; 347 348 switch (ioctl) { 349 case KVM_S390_INTERRUPT: { 350 struct kvm_s390_interrupt s390int; 351 352 r = -EFAULT; 353 if (copy_from_user(&s390int, argp, sizeof(s390int))) 354 break; 355 r = kvm_s390_inject_vm(kvm, &s390int); 356 break; 357 } 358 case KVM_ENABLE_CAP: { 359 struct kvm_enable_cap cap; 360 r = -EFAULT; 361 if (copy_from_user(&cap, argp, sizeof(cap))) 362 break; 363 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 364 break; 365 } 366 case KVM_CREATE_IRQCHIP: { 367 struct kvm_irq_routing_entry routing; 368 369 r = -EINVAL; 370 if (kvm->arch.use_irqchip) { 371 /* Set up dummy routing. */ 372 memset(&routing, 0, sizeof(routing)); 373 kvm_set_irq_routing(kvm, &routing, 0, 0); 374 r = 0; 375 } 376 break; 377 } 378 case KVM_SET_DEVICE_ATTR: { 379 r = -EFAULT; 380 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 381 break; 382 r = kvm_s390_vm_set_attr(kvm, &attr); 383 break; 384 } 385 case KVM_GET_DEVICE_ATTR: { 386 r = -EFAULT; 387 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 388 break; 389 r = kvm_s390_vm_get_attr(kvm, &attr); 390 break; 391 } 392 case KVM_HAS_DEVICE_ATTR: { 393 r = -EFAULT; 394 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 395 break; 396 r = kvm_s390_vm_has_attr(kvm, &attr); 397 break; 398 } 399 default: 400 r = -ENOTTY; 401 } 402 403 return r; 404 } 405 406 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 407 { 408 int rc; 409 char debug_name[16]; 410 static unsigned long sca_offset; 411 412 rc = -EINVAL; 413 #ifdef CONFIG_KVM_S390_UCONTROL 414 if (type & ~KVM_VM_S390_UCONTROL) 415 goto out_err; 416 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 417 goto out_err; 418 #else 419 if (type) 420 goto out_err; 421 #endif 422 423 rc = s390_enable_sie(); 424 if (rc) 425 goto out_err; 426 427 rc = -ENOMEM; 428 429 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); 430 if (!kvm->arch.sca) 431 goto out_err; 432 spin_lock(&kvm_lock); 433 sca_offset = (sca_offset + 16) & 0x7f0; 434 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); 435 spin_unlock(&kvm_lock); 436 437 sprintf(debug_name, "kvm-%u", current->pid); 438 439 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); 440 if (!kvm->arch.dbf) 441 goto out_nodbf; 442 443 spin_lock_init(&kvm->arch.float_int.lock); 444 INIT_LIST_HEAD(&kvm->arch.float_int.list); 445 init_waitqueue_head(&kvm->arch.ipte_wq); 446 447 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 448 VM_EVENT(kvm, 3, "%s", "vm created"); 449 450 if (type & KVM_VM_S390_UCONTROL) { 451 kvm->arch.gmap = NULL; 452 } else { 453 kvm->arch.gmap = gmap_alloc(current->mm); 454 if (!kvm->arch.gmap) 455 goto out_nogmap; 456 kvm->arch.gmap->private = kvm; 457 kvm->arch.gmap->pfault_enabled = 0; 458 } 459 460 kvm->arch.css_support = 0; 461 kvm->arch.use_irqchip = 0; 462 463 spin_lock_init(&kvm->arch.start_stop_lock); 464 465 return 0; 466 out_nogmap: 467 debug_unregister(kvm->arch.dbf); 468 out_nodbf: 469 free_page((unsigned long)(kvm->arch.sca)); 470 out_err: 471 return rc; 472 } 473 474 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 475 { 476 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 477 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 478 kvm_s390_clear_local_irqs(vcpu); 479 kvm_clear_async_pf_completion_queue(vcpu); 480 if (!kvm_is_ucontrol(vcpu->kvm)) { 481 clear_bit(63 - vcpu->vcpu_id, 482 (unsigned long *) &vcpu->kvm->arch.sca->mcn); 483 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 484 (__u64) vcpu->arch.sie_block) 485 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; 486 } 487 smp_mb(); 488 489 if (kvm_is_ucontrol(vcpu->kvm)) 490 gmap_free(vcpu->arch.gmap); 491 492 if (kvm_s390_cmma_enabled(vcpu->kvm)) 493 kvm_s390_vcpu_unsetup_cmma(vcpu); 494 free_page((unsigned long)(vcpu->arch.sie_block)); 495 496 kvm_vcpu_uninit(vcpu); 497 kmem_cache_free(kvm_vcpu_cache, vcpu); 498 } 499 500 static void kvm_free_vcpus(struct kvm *kvm) 501 { 502 unsigned int i; 503 struct kvm_vcpu *vcpu; 504 505 kvm_for_each_vcpu(i, vcpu, kvm) 506 kvm_arch_vcpu_destroy(vcpu); 507 508 mutex_lock(&kvm->lock); 509 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 510 kvm->vcpus[i] = NULL; 511 512 atomic_set(&kvm->online_vcpus, 0); 513 mutex_unlock(&kvm->lock); 514 } 515 516 void kvm_arch_sync_events(struct kvm *kvm) 517 { 518 } 519 520 void kvm_arch_destroy_vm(struct kvm *kvm) 521 { 522 kvm_free_vcpus(kvm); 523 free_page((unsigned long)(kvm->arch.sca)); 524 debug_unregister(kvm->arch.dbf); 525 if (!kvm_is_ucontrol(kvm)) 526 gmap_free(kvm->arch.gmap); 527 kvm_s390_destroy_adapters(kvm); 528 kvm_s390_clear_float_irqs(kvm); 529 } 530 531 /* Section: vcpu related */ 532 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 533 { 534 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 535 kvm_clear_async_pf_completion_queue(vcpu); 536 if (kvm_is_ucontrol(vcpu->kvm)) { 537 vcpu->arch.gmap = gmap_alloc(current->mm); 538 if (!vcpu->arch.gmap) 539 return -ENOMEM; 540 vcpu->arch.gmap->private = vcpu->kvm; 541 return 0; 542 } 543 544 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 545 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 546 KVM_SYNC_GPRS | 547 KVM_SYNC_ACRS | 548 KVM_SYNC_CRS; 549 return 0; 550 } 551 552 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 553 { 554 /* Nothing todo */ 555 } 556 557 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 558 { 559 save_fp_ctl(&vcpu->arch.host_fpregs.fpc); 560 save_fp_regs(vcpu->arch.host_fpregs.fprs); 561 save_access_regs(vcpu->arch.host_acrs); 562 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 563 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 564 restore_access_regs(vcpu->run->s.regs.acrs); 565 gmap_enable(vcpu->arch.gmap); 566 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 567 } 568 569 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 570 { 571 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 572 gmap_disable(vcpu->arch.gmap); 573 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 574 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 575 save_access_regs(vcpu->run->s.regs.acrs); 576 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); 577 restore_fp_regs(vcpu->arch.host_fpregs.fprs); 578 restore_access_regs(vcpu->arch.host_acrs); 579 } 580 581 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 582 { 583 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 584 vcpu->arch.sie_block->gpsw.mask = 0UL; 585 vcpu->arch.sie_block->gpsw.addr = 0UL; 586 kvm_s390_set_prefix(vcpu, 0); 587 vcpu->arch.sie_block->cputm = 0UL; 588 vcpu->arch.sie_block->ckc = 0UL; 589 vcpu->arch.sie_block->todpr = 0; 590 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 591 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 592 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 593 vcpu->arch.guest_fpregs.fpc = 0; 594 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); 595 vcpu->arch.sie_block->gbea = 1; 596 vcpu->arch.sie_block->pp = 0; 597 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 598 kvm_clear_async_pf_completion_queue(vcpu); 599 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 600 kvm_s390_vcpu_stop(vcpu); 601 kvm_s390_clear_local_irqs(vcpu); 602 } 603 604 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 605 { 606 return 0; 607 } 608 609 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 610 { 611 free_page(vcpu->arch.sie_block->cbrlo); 612 vcpu->arch.sie_block->cbrlo = 0; 613 } 614 615 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 616 { 617 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 618 if (!vcpu->arch.sie_block->cbrlo) 619 return -ENOMEM; 620 621 vcpu->arch.sie_block->ecb2 |= 0x80; 622 vcpu->arch.sie_block->ecb2 &= ~0x08; 623 return 0; 624 } 625 626 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 627 { 628 int rc = 0; 629 630 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 631 CPUSTAT_SM | 632 CPUSTAT_STOPPED | 633 CPUSTAT_GED); 634 vcpu->arch.sie_block->ecb = 6; 635 if (test_vfacility(50) && test_vfacility(73)) 636 vcpu->arch.sie_block->ecb |= 0x10; 637 638 vcpu->arch.sie_block->ecb2 = 8; 639 vcpu->arch.sie_block->eca = 0xD1002000U; 640 if (sclp_has_siif()) 641 vcpu->arch.sie_block->eca |= 1; 642 vcpu->arch.sie_block->fac = (int) (long) vfacilities; 643 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | 644 ICTL_TPROT; 645 646 if (kvm_s390_cmma_enabled(vcpu->kvm)) { 647 rc = kvm_s390_vcpu_setup_cmma(vcpu); 648 if (rc) 649 return rc; 650 } 651 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 652 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 653 (unsigned long) vcpu); 654 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 655 get_cpu_id(&vcpu->arch.cpu_id); 656 vcpu->arch.cpu_id.version = 0xff; 657 return rc; 658 } 659 660 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 661 unsigned int id) 662 { 663 struct kvm_vcpu *vcpu; 664 struct sie_page *sie_page; 665 int rc = -EINVAL; 666 667 if (id >= KVM_MAX_VCPUS) 668 goto out; 669 670 rc = -ENOMEM; 671 672 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 673 if (!vcpu) 674 goto out; 675 676 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 677 if (!sie_page) 678 goto out_free_cpu; 679 680 vcpu->arch.sie_block = &sie_page->sie_block; 681 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 682 683 vcpu->arch.sie_block->icpua = id; 684 if (!kvm_is_ucontrol(kvm)) { 685 if (!kvm->arch.sca) { 686 WARN_ON_ONCE(1); 687 goto out_free_cpu; 688 } 689 if (!kvm->arch.sca->cpu[id].sda) 690 kvm->arch.sca->cpu[id].sda = 691 (__u64) vcpu->arch.sie_block; 692 vcpu->arch.sie_block->scaoh = 693 (__u32)(((__u64)kvm->arch.sca) >> 32); 694 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 695 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); 696 } 697 698 spin_lock_init(&vcpu->arch.local_int.lock); 699 INIT_LIST_HEAD(&vcpu->arch.local_int.list); 700 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 701 vcpu->arch.local_int.wq = &vcpu->wq; 702 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 703 704 rc = kvm_vcpu_init(vcpu, kvm, id); 705 if (rc) 706 goto out_free_sie_block; 707 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 708 vcpu->arch.sie_block); 709 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 710 711 return vcpu; 712 out_free_sie_block: 713 free_page((unsigned long)(vcpu->arch.sie_block)); 714 out_free_cpu: 715 kmem_cache_free(kvm_vcpu_cache, vcpu); 716 out: 717 return ERR_PTR(rc); 718 } 719 720 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 721 { 722 return kvm_cpu_has_interrupt(vcpu); 723 } 724 725 void s390_vcpu_block(struct kvm_vcpu *vcpu) 726 { 727 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 728 } 729 730 void s390_vcpu_unblock(struct kvm_vcpu *vcpu) 731 { 732 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 733 } 734 735 /* 736 * Kick a guest cpu out of SIE and wait until SIE is not running. 737 * If the CPU is not running (e.g. waiting as idle) the function will 738 * return immediately. */ 739 void exit_sie(struct kvm_vcpu *vcpu) 740 { 741 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); 742 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 743 cpu_relax(); 744 } 745 746 /* Kick a guest cpu out of SIE and prevent SIE-reentry */ 747 void exit_sie_sync(struct kvm_vcpu *vcpu) 748 { 749 s390_vcpu_block(vcpu); 750 exit_sie(vcpu); 751 } 752 753 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) 754 { 755 int i; 756 struct kvm *kvm = gmap->private; 757 struct kvm_vcpu *vcpu; 758 759 kvm_for_each_vcpu(i, vcpu, kvm) { 760 /* match against both prefix pages */ 761 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { 762 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); 763 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 764 exit_sie_sync(vcpu); 765 } 766 } 767 } 768 769 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 770 { 771 /* kvm common code refers to this, but never calls it */ 772 BUG(); 773 return 0; 774 } 775 776 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 777 struct kvm_one_reg *reg) 778 { 779 int r = -EINVAL; 780 781 switch (reg->id) { 782 case KVM_REG_S390_TODPR: 783 r = put_user(vcpu->arch.sie_block->todpr, 784 (u32 __user *)reg->addr); 785 break; 786 case KVM_REG_S390_EPOCHDIFF: 787 r = put_user(vcpu->arch.sie_block->epoch, 788 (u64 __user *)reg->addr); 789 break; 790 case KVM_REG_S390_CPU_TIMER: 791 r = put_user(vcpu->arch.sie_block->cputm, 792 (u64 __user *)reg->addr); 793 break; 794 case KVM_REG_S390_CLOCK_COMP: 795 r = put_user(vcpu->arch.sie_block->ckc, 796 (u64 __user *)reg->addr); 797 break; 798 case KVM_REG_S390_PFTOKEN: 799 r = put_user(vcpu->arch.pfault_token, 800 (u64 __user *)reg->addr); 801 break; 802 case KVM_REG_S390_PFCOMPARE: 803 r = put_user(vcpu->arch.pfault_compare, 804 (u64 __user *)reg->addr); 805 break; 806 case KVM_REG_S390_PFSELECT: 807 r = put_user(vcpu->arch.pfault_select, 808 (u64 __user *)reg->addr); 809 break; 810 case KVM_REG_S390_PP: 811 r = put_user(vcpu->arch.sie_block->pp, 812 (u64 __user *)reg->addr); 813 break; 814 case KVM_REG_S390_GBEA: 815 r = put_user(vcpu->arch.sie_block->gbea, 816 (u64 __user *)reg->addr); 817 break; 818 default: 819 break; 820 } 821 822 return r; 823 } 824 825 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 826 struct kvm_one_reg *reg) 827 { 828 int r = -EINVAL; 829 830 switch (reg->id) { 831 case KVM_REG_S390_TODPR: 832 r = get_user(vcpu->arch.sie_block->todpr, 833 (u32 __user *)reg->addr); 834 break; 835 case KVM_REG_S390_EPOCHDIFF: 836 r = get_user(vcpu->arch.sie_block->epoch, 837 (u64 __user *)reg->addr); 838 break; 839 case KVM_REG_S390_CPU_TIMER: 840 r = get_user(vcpu->arch.sie_block->cputm, 841 (u64 __user *)reg->addr); 842 break; 843 case KVM_REG_S390_CLOCK_COMP: 844 r = get_user(vcpu->arch.sie_block->ckc, 845 (u64 __user *)reg->addr); 846 break; 847 case KVM_REG_S390_PFTOKEN: 848 r = get_user(vcpu->arch.pfault_token, 849 (u64 __user *)reg->addr); 850 break; 851 case KVM_REG_S390_PFCOMPARE: 852 r = get_user(vcpu->arch.pfault_compare, 853 (u64 __user *)reg->addr); 854 break; 855 case KVM_REG_S390_PFSELECT: 856 r = get_user(vcpu->arch.pfault_select, 857 (u64 __user *)reg->addr); 858 break; 859 case KVM_REG_S390_PP: 860 r = get_user(vcpu->arch.sie_block->pp, 861 (u64 __user *)reg->addr); 862 break; 863 case KVM_REG_S390_GBEA: 864 r = get_user(vcpu->arch.sie_block->gbea, 865 (u64 __user *)reg->addr); 866 break; 867 default: 868 break; 869 } 870 871 return r; 872 } 873 874 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 875 { 876 kvm_s390_vcpu_initial_reset(vcpu); 877 return 0; 878 } 879 880 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 881 { 882 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 883 return 0; 884 } 885 886 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 887 { 888 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 889 return 0; 890 } 891 892 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 893 struct kvm_sregs *sregs) 894 { 895 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 896 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 897 restore_access_regs(vcpu->run->s.regs.acrs); 898 return 0; 899 } 900 901 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 902 struct kvm_sregs *sregs) 903 { 904 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 905 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 906 return 0; 907 } 908 909 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 910 { 911 if (test_fp_ctl(fpu->fpc)) 912 return -EINVAL; 913 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 914 vcpu->arch.guest_fpregs.fpc = fpu->fpc; 915 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 916 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 917 return 0; 918 } 919 920 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 921 { 922 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); 923 fpu->fpc = vcpu->arch.guest_fpregs.fpc; 924 return 0; 925 } 926 927 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 928 { 929 int rc = 0; 930 931 if (!is_vcpu_stopped(vcpu)) 932 rc = -EBUSY; 933 else { 934 vcpu->run->psw_mask = psw.mask; 935 vcpu->run->psw_addr = psw.addr; 936 } 937 return rc; 938 } 939 940 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 941 struct kvm_translation *tr) 942 { 943 return -EINVAL; /* not implemented yet */ 944 } 945 946 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 947 KVM_GUESTDBG_USE_HW_BP | \ 948 KVM_GUESTDBG_ENABLE) 949 950 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 951 struct kvm_guest_debug *dbg) 952 { 953 int rc = 0; 954 955 vcpu->guest_debug = 0; 956 kvm_s390_clear_bp_data(vcpu); 957 958 if (dbg->control & ~VALID_GUESTDBG_FLAGS) 959 return -EINVAL; 960 961 if (dbg->control & KVM_GUESTDBG_ENABLE) { 962 vcpu->guest_debug = dbg->control; 963 /* enforce guest PER */ 964 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 965 966 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 967 rc = kvm_s390_import_bp_data(vcpu, dbg); 968 } else { 969 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 970 vcpu->arch.guestdbg.last_bp = 0; 971 } 972 973 if (rc) { 974 vcpu->guest_debug = 0; 975 kvm_s390_clear_bp_data(vcpu); 976 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 977 } 978 979 return rc; 980 } 981 982 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 983 struct kvm_mp_state *mp_state) 984 { 985 /* CHECK_STOP and LOAD are not supported yet */ 986 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 987 KVM_MP_STATE_OPERATING; 988 } 989 990 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 991 struct kvm_mp_state *mp_state) 992 { 993 int rc = 0; 994 995 /* user space knows about this interface - let it control the state */ 996 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 997 998 switch (mp_state->mp_state) { 999 case KVM_MP_STATE_STOPPED: 1000 kvm_s390_vcpu_stop(vcpu); 1001 break; 1002 case KVM_MP_STATE_OPERATING: 1003 kvm_s390_vcpu_start(vcpu); 1004 break; 1005 case KVM_MP_STATE_LOAD: 1006 case KVM_MP_STATE_CHECK_STOP: 1007 /* fall through - CHECK_STOP and LOAD are not supported yet */ 1008 default: 1009 rc = -ENXIO; 1010 } 1011 1012 return rc; 1013 } 1014 1015 bool kvm_s390_cmma_enabled(struct kvm *kvm) 1016 { 1017 if (!MACHINE_IS_LPAR) 1018 return false; 1019 /* only enable for z10 and later */ 1020 if (!MACHINE_HAS_EDAT1) 1021 return false; 1022 if (!kvm->arch.use_cmma) 1023 return false; 1024 return true; 1025 } 1026 1027 static bool ibs_enabled(struct kvm_vcpu *vcpu) 1028 { 1029 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; 1030 } 1031 1032 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 1033 { 1034 retry: 1035 s390_vcpu_unblock(vcpu); 1036 /* 1037 * We use MMU_RELOAD just to re-arm the ipte notifier for the 1038 * guest prefix page. gmap_ipte_notify will wait on the ptl lock. 1039 * This ensures that the ipte instruction for this request has 1040 * already finished. We might race against a second unmapper that 1041 * wants to set the blocking bit. Lets just retry the request loop. 1042 */ 1043 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 1044 int rc; 1045 rc = gmap_ipte_notify(vcpu->arch.gmap, 1046 kvm_s390_get_prefix(vcpu), 1047 PAGE_SIZE * 2); 1048 if (rc) 1049 return rc; 1050 goto retry; 1051 } 1052 1053 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 1054 if (!ibs_enabled(vcpu)) { 1055 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 1056 atomic_set_mask(CPUSTAT_IBS, 1057 &vcpu->arch.sie_block->cpuflags); 1058 } 1059 goto retry; 1060 } 1061 1062 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 1063 if (ibs_enabled(vcpu)) { 1064 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 1065 atomic_clear_mask(CPUSTAT_IBS, 1066 &vcpu->arch.sie_block->cpuflags); 1067 } 1068 goto retry; 1069 } 1070 1071 /* nothing to do, just clear the request */ 1072 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 1073 1074 return 0; 1075 } 1076 1077 /** 1078 * kvm_arch_fault_in_page - fault-in guest page if necessary 1079 * @vcpu: The corresponding virtual cpu 1080 * @gpa: Guest physical address 1081 * @writable: Whether the page should be writable or not 1082 * 1083 * Make sure that a guest page has been faulted-in on the host. 1084 * 1085 * Return: Zero on success, negative error code otherwise. 1086 */ 1087 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 1088 { 1089 struct mm_struct *mm = current->mm; 1090 hva_t hva; 1091 long rc; 1092 1093 hva = gmap_fault(gpa, vcpu->arch.gmap); 1094 if (IS_ERR_VALUE(hva)) 1095 return (long)hva; 1096 down_read(&mm->mmap_sem); 1097 rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); 1098 up_read(&mm->mmap_sem); 1099 1100 return rc < 0 ? rc : 0; 1101 } 1102 1103 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 1104 unsigned long token) 1105 { 1106 struct kvm_s390_interrupt inti; 1107 inti.parm64 = token; 1108 1109 if (start_token) { 1110 inti.type = KVM_S390_INT_PFAULT_INIT; 1111 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); 1112 } else { 1113 inti.type = KVM_S390_INT_PFAULT_DONE; 1114 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 1115 } 1116 } 1117 1118 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 1119 struct kvm_async_pf *work) 1120 { 1121 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 1122 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 1123 } 1124 1125 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 1126 struct kvm_async_pf *work) 1127 { 1128 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 1129 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 1130 } 1131 1132 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 1133 struct kvm_async_pf *work) 1134 { 1135 /* s390 will always inject the page directly */ 1136 } 1137 1138 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 1139 { 1140 /* 1141 * s390 will always inject the page directly, 1142 * but we still want check_async_completion to cleanup 1143 */ 1144 return true; 1145 } 1146 1147 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 1148 { 1149 hva_t hva; 1150 struct kvm_arch_async_pf arch; 1151 int rc; 1152 1153 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 1154 return 0; 1155 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 1156 vcpu->arch.pfault_compare) 1157 return 0; 1158 if (psw_extint_disabled(vcpu)) 1159 return 0; 1160 if (kvm_cpu_has_interrupt(vcpu)) 1161 return 0; 1162 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 1163 return 0; 1164 if (!vcpu->arch.gmap->pfault_enabled) 1165 return 0; 1166 1167 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 1168 hva += current->thread.gmap_addr & ~PAGE_MASK; 1169 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 1170 return 0; 1171 1172 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 1173 return rc; 1174 } 1175 1176 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 1177 { 1178 int rc, cpuflags; 1179 1180 /* 1181 * On s390 notifications for arriving pages will be delivered directly 1182 * to the guest but the house keeping for completed pfaults is 1183 * handled outside the worker. 1184 */ 1185 kvm_check_async_pf_completion(vcpu); 1186 1187 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 1188 1189 if (need_resched()) 1190 schedule(); 1191 1192 if (test_cpu_flag(CIF_MCCK_PENDING)) 1193 s390_handle_mcck(); 1194 1195 if (!kvm_is_ucontrol(vcpu->kvm)) 1196 kvm_s390_deliver_pending_interrupts(vcpu); 1197 1198 rc = kvm_s390_handle_requests(vcpu); 1199 if (rc) 1200 return rc; 1201 1202 if (guestdbg_enabled(vcpu)) { 1203 kvm_s390_backup_guest_per_regs(vcpu); 1204 kvm_s390_patch_guest_per_regs(vcpu); 1205 } 1206 1207 vcpu->arch.sie_block->icptcode = 0; 1208 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 1209 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 1210 trace_kvm_s390_sie_enter(vcpu, cpuflags); 1211 1212 return 0; 1213 } 1214 1215 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 1216 { 1217 int rc = -1; 1218 1219 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 1220 vcpu->arch.sie_block->icptcode); 1221 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 1222 1223 if (guestdbg_enabled(vcpu)) 1224 kvm_s390_restore_guest_per_regs(vcpu); 1225 1226 if (exit_reason >= 0) { 1227 rc = 0; 1228 } else if (kvm_is_ucontrol(vcpu->kvm)) { 1229 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 1230 vcpu->run->s390_ucontrol.trans_exc_code = 1231 current->thread.gmap_addr; 1232 vcpu->run->s390_ucontrol.pgm_code = 0x10; 1233 rc = -EREMOTE; 1234 1235 } else if (current->thread.gmap_pfault) { 1236 trace_kvm_s390_major_guest_pfault(vcpu); 1237 current->thread.gmap_pfault = 0; 1238 if (kvm_arch_setup_async_pf(vcpu)) { 1239 rc = 0; 1240 } else { 1241 gpa_t gpa = current->thread.gmap_addr; 1242 rc = kvm_arch_fault_in_page(vcpu, gpa, 1); 1243 } 1244 } 1245 1246 if (rc == -1) { 1247 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 1248 trace_kvm_s390_sie_fault(vcpu); 1249 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 1250 } 1251 1252 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 1253 1254 if (rc == 0) { 1255 if (kvm_is_ucontrol(vcpu->kvm)) 1256 /* Don't exit for host interrupts. */ 1257 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; 1258 else 1259 rc = kvm_handle_sie_intercept(vcpu); 1260 } 1261 1262 return rc; 1263 } 1264 1265 static int __vcpu_run(struct kvm_vcpu *vcpu) 1266 { 1267 int rc, exit_reason; 1268 1269 /* 1270 * We try to hold kvm->srcu during most of vcpu_run (except when run- 1271 * ning the guest), so that memslots (and other stuff) are protected 1272 */ 1273 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1274 1275 do { 1276 rc = vcpu_pre_run(vcpu); 1277 if (rc) 1278 break; 1279 1280 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 1281 /* 1282 * As PF_VCPU will be used in fault handler, between 1283 * guest_enter and guest_exit should be no uaccess. 1284 */ 1285 preempt_disable(); 1286 kvm_guest_enter(); 1287 preempt_enable(); 1288 exit_reason = sie64a(vcpu->arch.sie_block, 1289 vcpu->run->s.regs.gprs); 1290 kvm_guest_exit(); 1291 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1292 1293 rc = vcpu_post_run(vcpu, exit_reason); 1294 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 1295 1296 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 1297 return rc; 1298 } 1299 1300 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1301 { 1302 int rc; 1303 sigset_t sigsaved; 1304 1305 if (guestdbg_exit_pending(vcpu)) { 1306 kvm_s390_prepare_debug_exit(vcpu); 1307 return 0; 1308 } 1309 1310 if (vcpu->sigset_active) 1311 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 1312 1313 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 1314 kvm_s390_vcpu_start(vcpu); 1315 } else if (is_vcpu_stopped(vcpu)) { 1316 pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n", 1317 vcpu->vcpu_id); 1318 return -EINVAL; 1319 } 1320 1321 switch (kvm_run->exit_reason) { 1322 case KVM_EXIT_S390_SIEIC: 1323 case KVM_EXIT_UNKNOWN: 1324 case KVM_EXIT_INTR: 1325 case KVM_EXIT_S390_RESET: 1326 case KVM_EXIT_S390_UCONTROL: 1327 case KVM_EXIT_S390_TSCH: 1328 case KVM_EXIT_DEBUG: 1329 break; 1330 default: 1331 BUG(); 1332 } 1333 1334 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 1335 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 1336 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { 1337 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; 1338 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 1339 } 1340 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 1341 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; 1342 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 1343 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 1344 } 1345 1346 might_fault(); 1347 rc = __vcpu_run(vcpu); 1348 1349 if (signal_pending(current) && !rc) { 1350 kvm_run->exit_reason = KVM_EXIT_INTR; 1351 rc = -EINTR; 1352 } 1353 1354 if (guestdbg_exit_pending(vcpu) && !rc) { 1355 kvm_s390_prepare_debug_exit(vcpu); 1356 rc = 0; 1357 } 1358 1359 if (rc == -EOPNOTSUPP) { 1360 /* intercept cannot be handled in-kernel, prepare kvm-run */ 1361 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 1362 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 1363 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 1364 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 1365 rc = 0; 1366 } 1367 1368 if (rc == -EREMOTE) { 1369 /* intercept was handled, but userspace support is needed 1370 * kvm_run has been prepared by the handler */ 1371 rc = 0; 1372 } 1373 1374 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 1375 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 1376 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 1377 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 1378 1379 if (vcpu->sigset_active) 1380 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1381 1382 vcpu->stat.exit_userspace++; 1383 return rc; 1384 } 1385 1386 /* 1387 * store status at address 1388 * we use have two special cases: 1389 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 1390 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 1391 */ 1392 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 1393 { 1394 unsigned char archmode = 1; 1395 unsigned int px; 1396 u64 clkcomp; 1397 int rc; 1398 1399 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 1400 if (write_guest_abs(vcpu, 163, &archmode, 1)) 1401 return -EFAULT; 1402 gpa = SAVE_AREA_BASE; 1403 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 1404 if (write_guest_real(vcpu, 163, &archmode, 1)) 1405 return -EFAULT; 1406 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); 1407 } 1408 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), 1409 vcpu->arch.guest_fpregs.fprs, 128); 1410 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), 1411 vcpu->run->s.regs.gprs, 128); 1412 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), 1413 &vcpu->arch.sie_block->gpsw, 16); 1414 px = kvm_s390_get_prefix(vcpu); 1415 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), 1416 &px, 4); 1417 rc |= write_guest_abs(vcpu, 1418 gpa + offsetof(struct save_area, fp_ctrl_reg), 1419 &vcpu->arch.guest_fpregs.fpc, 4); 1420 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), 1421 &vcpu->arch.sie_block->todpr, 4); 1422 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), 1423 &vcpu->arch.sie_block->cputm, 8); 1424 clkcomp = vcpu->arch.sie_block->ckc >> 8; 1425 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), 1426 &clkcomp, 8); 1427 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), 1428 &vcpu->run->s.regs.acrs, 64); 1429 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), 1430 &vcpu->arch.sie_block->gcr, 128); 1431 return rc ? -EFAULT : 0; 1432 } 1433 1434 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 1435 { 1436 /* 1437 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 1438 * copying in vcpu load/put. Lets update our copies before we save 1439 * it into the save area 1440 */ 1441 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 1442 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 1443 save_access_regs(vcpu->run->s.regs.acrs); 1444 1445 return kvm_s390_store_status_unloaded(vcpu, addr); 1446 } 1447 1448 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 1449 { 1450 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 1451 kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu); 1452 exit_sie_sync(vcpu); 1453 } 1454 1455 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 1456 { 1457 unsigned int i; 1458 struct kvm_vcpu *vcpu; 1459 1460 kvm_for_each_vcpu(i, vcpu, kvm) { 1461 __disable_ibs_on_vcpu(vcpu); 1462 } 1463 } 1464 1465 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 1466 { 1467 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 1468 kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu); 1469 exit_sie_sync(vcpu); 1470 } 1471 1472 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 1473 { 1474 int i, online_vcpus, started_vcpus = 0; 1475 1476 if (!is_vcpu_stopped(vcpu)) 1477 return; 1478 1479 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 1480 /* Only one cpu at a time may enter/leave the STOPPED state. */ 1481 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); 1482 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 1483 1484 for (i = 0; i < online_vcpus; i++) { 1485 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 1486 started_vcpus++; 1487 } 1488 1489 if (started_vcpus == 0) { 1490 /* we're the only active VCPU -> speed it up */ 1491 __enable_ibs_on_vcpu(vcpu); 1492 } else if (started_vcpus == 1) { 1493 /* 1494 * As we are starting a second VCPU, we have to disable 1495 * the IBS facility on all VCPUs to remove potentially 1496 * oustanding ENABLE requests. 1497 */ 1498 __disable_ibs_on_all_vcpus(vcpu->kvm); 1499 } 1500 1501 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1502 /* 1503 * Another VCPU might have used IBS while we were offline. 1504 * Let's play safe and flush the VCPU at startup. 1505 */ 1506 vcpu->arch.sie_block->ihcpu = 0xffff; 1507 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); 1508 return; 1509 } 1510 1511 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 1512 { 1513 int i, online_vcpus, started_vcpus = 0; 1514 struct kvm_vcpu *started_vcpu = NULL; 1515 1516 if (is_vcpu_stopped(vcpu)) 1517 return; 1518 1519 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 1520 /* Only one cpu at a time may enter/leave the STOPPED state. */ 1521 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); 1522 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 1523 1524 /* Need to lock access to action_bits to avoid a SIGP race condition */ 1525 spin_lock(&vcpu->arch.local_int.lock); 1526 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1527 1528 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 1529 vcpu->arch.local_int.action_bits &= 1530 ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP); 1531 spin_unlock(&vcpu->arch.local_int.lock); 1532 1533 __disable_ibs_on_vcpu(vcpu); 1534 1535 for (i = 0; i < online_vcpus; i++) { 1536 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 1537 started_vcpus++; 1538 started_vcpu = vcpu->kvm->vcpus[i]; 1539 } 1540 } 1541 1542 if (started_vcpus == 1) { 1543 /* 1544 * As we only have one VCPU left, we want to enable the 1545 * IBS facility for that VCPU to speed it up. 1546 */ 1547 __enable_ibs_on_vcpu(started_vcpu); 1548 } 1549 1550 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); 1551 return; 1552 } 1553 1554 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 1555 struct kvm_enable_cap *cap) 1556 { 1557 int r; 1558 1559 if (cap->flags) 1560 return -EINVAL; 1561 1562 switch (cap->cap) { 1563 case KVM_CAP_S390_CSS_SUPPORT: 1564 if (!vcpu->kvm->arch.css_support) { 1565 vcpu->kvm->arch.css_support = 1; 1566 trace_kvm_s390_enable_css(vcpu->kvm); 1567 } 1568 r = 0; 1569 break; 1570 default: 1571 r = -EINVAL; 1572 break; 1573 } 1574 return r; 1575 } 1576 1577 long kvm_arch_vcpu_ioctl(struct file *filp, 1578 unsigned int ioctl, unsigned long arg) 1579 { 1580 struct kvm_vcpu *vcpu = filp->private_data; 1581 void __user *argp = (void __user *)arg; 1582 int idx; 1583 long r; 1584 1585 switch (ioctl) { 1586 case KVM_S390_INTERRUPT: { 1587 struct kvm_s390_interrupt s390int; 1588 1589 r = -EFAULT; 1590 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1591 break; 1592 r = kvm_s390_inject_vcpu(vcpu, &s390int); 1593 break; 1594 } 1595 case KVM_S390_STORE_STATUS: 1596 idx = srcu_read_lock(&vcpu->kvm->srcu); 1597 r = kvm_s390_vcpu_store_status(vcpu, arg); 1598 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1599 break; 1600 case KVM_S390_SET_INITIAL_PSW: { 1601 psw_t psw; 1602 1603 r = -EFAULT; 1604 if (copy_from_user(&psw, argp, sizeof(psw))) 1605 break; 1606 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 1607 break; 1608 } 1609 case KVM_S390_INITIAL_RESET: 1610 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 1611 break; 1612 case KVM_SET_ONE_REG: 1613 case KVM_GET_ONE_REG: { 1614 struct kvm_one_reg reg; 1615 r = -EFAULT; 1616 if (copy_from_user(®, argp, sizeof(reg))) 1617 break; 1618 if (ioctl == KVM_SET_ONE_REG) 1619 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 1620 else 1621 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 1622 break; 1623 } 1624 #ifdef CONFIG_KVM_S390_UCONTROL 1625 case KVM_S390_UCAS_MAP: { 1626 struct kvm_s390_ucas_mapping ucasmap; 1627 1628 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1629 r = -EFAULT; 1630 break; 1631 } 1632 1633 if (!kvm_is_ucontrol(vcpu->kvm)) { 1634 r = -EINVAL; 1635 break; 1636 } 1637 1638 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 1639 ucasmap.vcpu_addr, ucasmap.length); 1640 break; 1641 } 1642 case KVM_S390_UCAS_UNMAP: { 1643 struct kvm_s390_ucas_mapping ucasmap; 1644 1645 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1646 r = -EFAULT; 1647 break; 1648 } 1649 1650 if (!kvm_is_ucontrol(vcpu->kvm)) { 1651 r = -EINVAL; 1652 break; 1653 } 1654 1655 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 1656 ucasmap.length); 1657 break; 1658 } 1659 #endif 1660 case KVM_S390_VCPU_FAULT: { 1661 r = gmap_fault(arg, vcpu->arch.gmap); 1662 if (!IS_ERR_VALUE(r)) 1663 r = 0; 1664 break; 1665 } 1666 case KVM_ENABLE_CAP: 1667 { 1668 struct kvm_enable_cap cap; 1669 r = -EFAULT; 1670 if (copy_from_user(&cap, argp, sizeof(cap))) 1671 break; 1672 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 1673 break; 1674 } 1675 default: 1676 r = -ENOTTY; 1677 } 1678 return r; 1679 } 1680 1681 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 1682 { 1683 #ifdef CONFIG_KVM_S390_UCONTROL 1684 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 1685 && (kvm_is_ucontrol(vcpu->kvm))) { 1686 vmf->page = virt_to_page(vcpu->arch.sie_block); 1687 get_page(vmf->page); 1688 return 0; 1689 } 1690 #endif 1691 return VM_FAULT_SIGBUS; 1692 } 1693 1694 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1695 struct kvm_memory_slot *dont) 1696 { 1697 } 1698 1699 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1700 unsigned long npages) 1701 { 1702 return 0; 1703 } 1704 1705 void kvm_arch_memslots_updated(struct kvm *kvm) 1706 { 1707 } 1708 1709 /* Section: memory related */ 1710 int kvm_arch_prepare_memory_region(struct kvm *kvm, 1711 struct kvm_memory_slot *memslot, 1712 struct kvm_userspace_memory_region *mem, 1713 enum kvm_mr_change change) 1714 { 1715 /* A few sanity checks. We can have memory slots which have to be 1716 located/ended at a segment boundary (1MB). The memory in userland is 1717 ok to be fragmented into various different vmas. It is okay to mmap() 1718 and munmap() stuff in this slot after doing this call at any time */ 1719 1720 if (mem->userspace_addr & 0xffffful) 1721 return -EINVAL; 1722 1723 if (mem->memory_size & 0xffffful) 1724 return -EINVAL; 1725 1726 return 0; 1727 } 1728 1729 void kvm_arch_commit_memory_region(struct kvm *kvm, 1730 struct kvm_userspace_memory_region *mem, 1731 const struct kvm_memory_slot *old, 1732 enum kvm_mr_change change) 1733 { 1734 int rc; 1735 1736 /* If the basics of the memslot do not change, we do not want 1737 * to update the gmap. Every update causes several unnecessary 1738 * segment translation exceptions. This is usually handled just 1739 * fine by the normal fault handler + gmap, but it will also 1740 * cause faults on the prefix page of running guest CPUs. 1741 */ 1742 if (old->userspace_addr == mem->userspace_addr && 1743 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 1744 old->npages * PAGE_SIZE == mem->memory_size) 1745 return; 1746 1747 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 1748 mem->guest_phys_addr, mem->memory_size); 1749 if (rc) 1750 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); 1751 return; 1752 } 1753 1754 void kvm_arch_flush_shadow_all(struct kvm *kvm) 1755 { 1756 } 1757 1758 void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 1759 struct kvm_memory_slot *slot) 1760 { 1761 } 1762 1763 static int __init kvm_s390_init(void) 1764 { 1765 int ret; 1766 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1767 if (ret) 1768 return ret; 1769 1770 /* 1771 * guests can ask for up to 255+1 double words, we need a full page 1772 * to hold the maximum amount of facilities. On the other hand, we 1773 * only set facilities that are known to work in KVM. 1774 */ 1775 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); 1776 if (!vfacilities) { 1777 kvm_exit(); 1778 return -ENOMEM; 1779 } 1780 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); 1781 vfacilities[0] &= 0xff82fff3f4fc2000UL; 1782 vfacilities[1] &= 0x005c000000000000UL; 1783 return 0; 1784 } 1785 1786 static void __exit kvm_s390_exit(void) 1787 { 1788 free_page((unsigned long) vfacilities); 1789 kvm_exit(); 1790 } 1791 1792 module_init(kvm_s390_init); 1793 module_exit(kvm_s390_exit); 1794 1795 /* 1796 * Enable autoloading of the kvm module. 1797 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 1798 * since x86 takes a different approach. 1799 */ 1800 #include <linux/miscdevice.h> 1801 MODULE_ALIAS_MISCDEV(KVM_MINOR); 1802 MODULE_ALIAS("devname:kvm"); 1803