1 /* 2 * hosting zSeries kernel virtual machines 3 * 4 * Copyright IBM Corp. 2008, 2009 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2 only) 8 * as published by the Free Software Foundation. 9 * 10 * Author(s): Carsten Otte <cotte@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 14 * Jason J. Herne <jjherne@us.ibm.com> 15 */ 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/module.h> 25 #include <linux/slab.h> 26 #include <linux/timer.h> 27 #include <asm/asm-offsets.h> 28 #include <asm/lowcore.h> 29 #include <asm/pgtable.h> 30 #include <asm/nmi.h> 31 #include <asm/switch_to.h> 32 #include <asm/facility.h> 33 #include <asm/sclp.h> 34 #include "kvm-s390.h" 35 #include "gaccess.h" 36 37 #define CREATE_TRACE_POINTS 38 #include "trace.h" 39 #include "trace-s390.h" 40 41 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 42 43 struct kvm_stats_debugfs_item debugfs_entries[] = { 44 { "userspace_handled", VCPU_STAT(exit_userspace) }, 45 { "exit_null", VCPU_STAT(exit_null) }, 46 { "exit_validity", VCPU_STAT(exit_validity) }, 47 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 48 { "exit_external_request", VCPU_STAT(exit_external_request) }, 49 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 50 { "exit_instruction", VCPU_STAT(exit_instruction) }, 51 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 52 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 53 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 54 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 55 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 56 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 57 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 58 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 59 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 60 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 61 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 62 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 63 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 64 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 65 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 66 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 67 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 68 { "instruction_spx", VCPU_STAT(instruction_spx) }, 69 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 70 { "instruction_stap", VCPU_STAT(instruction_stap) }, 71 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 72 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 73 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 74 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 75 { "instruction_essa", VCPU_STAT(instruction_essa) }, 76 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 77 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 78 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 79 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 80 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 81 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 82 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 83 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 84 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 85 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 86 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 87 { "diagnose_10", VCPU_STAT(diagnose_10) }, 88 { "diagnose_44", VCPU_STAT(diagnose_44) }, 89 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 90 { NULL } 91 }; 92 93 unsigned long *vfacilities; 94 static struct gmap_notifier gmap_notifier; 95 96 /* test availability of vfacility */ 97 int test_vfacility(unsigned long nr) 98 { 99 return __test_facility(nr, (void *) vfacilities); 100 } 101 102 /* Section: not file related */ 103 int kvm_arch_hardware_enable(void *garbage) 104 { 105 /* every s390 is virtualization enabled ;-) */ 106 return 0; 107 } 108 109 void kvm_arch_hardware_disable(void *garbage) 110 { 111 } 112 113 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); 114 115 int kvm_arch_hardware_setup(void) 116 { 117 gmap_notifier.notifier_call = kvm_gmap_notifier; 118 gmap_register_ipte_notifier(&gmap_notifier); 119 return 0; 120 } 121 122 void kvm_arch_hardware_unsetup(void) 123 { 124 gmap_unregister_ipte_notifier(&gmap_notifier); 125 } 126 127 void kvm_arch_check_processor_compat(void *rtn) 128 { 129 } 130 131 int kvm_arch_init(void *opaque) 132 { 133 return 0; 134 } 135 136 void kvm_arch_exit(void) 137 { 138 } 139 140 /* Section: device related */ 141 long kvm_arch_dev_ioctl(struct file *filp, 142 unsigned int ioctl, unsigned long arg) 143 { 144 if (ioctl == KVM_S390_ENABLE_SIE) 145 return s390_enable_sie(); 146 return -EINVAL; 147 } 148 149 int kvm_dev_ioctl_check_extension(long ext) 150 { 151 int r; 152 153 switch (ext) { 154 case KVM_CAP_S390_PSW: 155 case KVM_CAP_S390_GMAP: 156 case KVM_CAP_SYNC_MMU: 157 #ifdef CONFIG_KVM_S390_UCONTROL 158 case KVM_CAP_S390_UCONTROL: 159 #endif 160 case KVM_CAP_ASYNC_PF: 161 case KVM_CAP_SYNC_REGS: 162 case KVM_CAP_ONE_REG: 163 case KVM_CAP_ENABLE_CAP: 164 case KVM_CAP_S390_CSS_SUPPORT: 165 case KVM_CAP_IRQFD: 166 case KVM_CAP_IOEVENTFD: 167 case KVM_CAP_DEVICE_CTRL: 168 case KVM_CAP_ENABLE_CAP_VM: 169 case KVM_CAP_VM_ATTRIBUTES: 170 r = 1; 171 break; 172 case KVM_CAP_NR_VCPUS: 173 case KVM_CAP_MAX_VCPUS: 174 r = KVM_MAX_VCPUS; 175 break; 176 case KVM_CAP_NR_MEMSLOTS: 177 r = KVM_USER_MEM_SLOTS; 178 break; 179 case KVM_CAP_S390_COW: 180 r = MACHINE_HAS_ESOP; 181 break; 182 default: 183 r = 0; 184 } 185 return r; 186 } 187 188 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 189 struct kvm_memory_slot *memslot) 190 { 191 gfn_t cur_gfn, last_gfn; 192 unsigned long address; 193 struct gmap *gmap = kvm->arch.gmap; 194 195 down_read(&gmap->mm->mmap_sem); 196 /* Loop over all guest pages */ 197 last_gfn = memslot->base_gfn + memslot->npages; 198 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 199 address = gfn_to_hva_memslot(memslot, cur_gfn); 200 201 if (gmap_test_and_clear_dirty(address, gmap)) 202 mark_page_dirty(kvm, cur_gfn); 203 } 204 up_read(&gmap->mm->mmap_sem); 205 } 206 207 /* Section: vm related */ 208 /* 209 * Get (and clear) the dirty memory log for a memory slot. 210 */ 211 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 212 struct kvm_dirty_log *log) 213 { 214 int r; 215 unsigned long n; 216 struct kvm_memory_slot *memslot; 217 int is_dirty = 0; 218 219 mutex_lock(&kvm->slots_lock); 220 221 r = -EINVAL; 222 if (log->slot >= KVM_USER_MEM_SLOTS) 223 goto out; 224 225 memslot = id_to_memslot(kvm->memslots, log->slot); 226 r = -ENOENT; 227 if (!memslot->dirty_bitmap) 228 goto out; 229 230 kvm_s390_sync_dirty_log(kvm, memslot); 231 r = kvm_get_dirty_log(kvm, log, &is_dirty); 232 if (r) 233 goto out; 234 235 /* Clear the dirty log */ 236 if (is_dirty) { 237 n = kvm_dirty_bitmap_bytes(memslot); 238 memset(memslot->dirty_bitmap, 0, n); 239 } 240 r = 0; 241 out: 242 mutex_unlock(&kvm->slots_lock); 243 return r; 244 } 245 246 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 247 { 248 int r; 249 250 if (cap->flags) 251 return -EINVAL; 252 253 switch (cap->cap) { 254 case KVM_CAP_S390_IRQCHIP: 255 kvm->arch.use_irqchip = 1; 256 r = 0; 257 break; 258 default: 259 r = -EINVAL; 260 break; 261 } 262 return r; 263 } 264 265 static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 266 { 267 int ret; 268 unsigned int idx; 269 switch (attr->attr) { 270 case KVM_S390_VM_MEM_ENABLE_CMMA: 271 ret = -EBUSY; 272 mutex_lock(&kvm->lock); 273 if (atomic_read(&kvm->online_vcpus) == 0) { 274 kvm->arch.use_cmma = 1; 275 ret = 0; 276 } 277 mutex_unlock(&kvm->lock); 278 break; 279 case KVM_S390_VM_MEM_CLR_CMMA: 280 mutex_lock(&kvm->lock); 281 idx = srcu_read_lock(&kvm->srcu); 282 page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false); 283 srcu_read_unlock(&kvm->srcu, idx); 284 mutex_unlock(&kvm->lock); 285 ret = 0; 286 break; 287 default: 288 ret = -ENXIO; 289 break; 290 } 291 return ret; 292 } 293 294 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 295 { 296 int ret; 297 298 switch (attr->group) { 299 case KVM_S390_VM_MEM_CTRL: 300 ret = kvm_s390_mem_control(kvm, attr); 301 break; 302 default: 303 ret = -ENXIO; 304 break; 305 } 306 307 return ret; 308 } 309 310 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 311 { 312 return -ENXIO; 313 } 314 315 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 316 { 317 int ret; 318 319 switch (attr->group) { 320 case KVM_S390_VM_MEM_CTRL: 321 switch (attr->attr) { 322 case KVM_S390_VM_MEM_ENABLE_CMMA: 323 case KVM_S390_VM_MEM_CLR_CMMA: 324 ret = 0; 325 break; 326 default: 327 ret = -ENXIO; 328 break; 329 } 330 break; 331 default: 332 ret = -ENXIO; 333 break; 334 } 335 336 return ret; 337 } 338 339 long kvm_arch_vm_ioctl(struct file *filp, 340 unsigned int ioctl, unsigned long arg) 341 { 342 struct kvm *kvm = filp->private_data; 343 void __user *argp = (void __user *)arg; 344 struct kvm_device_attr attr; 345 int r; 346 347 switch (ioctl) { 348 case KVM_S390_INTERRUPT: { 349 struct kvm_s390_interrupt s390int; 350 351 r = -EFAULT; 352 if (copy_from_user(&s390int, argp, sizeof(s390int))) 353 break; 354 r = kvm_s390_inject_vm(kvm, &s390int); 355 break; 356 } 357 case KVM_ENABLE_CAP: { 358 struct kvm_enable_cap cap; 359 r = -EFAULT; 360 if (copy_from_user(&cap, argp, sizeof(cap))) 361 break; 362 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 363 break; 364 } 365 case KVM_CREATE_IRQCHIP: { 366 struct kvm_irq_routing_entry routing; 367 368 r = -EINVAL; 369 if (kvm->arch.use_irqchip) { 370 /* Set up dummy routing. */ 371 memset(&routing, 0, sizeof(routing)); 372 kvm_set_irq_routing(kvm, &routing, 0, 0); 373 r = 0; 374 } 375 break; 376 } 377 case KVM_SET_DEVICE_ATTR: { 378 r = -EFAULT; 379 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 380 break; 381 r = kvm_s390_vm_set_attr(kvm, &attr); 382 break; 383 } 384 case KVM_GET_DEVICE_ATTR: { 385 r = -EFAULT; 386 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 387 break; 388 r = kvm_s390_vm_get_attr(kvm, &attr); 389 break; 390 } 391 case KVM_HAS_DEVICE_ATTR: { 392 r = -EFAULT; 393 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 394 break; 395 r = kvm_s390_vm_has_attr(kvm, &attr); 396 break; 397 } 398 default: 399 r = -ENOTTY; 400 } 401 402 return r; 403 } 404 405 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 406 { 407 int rc; 408 char debug_name[16]; 409 static unsigned long sca_offset; 410 411 rc = -EINVAL; 412 #ifdef CONFIG_KVM_S390_UCONTROL 413 if (type & ~KVM_VM_S390_UCONTROL) 414 goto out_err; 415 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 416 goto out_err; 417 #else 418 if (type) 419 goto out_err; 420 #endif 421 422 rc = s390_enable_sie(); 423 if (rc) 424 goto out_err; 425 426 rc = -ENOMEM; 427 428 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); 429 if (!kvm->arch.sca) 430 goto out_err; 431 spin_lock(&kvm_lock); 432 sca_offset = (sca_offset + 16) & 0x7f0; 433 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); 434 spin_unlock(&kvm_lock); 435 436 sprintf(debug_name, "kvm-%u", current->pid); 437 438 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); 439 if (!kvm->arch.dbf) 440 goto out_nodbf; 441 442 spin_lock_init(&kvm->arch.float_int.lock); 443 INIT_LIST_HEAD(&kvm->arch.float_int.list); 444 init_waitqueue_head(&kvm->arch.ipte_wq); 445 446 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 447 VM_EVENT(kvm, 3, "%s", "vm created"); 448 449 if (type & KVM_VM_S390_UCONTROL) { 450 kvm->arch.gmap = NULL; 451 } else { 452 kvm->arch.gmap = gmap_alloc(current->mm); 453 if (!kvm->arch.gmap) 454 goto out_nogmap; 455 kvm->arch.gmap->private = kvm; 456 kvm->arch.gmap->pfault_enabled = 0; 457 } 458 459 kvm->arch.css_support = 0; 460 kvm->arch.use_irqchip = 0; 461 462 spin_lock_init(&kvm->arch.start_stop_lock); 463 464 return 0; 465 out_nogmap: 466 debug_unregister(kvm->arch.dbf); 467 out_nodbf: 468 free_page((unsigned long)(kvm->arch.sca)); 469 out_err: 470 return rc; 471 } 472 473 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 474 { 475 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 476 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 477 kvm_s390_clear_local_irqs(vcpu); 478 kvm_clear_async_pf_completion_queue(vcpu); 479 if (!kvm_is_ucontrol(vcpu->kvm)) { 480 clear_bit(63 - vcpu->vcpu_id, 481 (unsigned long *) &vcpu->kvm->arch.sca->mcn); 482 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 483 (__u64) vcpu->arch.sie_block) 484 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; 485 } 486 smp_mb(); 487 488 if (kvm_is_ucontrol(vcpu->kvm)) 489 gmap_free(vcpu->arch.gmap); 490 491 if (kvm_s390_cmma_enabled(vcpu->kvm)) 492 kvm_s390_vcpu_unsetup_cmma(vcpu); 493 free_page((unsigned long)(vcpu->arch.sie_block)); 494 495 kvm_vcpu_uninit(vcpu); 496 kmem_cache_free(kvm_vcpu_cache, vcpu); 497 } 498 499 static void kvm_free_vcpus(struct kvm *kvm) 500 { 501 unsigned int i; 502 struct kvm_vcpu *vcpu; 503 504 kvm_for_each_vcpu(i, vcpu, kvm) 505 kvm_arch_vcpu_destroy(vcpu); 506 507 mutex_lock(&kvm->lock); 508 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 509 kvm->vcpus[i] = NULL; 510 511 atomic_set(&kvm->online_vcpus, 0); 512 mutex_unlock(&kvm->lock); 513 } 514 515 void kvm_arch_sync_events(struct kvm *kvm) 516 { 517 } 518 519 void kvm_arch_destroy_vm(struct kvm *kvm) 520 { 521 kvm_free_vcpus(kvm); 522 free_page((unsigned long)(kvm->arch.sca)); 523 debug_unregister(kvm->arch.dbf); 524 if (!kvm_is_ucontrol(kvm)) 525 gmap_free(kvm->arch.gmap); 526 kvm_s390_destroy_adapters(kvm); 527 kvm_s390_clear_float_irqs(kvm); 528 } 529 530 /* Section: vcpu related */ 531 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 532 { 533 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 534 kvm_clear_async_pf_completion_queue(vcpu); 535 if (kvm_is_ucontrol(vcpu->kvm)) { 536 vcpu->arch.gmap = gmap_alloc(current->mm); 537 if (!vcpu->arch.gmap) 538 return -ENOMEM; 539 vcpu->arch.gmap->private = vcpu->kvm; 540 return 0; 541 } 542 543 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 544 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 545 KVM_SYNC_GPRS | 546 KVM_SYNC_ACRS | 547 KVM_SYNC_CRS; 548 return 0; 549 } 550 551 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 552 { 553 /* Nothing todo */ 554 } 555 556 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 557 { 558 save_fp_ctl(&vcpu->arch.host_fpregs.fpc); 559 save_fp_regs(vcpu->arch.host_fpregs.fprs); 560 save_access_regs(vcpu->arch.host_acrs); 561 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 562 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 563 restore_access_regs(vcpu->run->s.regs.acrs); 564 gmap_enable(vcpu->arch.gmap); 565 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 566 } 567 568 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 569 { 570 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 571 gmap_disable(vcpu->arch.gmap); 572 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 573 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 574 save_access_regs(vcpu->run->s.regs.acrs); 575 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); 576 restore_fp_regs(vcpu->arch.host_fpregs.fprs); 577 restore_access_regs(vcpu->arch.host_acrs); 578 } 579 580 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 581 { 582 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 583 vcpu->arch.sie_block->gpsw.mask = 0UL; 584 vcpu->arch.sie_block->gpsw.addr = 0UL; 585 kvm_s390_set_prefix(vcpu, 0); 586 vcpu->arch.sie_block->cputm = 0UL; 587 vcpu->arch.sie_block->ckc = 0UL; 588 vcpu->arch.sie_block->todpr = 0; 589 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 590 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 591 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 592 vcpu->arch.guest_fpregs.fpc = 0; 593 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); 594 vcpu->arch.sie_block->gbea = 1; 595 vcpu->arch.sie_block->pp = 0; 596 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 597 kvm_clear_async_pf_completion_queue(vcpu); 598 kvm_s390_vcpu_stop(vcpu); 599 kvm_s390_clear_local_irqs(vcpu); 600 } 601 602 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 603 { 604 return 0; 605 } 606 607 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 608 { 609 free_page(vcpu->arch.sie_block->cbrlo); 610 vcpu->arch.sie_block->cbrlo = 0; 611 } 612 613 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 614 { 615 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 616 if (!vcpu->arch.sie_block->cbrlo) 617 return -ENOMEM; 618 619 vcpu->arch.sie_block->ecb2 |= 0x80; 620 vcpu->arch.sie_block->ecb2 &= ~0x08; 621 return 0; 622 } 623 624 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 625 { 626 int rc = 0; 627 628 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 629 CPUSTAT_SM | 630 CPUSTAT_STOPPED | 631 CPUSTAT_GED); 632 vcpu->arch.sie_block->ecb = 6; 633 if (test_vfacility(50) && test_vfacility(73)) 634 vcpu->arch.sie_block->ecb |= 0x10; 635 636 vcpu->arch.sie_block->ecb2 = 8; 637 vcpu->arch.sie_block->eca = 0xD1002000U; 638 if (sclp_has_siif()) 639 vcpu->arch.sie_block->eca |= 1; 640 vcpu->arch.sie_block->fac = (int) (long) vfacilities; 641 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | 642 ICTL_TPROT; 643 644 if (kvm_s390_cmma_enabled(vcpu->kvm)) { 645 rc = kvm_s390_vcpu_setup_cmma(vcpu); 646 if (rc) 647 return rc; 648 } 649 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 650 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 651 (unsigned long) vcpu); 652 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 653 get_cpu_id(&vcpu->arch.cpu_id); 654 vcpu->arch.cpu_id.version = 0xff; 655 return rc; 656 } 657 658 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 659 unsigned int id) 660 { 661 struct kvm_vcpu *vcpu; 662 struct sie_page *sie_page; 663 int rc = -EINVAL; 664 665 if (id >= KVM_MAX_VCPUS) 666 goto out; 667 668 rc = -ENOMEM; 669 670 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 671 if (!vcpu) 672 goto out; 673 674 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 675 if (!sie_page) 676 goto out_free_cpu; 677 678 vcpu->arch.sie_block = &sie_page->sie_block; 679 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 680 681 vcpu->arch.sie_block->icpua = id; 682 if (!kvm_is_ucontrol(kvm)) { 683 if (!kvm->arch.sca) { 684 WARN_ON_ONCE(1); 685 goto out_free_cpu; 686 } 687 if (!kvm->arch.sca->cpu[id].sda) 688 kvm->arch.sca->cpu[id].sda = 689 (__u64) vcpu->arch.sie_block; 690 vcpu->arch.sie_block->scaoh = 691 (__u32)(((__u64)kvm->arch.sca) >> 32); 692 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 693 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); 694 } 695 696 spin_lock_init(&vcpu->arch.local_int.lock); 697 INIT_LIST_HEAD(&vcpu->arch.local_int.list); 698 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 699 vcpu->arch.local_int.wq = &vcpu->wq; 700 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 701 702 rc = kvm_vcpu_init(vcpu, kvm, id); 703 if (rc) 704 goto out_free_sie_block; 705 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 706 vcpu->arch.sie_block); 707 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 708 709 return vcpu; 710 out_free_sie_block: 711 free_page((unsigned long)(vcpu->arch.sie_block)); 712 out_free_cpu: 713 kmem_cache_free(kvm_vcpu_cache, vcpu); 714 out: 715 return ERR_PTR(rc); 716 } 717 718 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 719 { 720 return kvm_cpu_has_interrupt(vcpu); 721 } 722 723 void s390_vcpu_block(struct kvm_vcpu *vcpu) 724 { 725 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 726 } 727 728 void s390_vcpu_unblock(struct kvm_vcpu *vcpu) 729 { 730 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 731 } 732 733 /* 734 * Kick a guest cpu out of SIE and wait until SIE is not running. 735 * If the CPU is not running (e.g. waiting as idle) the function will 736 * return immediately. */ 737 void exit_sie(struct kvm_vcpu *vcpu) 738 { 739 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); 740 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 741 cpu_relax(); 742 } 743 744 /* Kick a guest cpu out of SIE and prevent SIE-reentry */ 745 void exit_sie_sync(struct kvm_vcpu *vcpu) 746 { 747 s390_vcpu_block(vcpu); 748 exit_sie(vcpu); 749 } 750 751 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) 752 { 753 int i; 754 struct kvm *kvm = gmap->private; 755 struct kvm_vcpu *vcpu; 756 757 kvm_for_each_vcpu(i, vcpu, kvm) { 758 /* match against both prefix pages */ 759 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { 760 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); 761 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 762 exit_sie_sync(vcpu); 763 } 764 } 765 } 766 767 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 768 { 769 /* kvm common code refers to this, but never calls it */ 770 BUG(); 771 return 0; 772 } 773 774 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 775 struct kvm_one_reg *reg) 776 { 777 int r = -EINVAL; 778 779 switch (reg->id) { 780 case KVM_REG_S390_TODPR: 781 r = put_user(vcpu->arch.sie_block->todpr, 782 (u32 __user *)reg->addr); 783 break; 784 case KVM_REG_S390_EPOCHDIFF: 785 r = put_user(vcpu->arch.sie_block->epoch, 786 (u64 __user *)reg->addr); 787 break; 788 case KVM_REG_S390_CPU_TIMER: 789 r = put_user(vcpu->arch.sie_block->cputm, 790 (u64 __user *)reg->addr); 791 break; 792 case KVM_REG_S390_CLOCK_COMP: 793 r = put_user(vcpu->arch.sie_block->ckc, 794 (u64 __user *)reg->addr); 795 break; 796 case KVM_REG_S390_PFTOKEN: 797 r = put_user(vcpu->arch.pfault_token, 798 (u64 __user *)reg->addr); 799 break; 800 case KVM_REG_S390_PFCOMPARE: 801 r = put_user(vcpu->arch.pfault_compare, 802 (u64 __user *)reg->addr); 803 break; 804 case KVM_REG_S390_PFSELECT: 805 r = put_user(vcpu->arch.pfault_select, 806 (u64 __user *)reg->addr); 807 break; 808 case KVM_REG_S390_PP: 809 r = put_user(vcpu->arch.sie_block->pp, 810 (u64 __user *)reg->addr); 811 break; 812 case KVM_REG_S390_GBEA: 813 r = put_user(vcpu->arch.sie_block->gbea, 814 (u64 __user *)reg->addr); 815 break; 816 default: 817 break; 818 } 819 820 return r; 821 } 822 823 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 824 struct kvm_one_reg *reg) 825 { 826 int r = -EINVAL; 827 828 switch (reg->id) { 829 case KVM_REG_S390_TODPR: 830 r = get_user(vcpu->arch.sie_block->todpr, 831 (u32 __user *)reg->addr); 832 break; 833 case KVM_REG_S390_EPOCHDIFF: 834 r = get_user(vcpu->arch.sie_block->epoch, 835 (u64 __user *)reg->addr); 836 break; 837 case KVM_REG_S390_CPU_TIMER: 838 r = get_user(vcpu->arch.sie_block->cputm, 839 (u64 __user *)reg->addr); 840 break; 841 case KVM_REG_S390_CLOCK_COMP: 842 r = get_user(vcpu->arch.sie_block->ckc, 843 (u64 __user *)reg->addr); 844 break; 845 case KVM_REG_S390_PFTOKEN: 846 r = get_user(vcpu->arch.pfault_token, 847 (u64 __user *)reg->addr); 848 break; 849 case KVM_REG_S390_PFCOMPARE: 850 r = get_user(vcpu->arch.pfault_compare, 851 (u64 __user *)reg->addr); 852 break; 853 case KVM_REG_S390_PFSELECT: 854 r = get_user(vcpu->arch.pfault_select, 855 (u64 __user *)reg->addr); 856 break; 857 case KVM_REG_S390_PP: 858 r = get_user(vcpu->arch.sie_block->pp, 859 (u64 __user *)reg->addr); 860 break; 861 case KVM_REG_S390_GBEA: 862 r = get_user(vcpu->arch.sie_block->gbea, 863 (u64 __user *)reg->addr); 864 break; 865 default: 866 break; 867 } 868 869 return r; 870 } 871 872 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 873 { 874 kvm_s390_vcpu_initial_reset(vcpu); 875 return 0; 876 } 877 878 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 879 { 880 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 881 return 0; 882 } 883 884 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 885 { 886 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 887 return 0; 888 } 889 890 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 891 struct kvm_sregs *sregs) 892 { 893 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 894 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 895 restore_access_regs(vcpu->run->s.regs.acrs); 896 return 0; 897 } 898 899 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 900 struct kvm_sregs *sregs) 901 { 902 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 903 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 904 return 0; 905 } 906 907 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 908 { 909 if (test_fp_ctl(fpu->fpc)) 910 return -EINVAL; 911 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 912 vcpu->arch.guest_fpregs.fpc = fpu->fpc; 913 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 914 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 915 return 0; 916 } 917 918 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 919 { 920 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); 921 fpu->fpc = vcpu->arch.guest_fpregs.fpc; 922 return 0; 923 } 924 925 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 926 { 927 int rc = 0; 928 929 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) 930 rc = -EBUSY; 931 else { 932 vcpu->run->psw_mask = psw.mask; 933 vcpu->run->psw_addr = psw.addr; 934 } 935 return rc; 936 } 937 938 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 939 struct kvm_translation *tr) 940 { 941 return -EINVAL; /* not implemented yet */ 942 } 943 944 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 945 KVM_GUESTDBG_USE_HW_BP | \ 946 KVM_GUESTDBG_ENABLE) 947 948 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 949 struct kvm_guest_debug *dbg) 950 { 951 int rc = 0; 952 953 vcpu->guest_debug = 0; 954 kvm_s390_clear_bp_data(vcpu); 955 956 if (dbg->control & ~VALID_GUESTDBG_FLAGS) 957 return -EINVAL; 958 959 if (dbg->control & KVM_GUESTDBG_ENABLE) { 960 vcpu->guest_debug = dbg->control; 961 /* enforce guest PER */ 962 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 963 964 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 965 rc = kvm_s390_import_bp_data(vcpu, dbg); 966 } else { 967 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 968 vcpu->arch.guestdbg.last_bp = 0; 969 } 970 971 if (rc) { 972 vcpu->guest_debug = 0; 973 kvm_s390_clear_bp_data(vcpu); 974 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 975 } 976 977 return rc; 978 } 979 980 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 981 struct kvm_mp_state *mp_state) 982 { 983 return -EINVAL; /* not implemented yet */ 984 } 985 986 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 987 struct kvm_mp_state *mp_state) 988 { 989 return -EINVAL; /* not implemented yet */ 990 } 991 992 bool kvm_s390_cmma_enabled(struct kvm *kvm) 993 { 994 if (!MACHINE_IS_LPAR) 995 return false; 996 /* only enable for z10 and later */ 997 if (!MACHINE_HAS_EDAT1) 998 return false; 999 if (!kvm->arch.use_cmma) 1000 return false; 1001 return true; 1002 } 1003 1004 static bool ibs_enabled(struct kvm_vcpu *vcpu) 1005 { 1006 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; 1007 } 1008 1009 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 1010 { 1011 retry: 1012 s390_vcpu_unblock(vcpu); 1013 /* 1014 * We use MMU_RELOAD just to re-arm the ipte notifier for the 1015 * guest prefix page. gmap_ipte_notify will wait on the ptl lock. 1016 * This ensures that the ipte instruction for this request has 1017 * already finished. We might race against a second unmapper that 1018 * wants to set the blocking bit. Lets just retry the request loop. 1019 */ 1020 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 1021 int rc; 1022 rc = gmap_ipte_notify(vcpu->arch.gmap, 1023 kvm_s390_get_prefix(vcpu), 1024 PAGE_SIZE * 2); 1025 if (rc) 1026 return rc; 1027 goto retry; 1028 } 1029 1030 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 1031 if (!ibs_enabled(vcpu)) { 1032 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 1033 atomic_set_mask(CPUSTAT_IBS, 1034 &vcpu->arch.sie_block->cpuflags); 1035 } 1036 goto retry; 1037 } 1038 1039 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 1040 if (ibs_enabled(vcpu)) { 1041 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 1042 atomic_clear_mask(CPUSTAT_IBS, 1043 &vcpu->arch.sie_block->cpuflags); 1044 } 1045 goto retry; 1046 } 1047 1048 return 0; 1049 } 1050 1051 /** 1052 * kvm_arch_fault_in_page - fault-in guest page if necessary 1053 * @vcpu: The corresponding virtual cpu 1054 * @gpa: Guest physical address 1055 * @writable: Whether the page should be writable or not 1056 * 1057 * Make sure that a guest page has been faulted-in on the host. 1058 * 1059 * Return: Zero on success, negative error code otherwise. 1060 */ 1061 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 1062 { 1063 struct mm_struct *mm = current->mm; 1064 hva_t hva; 1065 long rc; 1066 1067 hva = gmap_fault(gpa, vcpu->arch.gmap); 1068 if (IS_ERR_VALUE(hva)) 1069 return (long)hva; 1070 down_read(&mm->mmap_sem); 1071 rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); 1072 up_read(&mm->mmap_sem); 1073 1074 return rc < 0 ? rc : 0; 1075 } 1076 1077 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 1078 unsigned long token) 1079 { 1080 struct kvm_s390_interrupt inti; 1081 inti.parm64 = token; 1082 1083 if (start_token) { 1084 inti.type = KVM_S390_INT_PFAULT_INIT; 1085 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); 1086 } else { 1087 inti.type = KVM_S390_INT_PFAULT_DONE; 1088 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 1089 } 1090 } 1091 1092 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 1093 struct kvm_async_pf *work) 1094 { 1095 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 1096 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 1097 } 1098 1099 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 1100 struct kvm_async_pf *work) 1101 { 1102 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 1103 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 1104 } 1105 1106 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 1107 struct kvm_async_pf *work) 1108 { 1109 /* s390 will always inject the page directly */ 1110 } 1111 1112 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 1113 { 1114 /* 1115 * s390 will always inject the page directly, 1116 * but we still want check_async_completion to cleanup 1117 */ 1118 return true; 1119 } 1120 1121 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 1122 { 1123 hva_t hva; 1124 struct kvm_arch_async_pf arch; 1125 int rc; 1126 1127 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 1128 return 0; 1129 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 1130 vcpu->arch.pfault_compare) 1131 return 0; 1132 if (psw_extint_disabled(vcpu)) 1133 return 0; 1134 if (kvm_cpu_has_interrupt(vcpu)) 1135 return 0; 1136 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 1137 return 0; 1138 if (!vcpu->arch.gmap->pfault_enabled) 1139 return 0; 1140 1141 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 1142 hva += current->thread.gmap_addr & ~PAGE_MASK; 1143 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 1144 return 0; 1145 1146 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 1147 return rc; 1148 } 1149 1150 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 1151 { 1152 int rc, cpuflags; 1153 1154 /* 1155 * On s390 notifications for arriving pages will be delivered directly 1156 * to the guest but the house keeping for completed pfaults is 1157 * handled outside the worker. 1158 */ 1159 kvm_check_async_pf_completion(vcpu); 1160 1161 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 1162 1163 if (need_resched()) 1164 schedule(); 1165 1166 if (test_cpu_flag(CIF_MCCK_PENDING)) 1167 s390_handle_mcck(); 1168 1169 if (!kvm_is_ucontrol(vcpu->kvm)) 1170 kvm_s390_deliver_pending_interrupts(vcpu); 1171 1172 rc = kvm_s390_handle_requests(vcpu); 1173 if (rc) 1174 return rc; 1175 1176 if (guestdbg_enabled(vcpu)) { 1177 kvm_s390_backup_guest_per_regs(vcpu); 1178 kvm_s390_patch_guest_per_regs(vcpu); 1179 } 1180 1181 vcpu->arch.sie_block->icptcode = 0; 1182 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 1183 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 1184 trace_kvm_s390_sie_enter(vcpu, cpuflags); 1185 1186 return 0; 1187 } 1188 1189 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 1190 { 1191 int rc = -1; 1192 1193 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 1194 vcpu->arch.sie_block->icptcode); 1195 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 1196 1197 if (guestdbg_enabled(vcpu)) 1198 kvm_s390_restore_guest_per_regs(vcpu); 1199 1200 if (exit_reason >= 0) { 1201 rc = 0; 1202 } else if (kvm_is_ucontrol(vcpu->kvm)) { 1203 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 1204 vcpu->run->s390_ucontrol.trans_exc_code = 1205 current->thread.gmap_addr; 1206 vcpu->run->s390_ucontrol.pgm_code = 0x10; 1207 rc = -EREMOTE; 1208 1209 } else if (current->thread.gmap_pfault) { 1210 trace_kvm_s390_major_guest_pfault(vcpu); 1211 current->thread.gmap_pfault = 0; 1212 if (kvm_arch_setup_async_pf(vcpu)) { 1213 rc = 0; 1214 } else { 1215 gpa_t gpa = current->thread.gmap_addr; 1216 rc = kvm_arch_fault_in_page(vcpu, gpa, 1); 1217 } 1218 } 1219 1220 if (rc == -1) { 1221 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 1222 trace_kvm_s390_sie_fault(vcpu); 1223 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 1224 } 1225 1226 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 1227 1228 if (rc == 0) { 1229 if (kvm_is_ucontrol(vcpu->kvm)) 1230 /* Don't exit for host interrupts. */ 1231 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; 1232 else 1233 rc = kvm_handle_sie_intercept(vcpu); 1234 } 1235 1236 return rc; 1237 } 1238 1239 static int __vcpu_run(struct kvm_vcpu *vcpu) 1240 { 1241 int rc, exit_reason; 1242 1243 /* 1244 * We try to hold kvm->srcu during most of vcpu_run (except when run- 1245 * ning the guest), so that memslots (and other stuff) are protected 1246 */ 1247 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1248 1249 do { 1250 rc = vcpu_pre_run(vcpu); 1251 if (rc) 1252 break; 1253 1254 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 1255 /* 1256 * As PF_VCPU will be used in fault handler, between 1257 * guest_enter and guest_exit should be no uaccess. 1258 */ 1259 preempt_disable(); 1260 kvm_guest_enter(); 1261 preempt_enable(); 1262 exit_reason = sie64a(vcpu->arch.sie_block, 1263 vcpu->run->s.regs.gprs); 1264 kvm_guest_exit(); 1265 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1266 1267 rc = vcpu_post_run(vcpu, exit_reason); 1268 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 1269 1270 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 1271 return rc; 1272 } 1273 1274 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1275 { 1276 int rc; 1277 sigset_t sigsaved; 1278 1279 if (guestdbg_exit_pending(vcpu)) { 1280 kvm_s390_prepare_debug_exit(vcpu); 1281 return 0; 1282 } 1283 1284 if (vcpu->sigset_active) 1285 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 1286 1287 kvm_s390_vcpu_start(vcpu); 1288 1289 switch (kvm_run->exit_reason) { 1290 case KVM_EXIT_S390_SIEIC: 1291 case KVM_EXIT_UNKNOWN: 1292 case KVM_EXIT_INTR: 1293 case KVM_EXIT_S390_RESET: 1294 case KVM_EXIT_S390_UCONTROL: 1295 case KVM_EXIT_S390_TSCH: 1296 case KVM_EXIT_DEBUG: 1297 break; 1298 default: 1299 BUG(); 1300 } 1301 1302 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 1303 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 1304 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { 1305 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; 1306 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 1307 } 1308 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 1309 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; 1310 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 1311 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 1312 } 1313 1314 might_fault(); 1315 rc = __vcpu_run(vcpu); 1316 1317 if (signal_pending(current) && !rc) { 1318 kvm_run->exit_reason = KVM_EXIT_INTR; 1319 rc = -EINTR; 1320 } 1321 1322 if (guestdbg_exit_pending(vcpu) && !rc) { 1323 kvm_s390_prepare_debug_exit(vcpu); 1324 rc = 0; 1325 } 1326 1327 if (rc == -EOPNOTSUPP) { 1328 /* intercept cannot be handled in-kernel, prepare kvm-run */ 1329 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 1330 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 1331 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 1332 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 1333 rc = 0; 1334 } 1335 1336 if (rc == -EREMOTE) { 1337 /* intercept was handled, but userspace support is needed 1338 * kvm_run has been prepared by the handler */ 1339 rc = 0; 1340 } 1341 1342 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 1343 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 1344 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 1345 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 1346 1347 if (vcpu->sigset_active) 1348 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1349 1350 vcpu->stat.exit_userspace++; 1351 return rc; 1352 } 1353 1354 /* 1355 * store status at address 1356 * we use have two special cases: 1357 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 1358 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 1359 */ 1360 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 1361 { 1362 unsigned char archmode = 1; 1363 unsigned int px; 1364 u64 clkcomp; 1365 int rc; 1366 1367 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 1368 if (write_guest_abs(vcpu, 163, &archmode, 1)) 1369 return -EFAULT; 1370 gpa = SAVE_AREA_BASE; 1371 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 1372 if (write_guest_real(vcpu, 163, &archmode, 1)) 1373 return -EFAULT; 1374 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); 1375 } 1376 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), 1377 vcpu->arch.guest_fpregs.fprs, 128); 1378 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), 1379 vcpu->run->s.regs.gprs, 128); 1380 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), 1381 &vcpu->arch.sie_block->gpsw, 16); 1382 px = kvm_s390_get_prefix(vcpu); 1383 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), 1384 &px, 4); 1385 rc |= write_guest_abs(vcpu, 1386 gpa + offsetof(struct save_area, fp_ctrl_reg), 1387 &vcpu->arch.guest_fpregs.fpc, 4); 1388 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), 1389 &vcpu->arch.sie_block->todpr, 4); 1390 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), 1391 &vcpu->arch.sie_block->cputm, 8); 1392 clkcomp = vcpu->arch.sie_block->ckc >> 8; 1393 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), 1394 &clkcomp, 8); 1395 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), 1396 &vcpu->run->s.regs.acrs, 64); 1397 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), 1398 &vcpu->arch.sie_block->gcr, 128); 1399 return rc ? -EFAULT : 0; 1400 } 1401 1402 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 1403 { 1404 /* 1405 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 1406 * copying in vcpu load/put. Lets update our copies before we save 1407 * it into the save area 1408 */ 1409 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 1410 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 1411 save_access_regs(vcpu->run->s.regs.acrs); 1412 1413 return kvm_s390_store_status_unloaded(vcpu, addr); 1414 } 1415 1416 static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) 1417 { 1418 return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; 1419 } 1420 1421 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 1422 { 1423 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 1424 kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu); 1425 exit_sie_sync(vcpu); 1426 } 1427 1428 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 1429 { 1430 unsigned int i; 1431 struct kvm_vcpu *vcpu; 1432 1433 kvm_for_each_vcpu(i, vcpu, kvm) { 1434 __disable_ibs_on_vcpu(vcpu); 1435 } 1436 } 1437 1438 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 1439 { 1440 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 1441 kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu); 1442 exit_sie_sync(vcpu); 1443 } 1444 1445 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 1446 { 1447 int i, online_vcpus, started_vcpus = 0; 1448 1449 if (!is_vcpu_stopped(vcpu)) 1450 return; 1451 1452 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 1453 /* Only one cpu at a time may enter/leave the STOPPED state. */ 1454 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); 1455 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 1456 1457 for (i = 0; i < online_vcpus; i++) { 1458 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 1459 started_vcpus++; 1460 } 1461 1462 if (started_vcpus == 0) { 1463 /* we're the only active VCPU -> speed it up */ 1464 __enable_ibs_on_vcpu(vcpu); 1465 } else if (started_vcpus == 1) { 1466 /* 1467 * As we are starting a second VCPU, we have to disable 1468 * the IBS facility on all VCPUs to remove potentially 1469 * oustanding ENABLE requests. 1470 */ 1471 __disable_ibs_on_all_vcpus(vcpu->kvm); 1472 } 1473 1474 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1475 /* 1476 * Another VCPU might have used IBS while we were offline. 1477 * Let's play safe and flush the VCPU at startup. 1478 */ 1479 vcpu->arch.sie_block->ihcpu = 0xffff; 1480 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); 1481 return; 1482 } 1483 1484 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 1485 { 1486 int i, online_vcpus, started_vcpus = 0; 1487 struct kvm_vcpu *started_vcpu = NULL; 1488 1489 if (is_vcpu_stopped(vcpu)) 1490 return; 1491 1492 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 1493 /* Only one cpu at a time may enter/leave the STOPPED state. */ 1494 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); 1495 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 1496 1497 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1498 __disable_ibs_on_vcpu(vcpu); 1499 1500 for (i = 0; i < online_vcpus; i++) { 1501 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 1502 started_vcpus++; 1503 started_vcpu = vcpu->kvm->vcpus[i]; 1504 } 1505 } 1506 1507 if (started_vcpus == 1) { 1508 /* 1509 * As we only have one VCPU left, we want to enable the 1510 * IBS facility for that VCPU to speed it up. 1511 */ 1512 __enable_ibs_on_vcpu(started_vcpu); 1513 } 1514 1515 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); 1516 return; 1517 } 1518 1519 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 1520 struct kvm_enable_cap *cap) 1521 { 1522 int r; 1523 1524 if (cap->flags) 1525 return -EINVAL; 1526 1527 switch (cap->cap) { 1528 case KVM_CAP_S390_CSS_SUPPORT: 1529 if (!vcpu->kvm->arch.css_support) { 1530 vcpu->kvm->arch.css_support = 1; 1531 trace_kvm_s390_enable_css(vcpu->kvm); 1532 } 1533 r = 0; 1534 break; 1535 default: 1536 r = -EINVAL; 1537 break; 1538 } 1539 return r; 1540 } 1541 1542 long kvm_arch_vcpu_ioctl(struct file *filp, 1543 unsigned int ioctl, unsigned long arg) 1544 { 1545 struct kvm_vcpu *vcpu = filp->private_data; 1546 void __user *argp = (void __user *)arg; 1547 int idx; 1548 long r; 1549 1550 switch (ioctl) { 1551 case KVM_S390_INTERRUPT: { 1552 struct kvm_s390_interrupt s390int; 1553 1554 r = -EFAULT; 1555 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1556 break; 1557 r = kvm_s390_inject_vcpu(vcpu, &s390int); 1558 break; 1559 } 1560 case KVM_S390_STORE_STATUS: 1561 idx = srcu_read_lock(&vcpu->kvm->srcu); 1562 r = kvm_s390_vcpu_store_status(vcpu, arg); 1563 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1564 break; 1565 case KVM_S390_SET_INITIAL_PSW: { 1566 psw_t psw; 1567 1568 r = -EFAULT; 1569 if (copy_from_user(&psw, argp, sizeof(psw))) 1570 break; 1571 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 1572 break; 1573 } 1574 case KVM_S390_INITIAL_RESET: 1575 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 1576 break; 1577 case KVM_SET_ONE_REG: 1578 case KVM_GET_ONE_REG: { 1579 struct kvm_one_reg reg; 1580 r = -EFAULT; 1581 if (copy_from_user(®, argp, sizeof(reg))) 1582 break; 1583 if (ioctl == KVM_SET_ONE_REG) 1584 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 1585 else 1586 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 1587 break; 1588 } 1589 #ifdef CONFIG_KVM_S390_UCONTROL 1590 case KVM_S390_UCAS_MAP: { 1591 struct kvm_s390_ucas_mapping ucasmap; 1592 1593 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1594 r = -EFAULT; 1595 break; 1596 } 1597 1598 if (!kvm_is_ucontrol(vcpu->kvm)) { 1599 r = -EINVAL; 1600 break; 1601 } 1602 1603 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 1604 ucasmap.vcpu_addr, ucasmap.length); 1605 break; 1606 } 1607 case KVM_S390_UCAS_UNMAP: { 1608 struct kvm_s390_ucas_mapping ucasmap; 1609 1610 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1611 r = -EFAULT; 1612 break; 1613 } 1614 1615 if (!kvm_is_ucontrol(vcpu->kvm)) { 1616 r = -EINVAL; 1617 break; 1618 } 1619 1620 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 1621 ucasmap.length); 1622 break; 1623 } 1624 #endif 1625 case KVM_S390_VCPU_FAULT: { 1626 r = gmap_fault(arg, vcpu->arch.gmap); 1627 if (!IS_ERR_VALUE(r)) 1628 r = 0; 1629 break; 1630 } 1631 case KVM_ENABLE_CAP: 1632 { 1633 struct kvm_enable_cap cap; 1634 r = -EFAULT; 1635 if (copy_from_user(&cap, argp, sizeof(cap))) 1636 break; 1637 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 1638 break; 1639 } 1640 default: 1641 r = -ENOTTY; 1642 } 1643 return r; 1644 } 1645 1646 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 1647 { 1648 #ifdef CONFIG_KVM_S390_UCONTROL 1649 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 1650 && (kvm_is_ucontrol(vcpu->kvm))) { 1651 vmf->page = virt_to_page(vcpu->arch.sie_block); 1652 get_page(vmf->page); 1653 return 0; 1654 } 1655 #endif 1656 return VM_FAULT_SIGBUS; 1657 } 1658 1659 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1660 struct kvm_memory_slot *dont) 1661 { 1662 } 1663 1664 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1665 unsigned long npages) 1666 { 1667 return 0; 1668 } 1669 1670 void kvm_arch_memslots_updated(struct kvm *kvm) 1671 { 1672 } 1673 1674 /* Section: memory related */ 1675 int kvm_arch_prepare_memory_region(struct kvm *kvm, 1676 struct kvm_memory_slot *memslot, 1677 struct kvm_userspace_memory_region *mem, 1678 enum kvm_mr_change change) 1679 { 1680 /* A few sanity checks. We can have memory slots which have to be 1681 located/ended at a segment boundary (1MB). The memory in userland is 1682 ok to be fragmented into various different vmas. It is okay to mmap() 1683 and munmap() stuff in this slot after doing this call at any time */ 1684 1685 if (mem->userspace_addr & 0xffffful) 1686 return -EINVAL; 1687 1688 if (mem->memory_size & 0xffffful) 1689 return -EINVAL; 1690 1691 return 0; 1692 } 1693 1694 void kvm_arch_commit_memory_region(struct kvm *kvm, 1695 struct kvm_userspace_memory_region *mem, 1696 const struct kvm_memory_slot *old, 1697 enum kvm_mr_change change) 1698 { 1699 int rc; 1700 1701 /* If the basics of the memslot do not change, we do not want 1702 * to update the gmap. Every update causes several unnecessary 1703 * segment translation exceptions. This is usually handled just 1704 * fine by the normal fault handler + gmap, but it will also 1705 * cause faults on the prefix page of running guest CPUs. 1706 */ 1707 if (old->userspace_addr == mem->userspace_addr && 1708 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 1709 old->npages * PAGE_SIZE == mem->memory_size) 1710 return; 1711 1712 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 1713 mem->guest_phys_addr, mem->memory_size); 1714 if (rc) 1715 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); 1716 return; 1717 } 1718 1719 void kvm_arch_flush_shadow_all(struct kvm *kvm) 1720 { 1721 } 1722 1723 void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 1724 struct kvm_memory_slot *slot) 1725 { 1726 } 1727 1728 static int __init kvm_s390_init(void) 1729 { 1730 int ret; 1731 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1732 if (ret) 1733 return ret; 1734 1735 /* 1736 * guests can ask for up to 255+1 double words, we need a full page 1737 * to hold the maximum amount of facilities. On the other hand, we 1738 * only set facilities that are known to work in KVM. 1739 */ 1740 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); 1741 if (!vfacilities) { 1742 kvm_exit(); 1743 return -ENOMEM; 1744 } 1745 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); 1746 vfacilities[0] &= 0xff82fff3f4fc2000UL; 1747 vfacilities[1] &= 0x005c000000000000UL; 1748 return 0; 1749 } 1750 1751 static void __exit kvm_s390_exit(void) 1752 { 1753 free_page((unsigned long) vfacilities); 1754 kvm_exit(); 1755 } 1756 1757 module_init(kvm_s390_init); 1758 module_exit(kvm_s390_exit); 1759 1760 /* 1761 * Enable autoloading of the kvm module. 1762 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 1763 * since x86 takes a different approach. 1764 */ 1765 #include <linux/miscdevice.h> 1766 MODULE_ALIAS_MISCDEV(KVM_MINOR); 1767 MODULE_ALIAS("devname:kvm"); 1768