1 /* 2 * hosting zSeries kernel virtual machines 3 * 4 * Copyright IBM Corp. 2008, 2009 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2 only) 8 * as published by the Free Software Foundation. 9 * 10 * Author(s): Carsten Otte <cotte@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 14 * Jason J. Herne <jjherne@us.ibm.com> 15 */ 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/module.h> 25 #include <linux/slab.h> 26 #include <linux/timer.h> 27 #include <asm/asm-offsets.h> 28 #include <asm/lowcore.h> 29 #include <asm/pgtable.h> 30 #include <asm/nmi.h> 31 #include <asm/switch_to.h> 32 #include <asm/facility.h> 33 #include <asm/sclp.h> 34 #include "kvm-s390.h" 35 #include "gaccess.h" 36 37 #define CREATE_TRACE_POINTS 38 #include "trace.h" 39 #include "trace-s390.h" 40 41 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 42 43 struct kvm_stats_debugfs_item debugfs_entries[] = { 44 { "userspace_handled", VCPU_STAT(exit_userspace) }, 45 { "exit_null", VCPU_STAT(exit_null) }, 46 { "exit_validity", VCPU_STAT(exit_validity) }, 47 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 48 { "exit_external_request", VCPU_STAT(exit_external_request) }, 49 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 50 { "exit_instruction", VCPU_STAT(exit_instruction) }, 51 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 52 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 53 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 54 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 55 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 56 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 57 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 58 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 59 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 60 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 61 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 62 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 63 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 64 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 65 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 66 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 67 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 68 { "instruction_spx", VCPU_STAT(instruction_spx) }, 69 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 70 { "instruction_stap", VCPU_STAT(instruction_stap) }, 71 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 72 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 73 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 74 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 75 { "instruction_essa", VCPU_STAT(instruction_essa) }, 76 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 77 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 78 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 79 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 80 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 81 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 82 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 83 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 84 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 85 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 86 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 87 { "diagnose_10", VCPU_STAT(diagnose_10) }, 88 { "diagnose_44", VCPU_STAT(diagnose_44) }, 89 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 90 { NULL } 91 }; 92 93 unsigned long *vfacilities; 94 static struct gmap_notifier gmap_notifier; 95 96 /* test availability of vfacility */ 97 int test_vfacility(unsigned long nr) 98 { 99 return __test_facility(nr, (void *) vfacilities); 100 } 101 102 /* Section: not file related */ 103 int kvm_arch_hardware_enable(void *garbage) 104 { 105 /* every s390 is virtualization enabled ;-) */ 106 return 0; 107 } 108 109 void kvm_arch_hardware_disable(void *garbage) 110 { 111 } 112 113 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); 114 115 int kvm_arch_hardware_setup(void) 116 { 117 gmap_notifier.notifier_call = kvm_gmap_notifier; 118 gmap_register_ipte_notifier(&gmap_notifier); 119 return 0; 120 } 121 122 void kvm_arch_hardware_unsetup(void) 123 { 124 gmap_unregister_ipte_notifier(&gmap_notifier); 125 } 126 127 void kvm_arch_check_processor_compat(void *rtn) 128 { 129 } 130 131 int kvm_arch_init(void *opaque) 132 { 133 return 0; 134 } 135 136 void kvm_arch_exit(void) 137 { 138 } 139 140 /* Section: device related */ 141 long kvm_arch_dev_ioctl(struct file *filp, 142 unsigned int ioctl, unsigned long arg) 143 { 144 if (ioctl == KVM_S390_ENABLE_SIE) 145 return s390_enable_sie(); 146 return -EINVAL; 147 } 148 149 int kvm_dev_ioctl_check_extension(long ext) 150 { 151 int r; 152 153 switch (ext) { 154 case KVM_CAP_S390_PSW: 155 case KVM_CAP_S390_GMAP: 156 case KVM_CAP_SYNC_MMU: 157 #ifdef CONFIG_KVM_S390_UCONTROL 158 case KVM_CAP_S390_UCONTROL: 159 #endif 160 case KVM_CAP_ASYNC_PF: 161 case KVM_CAP_SYNC_REGS: 162 case KVM_CAP_ONE_REG: 163 case KVM_CAP_ENABLE_CAP: 164 case KVM_CAP_S390_CSS_SUPPORT: 165 case KVM_CAP_IOEVENTFD: 166 case KVM_CAP_DEVICE_CTRL: 167 case KVM_CAP_ENABLE_CAP_VM: 168 case KVM_CAP_VM_ATTRIBUTES: 169 r = 1; 170 break; 171 case KVM_CAP_NR_VCPUS: 172 case KVM_CAP_MAX_VCPUS: 173 r = KVM_MAX_VCPUS; 174 break; 175 case KVM_CAP_NR_MEMSLOTS: 176 r = KVM_USER_MEM_SLOTS; 177 break; 178 case KVM_CAP_S390_COW: 179 r = MACHINE_HAS_ESOP; 180 break; 181 default: 182 r = 0; 183 } 184 return r; 185 } 186 187 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 188 struct kvm_memory_slot *memslot) 189 { 190 gfn_t cur_gfn, last_gfn; 191 unsigned long address; 192 struct gmap *gmap = kvm->arch.gmap; 193 194 down_read(&gmap->mm->mmap_sem); 195 /* Loop over all guest pages */ 196 last_gfn = memslot->base_gfn + memslot->npages; 197 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 198 address = gfn_to_hva_memslot(memslot, cur_gfn); 199 200 if (gmap_test_and_clear_dirty(address, gmap)) 201 mark_page_dirty(kvm, cur_gfn); 202 } 203 up_read(&gmap->mm->mmap_sem); 204 } 205 206 /* Section: vm related */ 207 /* 208 * Get (and clear) the dirty memory log for a memory slot. 209 */ 210 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 211 struct kvm_dirty_log *log) 212 { 213 int r; 214 unsigned long n; 215 struct kvm_memory_slot *memslot; 216 int is_dirty = 0; 217 218 mutex_lock(&kvm->slots_lock); 219 220 r = -EINVAL; 221 if (log->slot >= KVM_USER_MEM_SLOTS) 222 goto out; 223 224 memslot = id_to_memslot(kvm->memslots, log->slot); 225 r = -ENOENT; 226 if (!memslot->dirty_bitmap) 227 goto out; 228 229 kvm_s390_sync_dirty_log(kvm, memslot); 230 r = kvm_get_dirty_log(kvm, log, &is_dirty); 231 if (r) 232 goto out; 233 234 /* Clear the dirty log */ 235 if (is_dirty) { 236 n = kvm_dirty_bitmap_bytes(memslot); 237 memset(memslot->dirty_bitmap, 0, n); 238 } 239 r = 0; 240 out: 241 mutex_unlock(&kvm->slots_lock); 242 return r; 243 } 244 245 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 246 { 247 int r; 248 249 if (cap->flags) 250 return -EINVAL; 251 252 switch (cap->cap) { 253 case KVM_CAP_S390_IRQCHIP: 254 kvm->arch.use_irqchip = 1; 255 r = 0; 256 break; 257 default: 258 r = -EINVAL; 259 break; 260 } 261 return r; 262 } 263 264 static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 265 { 266 int ret; 267 unsigned int idx; 268 switch (attr->attr) { 269 case KVM_S390_VM_MEM_ENABLE_CMMA: 270 ret = -EBUSY; 271 mutex_lock(&kvm->lock); 272 if (atomic_read(&kvm->online_vcpus) == 0) { 273 kvm->arch.use_cmma = 1; 274 ret = 0; 275 } 276 mutex_unlock(&kvm->lock); 277 break; 278 case KVM_S390_VM_MEM_CLR_CMMA: 279 mutex_lock(&kvm->lock); 280 idx = srcu_read_lock(&kvm->srcu); 281 page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false); 282 srcu_read_unlock(&kvm->srcu, idx); 283 mutex_unlock(&kvm->lock); 284 ret = 0; 285 break; 286 default: 287 ret = -ENXIO; 288 break; 289 } 290 return ret; 291 } 292 293 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 294 { 295 int ret; 296 297 switch (attr->group) { 298 case KVM_S390_VM_MEM_CTRL: 299 ret = kvm_s390_mem_control(kvm, attr); 300 break; 301 default: 302 ret = -ENXIO; 303 break; 304 } 305 306 return ret; 307 } 308 309 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 310 { 311 return -ENXIO; 312 } 313 314 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 315 { 316 int ret; 317 318 switch (attr->group) { 319 case KVM_S390_VM_MEM_CTRL: 320 switch (attr->attr) { 321 case KVM_S390_VM_MEM_ENABLE_CMMA: 322 case KVM_S390_VM_MEM_CLR_CMMA: 323 ret = 0; 324 break; 325 default: 326 ret = -ENXIO; 327 break; 328 } 329 break; 330 default: 331 ret = -ENXIO; 332 break; 333 } 334 335 return ret; 336 } 337 338 long kvm_arch_vm_ioctl(struct file *filp, 339 unsigned int ioctl, unsigned long arg) 340 { 341 struct kvm *kvm = filp->private_data; 342 void __user *argp = (void __user *)arg; 343 struct kvm_device_attr attr; 344 int r; 345 346 switch (ioctl) { 347 case KVM_S390_INTERRUPT: { 348 struct kvm_s390_interrupt s390int; 349 350 r = -EFAULT; 351 if (copy_from_user(&s390int, argp, sizeof(s390int))) 352 break; 353 r = kvm_s390_inject_vm(kvm, &s390int); 354 break; 355 } 356 case KVM_ENABLE_CAP: { 357 struct kvm_enable_cap cap; 358 r = -EFAULT; 359 if (copy_from_user(&cap, argp, sizeof(cap))) 360 break; 361 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 362 break; 363 } 364 case KVM_CREATE_IRQCHIP: { 365 struct kvm_irq_routing_entry routing; 366 367 r = -EINVAL; 368 if (kvm->arch.use_irqchip) { 369 /* Set up dummy routing. */ 370 memset(&routing, 0, sizeof(routing)); 371 kvm_set_irq_routing(kvm, &routing, 0, 0); 372 r = 0; 373 } 374 break; 375 } 376 case KVM_SET_DEVICE_ATTR: { 377 r = -EFAULT; 378 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 379 break; 380 r = kvm_s390_vm_set_attr(kvm, &attr); 381 break; 382 } 383 case KVM_GET_DEVICE_ATTR: { 384 r = -EFAULT; 385 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 386 break; 387 r = kvm_s390_vm_get_attr(kvm, &attr); 388 break; 389 } 390 case KVM_HAS_DEVICE_ATTR: { 391 r = -EFAULT; 392 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 393 break; 394 r = kvm_s390_vm_has_attr(kvm, &attr); 395 break; 396 } 397 default: 398 r = -ENOTTY; 399 } 400 401 return r; 402 } 403 404 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 405 { 406 int rc; 407 char debug_name[16]; 408 static unsigned long sca_offset; 409 410 rc = -EINVAL; 411 #ifdef CONFIG_KVM_S390_UCONTROL 412 if (type & ~KVM_VM_S390_UCONTROL) 413 goto out_err; 414 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 415 goto out_err; 416 #else 417 if (type) 418 goto out_err; 419 #endif 420 421 rc = s390_enable_sie(); 422 if (rc) 423 goto out_err; 424 425 rc = -ENOMEM; 426 427 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); 428 if (!kvm->arch.sca) 429 goto out_err; 430 spin_lock(&kvm_lock); 431 sca_offset = (sca_offset + 16) & 0x7f0; 432 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); 433 spin_unlock(&kvm_lock); 434 435 sprintf(debug_name, "kvm-%u", current->pid); 436 437 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); 438 if (!kvm->arch.dbf) 439 goto out_nodbf; 440 441 spin_lock_init(&kvm->arch.float_int.lock); 442 INIT_LIST_HEAD(&kvm->arch.float_int.list); 443 init_waitqueue_head(&kvm->arch.ipte_wq); 444 445 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 446 VM_EVENT(kvm, 3, "%s", "vm created"); 447 448 if (type & KVM_VM_S390_UCONTROL) { 449 kvm->arch.gmap = NULL; 450 } else { 451 kvm->arch.gmap = gmap_alloc(current->mm); 452 if (!kvm->arch.gmap) 453 goto out_nogmap; 454 kvm->arch.gmap->private = kvm; 455 kvm->arch.gmap->pfault_enabled = 0; 456 } 457 458 kvm->arch.css_support = 0; 459 kvm->arch.use_irqchip = 0; 460 461 spin_lock_init(&kvm->arch.start_stop_lock); 462 463 return 0; 464 out_nogmap: 465 debug_unregister(kvm->arch.dbf); 466 out_nodbf: 467 free_page((unsigned long)(kvm->arch.sca)); 468 out_err: 469 return rc; 470 } 471 472 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 473 { 474 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 475 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 476 kvm_s390_clear_local_irqs(vcpu); 477 kvm_clear_async_pf_completion_queue(vcpu); 478 if (!kvm_is_ucontrol(vcpu->kvm)) { 479 clear_bit(63 - vcpu->vcpu_id, 480 (unsigned long *) &vcpu->kvm->arch.sca->mcn); 481 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 482 (__u64) vcpu->arch.sie_block) 483 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; 484 } 485 smp_mb(); 486 487 if (kvm_is_ucontrol(vcpu->kvm)) 488 gmap_free(vcpu->arch.gmap); 489 490 if (kvm_s390_cmma_enabled(vcpu->kvm)) 491 kvm_s390_vcpu_unsetup_cmma(vcpu); 492 free_page((unsigned long)(vcpu->arch.sie_block)); 493 494 kvm_vcpu_uninit(vcpu); 495 kmem_cache_free(kvm_vcpu_cache, vcpu); 496 } 497 498 static void kvm_free_vcpus(struct kvm *kvm) 499 { 500 unsigned int i; 501 struct kvm_vcpu *vcpu; 502 503 kvm_for_each_vcpu(i, vcpu, kvm) 504 kvm_arch_vcpu_destroy(vcpu); 505 506 mutex_lock(&kvm->lock); 507 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 508 kvm->vcpus[i] = NULL; 509 510 atomic_set(&kvm->online_vcpus, 0); 511 mutex_unlock(&kvm->lock); 512 } 513 514 void kvm_arch_sync_events(struct kvm *kvm) 515 { 516 } 517 518 void kvm_arch_destroy_vm(struct kvm *kvm) 519 { 520 kvm_free_vcpus(kvm); 521 free_page((unsigned long)(kvm->arch.sca)); 522 debug_unregister(kvm->arch.dbf); 523 if (!kvm_is_ucontrol(kvm)) 524 gmap_free(kvm->arch.gmap); 525 kvm_s390_destroy_adapters(kvm); 526 kvm_s390_clear_float_irqs(kvm); 527 } 528 529 /* Section: vcpu related */ 530 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 531 { 532 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 533 kvm_clear_async_pf_completion_queue(vcpu); 534 if (kvm_is_ucontrol(vcpu->kvm)) { 535 vcpu->arch.gmap = gmap_alloc(current->mm); 536 if (!vcpu->arch.gmap) 537 return -ENOMEM; 538 vcpu->arch.gmap->private = vcpu->kvm; 539 return 0; 540 } 541 542 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 543 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 544 KVM_SYNC_GPRS | 545 KVM_SYNC_ACRS | 546 KVM_SYNC_CRS; 547 return 0; 548 } 549 550 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 551 { 552 /* Nothing todo */ 553 } 554 555 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 556 { 557 save_fp_ctl(&vcpu->arch.host_fpregs.fpc); 558 save_fp_regs(vcpu->arch.host_fpregs.fprs); 559 save_access_regs(vcpu->arch.host_acrs); 560 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 561 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 562 restore_access_regs(vcpu->run->s.regs.acrs); 563 gmap_enable(vcpu->arch.gmap); 564 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 565 } 566 567 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 568 { 569 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 570 gmap_disable(vcpu->arch.gmap); 571 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 572 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 573 save_access_regs(vcpu->run->s.regs.acrs); 574 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); 575 restore_fp_regs(vcpu->arch.host_fpregs.fprs); 576 restore_access_regs(vcpu->arch.host_acrs); 577 } 578 579 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 580 { 581 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 582 vcpu->arch.sie_block->gpsw.mask = 0UL; 583 vcpu->arch.sie_block->gpsw.addr = 0UL; 584 kvm_s390_set_prefix(vcpu, 0); 585 vcpu->arch.sie_block->cputm = 0UL; 586 vcpu->arch.sie_block->ckc = 0UL; 587 vcpu->arch.sie_block->todpr = 0; 588 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 589 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 590 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 591 vcpu->arch.guest_fpregs.fpc = 0; 592 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); 593 vcpu->arch.sie_block->gbea = 1; 594 vcpu->arch.sie_block->pp = 0; 595 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 596 kvm_clear_async_pf_completion_queue(vcpu); 597 kvm_s390_vcpu_stop(vcpu); 598 kvm_s390_clear_local_irqs(vcpu); 599 } 600 601 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 602 { 603 return 0; 604 } 605 606 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 607 { 608 free_page(vcpu->arch.sie_block->cbrlo); 609 vcpu->arch.sie_block->cbrlo = 0; 610 } 611 612 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 613 { 614 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 615 if (!vcpu->arch.sie_block->cbrlo) 616 return -ENOMEM; 617 618 vcpu->arch.sie_block->ecb2 |= 0x80; 619 vcpu->arch.sie_block->ecb2 &= ~0x08; 620 return 0; 621 } 622 623 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 624 { 625 int rc = 0; 626 627 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 628 CPUSTAT_SM | 629 CPUSTAT_STOPPED | 630 CPUSTAT_GED); 631 vcpu->arch.sie_block->ecb = 6; 632 if (test_vfacility(50) && test_vfacility(73)) 633 vcpu->arch.sie_block->ecb |= 0x10; 634 635 vcpu->arch.sie_block->ecb2 = 8; 636 vcpu->arch.sie_block->eca = 0xC1002000U; 637 if (sclp_has_siif()) 638 vcpu->arch.sie_block->eca |= 1; 639 vcpu->arch.sie_block->fac = (int) (long) vfacilities; 640 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 641 if (kvm_s390_cmma_enabled(vcpu->kvm)) { 642 rc = kvm_s390_vcpu_setup_cmma(vcpu); 643 if (rc) 644 return rc; 645 } 646 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 647 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 648 (unsigned long) vcpu); 649 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 650 get_cpu_id(&vcpu->arch.cpu_id); 651 vcpu->arch.cpu_id.version = 0xff; 652 return rc; 653 } 654 655 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 656 unsigned int id) 657 { 658 struct kvm_vcpu *vcpu; 659 struct sie_page *sie_page; 660 int rc = -EINVAL; 661 662 if (id >= KVM_MAX_VCPUS) 663 goto out; 664 665 rc = -ENOMEM; 666 667 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 668 if (!vcpu) 669 goto out; 670 671 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 672 if (!sie_page) 673 goto out_free_cpu; 674 675 vcpu->arch.sie_block = &sie_page->sie_block; 676 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 677 678 vcpu->arch.sie_block->icpua = id; 679 if (!kvm_is_ucontrol(kvm)) { 680 if (!kvm->arch.sca) { 681 WARN_ON_ONCE(1); 682 goto out_free_cpu; 683 } 684 if (!kvm->arch.sca->cpu[id].sda) 685 kvm->arch.sca->cpu[id].sda = 686 (__u64) vcpu->arch.sie_block; 687 vcpu->arch.sie_block->scaoh = 688 (__u32)(((__u64)kvm->arch.sca) >> 32); 689 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 690 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); 691 } 692 693 spin_lock_init(&vcpu->arch.local_int.lock); 694 INIT_LIST_HEAD(&vcpu->arch.local_int.list); 695 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 696 vcpu->arch.local_int.wq = &vcpu->wq; 697 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 698 699 rc = kvm_vcpu_init(vcpu, kvm, id); 700 if (rc) 701 goto out_free_sie_block; 702 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 703 vcpu->arch.sie_block); 704 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 705 706 return vcpu; 707 out_free_sie_block: 708 free_page((unsigned long)(vcpu->arch.sie_block)); 709 out_free_cpu: 710 kmem_cache_free(kvm_vcpu_cache, vcpu); 711 out: 712 return ERR_PTR(rc); 713 } 714 715 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 716 { 717 return kvm_cpu_has_interrupt(vcpu); 718 } 719 720 void s390_vcpu_block(struct kvm_vcpu *vcpu) 721 { 722 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 723 } 724 725 void s390_vcpu_unblock(struct kvm_vcpu *vcpu) 726 { 727 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 728 } 729 730 /* 731 * Kick a guest cpu out of SIE and wait until SIE is not running. 732 * If the CPU is not running (e.g. waiting as idle) the function will 733 * return immediately. */ 734 void exit_sie(struct kvm_vcpu *vcpu) 735 { 736 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); 737 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 738 cpu_relax(); 739 } 740 741 /* Kick a guest cpu out of SIE and prevent SIE-reentry */ 742 void exit_sie_sync(struct kvm_vcpu *vcpu) 743 { 744 s390_vcpu_block(vcpu); 745 exit_sie(vcpu); 746 } 747 748 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) 749 { 750 int i; 751 struct kvm *kvm = gmap->private; 752 struct kvm_vcpu *vcpu; 753 754 kvm_for_each_vcpu(i, vcpu, kvm) { 755 /* match against both prefix pages */ 756 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { 757 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); 758 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 759 exit_sie_sync(vcpu); 760 } 761 } 762 } 763 764 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 765 { 766 /* kvm common code refers to this, but never calls it */ 767 BUG(); 768 return 0; 769 } 770 771 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 772 struct kvm_one_reg *reg) 773 { 774 int r = -EINVAL; 775 776 switch (reg->id) { 777 case KVM_REG_S390_TODPR: 778 r = put_user(vcpu->arch.sie_block->todpr, 779 (u32 __user *)reg->addr); 780 break; 781 case KVM_REG_S390_EPOCHDIFF: 782 r = put_user(vcpu->arch.sie_block->epoch, 783 (u64 __user *)reg->addr); 784 break; 785 case KVM_REG_S390_CPU_TIMER: 786 r = put_user(vcpu->arch.sie_block->cputm, 787 (u64 __user *)reg->addr); 788 break; 789 case KVM_REG_S390_CLOCK_COMP: 790 r = put_user(vcpu->arch.sie_block->ckc, 791 (u64 __user *)reg->addr); 792 break; 793 case KVM_REG_S390_PFTOKEN: 794 r = put_user(vcpu->arch.pfault_token, 795 (u64 __user *)reg->addr); 796 break; 797 case KVM_REG_S390_PFCOMPARE: 798 r = put_user(vcpu->arch.pfault_compare, 799 (u64 __user *)reg->addr); 800 break; 801 case KVM_REG_S390_PFSELECT: 802 r = put_user(vcpu->arch.pfault_select, 803 (u64 __user *)reg->addr); 804 break; 805 case KVM_REG_S390_PP: 806 r = put_user(vcpu->arch.sie_block->pp, 807 (u64 __user *)reg->addr); 808 break; 809 case KVM_REG_S390_GBEA: 810 r = put_user(vcpu->arch.sie_block->gbea, 811 (u64 __user *)reg->addr); 812 break; 813 default: 814 break; 815 } 816 817 return r; 818 } 819 820 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 821 struct kvm_one_reg *reg) 822 { 823 int r = -EINVAL; 824 825 switch (reg->id) { 826 case KVM_REG_S390_TODPR: 827 r = get_user(vcpu->arch.sie_block->todpr, 828 (u32 __user *)reg->addr); 829 break; 830 case KVM_REG_S390_EPOCHDIFF: 831 r = get_user(vcpu->arch.sie_block->epoch, 832 (u64 __user *)reg->addr); 833 break; 834 case KVM_REG_S390_CPU_TIMER: 835 r = get_user(vcpu->arch.sie_block->cputm, 836 (u64 __user *)reg->addr); 837 break; 838 case KVM_REG_S390_CLOCK_COMP: 839 r = get_user(vcpu->arch.sie_block->ckc, 840 (u64 __user *)reg->addr); 841 break; 842 case KVM_REG_S390_PFTOKEN: 843 r = get_user(vcpu->arch.pfault_token, 844 (u64 __user *)reg->addr); 845 break; 846 case KVM_REG_S390_PFCOMPARE: 847 r = get_user(vcpu->arch.pfault_compare, 848 (u64 __user *)reg->addr); 849 break; 850 case KVM_REG_S390_PFSELECT: 851 r = get_user(vcpu->arch.pfault_select, 852 (u64 __user *)reg->addr); 853 break; 854 case KVM_REG_S390_PP: 855 r = get_user(vcpu->arch.sie_block->pp, 856 (u64 __user *)reg->addr); 857 break; 858 case KVM_REG_S390_GBEA: 859 r = get_user(vcpu->arch.sie_block->gbea, 860 (u64 __user *)reg->addr); 861 break; 862 default: 863 break; 864 } 865 866 return r; 867 } 868 869 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 870 { 871 kvm_s390_vcpu_initial_reset(vcpu); 872 return 0; 873 } 874 875 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 876 { 877 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 878 return 0; 879 } 880 881 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 882 { 883 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 884 return 0; 885 } 886 887 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 888 struct kvm_sregs *sregs) 889 { 890 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 891 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 892 restore_access_regs(vcpu->run->s.regs.acrs); 893 return 0; 894 } 895 896 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 897 struct kvm_sregs *sregs) 898 { 899 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 900 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 901 return 0; 902 } 903 904 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 905 { 906 if (test_fp_ctl(fpu->fpc)) 907 return -EINVAL; 908 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 909 vcpu->arch.guest_fpregs.fpc = fpu->fpc; 910 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 911 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 912 return 0; 913 } 914 915 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 916 { 917 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); 918 fpu->fpc = vcpu->arch.guest_fpregs.fpc; 919 return 0; 920 } 921 922 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 923 { 924 int rc = 0; 925 926 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) 927 rc = -EBUSY; 928 else { 929 vcpu->run->psw_mask = psw.mask; 930 vcpu->run->psw_addr = psw.addr; 931 } 932 return rc; 933 } 934 935 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 936 struct kvm_translation *tr) 937 { 938 return -EINVAL; /* not implemented yet */ 939 } 940 941 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 942 KVM_GUESTDBG_USE_HW_BP | \ 943 KVM_GUESTDBG_ENABLE) 944 945 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 946 struct kvm_guest_debug *dbg) 947 { 948 int rc = 0; 949 950 vcpu->guest_debug = 0; 951 kvm_s390_clear_bp_data(vcpu); 952 953 if (vcpu->guest_debug & ~VALID_GUESTDBG_FLAGS) 954 return -EINVAL; 955 956 if (dbg->control & KVM_GUESTDBG_ENABLE) { 957 vcpu->guest_debug = dbg->control; 958 /* enforce guest PER */ 959 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 960 961 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 962 rc = kvm_s390_import_bp_data(vcpu, dbg); 963 } else { 964 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 965 vcpu->arch.guestdbg.last_bp = 0; 966 } 967 968 if (rc) { 969 vcpu->guest_debug = 0; 970 kvm_s390_clear_bp_data(vcpu); 971 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 972 } 973 974 return rc; 975 } 976 977 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 978 struct kvm_mp_state *mp_state) 979 { 980 return -EINVAL; /* not implemented yet */ 981 } 982 983 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 984 struct kvm_mp_state *mp_state) 985 { 986 return -EINVAL; /* not implemented yet */ 987 } 988 989 bool kvm_s390_cmma_enabled(struct kvm *kvm) 990 { 991 if (!MACHINE_IS_LPAR) 992 return false; 993 /* only enable for z10 and later */ 994 if (!MACHINE_HAS_EDAT1) 995 return false; 996 if (!kvm->arch.use_cmma) 997 return false; 998 return true; 999 } 1000 1001 static bool ibs_enabled(struct kvm_vcpu *vcpu) 1002 { 1003 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; 1004 } 1005 1006 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 1007 { 1008 retry: 1009 s390_vcpu_unblock(vcpu); 1010 /* 1011 * We use MMU_RELOAD just to re-arm the ipte notifier for the 1012 * guest prefix page. gmap_ipte_notify will wait on the ptl lock. 1013 * This ensures that the ipte instruction for this request has 1014 * already finished. We might race against a second unmapper that 1015 * wants to set the blocking bit. Lets just retry the request loop. 1016 */ 1017 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 1018 int rc; 1019 rc = gmap_ipte_notify(vcpu->arch.gmap, 1020 vcpu->arch.sie_block->prefix, 1021 PAGE_SIZE * 2); 1022 if (rc) 1023 return rc; 1024 goto retry; 1025 } 1026 1027 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 1028 if (!ibs_enabled(vcpu)) { 1029 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 1030 atomic_set_mask(CPUSTAT_IBS, 1031 &vcpu->arch.sie_block->cpuflags); 1032 } 1033 goto retry; 1034 } 1035 1036 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 1037 if (ibs_enabled(vcpu)) { 1038 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 1039 atomic_clear_mask(CPUSTAT_IBS, 1040 &vcpu->arch.sie_block->cpuflags); 1041 } 1042 goto retry; 1043 } 1044 1045 return 0; 1046 } 1047 1048 /** 1049 * kvm_arch_fault_in_page - fault-in guest page if necessary 1050 * @vcpu: The corresponding virtual cpu 1051 * @gpa: Guest physical address 1052 * @writable: Whether the page should be writable or not 1053 * 1054 * Make sure that a guest page has been faulted-in on the host. 1055 * 1056 * Return: Zero on success, negative error code otherwise. 1057 */ 1058 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 1059 { 1060 struct mm_struct *mm = current->mm; 1061 hva_t hva; 1062 long rc; 1063 1064 hva = gmap_fault(gpa, vcpu->arch.gmap); 1065 if (IS_ERR_VALUE(hva)) 1066 return (long)hva; 1067 down_read(&mm->mmap_sem); 1068 rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); 1069 up_read(&mm->mmap_sem); 1070 1071 return rc < 0 ? rc : 0; 1072 } 1073 1074 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 1075 unsigned long token) 1076 { 1077 struct kvm_s390_interrupt inti; 1078 inti.parm64 = token; 1079 1080 if (start_token) { 1081 inti.type = KVM_S390_INT_PFAULT_INIT; 1082 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); 1083 } else { 1084 inti.type = KVM_S390_INT_PFAULT_DONE; 1085 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 1086 } 1087 } 1088 1089 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 1090 struct kvm_async_pf *work) 1091 { 1092 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 1093 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 1094 } 1095 1096 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 1097 struct kvm_async_pf *work) 1098 { 1099 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 1100 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 1101 } 1102 1103 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 1104 struct kvm_async_pf *work) 1105 { 1106 /* s390 will always inject the page directly */ 1107 } 1108 1109 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 1110 { 1111 /* 1112 * s390 will always inject the page directly, 1113 * but we still want check_async_completion to cleanup 1114 */ 1115 return true; 1116 } 1117 1118 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 1119 { 1120 hva_t hva; 1121 struct kvm_arch_async_pf arch; 1122 int rc; 1123 1124 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 1125 return 0; 1126 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 1127 vcpu->arch.pfault_compare) 1128 return 0; 1129 if (psw_extint_disabled(vcpu)) 1130 return 0; 1131 if (kvm_cpu_has_interrupt(vcpu)) 1132 return 0; 1133 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 1134 return 0; 1135 if (!vcpu->arch.gmap->pfault_enabled) 1136 return 0; 1137 1138 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 1139 hva += current->thread.gmap_addr & ~PAGE_MASK; 1140 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 1141 return 0; 1142 1143 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 1144 return rc; 1145 } 1146 1147 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 1148 { 1149 int rc, cpuflags; 1150 1151 /* 1152 * On s390 notifications for arriving pages will be delivered directly 1153 * to the guest but the house keeping for completed pfaults is 1154 * handled outside the worker. 1155 */ 1156 kvm_check_async_pf_completion(vcpu); 1157 1158 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 1159 1160 if (need_resched()) 1161 schedule(); 1162 1163 if (test_thread_flag(TIF_MCCK_PENDING)) 1164 s390_handle_mcck(); 1165 1166 if (!kvm_is_ucontrol(vcpu->kvm)) 1167 kvm_s390_deliver_pending_interrupts(vcpu); 1168 1169 rc = kvm_s390_handle_requests(vcpu); 1170 if (rc) 1171 return rc; 1172 1173 if (guestdbg_enabled(vcpu)) { 1174 kvm_s390_backup_guest_per_regs(vcpu); 1175 kvm_s390_patch_guest_per_regs(vcpu); 1176 } 1177 1178 vcpu->arch.sie_block->icptcode = 0; 1179 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 1180 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 1181 trace_kvm_s390_sie_enter(vcpu, cpuflags); 1182 1183 return 0; 1184 } 1185 1186 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 1187 { 1188 int rc = -1; 1189 1190 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 1191 vcpu->arch.sie_block->icptcode); 1192 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 1193 1194 if (guestdbg_enabled(vcpu)) 1195 kvm_s390_restore_guest_per_regs(vcpu); 1196 1197 if (exit_reason >= 0) { 1198 rc = 0; 1199 } else if (kvm_is_ucontrol(vcpu->kvm)) { 1200 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 1201 vcpu->run->s390_ucontrol.trans_exc_code = 1202 current->thread.gmap_addr; 1203 vcpu->run->s390_ucontrol.pgm_code = 0x10; 1204 rc = -EREMOTE; 1205 1206 } else if (current->thread.gmap_pfault) { 1207 trace_kvm_s390_major_guest_pfault(vcpu); 1208 current->thread.gmap_pfault = 0; 1209 if (kvm_arch_setup_async_pf(vcpu)) { 1210 rc = 0; 1211 } else { 1212 gpa_t gpa = current->thread.gmap_addr; 1213 rc = kvm_arch_fault_in_page(vcpu, gpa, 1); 1214 } 1215 } 1216 1217 if (rc == -1) { 1218 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 1219 trace_kvm_s390_sie_fault(vcpu); 1220 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 1221 } 1222 1223 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 1224 1225 if (rc == 0) { 1226 if (kvm_is_ucontrol(vcpu->kvm)) 1227 /* Don't exit for host interrupts. */ 1228 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; 1229 else 1230 rc = kvm_handle_sie_intercept(vcpu); 1231 } 1232 1233 return rc; 1234 } 1235 1236 static int __vcpu_run(struct kvm_vcpu *vcpu) 1237 { 1238 int rc, exit_reason; 1239 1240 /* 1241 * We try to hold kvm->srcu during most of vcpu_run (except when run- 1242 * ning the guest), so that memslots (and other stuff) are protected 1243 */ 1244 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1245 1246 do { 1247 rc = vcpu_pre_run(vcpu); 1248 if (rc) 1249 break; 1250 1251 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 1252 /* 1253 * As PF_VCPU will be used in fault handler, between 1254 * guest_enter and guest_exit should be no uaccess. 1255 */ 1256 preempt_disable(); 1257 kvm_guest_enter(); 1258 preempt_enable(); 1259 exit_reason = sie64a(vcpu->arch.sie_block, 1260 vcpu->run->s.regs.gprs); 1261 kvm_guest_exit(); 1262 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1263 1264 rc = vcpu_post_run(vcpu, exit_reason); 1265 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 1266 1267 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 1268 return rc; 1269 } 1270 1271 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1272 { 1273 int rc; 1274 sigset_t sigsaved; 1275 1276 if (guestdbg_exit_pending(vcpu)) { 1277 kvm_s390_prepare_debug_exit(vcpu); 1278 return 0; 1279 } 1280 1281 if (vcpu->sigset_active) 1282 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 1283 1284 kvm_s390_vcpu_start(vcpu); 1285 1286 switch (kvm_run->exit_reason) { 1287 case KVM_EXIT_S390_SIEIC: 1288 case KVM_EXIT_UNKNOWN: 1289 case KVM_EXIT_INTR: 1290 case KVM_EXIT_S390_RESET: 1291 case KVM_EXIT_S390_UCONTROL: 1292 case KVM_EXIT_S390_TSCH: 1293 case KVM_EXIT_DEBUG: 1294 break; 1295 default: 1296 BUG(); 1297 } 1298 1299 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 1300 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 1301 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { 1302 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; 1303 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 1304 } 1305 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 1306 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; 1307 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 1308 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 1309 } 1310 1311 might_fault(); 1312 rc = __vcpu_run(vcpu); 1313 1314 if (signal_pending(current) && !rc) { 1315 kvm_run->exit_reason = KVM_EXIT_INTR; 1316 rc = -EINTR; 1317 } 1318 1319 if (guestdbg_exit_pending(vcpu) && !rc) { 1320 kvm_s390_prepare_debug_exit(vcpu); 1321 rc = 0; 1322 } 1323 1324 if (rc == -EOPNOTSUPP) { 1325 /* intercept cannot be handled in-kernel, prepare kvm-run */ 1326 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 1327 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 1328 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 1329 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 1330 rc = 0; 1331 } 1332 1333 if (rc == -EREMOTE) { 1334 /* intercept was handled, but userspace support is needed 1335 * kvm_run has been prepared by the handler */ 1336 rc = 0; 1337 } 1338 1339 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 1340 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 1341 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix; 1342 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 1343 1344 if (vcpu->sigset_active) 1345 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1346 1347 vcpu->stat.exit_userspace++; 1348 return rc; 1349 } 1350 1351 /* 1352 * store status at address 1353 * we use have two special cases: 1354 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 1355 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 1356 */ 1357 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 1358 { 1359 unsigned char archmode = 1; 1360 u64 clkcomp; 1361 int rc; 1362 1363 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 1364 if (write_guest_abs(vcpu, 163, &archmode, 1)) 1365 return -EFAULT; 1366 gpa = SAVE_AREA_BASE; 1367 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 1368 if (write_guest_real(vcpu, 163, &archmode, 1)) 1369 return -EFAULT; 1370 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); 1371 } 1372 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), 1373 vcpu->arch.guest_fpregs.fprs, 128); 1374 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), 1375 vcpu->run->s.regs.gprs, 128); 1376 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), 1377 &vcpu->arch.sie_block->gpsw, 16); 1378 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), 1379 &vcpu->arch.sie_block->prefix, 4); 1380 rc |= write_guest_abs(vcpu, 1381 gpa + offsetof(struct save_area, fp_ctrl_reg), 1382 &vcpu->arch.guest_fpregs.fpc, 4); 1383 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), 1384 &vcpu->arch.sie_block->todpr, 4); 1385 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), 1386 &vcpu->arch.sie_block->cputm, 8); 1387 clkcomp = vcpu->arch.sie_block->ckc >> 8; 1388 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), 1389 &clkcomp, 8); 1390 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), 1391 &vcpu->run->s.regs.acrs, 64); 1392 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), 1393 &vcpu->arch.sie_block->gcr, 128); 1394 return rc ? -EFAULT : 0; 1395 } 1396 1397 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 1398 { 1399 /* 1400 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 1401 * copying in vcpu load/put. Lets update our copies before we save 1402 * it into the save area 1403 */ 1404 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 1405 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 1406 save_access_regs(vcpu->run->s.regs.acrs); 1407 1408 return kvm_s390_store_status_unloaded(vcpu, addr); 1409 } 1410 1411 static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) 1412 { 1413 return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; 1414 } 1415 1416 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 1417 { 1418 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 1419 kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu); 1420 exit_sie_sync(vcpu); 1421 } 1422 1423 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 1424 { 1425 unsigned int i; 1426 struct kvm_vcpu *vcpu; 1427 1428 kvm_for_each_vcpu(i, vcpu, kvm) { 1429 __disable_ibs_on_vcpu(vcpu); 1430 } 1431 } 1432 1433 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 1434 { 1435 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 1436 kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu); 1437 exit_sie_sync(vcpu); 1438 } 1439 1440 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 1441 { 1442 int i, online_vcpus, started_vcpus = 0; 1443 1444 if (!is_vcpu_stopped(vcpu)) 1445 return; 1446 1447 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 1448 /* Only one cpu at a time may enter/leave the STOPPED state. */ 1449 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); 1450 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 1451 1452 for (i = 0; i < online_vcpus; i++) { 1453 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 1454 started_vcpus++; 1455 } 1456 1457 if (started_vcpus == 0) { 1458 /* we're the only active VCPU -> speed it up */ 1459 __enable_ibs_on_vcpu(vcpu); 1460 } else if (started_vcpus == 1) { 1461 /* 1462 * As we are starting a second VCPU, we have to disable 1463 * the IBS facility on all VCPUs to remove potentially 1464 * oustanding ENABLE requests. 1465 */ 1466 __disable_ibs_on_all_vcpus(vcpu->kvm); 1467 } 1468 1469 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1470 /* 1471 * Another VCPU might have used IBS while we were offline. 1472 * Let's play safe and flush the VCPU at startup. 1473 */ 1474 vcpu->arch.sie_block->ihcpu = 0xffff; 1475 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); 1476 return; 1477 } 1478 1479 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 1480 { 1481 int i, online_vcpus, started_vcpus = 0; 1482 struct kvm_vcpu *started_vcpu = NULL; 1483 1484 if (is_vcpu_stopped(vcpu)) 1485 return; 1486 1487 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 1488 /* Only one cpu at a time may enter/leave the STOPPED state. */ 1489 spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); 1490 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 1491 1492 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1493 __disable_ibs_on_vcpu(vcpu); 1494 1495 for (i = 0; i < online_vcpus; i++) { 1496 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 1497 started_vcpus++; 1498 started_vcpu = vcpu->kvm->vcpus[i]; 1499 } 1500 } 1501 1502 if (started_vcpus == 1) { 1503 /* 1504 * As we only have one VCPU left, we want to enable the 1505 * IBS facility for that VCPU to speed it up. 1506 */ 1507 __enable_ibs_on_vcpu(started_vcpu); 1508 } 1509 1510 spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); 1511 return; 1512 } 1513 1514 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 1515 struct kvm_enable_cap *cap) 1516 { 1517 int r; 1518 1519 if (cap->flags) 1520 return -EINVAL; 1521 1522 switch (cap->cap) { 1523 case KVM_CAP_S390_CSS_SUPPORT: 1524 if (!vcpu->kvm->arch.css_support) { 1525 vcpu->kvm->arch.css_support = 1; 1526 trace_kvm_s390_enable_css(vcpu->kvm); 1527 } 1528 r = 0; 1529 break; 1530 default: 1531 r = -EINVAL; 1532 break; 1533 } 1534 return r; 1535 } 1536 1537 long kvm_arch_vcpu_ioctl(struct file *filp, 1538 unsigned int ioctl, unsigned long arg) 1539 { 1540 struct kvm_vcpu *vcpu = filp->private_data; 1541 void __user *argp = (void __user *)arg; 1542 int idx; 1543 long r; 1544 1545 switch (ioctl) { 1546 case KVM_S390_INTERRUPT: { 1547 struct kvm_s390_interrupt s390int; 1548 1549 r = -EFAULT; 1550 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1551 break; 1552 r = kvm_s390_inject_vcpu(vcpu, &s390int); 1553 break; 1554 } 1555 case KVM_S390_STORE_STATUS: 1556 idx = srcu_read_lock(&vcpu->kvm->srcu); 1557 r = kvm_s390_vcpu_store_status(vcpu, arg); 1558 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1559 break; 1560 case KVM_S390_SET_INITIAL_PSW: { 1561 psw_t psw; 1562 1563 r = -EFAULT; 1564 if (copy_from_user(&psw, argp, sizeof(psw))) 1565 break; 1566 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 1567 break; 1568 } 1569 case KVM_S390_INITIAL_RESET: 1570 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 1571 break; 1572 case KVM_SET_ONE_REG: 1573 case KVM_GET_ONE_REG: { 1574 struct kvm_one_reg reg; 1575 r = -EFAULT; 1576 if (copy_from_user(®, argp, sizeof(reg))) 1577 break; 1578 if (ioctl == KVM_SET_ONE_REG) 1579 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 1580 else 1581 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 1582 break; 1583 } 1584 #ifdef CONFIG_KVM_S390_UCONTROL 1585 case KVM_S390_UCAS_MAP: { 1586 struct kvm_s390_ucas_mapping ucasmap; 1587 1588 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1589 r = -EFAULT; 1590 break; 1591 } 1592 1593 if (!kvm_is_ucontrol(vcpu->kvm)) { 1594 r = -EINVAL; 1595 break; 1596 } 1597 1598 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 1599 ucasmap.vcpu_addr, ucasmap.length); 1600 break; 1601 } 1602 case KVM_S390_UCAS_UNMAP: { 1603 struct kvm_s390_ucas_mapping ucasmap; 1604 1605 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1606 r = -EFAULT; 1607 break; 1608 } 1609 1610 if (!kvm_is_ucontrol(vcpu->kvm)) { 1611 r = -EINVAL; 1612 break; 1613 } 1614 1615 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 1616 ucasmap.length); 1617 break; 1618 } 1619 #endif 1620 case KVM_S390_VCPU_FAULT: { 1621 r = gmap_fault(arg, vcpu->arch.gmap); 1622 if (!IS_ERR_VALUE(r)) 1623 r = 0; 1624 break; 1625 } 1626 case KVM_ENABLE_CAP: 1627 { 1628 struct kvm_enable_cap cap; 1629 r = -EFAULT; 1630 if (copy_from_user(&cap, argp, sizeof(cap))) 1631 break; 1632 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 1633 break; 1634 } 1635 default: 1636 r = -ENOTTY; 1637 } 1638 return r; 1639 } 1640 1641 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 1642 { 1643 #ifdef CONFIG_KVM_S390_UCONTROL 1644 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 1645 && (kvm_is_ucontrol(vcpu->kvm))) { 1646 vmf->page = virt_to_page(vcpu->arch.sie_block); 1647 get_page(vmf->page); 1648 return 0; 1649 } 1650 #endif 1651 return VM_FAULT_SIGBUS; 1652 } 1653 1654 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1655 struct kvm_memory_slot *dont) 1656 { 1657 } 1658 1659 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1660 unsigned long npages) 1661 { 1662 return 0; 1663 } 1664 1665 void kvm_arch_memslots_updated(struct kvm *kvm) 1666 { 1667 } 1668 1669 /* Section: memory related */ 1670 int kvm_arch_prepare_memory_region(struct kvm *kvm, 1671 struct kvm_memory_slot *memslot, 1672 struct kvm_userspace_memory_region *mem, 1673 enum kvm_mr_change change) 1674 { 1675 /* A few sanity checks. We can have memory slots which have to be 1676 located/ended at a segment boundary (1MB). The memory in userland is 1677 ok to be fragmented into various different vmas. It is okay to mmap() 1678 and munmap() stuff in this slot after doing this call at any time */ 1679 1680 if (mem->userspace_addr & 0xffffful) 1681 return -EINVAL; 1682 1683 if (mem->memory_size & 0xffffful) 1684 return -EINVAL; 1685 1686 return 0; 1687 } 1688 1689 void kvm_arch_commit_memory_region(struct kvm *kvm, 1690 struct kvm_userspace_memory_region *mem, 1691 const struct kvm_memory_slot *old, 1692 enum kvm_mr_change change) 1693 { 1694 int rc; 1695 1696 /* If the basics of the memslot do not change, we do not want 1697 * to update the gmap. Every update causes several unnecessary 1698 * segment translation exceptions. This is usually handled just 1699 * fine by the normal fault handler + gmap, but it will also 1700 * cause faults on the prefix page of running guest CPUs. 1701 */ 1702 if (old->userspace_addr == mem->userspace_addr && 1703 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 1704 old->npages * PAGE_SIZE == mem->memory_size) 1705 return; 1706 1707 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 1708 mem->guest_phys_addr, mem->memory_size); 1709 if (rc) 1710 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); 1711 return; 1712 } 1713 1714 void kvm_arch_flush_shadow_all(struct kvm *kvm) 1715 { 1716 } 1717 1718 void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 1719 struct kvm_memory_slot *slot) 1720 { 1721 } 1722 1723 static int __init kvm_s390_init(void) 1724 { 1725 int ret; 1726 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1727 if (ret) 1728 return ret; 1729 1730 /* 1731 * guests can ask for up to 255+1 double words, we need a full page 1732 * to hold the maximum amount of facilities. On the other hand, we 1733 * only set facilities that are known to work in KVM. 1734 */ 1735 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); 1736 if (!vfacilities) { 1737 kvm_exit(); 1738 return -ENOMEM; 1739 } 1740 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); 1741 vfacilities[0] &= 0xff82fff3f4fc2000UL; 1742 vfacilities[1] &= 0x005c000000000000UL; 1743 return 0; 1744 } 1745 1746 static void __exit kvm_s390_exit(void) 1747 { 1748 free_page((unsigned long) vfacilities); 1749 kvm_exit(); 1750 } 1751 1752 module_init(kvm_s390_init); 1753 module_exit(kvm_s390_exit); 1754 1755 /* 1756 * Enable autoloading of the kvm module. 1757 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 1758 * since x86 takes a different approach. 1759 */ 1760 #include <linux/miscdevice.h> 1761 MODULE_ALIAS_MISCDEV(KVM_MINOR); 1762 MODULE_ALIAS("devname:kvm"); 1763