1 /* 2 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator 3 * 4 * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics, in-kernel emulation 5 * 6 * Copyright (c) 2013 David Gibson, IBM Corporation. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a copy 9 * of this software and associated documentation files (the "Software"), to deal 10 * in the Software without restriction, including without limitation the rights 11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 * copies of the Software, and to permit persons to whom the Software is 13 * furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 * THE SOFTWARE. 25 * 26 */ 27 28 #include "qemu/osdep.h" 29 #include "qapi/error.h" 30 #include "qemu-common.h" 31 #include "cpu.h" 32 #include "hw/hw.h" 33 #include "trace.h" 34 #include "sysemu/kvm.h" 35 #include "hw/ppc/spapr.h" 36 #include "hw/ppc/xics.h" 37 #include "kvm_ppc.h" 38 #include "qemu/config-file.h" 39 #include "qemu/error-report.h" 40 41 #include <sys/ioctl.h> 42 43 static int kernel_xics_fd = -1; 44 45 typedef struct KVMEnabledICP { 46 unsigned long vcpu_id; 47 QLIST_ENTRY(KVMEnabledICP) node; 48 } KVMEnabledICP; 49 50 static QLIST_HEAD(, KVMEnabledICP) 51 kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps); 52 53 /* 54 * ICP-KVM 55 */ 56 static void icp_get_kvm_state(ICPState *icp) 57 { 58 uint64_t state; 59 struct kvm_one_reg reg = { 60 .id = KVM_REG_PPC_ICP_STATE, 61 .addr = (uintptr_t)&state, 62 }; 63 int ret; 64 65 /* ICP for this CPU thread is not in use, exiting */ 66 if (!icp->cs) { 67 return; 68 } 69 70 ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, ®); 71 if (ret != 0) { 72 error_report("Unable to retrieve KVM interrupt controller state" 73 " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno)); 74 exit(1); 75 } 76 77 icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT; 78 icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT) 79 & KVM_REG_PPC_ICP_MFRR_MASK; 80 icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT) 81 & KVM_REG_PPC_ICP_PPRI_MASK; 82 } 83 84 static int icp_set_kvm_state(ICPState *icp, int version_id) 85 { 86 uint64_t state; 87 struct kvm_one_reg reg = { 88 .id = KVM_REG_PPC_ICP_STATE, 89 .addr = (uintptr_t)&state, 90 }; 91 int ret; 92 93 /* ICP for this CPU thread is not in use, exiting */ 94 if (!icp->cs) { 95 return 0; 96 } 97 98 state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT) 99 | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) 100 | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT); 101 102 ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, ®); 103 if (ret != 0) { 104 error_report("Unable to restore KVM interrupt controller state (0x%" 105 PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs), 106 strerror(errno)); 107 return ret; 108 } 109 110 return 0; 111 } 112 113 static void icp_kvm_reset(void *dev) 114 { 115 ICPState *icp = ICP(dev); 116 117 icp->xirr = 0; 118 icp->pending_priority = 0xff; 119 icp->mfrr = 0xff; 120 121 /* Make all outputs as deasserted only if the CPU thread is in use */ 122 if (icp->output) { 123 qemu_set_irq(icp->output, 0); 124 } 125 126 icp_set_kvm_state(icp, 1); 127 } 128 129 static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) 130 { 131 CPUState *cs = CPU(cpu); 132 KVMEnabledICP *enabled_icp; 133 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 134 int ret; 135 136 if (kernel_xics_fd == -1) { 137 abort(); 138 } 139 140 /* 141 * If we are reusing a parked vCPU fd corresponding to the CPU 142 * which was hot-removed earlier we don't have to renable 143 * KVM_CAP_IRQ_XICS capability again. 144 */ 145 QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) { 146 if (enabled_icp->vcpu_id == vcpu_id) { 147 return; 148 } 149 } 150 151 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id); 152 if (ret < 0) { 153 error_report("Unable to connect CPU%ld to kernel XICS: %s", vcpu_id, 154 strerror(errno)); 155 exit(1); 156 } 157 enabled_icp = g_malloc(sizeof(*enabled_icp)); 158 enabled_icp->vcpu_id = vcpu_id; 159 QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node); 160 } 161 162 static void icp_kvm_realize(DeviceState *dev, Error **errp) 163 { 164 qemu_register_reset(icp_kvm_reset, dev); 165 } 166 167 static void icp_kvm_unrealize(DeviceState *dev, Error **errp) 168 { 169 qemu_unregister_reset(icp_kvm_reset, dev); 170 } 171 172 static void icp_kvm_class_init(ObjectClass *klass, void *data) 173 { 174 DeviceClass *dc = DEVICE_CLASS(klass); 175 ICPStateClass *icpc = ICP_CLASS(klass); 176 177 dc->realize = icp_kvm_realize; 178 dc->unrealize = icp_kvm_unrealize; 179 icpc->pre_save = icp_get_kvm_state; 180 icpc->post_load = icp_set_kvm_state; 181 icpc->cpu_setup = icp_kvm_cpu_setup; 182 } 183 184 static const TypeInfo icp_kvm_info = { 185 .name = TYPE_KVM_ICP, 186 .parent = TYPE_ICP, 187 .instance_size = sizeof(ICPState), 188 .class_init = icp_kvm_class_init, 189 .class_size = sizeof(ICPStateClass), 190 }; 191 192 /* 193 * ICS-KVM 194 */ 195 static void ics_get_kvm_state(ICSState *ics) 196 { 197 uint64_t state; 198 struct kvm_device_attr attr = { 199 .flags = 0, 200 .group = KVM_DEV_XICS_GRP_SOURCES, 201 .addr = (uint64_t)(uintptr_t)&state, 202 }; 203 int i; 204 205 for (i = 0; i < ics->nr_irqs; i++) { 206 ICSIRQState *irq = &ics->irqs[i]; 207 int ret; 208 209 attr.attr = i + ics->offset; 210 211 ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr); 212 if (ret != 0) { 213 error_report("Unable to retrieve KVM interrupt controller state" 214 " for IRQ %d: %s", i + ics->offset, strerror(errno)); 215 exit(1); 216 } 217 218 irq->server = state & KVM_XICS_DESTINATION_MASK; 219 irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT) 220 & KVM_XICS_PRIORITY_MASK; 221 /* 222 * To be consistent with the software emulation in xics.c, we 223 * split out the masked state + priority that we get from the 224 * kernel into 'current priority' (0xff if masked) and 225 * 'saved priority' (if masked, this is the priority the 226 * interrupt had before it was masked). Masking and unmasking 227 * are done with the ibm,int-off and ibm,int-on RTAS calls. 228 */ 229 if (state & KVM_XICS_MASKED) { 230 irq->priority = 0xff; 231 } else { 232 irq->priority = irq->saved_priority; 233 } 234 235 irq->status = 0; 236 if (state & KVM_XICS_PENDING) { 237 if (state & KVM_XICS_LEVEL_SENSITIVE) { 238 irq->status |= XICS_STATUS_ASSERTED; 239 } else { 240 /* 241 * A pending edge-triggered interrupt (or MSI) 242 * must have been rejected previously when we 243 * first detected it and tried to deliver it, 244 * so mark it as pending and previously rejected 245 * for consistency with how xics.c works. 246 */ 247 irq->status |= XICS_STATUS_MASKED_PENDING 248 | XICS_STATUS_REJECTED; 249 } 250 } 251 if (state & KVM_XICS_PRESENTED) { 252 irq->status |= XICS_STATUS_PRESENTED; 253 } 254 if (state & KVM_XICS_QUEUED) { 255 irq->status |= XICS_STATUS_QUEUED; 256 } 257 } 258 } 259 260 static int ics_set_kvm_state(ICSState *ics, int version_id) 261 { 262 uint64_t state; 263 struct kvm_device_attr attr = { 264 .flags = 0, 265 .group = KVM_DEV_XICS_GRP_SOURCES, 266 .addr = (uint64_t)(uintptr_t)&state, 267 }; 268 int i; 269 270 for (i = 0; i < ics->nr_irqs; i++) { 271 ICSIRQState *irq = &ics->irqs[i]; 272 int ret; 273 274 attr.attr = i + ics->offset; 275 276 state = irq->server; 277 state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK) 278 << KVM_XICS_PRIORITY_SHIFT; 279 if (irq->priority != irq->saved_priority) { 280 assert(irq->priority == 0xff); 281 state |= KVM_XICS_MASKED; 282 } 283 284 if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { 285 state |= KVM_XICS_LEVEL_SENSITIVE; 286 if (irq->status & XICS_STATUS_ASSERTED) { 287 state |= KVM_XICS_PENDING; 288 } 289 } else { 290 if (irq->status & XICS_STATUS_MASKED_PENDING) { 291 state |= KVM_XICS_PENDING; 292 } 293 } 294 if (irq->status & XICS_STATUS_PRESENTED) { 295 state |= KVM_XICS_PRESENTED; 296 } 297 if (irq->status & XICS_STATUS_QUEUED) { 298 state |= KVM_XICS_QUEUED; 299 } 300 301 ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr); 302 if (ret != 0) { 303 error_report("Unable to restore KVM interrupt controller state" 304 " for IRQs %d: %s", i + ics->offset, strerror(errno)); 305 return ret; 306 } 307 } 308 309 return 0; 310 } 311 312 static void ics_kvm_set_irq(void *opaque, int srcno, int val) 313 { 314 ICSState *ics = opaque; 315 struct kvm_irq_level args; 316 int rc; 317 318 args.irq = srcno + ics->offset; 319 if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) { 320 if (!val) { 321 return; 322 } 323 args.level = KVM_INTERRUPT_SET; 324 } else { 325 args.level = val ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 326 } 327 rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args); 328 if (rc < 0) { 329 perror("kvm_irq_line"); 330 } 331 } 332 333 static void ics_kvm_reset(void *dev) 334 { 335 ICSState *ics = ICS_SIMPLE(dev); 336 int i; 337 uint8_t flags[ics->nr_irqs]; 338 339 for (i = 0; i < ics->nr_irqs; i++) { 340 flags[i] = ics->irqs[i].flags; 341 } 342 343 memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs); 344 345 for (i = 0; i < ics->nr_irqs; i++) { 346 ics->irqs[i].priority = 0xff; 347 ics->irqs[i].saved_priority = 0xff; 348 ics->irqs[i].flags = flags[i]; 349 } 350 351 ics_set_kvm_state(ics, 1); 352 } 353 354 static void ics_kvm_realize(DeviceState *dev, Error **errp) 355 { 356 ICSState *ics = ICS_SIMPLE(dev); 357 358 if (!ics->nr_irqs) { 359 error_setg(errp, "Number of interrupts needs to be greater 0"); 360 return; 361 } 362 ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState)); 363 ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs); 364 365 qemu_register_reset(ics_kvm_reset, dev); 366 } 367 368 static void ics_kvm_class_init(ObjectClass *klass, void *data) 369 { 370 ICSStateClass *icsc = ICS_BASE_CLASS(klass); 371 372 icsc->realize = ics_kvm_realize; 373 icsc->pre_save = ics_get_kvm_state; 374 icsc->post_load = ics_set_kvm_state; 375 } 376 377 static const TypeInfo ics_kvm_info = { 378 .name = TYPE_ICS_KVM, 379 .parent = TYPE_ICS_SIMPLE, 380 .instance_size = sizeof(ICSState), 381 .class_init = ics_kvm_class_init, 382 }; 383 384 /* 385 * XICS-KVM 386 */ 387 388 static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr, 389 uint32_t token, 390 uint32_t nargs, target_ulong args, 391 uint32_t nret, target_ulong rets) 392 { 393 error_report("pseries: %s must never be called for in-kernel XICS", 394 __func__); 395 } 396 397 int xics_kvm_init(sPAPRMachineState *spapr, Error **errp) 398 { 399 int rc; 400 struct kvm_create_device xics_create_device = { 401 .type = KVM_DEV_TYPE_XICS, 402 .flags = 0, 403 }; 404 405 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) { 406 error_setg(errp, 407 "KVM and IRQ_XICS capability must be present for in-kernel XICS"); 408 goto fail; 409 } 410 411 spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy); 412 spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy); 413 spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy); 414 spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy); 415 416 rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive"); 417 if (rc < 0) { 418 error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive"); 419 goto fail; 420 } 421 422 rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive"); 423 if (rc < 0) { 424 error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive"); 425 goto fail; 426 } 427 428 rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on"); 429 if (rc < 0) { 430 error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on"); 431 goto fail; 432 } 433 434 rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off"); 435 if (rc < 0) { 436 error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off"); 437 goto fail; 438 } 439 440 /* Create the kernel ICP */ 441 rc = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &xics_create_device); 442 if (rc < 0) { 443 error_setg_errno(errp, -rc, "Error on KVM_CREATE_DEVICE for XICS"); 444 goto fail; 445 } 446 447 kernel_xics_fd = xics_create_device.fd; 448 449 kvm_kernel_irqchip = true; 450 kvm_msi_via_irqfd_allowed = true; 451 kvm_gsi_direct_mapping = true; 452 453 return rc; 454 455 fail: 456 kvmppc_define_rtas_kernel_token(0, "ibm,set-xive"); 457 kvmppc_define_rtas_kernel_token(0, "ibm,get-xive"); 458 kvmppc_define_rtas_kernel_token(0, "ibm,int-on"); 459 kvmppc_define_rtas_kernel_token(0, "ibm,int-off"); 460 return -1; 461 } 462 463 static void xics_kvm_register_types(void) 464 { 465 type_register_static(&ics_kvm_info); 466 type_register_static(&icp_kvm_info); 467 } 468 469 type_init(xics_kvm_register_types) 470