1 /* 2 * vfio based subchannel assignment support 3 * 4 * Copyright 2017 IBM Corp. 5 * Copyright 2019 Red Hat, Inc. 6 * 7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> 8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> 9 * Pierre Morel <pmorel@linux.vnet.ibm.com> 10 * Cornelia Huck <cohuck@redhat.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or (at 13 * your option) any later version. See the COPYING file in the top-level 14 * directory. 15 */ 16 17 #include "qemu/osdep.h" 18 #include <linux/vfio.h> 19 #include <linux/vfio_ccw.h> 20 #include <sys/ioctl.h> 21 22 #include "qapi/error.h" 23 #include "hw/sysbus.h" 24 #include "hw/vfio/vfio.h" 25 #include "hw/vfio/vfio-common.h" 26 #include "hw/s390x/s390-ccw.h" 27 #include "hw/s390x/vfio-ccw.h" 28 #include "hw/s390x/ccw-device.h" 29 #include "exec/address-spaces.h" 30 #include "qemu/error-report.h" 31 #include "qemu/main-loop.h" 32 #include "qemu/module.h" 33 34 struct VFIOCCWDevice { 35 S390CCWDevice cdev; 36 VFIODevice vdev; 37 uint64_t io_region_size; 38 uint64_t io_region_offset; 39 struct ccw_io_region *io_region; 40 uint64_t async_cmd_region_size; 41 uint64_t async_cmd_region_offset; 42 struct ccw_cmd_region *async_cmd_region; 43 EventNotifier io_notifier; 44 bool force_orb_pfch; 45 bool warned_orb_pfch; 46 }; 47 48 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch, 49 const char *msg) 50 { 51 warn_report_once_cond(&vcdev->warned_orb_pfch, 52 "vfio-ccw (devno %x.%x.%04x): %s", 53 sch->cssid, sch->ssid, sch->devno, msg); 54 } 55 56 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) 57 { 58 vdev->needs_reset = false; 59 } 60 61 /* 62 * We don't need vfio_hot_reset_multi and vfio_eoi operations for 63 * vfio_ccw device now. 64 */ 65 struct VFIODeviceOps vfio_ccw_ops = { 66 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, 67 }; 68 69 static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) 70 { 71 S390CCWDevice *cdev = sch->driver_data; 72 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 73 struct ccw_io_region *region = vcdev->io_region; 74 int ret; 75 76 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) { 77 if (!(vcdev->force_orb_pfch)) { 78 warn_once_pfch(vcdev, sch, "requires PFCH flag set"); 79 sch_gen_unit_exception(sch); 80 css_inject_io_interrupt(sch); 81 return IOINST_CC_EXPECTED; 82 } else { 83 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; 84 warn_once_pfch(vcdev, sch, "PFCH flag forced"); 85 } 86 } 87 88 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); 89 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); 90 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); 91 92 memset(region, 0, sizeof(*region)); 93 94 memcpy(region->orb_area, &sch->orb, sizeof(ORB)); 95 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW)); 96 97 again: 98 ret = pwrite(vcdev->vdev.fd, region, 99 vcdev->io_region_size, vcdev->io_region_offset); 100 if (ret != vcdev->io_region_size) { 101 if (errno == EAGAIN) { 102 goto again; 103 } 104 error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); 105 ret = -errno; 106 } else { 107 ret = region->ret_code; 108 } 109 switch (ret) { 110 case 0: 111 return IOINST_CC_EXPECTED; 112 case -EBUSY: 113 return IOINST_CC_BUSY; 114 case -ENODEV: 115 case -EACCES: 116 return IOINST_CC_NOT_OPERATIONAL; 117 case -EFAULT: 118 default: 119 sch_gen_unit_exception(sch); 120 css_inject_io_interrupt(sch); 121 return IOINST_CC_EXPECTED; 122 } 123 } 124 125 static int vfio_ccw_handle_clear(SubchDev *sch) 126 { 127 S390CCWDevice *cdev = sch->driver_data; 128 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 129 struct ccw_cmd_region *region = vcdev->async_cmd_region; 130 int ret; 131 132 if (!vcdev->async_cmd_region) { 133 /* Async command region not available, fall back to emulation */ 134 return -ENOSYS; 135 } 136 137 memset(region, 0, sizeof(*region)); 138 region->command = VFIO_CCW_ASYNC_CMD_CSCH; 139 140 again: 141 ret = pwrite(vcdev->vdev.fd, region, 142 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 143 if (ret != vcdev->async_cmd_region_size) { 144 if (errno == EAGAIN) { 145 goto again; 146 } 147 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 148 ret = -errno; 149 } else { 150 ret = region->ret_code; 151 } 152 switch (ret) { 153 case 0: 154 case -ENODEV: 155 case -EACCES: 156 return 0; 157 case -EFAULT: 158 default: 159 sch_gen_unit_exception(sch); 160 css_inject_io_interrupt(sch); 161 return 0; 162 } 163 } 164 165 static int vfio_ccw_handle_halt(SubchDev *sch) 166 { 167 S390CCWDevice *cdev = sch->driver_data; 168 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 169 struct ccw_cmd_region *region = vcdev->async_cmd_region; 170 int ret; 171 172 if (!vcdev->async_cmd_region) { 173 /* Async command region not available, fall back to emulation */ 174 return -ENOSYS; 175 } 176 177 memset(region, 0, sizeof(*region)); 178 region->command = VFIO_CCW_ASYNC_CMD_HSCH; 179 180 again: 181 ret = pwrite(vcdev->vdev.fd, region, 182 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 183 if (ret != vcdev->async_cmd_region_size) { 184 if (errno == EAGAIN) { 185 goto again; 186 } 187 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 188 ret = -errno; 189 } else { 190 ret = region->ret_code; 191 } 192 switch (ret) { 193 case 0: 194 case -EBUSY: 195 case -ENODEV: 196 case -EACCES: 197 return 0; 198 case -EFAULT: 199 default: 200 sch_gen_unit_exception(sch); 201 css_inject_io_interrupt(sch); 202 return 0; 203 } 204 } 205 206 static void vfio_ccw_reset(DeviceState *dev) 207 { 208 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 209 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 210 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 211 212 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); 213 } 214 215 static void vfio_ccw_io_notifier_handler(void *opaque) 216 { 217 VFIOCCWDevice *vcdev = opaque; 218 struct ccw_io_region *region = vcdev->io_region; 219 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); 220 CcwDevice *ccw_dev = CCW_DEVICE(cdev); 221 SubchDev *sch = ccw_dev->sch; 222 SCHIB *schib = &sch->curr_status; 223 SCSW s; 224 IRB irb; 225 int size; 226 227 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { 228 return; 229 } 230 231 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, 232 vcdev->io_region_offset); 233 if (size == -1) { 234 switch (errno) { 235 case ENODEV: 236 /* Generate a deferred cc 3 condition. */ 237 schib->scsw.flags |= SCSW_FLAGS_MASK_CC; 238 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 239 schib->scsw.ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); 240 goto read_err; 241 case EFAULT: 242 /* Memory problem, generate channel data check. */ 243 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 244 schib->scsw.cstat = SCSW_CSTAT_DATA_CHECK; 245 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 246 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 247 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 248 goto read_err; 249 default: 250 /* Error, generate channel program check. */ 251 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 252 schib->scsw.cstat = SCSW_CSTAT_PROG_CHECK; 253 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 254 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 255 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 256 goto read_err; 257 } 258 } else if (size != vcdev->io_region_size) { 259 /* Information transfer error, generate channel-control check. */ 260 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 261 schib->scsw.cstat = SCSW_CSTAT_CHN_CTRL_CHK; 262 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 263 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 264 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 265 goto read_err; 266 } 267 268 memcpy(&irb, region->irb_area, sizeof(IRB)); 269 270 /* Update control block via irb. */ 271 s = schib->scsw; 272 copy_scsw_to_guest(&s, &irb.scsw); 273 schib->scsw = s; 274 275 /* If a uint check is pending, copy sense data. */ 276 if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && 277 (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { 278 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); 279 } 280 281 read_err: 282 css_inject_io_interrupt(sch); 283 } 284 285 static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) 286 { 287 VFIODevice *vdev = &vcdev->vdev; 288 struct vfio_irq_info *irq_info; 289 size_t argsz; 290 int fd; 291 292 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { 293 error_setg(errp, "vfio: unexpected number of io irqs %u", 294 vdev->num_irqs); 295 return; 296 } 297 298 argsz = sizeof(*irq_info); 299 irq_info = g_malloc0(argsz); 300 irq_info->index = VFIO_CCW_IO_IRQ_INDEX; 301 irq_info->argsz = argsz; 302 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, 303 irq_info) < 0 || irq_info->count < 1) { 304 error_setg_errno(errp, errno, "vfio: Error getting irq info"); 305 goto out_free_info; 306 } 307 308 if (event_notifier_init(&vcdev->io_notifier, 0)) { 309 error_setg_errno(errp, errno, 310 "vfio: Unable to init event notifier for IO"); 311 goto out_free_info; 312 } 313 314 fd = event_notifier_get_fd(&vcdev->io_notifier); 315 qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); 316 317 if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 318 VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { 319 qemu_set_fd_handler(fd, NULL, NULL, vcdev); 320 event_notifier_cleanup(&vcdev->io_notifier); 321 } 322 323 out_free_info: 324 g_free(irq_info); 325 } 326 327 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) 328 { 329 Error *err = NULL; 330 331 if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 332 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { 333 error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); 334 } 335 336 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), 337 NULL, NULL, vcdev); 338 event_notifier_cleanup(&vcdev->io_notifier); 339 } 340 341 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) 342 { 343 VFIODevice *vdev = &vcdev->vdev; 344 struct vfio_region_info *info; 345 int ret; 346 347 /* Sanity check device */ 348 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { 349 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); 350 return; 351 } 352 353 /* 354 * We always expect at least the I/O region to be present. We also 355 * may have a variable number of regions governed by capabilities. 356 */ 357 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { 358 error_setg(errp, "vfio: too few regions (%u), expected at least %u", 359 vdev->num_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1); 360 return; 361 } 362 363 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); 364 if (ret) { 365 error_setg_errno(errp, -ret, "vfio: Error getting config info"); 366 return; 367 } 368 369 vcdev->io_region_size = info->size; 370 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { 371 error_setg(errp, "vfio: Unexpected size of the I/O region"); 372 g_free(info); 373 return; 374 } 375 376 vcdev->io_region_offset = info->offset; 377 vcdev->io_region = g_malloc0(info->size); 378 379 /* check for the optional async command region */ 380 ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, 381 VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD, &info); 382 if (!ret) { 383 vcdev->async_cmd_region_size = info->size; 384 if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { 385 error_setg(errp, "vfio: Unexpected size of the async cmd region"); 386 g_free(vcdev->io_region); 387 g_free(info); 388 return; 389 } 390 vcdev->async_cmd_region_offset = info->offset; 391 vcdev->async_cmd_region = g_malloc0(info->size); 392 } 393 394 g_free(info); 395 } 396 397 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) 398 { 399 g_free(vcdev->async_cmd_region); 400 g_free(vcdev->io_region); 401 } 402 403 static void vfio_ccw_put_device(VFIOCCWDevice *vcdev) 404 { 405 g_free(vcdev->vdev.name); 406 vfio_put_base_device(&vcdev->vdev); 407 } 408 409 static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, 410 Error **errp) 411 { 412 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, 413 vcdev->cdev.hostid.ssid, 414 vcdev->cdev.hostid.devid); 415 VFIODevice *vbasedev; 416 417 QLIST_FOREACH(vbasedev, &group->device_list, next) { 418 if (strcmp(vbasedev->name, name) == 0) { 419 error_setg(errp, "vfio: subchannel %s has already been attached", 420 name); 421 goto out_err; 422 } 423 } 424 425 /* 426 * All vfio-ccw devices are believed to operate in a way compatible with 427 * memory ballooning, ie. pages pinned in the host are in the current 428 * working set of the guest driver and therefore never overlap with pages 429 * available to the guest balloon driver. This needs to be set before 430 * vfio_get_device() for vfio common to handle the balloon inhibitor. 431 */ 432 vcdev->vdev.balloon_allowed = true; 433 434 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) { 435 goto out_err; 436 } 437 438 vcdev->vdev.ops = &vfio_ccw_ops; 439 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; 440 vcdev->vdev.name = name; 441 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj; 442 443 return; 444 445 out_err: 446 g_free(name); 447 } 448 449 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) 450 { 451 char *tmp, group_path[PATH_MAX]; 452 ssize_t len; 453 int groupid; 454 455 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", 456 cdev->hostid.cssid, cdev->hostid.ssid, 457 cdev->hostid.devid, cdev->mdevid); 458 len = readlink(tmp, group_path, sizeof(group_path)); 459 g_free(tmp); 460 461 if (len <= 0 || len >= sizeof(group_path)) { 462 error_setg(errp, "vfio: no iommu_group found"); 463 return NULL; 464 } 465 466 group_path[len] = 0; 467 468 if (sscanf(basename(group_path), "%d", &groupid) != 1) { 469 error_setg(errp, "vfio: failed to read %s", group_path); 470 return NULL; 471 } 472 473 return vfio_get_group(groupid, &address_space_memory, errp); 474 } 475 476 static void vfio_ccw_realize(DeviceState *dev, Error **errp) 477 { 478 VFIOGroup *group; 479 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 480 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 481 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 482 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 483 Error *err = NULL; 484 485 /* Call the class init function for subchannel. */ 486 if (cdc->realize) { 487 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); 488 if (err) { 489 goto out_err_propagate; 490 } 491 } 492 493 group = vfio_ccw_get_group(cdev, &err); 494 if (!group) { 495 goto out_group_err; 496 } 497 498 vfio_ccw_get_device(group, vcdev, &err); 499 if (err) { 500 goto out_device_err; 501 } 502 503 vfio_ccw_get_region(vcdev, &err); 504 if (err) { 505 goto out_region_err; 506 } 507 508 vfio_ccw_register_io_notifier(vcdev, &err); 509 if (err) { 510 goto out_notifier_err; 511 } 512 513 return; 514 515 out_notifier_err: 516 vfio_ccw_put_region(vcdev); 517 out_region_err: 518 vfio_ccw_put_device(vcdev); 519 out_device_err: 520 vfio_put_group(group); 521 out_group_err: 522 if (cdc->unrealize) { 523 cdc->unrealize(cdev, NULL); 524 } 525 out_err_propagate: 526 error_propagate(errp, err); 527 } 528 529 static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) 530 { 531 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 532 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 533 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 534 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 535 VFIOGroup *group = vcdev->vdev.group; 536 537 vfio_ccw_unregister_io_notifier(vcdev); 538 vfio_ccw_put_region(vcdev); 539 vfio_ccw_put_device(vcdev); 540 vfio_put_group(group); 541 542 if (cdc->unrealize) { 543 cdc->unrealize(cdev, errp); 544 } 545 } 546 547 static Property vfio_ccw_properties[] = { 548 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), 549 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), 550 DEFINE_PROP_END_OF_LIST(), 551 }; 552 553 static const VMStateDescription vfio_ccw_vmstate = { 554 .name = "vfio-ccw", 555 .unmigratable = 1, 556 }; 557 558 static void vfio_ccw_class_init(ObjectClass *klass, void *data) 559 { 560 DeviceClass *dc = DEVICE_CLASS(klass); 561 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); 562 563 dc->props = vfio_ccw_properties; 564 dc->vmsd = &vfio_ccw_vmstate; 565 dc->desc = "VFIO-based subchannel assignment"; 566 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 567 dc->realize = vfio_ccw_realize; 568 dc->unrealize = vfio_ccw_unrealize; 569 dc->reset = vfio_ccw_reset; 570 571 cdc->handle_request = vfio_ccw_handle_request; 572 cdc->handle_halt = vfio_ccw_handle_halt; 573 cdc->handle_clear = vfio_ccw_handle_clear; 574 } 575 576 static const TypeInfo vfio_ccw_info = { 577 .name = TYPE_VFIO_CCW, 578 .parent = TYPE_S390_CCW, 579 .instance_size = sizeof(VFIOCCWDevice), 580 .class_init = vfio_ccw_class_init, 581 }; 582 583 static void register_vfio_ccw_type(void) 584 { 585 type_register_static(&vfio_ccw_info); 586 } 587 588 type_init(register_vfio_ccw_type) 589