1 /* 2 * vfio based subchannel assignment support 3 * 4 * Copyright 2017 IBM Corp. 5 * Copyright 2019 Red Hat, Inc. 6 * 7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> 8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> 9 * Pierre Morel <pmorel@linux.vnet.ibm.com> 10 * Cornelia Huck <cohuck@redhat.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or (at 13 * your option) any later version. See the COPYING file in the top-level 14 * directory. 15 */ 16 17 #include "qemu/osdep.h" 18 #include <linux/vfio.h> 19 #include <linux/vfio_ccw.h> 20 #include <sys/ioctl.h> 21 22 #include "qapi/error.h" 23 #include "hw/sysbus.h" 24 #include "hw/vfio/vfio.h" 25 #include "hw/vfio/vfio-common.h" 26 #include "hw/s390x/s390-ccw.h" 27 #include "hw/s390x/vfio-ccw.h" 28 #include "hw/qdev-properties.h" 29 #include "hw/s390x/ccw-device.h" 30 #include "exec/address-spaces.h" 31 #include "qemu/error-report.h" 32 #include "qemu/main-loop.h" 33 #include "qemu/module.h" 34 35 struct VFIOCCWDevice { 36 S390CCWDevice cdev; 37 VFIODevice vdev; 38 uint64_t io_region_size; 39 uint64_t io_region_offset; 40 struct ccw_io_region *io_region; 41 uint64_t async_cmd_region_size; 42 uint64_t async_cmd_region_offset; 43 struct ccw_cmd_region *async_cmd_region; 44 EventNotifier io_notifier; 45 bool force_orb_pfch; 46 bool warned_orb_pfch; 47 }; 48 49 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch, 50 const char *msg) 51 { 52 warn_report_once_cond(&vcdev->warned_orb_pfch, 53 "vfio-ccw (devno %x.%x.%04x): %s", 54 sch->cssid, sch->ssid, sch->devno, msg); 55 } 56 57 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) 58 { 59 vdev->needs_reset = false; 60 } 61 62 /* 63 * We don't need vfio_hot_reset_multi and vfio_eoi operations for 64 * vfio_ccw device now. 65 */ 66 struct VFIODeviceOps vfio_ccw_ops = { 67 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, 68 }; 69 70 static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) 71 { 72 S390CCWDevice *cdev = sch->driver_data; 73 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 74 struct ccw_io_region *region = vcdev->io_region; 75 int ret; 76 77 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) { 78 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; 79 warn_once_pfch(vcdev, sch, "PFCH flag forced"); 80 } 81 82 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); 83 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); 84 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); 85 86 memset(region, 0, sizeof(*region)); 87 88 memcpy(region->orb_area, &sch->orb, sizeof(ORB)); 89 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW)); 90 91 again: 92 ret = pwrite(vcdev->vdev.fd, region, 93 vcdev->io_region_size, vcdev->io_region_offset); 94 if (ret != vcdev->io_region_size) { 95 if (errno == EAGAIN) { 96 goto again; 97 } 98 error_report("vfio-ccw: write I/O region failed with errno=%d", errno); 99 ret = -errno; 100 } else { 101 ret = region->ret_code; 102 } 103 switch (ret) { 104 case 0: 105 return IOINST_CC_EXPECTED; 106 case -EBUSY: 107 return IOINST_CC_BUSY; 108 case -ENODEV: 109 case -EACCES: 110 return IOINST_CC_NOT_OPERATIONAL; 111 case -EFAULT: 112 default: 113 sch_gen_unit_exception(sch); 114 css_inject_io_interrupt(sch); 115 return IOINST_CC_EXPECTED; 116 } 117 } 118 119 static int vfio_ccw_handle_clear(SubchDev *sch) 120 { 121 S390CCWDevice *cdev = sch->driver_data; 122 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 123 struct ccw_cmd_region *region = vcdev->async_cmd_region; 124 int ret; 125 126 if (!vcdev->async_cmd_region) { 127 /* Async command region not available, fall back to emulation */ 128 return -ENOSYS; 129 } 130 131 memset(region, 0, sizeof(*region)); 132 region->command = VFIO_CCW_ASYNC_CMD_CSCH; 133 134 again: 135 ret = pwrite(vcdev->vdev.fd, region, 136 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 137 if (ret != vcdev->async_cmd_region_size) { 138 if (errno == EAGAIN) { 139 goto again; 140 } 141 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 142 ret = -errno; 143 } else { 144 ret = region->ret_code; 145 } 146 switch (ret) { 147 case 0: 148 case -ENODEV: 149 case -EACCES: 150 return 0; 151 case -EFAULT: 152 default: 153 sch_gen_unit_exception(sch); 154 css_inject_io_interrupt(sch); 155 return 0; 156 } 157 } 158 159 static int vfio_ccw_handle_halt(SubchDev *sch) 160 { 161 S390CCWDevice *cdev = sch->driver_data; 162 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 163 struct ccw_cmd_region *region = vcdev->async_cmd_region; 164 int ret; 165 166 if (!vcdev->async_cmd_region) { 167 /* Async command region not available, fall back to emulation */ 168 return -ENOSYS; 169 } 170 171 memset(region, 0, sizeof(*region)); 172 region->command = VFIO_CCW_ASYNC_CMD_HSCH; 173 174 again: 175 ret = pwrite(vcdev->vdev.fd, region, 176 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 177 if (ret != vcdev->async_cmd_region_size) { 178 if (errno == EAGAIN) { 179 goto again; 180 } 181 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 182 ret = -errno; 183 } else { 184 ret = region->ret_code; 185 } 186 switch (ret) { 187 case 0: 188 case -EBUSY: 189 case -ENODEV: 190 case -EACCES: 191 return 0; 192 case -EFAULT: 193 default: 194 sch_gen_unit_exception(sch); 195 css_inject_io_interrupt(sch); 196 return 0; 197 } 198 } 199 200 static void vfio_ccw_reset(DeviceState *dev) 201 { 202 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 203 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 204 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 205 206 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); 207 } 208 209 static void vfio_ccw_io_notifier_handler(void *opaque) 210 { 211 VFIOCCWDevice *vcdev = opaque; 212 struct ccw_io_region *region = vcdev->io_region; 213 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); 214 CcwDevice *ccw_dev = CCW_DEVICE(cdev); 215 SubchDev *sch = ccw_dev->sch; 216 SCHIB *schib = &sch->curr_status; 217 SCSW s; 218 IRB irb; 219 int size; 220 221 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { 222 return; 223 } 224 225 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, 226 vcdev->io_region_offset); 227 if (size == -1) { 228 switch (errno) { 229 case ENODEV: 230 /* Generate a deferred cc 3 condition. */ 231 schib->scsw.flags |= SCSW_FLAGS_MASK_CC; 232 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 233 schib->scsw.ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); 234 goto read_err; 235 case EFAULT: 236 /* Memory problem, generate channel data check. */ 237 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 238 schib->scsw.cstat = SCSW_CSTAT_DATA_CHECK; 239 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 240 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 241 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 242 goto read_err; 243 default: 244 /* Error, generate channel program check. */ 245 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 246 schib->scsw.cstat = SCSW_CSTAT_PROG_CHECK; 247 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 248 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 249 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 250 goto read_err; 251 } 252 } else if (size != vcdev->io_region_size) { 253 /* Information transfer error, generate channel-control check. */ 254 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 255 schib->scsw.cstat = SCSW_CSTAT_CHN_CTRL_CHK; 256 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 257 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 258 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 259 goto read_err; 260 } 261 262 memcpy(&irb, region->irb_area, sizeof(IRB)); 263 264 /* Update control block via irb. */ 265 s = schib->scsw; 266 copy_scsw_to_guest(&s, &irb.scsw); 267 schib->scsw = s; 268 269 /* If a uint check is pending, copy sense data. */ 270 if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && 271 (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { 272 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); 273 } 274 275 read_err: 276 css_inject_io_interrupt(sch); 277 } 278 279 static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) 280 { 281 VFIODevice *vdev = &vcdev->vdev; 282 struct vfio_irq_info *irq_info; 283 size_t argsz; 284 int fd; 285 286 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { 287 error_setg(errp, "vfio: unexpected number of io irqs %u", 288 vdev->num_irqs); 289 return; 290 } 291 292 argsz = sizeof(*irq_info); 293 irq_info = g_malloc0(argsz); 294 irq_info->index = VFIO_CCW_IO_IRQ_INDEX; 295 irq_info->argsz = argsz; 296 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, 297 irq_info) < 0 || irq_info->count < 1) { 298 error_setg_errno(errp, errno, "vfio: Error getting irq info"); 299 goto out_free_info; 300 } 301 302 if (event_notifier_init(&vcdev->io_notifier, 0)) { 303 error_setg_errno(errp, errno, 304 "vfio: Unable to init event notifier for IO"); 305 goto out_free_info; 306 } 307 308 fd = event_notifier_get_fd(&vcdev->io_notifier); 309 qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); 310 311 if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 312 VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { 313 qemu_set_fd_handler(fd, NULL, NULL, vcdev); 314 event_notifier_cleanup(&vcdev->io_notifier); 315 } 316 317 out_free_info: 318 g_free(irq_info); 319 } 320 321 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) 322 { 323 Error *err = NULL; 324 325 if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 326 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { 327 error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); 328 } 329 330 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), 331 NULL, NULL, vcdev); 332 event_notifier_cleanup(&vcdev->io_notifier); 333 } 334 335 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) 336 { 337 VFIODevice *vdev = &vcdev->vdev; 338 struct vfio_region_info *info; 339 int ret; 340 341 /* Sanity check device */ 342 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { 343 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); 344 return; 345 } 346 347 /* 348 * We always expect at least the I/O region to be present. We also 349 * may have a variable number of regions governed by capabilities. 350 */ 351 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { 352 error_setg(errp, "vfio: too few regions (%u), expected at least %u", 353 vdev->num_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1); 354 return; 355 } 356 357 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); 358 if (ret) { 359 error_setg_errno(errp, -ret, "vfio: Error getting config info"); 360 return; 361 } 362 363 vcdev->io_region_size = info->size; 364 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { 365 error_setg(errp, "vfio: Unexpected size of the I/O region"); 366 g_free(info); 367 return; 368 } 369 370 vcdev->io_region_offset = info->offset; 371 vcdev->io_region = g_malloc0(info->size); 372 373 /* check for the optional async command region */ 374 ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, 375 VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD, &info); 376 if (!ret) { 377 vcdev->async_cmd_region_size = info->size; 378 if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { 379 error_setg(errp, "vfio: Unexpected size of the async cmd region"); 380 g_free(vcdev->io_region); 381 g_free(info); 382 return; 383 } 384 vcdev->async_cmd_region_offset = info->offset; 385 vcdev->async_cmd_region = g_malloc0(info->size); 386 } 387 388 g_free(info); 389 } 390 391 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) 392 { 393 g_free(vcdev->async_cmd_region); 394 g_free(vcdev->io_region); 395 } 396 397 static void vfio_ccw_put_device(VFIOCCWDevice *vcdev) 398 { 399 g_free(vcdev->vdev.name); 400 vfio_put_base_device(&vcdev->vdev); 401 } 402 403 static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, 404 Error **errp) 405 { 406 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, 407 vcdev->cdev.hostid.ssid, 408 vcdev->cdev.hostid.devid); 409 VFIODevice *vbasedev; 410 411 QLIST_FOREACH(vbasedev, &group->device_list, next) { 412 if (strcmp(vbasedev->name, name) == 0) { 413 error_setg(errp, "vfio: subchannel %s has already been attached", 414 name); 415 goto out_err; 416 } 417 } 418 419 /* 420 * All vfio-ccw devices are believed to operate in a way compatible with 421 * memory ballooning, ie. pages pinned in the host are in the current 422 * working set of the guest driver and therefore never overlap with pages 423 * available to the guest balloon driver. This needs to be set before 424 * vfio_get_device() for vfio common to handle the balloon inhibitor. 425 */ 426 vcdev->vdev.balloon_allowed = true; 427 428 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) { 429 goto out_err; 430 } 431 432 vcdev->vdev.ops = &vfio_ccw_ops; 433 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; 434 vcdev->vdev.name = name; 435 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj; 436 437 return; 438 439 out_err: 440 g_free(name); 441 } 442 443 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) 444 { 445 char *tmp, group_path[PATH_MAX]; 446 ssize_t len; 447 int groupid; 448 449 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", 450 cdev->hostid.cssid, cdev->hostid.ssid, 451 cdev->hostid.devid, cdev->mdevid); 452 len = readlink(tmp, group_path, sizeof(group_path)); 453 g_free(tmp); 454 455 if (len <= 0 || len >= sizeof(group_path)) { 456 error_setg(errp, "vfio: no iommu_group found"); 457 return NULL; 458 } 459 460 group_path[len] = 0; 461 462 if (sscanf(basename(group_path), "%d", &groupid) != 1) { 463 error_setg(errp, "vfio: failed to read %s", group_path); 464 return NULL; 465 } 466 467 return vfio_get_group(groupid, &address_space_memory, errp); 468 } 469 470 static void vfio_ccw_realize(DeviceState *dev, Error **errp) 471 { 472 VFIOGroup *group; 473 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 474 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 475 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 476 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 477 Error *err = NULL; 478 479 /* Call the class init function for subchannel. */ 480 if (cdc->realize) { 481 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); 482 if (err) { 483 goto out_err_propagate; 484 } 485 } 486 487 group = vfio_ccw_get_group(cdev, &err); 488 if (!group) { 489 goto out_group_err; 490 } 491 492 vfio_ccw_get_device(group, vcdev, &err); 493 if (err) { 494 goto out_device_err; 495 } 496 497 vfio_ccw_get_region(vcdev, &err); 498 if (err) { 499 goto out_region_err; 500 } 501 502 vfio_ccw_register_io_notifier(vcdev, &err); 503 if (err) { 504 goto out_notifier_err; 505 } 506 507 return; 508 509 out_notifier_err: 510 vfio_ccw_put_region(vcdev); 511 out_region_err: 512 vfio_ccw_put_device(vcdev); 513 out_device_err: 514 vfio_put_group(group); 515 out_group_err: 516 if (cdc->unrealize) { 517 cdc->unrealize(cdev); 518 } 519 out_err_propagate: 520 error_propagate(errp, err); 521 } 522 523 static void vfio_ccw_unrealize(DeviceState *dev) 524 { 525 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 526 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 527 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 528 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 529 VFIOGroup *group = vcdev->vdev.group; 530 531 vfio_ccw_unregister_io_notifier(vcdev); 532 vfio_ccw_put_region(vcdev); 533 vfio_ccw_put_device(vcdev); 534 vfio_put_group(group); 535 536 if (cdc->unrealize) { 537 cdc->unrealize(cdev); 538 } 539 } 540 541 static Property vfio_ccw_properties[] = { 542 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), 543 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), 544 DEFINE_PROP_END_OF_LIST(), 545 }; 546 547 static const VMStateDescription vfio_ccw_vmstate = { 548 .name = "vfio-ccw", 549 .unmigratable = 1, 550 }; 551 552 static void vfio_ccw_class_init(ObjectClass *klass, void *data) 553 { 554 DeviceClass *dc = DEVICE_CLASS(klass); 555 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); 556 557 device_class_set_props(dc, vfio_ccw_properties); 558 dc->vmsd = &vfio_ccw_vmstate; 559 dc->desc = "VFIO-based subchannel assignment"; 560 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 561 dc->realize = vfio_ccw_realize; 562 dc->unrealize = vfio_ccw_unrealize; 563 dc->reset = vfio_ccw_reset; 564 565 cdc->handle_request = vfio_ccw_handle_request; 566 cdc->handle_halt = vfio_ccw_handle_halt; 567 cdc->handle_clear = vfio_ccw_handle_clear; 568 } 569 570 static const TypeInfo vfio_ccw_info = { 571 .name = TYPE_VFIO_CCW, 572 .parent = TYPE_S390_CCW, 573 .instance_size = sizeof(VFIOCCWDevice), 574 .class_init = vfio_ccw_class_init, 575 }; 576 577 static void register_vfio_ccw_type(void) 578 { 579 type_register_static(&vfio_ccw_info); 580 } 581 582 type_init(register_vfio_ccw_type) 583