1 /* 2 * vfio based subchannel assignment support 3 * 4 * Copyright 2017 IBM Corp. 5 * Copyright 2019 Red Hat, Inc. 6 * 7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> 8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> 9 * Pierre Morel <pmorel@linux.vnet.ibm.com> 10 * Cornelia Huck <cohuck@redhat.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or (at 13 * your option) any later version. See the COPYING file in the top-level 14 * directory. 15 */ 16 17 #include "qemu/osdep.h" 18 #include <linux/vfio.h> 19 #include <linux/vfio_ccw.h> 20 #include <sys/ioctl.h> 21 22 #include "qapi/error.h" 23 #include "hw/sysbus.h" 24 #include "hw/vfio/vfio.h" 25 #include "hw/vfio/vfio-common.h" 26 #include "hw/s390x/s390-ccw.h" 27 #include "hw/s390x/vfio-ccw.h" 28 #include "hw/s390x/ccw-device.h" 29 #include "exec/address-spaces.h" 30 #include "qemu/error-report.h" 31 #include "qemu/module.h" 32 33 struct VFIOCCWDevice { 34 S390CCWDevice cdev; 35 VFIODevice vdev; 36 uint64_t io_region_size; 37 uint64_t io_region_offset; 38 struct ccw_io_region *io_region; 39 uint64_t async_cmd_region_size; 40 uint64_t async_cmd_region_offset; 41 struct ccw_cmd_region *async_cmd_region; 42 EventNotifier io_notifier; 43 bool force_orb_pfch; 44 bool warned_orb_pfch; 45 }; 46 47 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch, 48 const char *msg) 49 { 50 warn_report_once_cond(&vcdev->warned_orb_pfch, 51 "vfio-ccw (devno %x.%x.%04x): %s", 52 sch->cssid, sch->ssid, sch->devno, msg); 53 } 54 55 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) 56 { 57 vdev->needs_reset = false; 58 } 59 60 /* 61 * We don't need vfio_hot_reset_multi and vfio_eoi operations for 62 * vfio_ccw device now. 63 */ 64 struct VFIODeviceOps vfio_ccw_ops = { 65 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, 66 }; 67 68 static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) 69 { 70 S390CCWDevice *cdev = sch->driver_data; 71 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 72 struct ccw_io_region *region = vcdev->io_region; 73 int ret; 74 75 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) { 76 if (!(vcdev->force_orb_pfch)) { 77 warn_once_pfch(vcdev, sch, "requires PFCH flag set"); 78 sch_gen_unit_exception(sch); 79 css_inject_io_interrupt(sch); 80 return IOINST_CC_EXPECTED; 81 } else { 82 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; 83 warn_once_pfch(vcdev, sch, "PFCH flag forced"); 84 } 85 } 86 87 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); 88 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); 89 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); 90 91 memset(region, 0, sizeof(*region)); 92 93 memcpy(region->orb_area, &sch->orb, sizeof(ORB)); 94 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW)); 95 96 again: 97 ret = pwrite(vcdev->vdev.fd, region, 98 vcdev->io_region_size, vcdev->io_region_offset); 99 if (ret != vcdev->io_region_size) { 100 if (errno == EAGAIN) { 101 goto again; 102 } 103 error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); 104 ret = -errno; 105 } else { 106 ret = region->ret_code; 107 } 108 switch (ret) { 109 case 0: 110 return IOINST_CC_EXPECTED; 111 case -EBUSY: 112 return IOINST_CC_BUSY; 113 case -ENODEV: 114 case -EACCES: 115 return IOINST_CC_NOT_OPERATIONAL; 116 case -EFAULT: 117 default: 118 sch_gen_unit_exception(sch); 119 css_inject_io_interrupt(sch); 120 return IOINST_CC_EXPECTED; 121 } 122 } 123 124 static int vfio_ccw_handle_clear(SubchDev *sch) 125 { 126 S390CCWDevice *cdev = sch->driver_data; 127 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 128 struct ccw_cmd_region *region = vcdev->async_cmd_region; 129 int ret; 130 131 if (!vcdev->async_cmd_region) { 132 /* Async command region not available, fall back to emulation */ 133 return -ENOSYS; 134 } 135 136 memset(region, 0, sizeof(*region)); 137 region->command = VFIO_CCW_ASYNC_CMD_CSCH; 138 139 again: 140 ret = pwrite(vcdev->vdev.fd, region, 141 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 142 if (ret != vcdev->async_cmd_region_size) { 143 if (errno == EAGAIN) { 144 goto again; 145 } 146 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 147 ret = -errno; 148 } else { 149 ret = region->ret_code; 150 } 151 switch (ret) { 152 case 0: 153 case -ENODEV: 154 case -EACCES: 155 return 0; 156 case -EFAULT: 157 default: 158 sch_gen_unit_exception(sch); 159 css_inject_io_interrupt(sch); 160 return 0; 161 } 162 } 163 164 static int vfio_ccw_handle_halt(SubchDev *sch) 165 { 166 S390CCWDevice *cdev = sch->driver_data; 167 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 168 struct ccw_cmd_region *region = vcdev->async_cmd_region; 169 int ret; 170 171 if (!vcdev->async_cmd_region) { 172 /* Async command region not available, fall back to emulation */ 173 return -ENOSYS; 174 } 175 176 memset(region, 0, sizeof(*region)); 177 region->command = VFIO_CCW_ASYNC_CMD_HSCH; 178 179 again: 180 ret = pwrite(vcdev->vdev.fd, region, 181 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 182 if (ret != vcdev->async_cmd_region_size) { 183 if (errno == EAGAIN) { 184 goto again; 185 } 186 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 187 ret = -errno; 188 } else { 189 ret = region->ret_code; 190 } 191 switch (ret) { 192 case 0: 193 case -EBUSY: 194 case -ENODEV: 195 case -EACCES: 196 return 0; 197 case -EFAULT: 198 default: 199 sch_gen_unit_exception(sch); 200 css_inject_io_interrupt(sch); 201 return 0; 202 } 203 } 204 205 static void vfio_ccw_reset(DeviceState *dev) 206 { 207 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 208 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 209 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 210 211 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); 212 } 213 214 static void vfio_ccw_io_notifier_handler(void *opaque) 215 { 216 VFIOCCWDevice *vcdev = opaque; 217 struct ccw_io_region *region = vcdev->io_region; 218 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); 219 CcwDevice *ccw_dev = CCW_DEVICE(cdev); 220 SubchDev *sch = ccw_dev->sch; 221 SCHIB *schib = &sch->curr_status; 222 SCSW s; 223 IRB irb; 224 int size; 225 226 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { 227 return; 228 } 229 230 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, 231 vcdev->io_region_offset); 232 if (size == -1) { 233 switch (errno) { 234 case ENODEV: 235 /* Generate a deferred cc 3 condition. */ 236 schib->scsw.flags |= SCSW_FLAGS_MASK_CC; 237 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 238 schib->scsw.ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); 239 goto read_err; 240 case EFAULT: 241 /* Memory problem, generate channel data check. */ 242 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 243 schib->scsw.cstat = SCSW_CSTAT_DATA_CHECK; 244 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 245 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 246 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 247 goto read_err; 248 default: 249 /* Error, generate channel program check. */ 250 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 251 schib->scsw.cstat = SCSW_CSTAT_PROG_CHECK; 252 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 253 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 254 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 255 goto read_err; 256 } 257 } else if (size != vcdev->io_region_size) { 258 /* Information transfer error, generate channel-control check. */ 259 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 260 schib->scsw.cstat = SCSW_CSTAT_CHN_CTRL_CHK; 261 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 262 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 263 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 264 goto read_err; 265 } 266 267 memcpy(&irb, region->irb_area, sizeof(IRB)); 268 269 /* Update control block via irb. */ 270 s = schib->scsw; 271 copy_scsw_to_guest(&s, &irb.scsw); 272 schib->scsw = s; 273 274 /* If a uint check is pending, copy sense data. */ 275 if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && 276 (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { 277 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); 278 } 279 280 read_err: 281 css_inject_io_interrupt(sch); 282 } 283 284 static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) 285 { 286 VFIODevice *vdev = &vcdev->vdev; 287 struct vfio_irq_info *irq_info; 288 size_t argsz; 289 int fd; 290 291 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { 292 error_setg(errp, "vfio: unexpected number of io irqs %u", 293 vdev->num_irqs); 294 return; 295 } 296 297 argsz = sizeof(*irq_info); 298 irq_info = g_malloc0(argsz); 299 irq_info->index = VFIO_CCW_IO_IRQ_INDEX; 300 irq_info->argsz = argsz; 301 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, 302 irq_info) < 0 || irq_info->count < 1) { 303 error_setg_errno(errp, errno, "vfio: Error getting irq info"); 304 goto out_free_info; 305 } 306 307 if (event_notifier_init(&vcdev->io_notifier, 0)) { 308 error_setg_errno(errp, errno, 309 "vfio: Unable to init event notifier for IO"); 310 goto out_free_info; 311 } 312 313 fd = event_notifier_get_fd(&vcdev->io_notifier); 314 qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); 315 316 if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 317 VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { 318 qemu_set_fd_handler(fd, NULL, NULL, vcdev); 319 event_notifier_cleanup(&vcdev->io_notifier); 320 } 321 322 out_free_info: 323 g_free(irq_info); 324 } 325 326 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) 327 { 328 Error *err = NULL; 329 330 if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 331 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { 332 error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); 333 } 334 335 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), 336 NULL, NULL, vcdev); 337 event_notifier_cleanup(&vcdev->io_notifier); 338 } 339 340 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) 341 { 342 VFIODevice *vdev = &vcdev->vdev; 343 struct vfio_region_info *info; 344 int ret; 345 346 /* Sanity check device */ 347 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { 348 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); 349 return; 350 } 351 352 /* 353 * We always expect at least the I/O region to be present. We also 354 * may have a variable number of regions governed by capabilities. 355 */ 356 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { 357 error_setg(errp, "vfio: too few regions (%u), expected at least %u", 358 vdev->num_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1); 359 return; 360 } 361 362 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); 363 if (ret) { 364 error_setg_errno(errp, -ret, "vfio: Error getting config info"); 365 return; 366 } 367 368 vcdev->io_region_size = info->size; 369 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { 370 error_setg(errp, "vfio: Unexpected size of the I/O region"); 371 g_free(info); 372 return; 373 } 374 375 vcdev->io_region_offset = info->offset; 376 vcdev->io_region = g_malloc0(info->size); 377 378 /* check for the optional async command region */ 379 ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, 380 VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD, &info); 381 if (!ret) { 382 vcdev->async_cmd_region_size = info->size; 383 if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { 384 error_setg(errp, "vfio: Unexpected size of the async cmd region"); 385 g_free(vcdev->io_region); 386 g_free(info); 387 return; 388 } 389 vcdev->async_cmd_region_offset = info->offset; 390 vcdev->async_cmd_region = g_malloc0(info->size); 391 } 392 393 g_free(info); 394 } 395 396 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) 397 { 398 g_free(vcdev->async_cmd_region); 399 g_free(vcdev->io_region); 400 } 401 402 static void vfio_ccw_put_device(VFIOCCWDevice *vcdev) 403 { 404 g_free(vcdev->vdev.name); 405 vfio_put_base_device(&vcdev->vdev); 406 } 407 408 static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, 409 Error **errp) 410 { 411 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, 412 vcdev->cdev.hostid.ssid, 413 vcdev->cdev.hostid.devid); 414 VFIODevice *vbasedev; 415 416 QLIST_FOREACH(vbasedev, &group->device_list, next) { 417 if (strcmp(vbasedev->name, name) == 0) { 418 error_setg(errp, "vfio: subchannel %s has already been attached", 419 name); 420 goto out_err; 421 } 422 } 423 424 /* 425 * All vfio-ccw devices are believed to operate in a way compatible with 426 * memory ballooning, ie. pages pinned in the host are in the current 427 * working set of the guest driver and therefore never overlap with pages 428 * available to the guest balloon driver. This needs to be set before 429 * vfio_get_device() for vfio common to handle the balloon inhibitor. 430 */ 431 vcdev->vdev.balloon_allowed = true; 432 433 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) { 434 goto out_err; 435 } 436 437 vcdev->vdev.ops = &vfio_ccw_ops; 438 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; 439 vcdev->vdev.name = name; 440 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj; 441 442 return; 443 444 out_err: 445 g_free(name); 446 } 447 448 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) 449 { 450 char *tmp, group_path[PATH_MAX]; 451 ssize_t len; 452 int groupid; 453 454 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", 455 cdev->hostid.cssid, cdev->hostid.ssid, 456 cdev->hostid.devid, cdev->mdevid); 457 len = readlink(tmp, group_path, sizeof(group_path)); 458 g_free(tmp); 459 460 if (len <= 0 || len >= sizeof(group_path)) { 461 error_setg(errp, "vfio: no iommu_group found"); 462 return NULL; 463 } 464 465 group_path[len] = 0; 466 467 if (sscanf(basename(group_path), "%d", &groupid) != 1) { 468 error_setg(errp, "vfio: failed to read %s", group_path); 469 return NULL; 470 } 471 472 return vfio_get_group(groupid, &address_space_memory, errp); 473 } 474 475 static void vfio_ccw_realize(DeviceState *dev, Error **errp) 476 { 477 VFIOGroup *group; 478 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 479 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 480 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 481 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 482 Error *err = NULL; 483 484 /* Call the class init function for subchannel. */ 485 if (cdc->realize) { 486 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); 487 if (err) { 488 goto out_err_propagate; 489 } 490 } 491 492 group = vfio_ccw_get_group(cdev, &err); 493 if (!group) { 494 goto out_group_err; 495 } 496 497 vfio_ccw_get_device(group, vcdev, &err); 498 if (err) { 499 goto out_device_err; 500 } 501 502 vfio_ccw_get_region(vcdev, &err); 503 if (err) { 504 goto out_region_err; 505 } 506 507 vfio_ccw_register_io_notifier(vcdev, &err); 508 if (err) { 509 goto out_notifier_err; 510 } 511 512 return; 513 514 out_notifier_err: 515 vfio_ccw_put_region(vcdev); 516 out_region_err: 517 vfio_ccw_put_device(vcdev); 518 out_device_err: 519 vfio_put_group(group); 520 out_group_err: 521 if (cdc->unrealize) { 522 cdc->unrealize(cdev, NULL); 523 } 524 out_err_propagate: 525 error_propagate(errp, err); 526 } 527 528 static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) 529 { 530 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 531 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 532 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 533 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 534 VFIOGroup *group = vcdev->vdev.group; 535 536 vfio_ccw_unregister_io_notifier(vcdev); 537 vfio_ccw_put_region(vcdev); 538 vfio_ccw_put_device(vcdev); 539 vfio_put_group(group); 540 541 if (cdc->unrealize) { 542 cdc->unrealize(cdev, errp); 543 } 544 } 545 546 static Property vfio_ccw_properties[] = { 547 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), 548 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), 549 DEFINE_PROP_END_OF_LIST(), 550 }; 551 552 static const VMStateDescription vfio_ccw_vmstate = { 553 .name = "vfio-ccw", 554 .unmigratable = 1, 555 }; 556 557 static void vfio_ccw_class_init(ObjectClass *klass, void *data) 558 { 559 DeviceClass *dc = DEVICE_CLASS(klass); 560 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); 561 562 dc->props = vfio_ccw_properties; 563 dc->vmsd = &vfio_ccw_vmstate; 564 dc->desc = "VFIO-based subchannel assignment"; 565 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 566 dc->realize = vfio_ccw_realize; 567 dc->unrealize = vfio_ccw_unrealize; 568 dc->reset = vfio_ccw_reset; 569 570 cdc->handle_request = vfio_ccw_handle_request; 571 cdc->handle_halt = vfio_ccw_handle_halt; 572 cdc->handle_clear = vfio_ccw_handle_clear; 573 } 574 575 static const TypeInfo vfio_ccw_info = { 576 .name = TYPE_VFIO_CCW, 577 .parent = TYPE_S390_CCW, 578 .instance_size = sizeof(VFIOCCWDevice), 579 .class_init = vfio_ccw_class_init, 580 }; 581 582 static void register_vfio_ccw_type(void) 583 { 584 type_register_static(&vfio_ccw_info); 585 } 586 587 type_init(register_vfio_ccw_type) 588