1 /* 2 * vfio based subchannel assignment support 3 * 4 * Copyright 2017 IBM Corp. 5 * Copyright 2019 Red Hat, Inc. 6 * 7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> 8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> 9 * Pierre Morel <pmorel@linux.vnet.ibm.com> 10 * Cornelia Huck <cohuck@redhat.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or (at 13 * your option) any later version. See the COPYING file in the top-level 14 * directory. 15 */ 16 17 #include "qemu/osdep.h" 18 #include <linux/vfio.h> 19 #include <linux/vfio_ccw.h> 20 #include <sys/ioctl.h> 21 22 #include "qapi/error.h" 23 #include "hw/sysbus.h" 24 #include "hw/vfio/vfio.h" 25 #include "hw/vfio/vfio-common.h" 26 #include "hw/s390x/s390-ccw.h" 27 #include "hw/s390x/vfio-ccw.h" 28 #include "hw/s390x/ccw-device.h" 29 #include "exec/address-spaces.h" 30 #include "qemu/error-report.h" 31 #include "qemu/module.h" 32 33 struct VFIOCCWDevice { 34 S390CCWDevice cdev; 35 VFIODevice vdev; 36 uint64_t io_region_size; 37 uint64_t io_region_offset; 38 struct ccw_io_region *io_region; 39 uint64_t async_cmd_region_size; 40 uint64_t async_cmd_region_offset; 41 struct ccw_cmd_region *async_cmd_region; 42 EventNotifier io_notifier; 43 bool force_orb_pfch; 44 bool warned_orb_pfch; 45 }; 46 47 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch, 48 const char *msg) 49 { 50 warn_report_once_cond(&vcdev->warned_orb_pfch, 51 "vfio-ccw (devno %x.%x.%04x): %s", 52 sch->cssid, sch->ssid, sch->devno, msg); 53 } 54 55 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) 56 { 57 vdev->needs_reset = false; 58 } 59 60 /* 61 * We don't need vfio_hot_reset_multi and vfio_eoi operations for 62 * vfio_ccw device now. 63 */ 64 struct VFIODeviceOps vfio_ccw_ops = { 65 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, 66 }; 67 68 static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) 69 { 70 S390CCWDevice *cdev = sch->driver_data; 71 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 72 struct ccw_io_region *region = vcdev->io_region; 73 int ret; 74 75 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) { 76 if (!(vcdev->force_orb_pfch)) { 77 warn_once_pfch(vcdev, sch, "requires PFCH flag set"); 78 sch_gen_unit_exception(sch); 79 css_inject_io_interrupt(sch); 80 return IOINST_CC_EXPECTED; 81 } else { 82 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; 83 warn_once_pfch(vcdev, sch, "PFCH flag forced"); 84 } 85 } 86 87 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); 88 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); 89 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); 90 91 memset(region, 0, sizeof(*region)); 92 93 memcpy(region->orb_area, &sch->orb, sizeof(ORB)); 94 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW)); 95 96 again: 97 ret = pwrite(vcdev->vdev.fd, region, 98 vcdev->io_region_size, vcdev->io_region_offset); 99 if (ret != vcdev->io_region_size) { 100 if (errno == EAGAIN) { 101 goto again; 102 } 103 error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); 104 ret = -errno; 105 } else { 106 ret = region->ret_code; 107 } 108 switch (ret) { 109 case 0: 110 return IOINST_CC_EXPECTED; 111 case -EBUSY: 112 return IOINST_CC_BUSY; 113 case -ENODEV: 114 case -EACCES: 115 return IOINST_CC_NOT_OPERATIONAL; 116 case -EFAULT: 117 default: 118 sch_gen_unit_exception(sch); 119 css_inject_io_interrupt(sch); 120 return IOINST_CC_EXPECTED; 121 } 122 } 123 124 static int vfio_ccw_handle_clear(SubchDev *sch) 125 { 126 S390CCWDevice *cdev = sch->driver_data; 127 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 128 struct ccw_cmd_region *region = vcdev->async_cmd_region; 129 int ret; 130 131 if (!vcdev->async_cmd_region) { 132 /* Async command region not available, fall back to emulation */ 133 return -ENOSYS; 134 } 135 136 memset(region, 0, sizeof(*region)); 137 region->command = VFIO_CCW_ASYNC_CMD_CSCH; 138 139 again: 140 ret = pwrite(vcdev->vdev.fd, region, 141 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 142 if (ret != vcdev->async_cmd_region_size) { 143 if (errno == EAGAIN) { 144 goto again; 145 } 146 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 147 ret = -errno; 148 } else { 149 ret = region->ret_code; 150 } 151 switch (ret) { 152 case 0: 153 case -ENODEV: 154 case -EACCES: 155 return 0; 156 case -EFAULT: 157 default: 158 sch_gen_unit_exception(sch); 159 css_inject_io_interrupt(sch); 160 return 0; 161 } 162 } 163 164 static int vfio_ccw_handle_halt(SubchDev *sch) 165 { 166 S390CCWDevice *cdev = sch->driver_data; 167 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 168 struct ccw_cmd_region *region = vcdev->async_cmd_region; 169 int ret; 170 171 if (!vcdev->async_cmd_region) { 172 /* Async command region not available, fall back to emulation */ 173 return -ENOSYS; 174 } 175 176 memset(region, 0, sizeof(*region)); 177 region->command = VFIO_CCW_ASYNC_CMD_HSCH; 178 179 again: 180 ret = pwrite(vcdev->vdev.fd, region, 181 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset); 182 if (ret != vcdev->async_cmd_region_size) { 183 if (errno == EAGAIN) { 184 goto again; 185 } 186 error_report("vfio-ccw: write cmd region failed with errno=%d", errno); 187 ret = -errno; 188 } else { 189 ret = region->ret_code; 190 } 191 switch (ret) { 192 case 0: 193 case -EBUSY: 194 case -ENODEV: 195 case -EACCES: 196 return 0; 197 case -EFAULT: 198 default: 199 sch_gen_unit_exception(sch); 200 css_inject_io_interrupt(sch); 201 return 0; 202 } 203 } 204 205 static void vfio_ccw_reset(DeviceState *dev) 206 { 207 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 208 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 209 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 210 211 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); 212 } 213 214 static void vfio_ccw_io_notifier_handler(void *opaque) 215 { 216 VFIOCCWDevice *vcdev = opaque; 217 struct ccw_io_region *region = vcdev->io_region; 218 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); 219 CcwDevice *ccw_dev = CCW_DEVICE(cdev); 220 SubchDev *sch = ccw_dev->sch; 221 SCHIB *schib = &sch->curr_status; 222 SCSW s; 223 IRB irb; 224 int size; 225 226 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { 227 return; 228 } 229 230 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, 231 vcdev->io_region_offset); 232 if (size == -1) { 233 switch (errno) { 234 case ENODEV: 235 /* Generate a deferred cc 3 condition. */ 236 schib->scsw.flags |= SCSW_FLAGS_MASK_CC; 237 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 238 schib->scsw.ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); 239 goto read_err; 240 case EFAULT: 241 /* Memory problem, generate channel data check. */ 242 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 243 schib->scsw.cstat = SCSW_CSTAT_DATA_CHECK; 244 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 245 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 246 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 247 goto read_err; 248 default: 249 /* Error, generate channel program check. */ 250 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 251 schib->scsw.cstat = SCSW_CSTAT_PROG_CHECK; 252 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 253 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 254 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 255 goto read_err; 256 } 257 } else if (size != vcdev->io_region_size) { 258 /* Information transfer error, generate channel-control check. */ 259 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 260 schib->scsw.cstat = SCSW_CSTAT_CHN_CTRL_CHK; 261 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 262 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 263 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 264 goto read_err; 265 } 266 267 memcpy(&irb, region->irb_area, sizeof(IRB)); 268 269 /* Update control block via irb. */ 270 s = schib->scsw; 271 copy_scsw_to_guest(&s, &irb.scsw); 272 schib->scsw = s; 273 274 /* If a uint check is pending, copy sense data. */ 275 if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && 276 (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { 277 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); 278 } 279 280 read_err: 281 css_inject_io_interrupt(sch); 282 } 283 284 static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) 285 { 286 VFIODevice *vdev = &vcdev->vdev; 287 struct vfio_irq_info *irq_info; 288 size_t argsz; 289 int fd; 290 291 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { 292 error_setg(errp, "vfio: unexpected number of io irqs %u", 293 vdev->num_irqs); 294 return; 295 } 296 297 argsz = sizeof(*irq_info); 298 irq_info = g_malloc0(argsz); 299 irq_info->index = VFIO_CCW_IO_IRQ_INDEX; 300 irq_info->argsz = argsz; 301 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, 302 irq_info) < 0 || irq_info->count < 1) { 303 error_setg_errno(errp, errno, "vfio: Error getting irq info"); 304 goto out_free_info; 305 } 306 307 if (event_notifier_init(&vcdev->io_notifier, 0)) { 308 error_setg_errno(errp, errno, 309 "vfio: Unable to init event notifier for IO"); 310 goto out_free_info; 311 } 312 313 fd = event_notifier_get_fd(&vcdev->io_notifier); 314 qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); 315 316 if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 317 VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { 318 qemu_set_fd_handler(fd, NULL, NULL, vcdev); 319 event_notifier_cleanup(&vcdev->io_notifier); 320 } 321 322 out_free_info: 323 g_free(irq_info); 324 } 325 326 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) 327 { 328 Error *err = NULL; 329 330 vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, 331 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err); 332 if (err) { 333 error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); 334 } 335 336 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), 337 NULL, NULL, vcdev); 338 event_notifier_cleanup(&vcdev->io_notifier); 339 } 340 341 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) 342 { 343 VFIODevice *vdev = &vcdev->vdev; 344 struct vfio_region_info *info; 345 int ret; 346 347 /* Sanity check device */ 348 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { 349 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); 350 return; 351 } 352 353 /* 354 * We always expect at least the I/O region to be present. We also 355 * may have a variable number of regions governed by capabilities. 356 */ 357 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { 358 error_setg(errp, "vfio: too few regions (%u), expected at least %u", 359 vdev->num_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1); 360 return; 361 } 362 363 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); 364 if (ret) { 365 error_setg_errno(errp, -ret, "vfio: Error getting config info"); 366 return; 367 } 368 369 vcdev->io_region_size = info->size; 370 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { 371 error_setg(errp, "vfio: Unexpected size of the I/O region"); 372 g_free(info); 373 return; 374 } 375 376 vcdev->io_region_offset = info->offset; 377 vcdev->io_region = g_malloc0(info->size); 378 379 /* check for the optional async command region */ 380 ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, 381 VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD, &info); 382 if (!ret) { 383 vcdev->async_cmd_region_size = info->size; 384 if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { 385 error_setg(errp, "vfio: Unexpected size of the async cmd region"); 386 g_free(vcdev->io_region); 387 g_free(info); 388 return; 389 } 390 vcdev->async_cmd_region_offset = info->offset; 391 vcdev->async_cmd_region = g_malloc0(info->size); 392 } 393 394 g_free(info); 395 } 396 397 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) 398 { 399 g_free(vcdev->async_cmd_region); 400 g_free(vcdev->io_region); 401 } 402 403 static void vfio_ccw_put_device(VFIOCCWDevice *vcdev) 404 { 405 g_free(vcdev->vdev.name); 406 vfio_put_base_device(&vcdev->vdev); 407 } 408 409 static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, 410 Error **errp) 411 { 412 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, 413 vcdev->cdev.hostid.ssid, 414 vcdev->cdev.hostid.devid); 415 VFIODevice *vbasedev; 416 417 QLIST_FOREACH(vbasedev, &group->device_list, next) { 418 if (strcmp(vbasedev->name, name) == 0) { 419 error_setg(errp, "vfio: subchannel %s has already been attached", 420 name); 421 goto out_err; 422 } 423 } 424 425 /* 426 * All vfio-ccw devices are believed to operate in a way compatible with 427 * memory ballooning, ie. pages pinned in the host are in the current 428 * working set of the guest driver and therefore never overlap with pages 429 * available to the guest balloon driver. This needs to be set before 430 * vfio_get_device() for vfio common to handle the balloon inhibitor. 431 */ 432 vcdev->vdev.balloon_allowed = true; 433 434 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) { 435 goto out_err; 436 } 437 438 vcdev->vdev.ops = &vfio_ccw_ops; 439 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; 440 vcdev->vdev.name = name; 441 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj; 442 443 return; 444 445 out_err: 446 g_free(name); 447 } 448 449 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) 450 { 451 char *tmp, group_path[PATH_MAX]; 452 ssize_t len; 453 int groupid; 454 455 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", 456 cdev->hostid.cssid, cdev->hostid.ssid, 457 cdev->hostid.devid, cdev->mdevid); 458 len = readlink(tmp, group_path, sizeof(group_path)); 459 g_free(tmp); 460 461 if (len <= 0 || len >= sizeof(group_path)) { 462 error_setg(errp, "vfio: no iommu_group found"); 463 return NULL; 464 } 465 466 group_path[len] = 0; 467 468 if (sscanf(basename(group_path), "%d", &groupid) != 1) { 469 error_setg(errp, "vfio: failed to read %s", group_path); 470 return NULL; 471 } 472 473 return vfio_get_group(groupid, &address_space_memory, errp); 474 } 475 476 static void vfio_ccw_realize(DeviceState *dev, Error **errp) 477 { 478 VFIOGroup *group; 479 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 480 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 481 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 482 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 483 Error *err = NULL; 484 485 /* Call the class init function for subchannel. */ 486 if (cdc->realize) { 487 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); 488 if (err) { 489 goto out_err_propagate; 490 } 491 } 492 493 group = vfio_ccw_get_group(cdev, &err); 494 if (!group) { 495 goto out_group_err; 496 } 497 498 vfio_ccw_get_device(group, vcdev, &err); 499 if (err) { 500 goto out_device_err; 501 } 502 503 vfio_ccw_get_region(vcdev, &err); 504 if (err) { 505 goto out_region_err; 506 } 507 508 vfio_ccw_register_io_notifier(vcdev, &err); 509 if (err) { 510 goto out_notifier_err; 511 } 512 513 return; 514 515 out_notifier_err: 516 vfio_ccw_put_region(vcdev); 517 out_region_err: 518 vfio_ccw_put_device(vcdev); 519 out_device_err: 520 vfio_put_group(group); 521 out_group_err: 522 if (cdc->unrealize) { 523 cdc->unrealize(cdev, NULL); 524 } 525 out_err_propagate: 526 error_propagate(errp, err); 527 } 528 529 static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) 530 { 531 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 532 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 533 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 534 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 535 VFIOGroup *group = vcdev->vdev.group; 536 537 vfio_ccw_unregister_io_notifier(vcdev); 538 vfio_ccw_put_region(vcdev); 539 vfio_ccw_put_device(vcdev); 540 vfio_put_group(group); 541 542 if (cdc->unrealize) { 543 cdc->unrealize(cdev, errp); 544 } 545 } 546 547 static Property vfio_ccw_properties[] = { 548 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), 549 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), 550 DEFINE_PROP_END_OF_LIST(), 551 }; 552 553 static const VMStateDescription vfio_ccw_vmstate = { 554 .name = "vfio-ccw", 555 .unmigratable = 1, 556 }; 557 558 static void vfio_ccw_class_init(ObjectClass *klass, void *data) 559 { 560 DeviceClass *dc = DEVICE_CLASS(klass); 561 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); 562 563 dc->props = vfio_ccw_properties; 564 dc->vmsd = &vfio_ccw_vmstate; 565 dc->desc = "VFIO-based subchannel assignment"; 566 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 567 dc->realize = vfio_ccw_realize; 568 dc->unrealize = vfio_ccw_unrealize; 569 dc->reset = vfio_ccw_reset; 570 571 cdc->handle_request = vfio_ccw_handle_request; 572 cdc->handle_halt = vfio_ccw_handle_halt; 573 cdc->handle_clear = vfio_ccw_handle_clear; 574 } 575 576 static const TypeInfo vfio_ccw_info = { 577 .name = TYPE_VFIO_CCW, 578 .parent = TYPE_S390_CCW, 579 .instance_size = sizeof(VFIOCCWDevice), 580 .class_init = vfio_ccw_class_init, 581 }; 582 583 static void register_vfio_ccw_type(void) 584 { 585 type_register_static(&vfio_ccw_info); 586 } 587 588 type_init(register_vfio_ccw_type) 589