1 /* 2 * vfio based subchannel assignment support 3 * 4 * Copyright 2017 IBM Corp. 5 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> 6 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> 7 * Pierre Morel <pmorel@linux.vnet.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or (at 10 * your option) any later version. See the COPYING file in the top-level 11 * directory. 12 */ 13 14 #include "qemu/osdep.h" 15 #include <linux/vfio.h> 16 #include <linux/vfio_ccw.h> 17 #include <sys/ioctl.h> 18 19 #include "qapi/error.h" 20 #include "hw/sysbus.h" 21 #include "hw/vfio/vfio.h" 22 #include "hw/vfio/vfio-common.h" 23 #include "hw/s390x/s390-ccw.h" 24 #include "hw/s390x/vfio-ccw.h" 25 #include "hw/s390x/ccw-device.h" 26 #include "exec/address-spaces.h" 27 #include "qemu/error-report.h" 28 29 struct VFIOCCWDevice { 30 S390CCWDevice cdev; 31 VFIODevice vdev; 32 uint64_t io_region_size; 33 uint64_t io_region_offset; 34 struct ccw_io_region *io_region; 35 EventNotifier io_notifier; 36 bool force_orb_pfch; 37 bool warned_orb_pfch; 38 }; 39 40 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch, 41 const char *msg) 42 { 43 warn_report_once_cond(&vcdev->warned_orb_pfch, 44 "vfio-ccw (devno %x.%x.%04x): %s", 45 sch->cssid, sch->ssid, sch->devno, msg); 46 } 47 48 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) 49 { 50 vdev->needs_reset = false; 51 } 52 53 /* 54 * We don't need vfio_hot_reset_multi and vfio_eoi operations for 55 * vfio_ccw device now. 56 */ 57 struct VFIODeviceOps vfio_ccw_ops = { 58 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, 59 }; 60 61 static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) 62 { 63 S390CCWDevice *cdev = sch->driver_data; 64 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 65 struct ccw_io_region *region = vcdev->io_region; 66 int ret; 67 68 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) { 69 if (!(vcdev->force_orb_pfch)) { 70 warn_once_pfch(vcdev, sch, "requires PFCH flag set"); 71 sch_gen_unit_exception(sch); 72 css_inject_io_interrupt(sch); 73 return IOINST_CC_EXPECTED; 74 } else { 75 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; 76 warn_once_pfch(vcdev, sch, "PFCH flag forced"); 77 } 78 } 79 80 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); 81 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); 82 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); 83 84 memset(region, 0, sizeof(*region)); 85 86 memcpy(region->orb_area, &sch->orb, sizeof(ORB)); 87 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW)); 88 89 again: 90 ret = pwrite(vcdev->vdev.fd, region, 91 vcdev->io_region_size, vcdev->io_region_offset); 92 if (ret != vcdev->io_region_size) { 93 if (errno == EAGAIN) { 94 goto again; 95 } 96 error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); 97 ret = -errno; 98 } else { 99 ret = region->ret_code; 100 } 101 switch (ret) { 102 case 0: 103 return IOINST_CC_EXPECTED; 104 case -EBUSY: 105 return IOINST_CC_BUSY; 106 case -ENODEV: 107 case -EACCES: 108 return IOINST_CC_NOT_OPERATIONAL; 109 case -EFAULT: 110 default: 111 sch_gen_unit_exception(sch); 112 css_inject_io_interrupt(sch); 113 return IOINST_CC_EXPECTED; 114 } 115 } 116 117 static void vfio_ccw_reset(DeviceState *dev) 118 { 119 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 120 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 121 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 122 123 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); 124 } 125 126 static void vfio_ccw_io_notifier_handler(void *opaque) 127 { 128 VFIOCCWDevice *vcdev = opaque; 129 struct ccw_io_region *region = vcdev->io_region; 130 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); 131 CcwDevice *ccw_dev = CCW_DEVICE(cdev); 132 SubchDev *sch = ccw_dev->sch; 133 SCHIB *schib = &sch->curr_status; 134 SCSW s; 135 IRB irb; 136 int size; 137 138 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { 139 return; 140 } 141 142 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, 143 vcdev->io_region_offset); 144 if (size == -1) { 145 switch (errno) { 146 case ENODEV: 147 /* Generate a deferred cc 3 condition. */ 148 schib->scsw.flags |= SCSW_FLAGS_MASK_CC; 149 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 150 schib->scsw.ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); 151 goto read_err; 152 case EFAULT: 153 /* Memory problem, generate channel data check. */ 154 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 155 schib->scsw.cstat = SCSW_CSTAT_DATA_CHECK; 156 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 157 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 158 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 159 goto read_err; 160 default: 161 /* Error, generate channel program check. */ 162 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 163 schib->scsw.cstat = SCSW_CSTAT_PROG_CHECK; 164 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 165 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 166 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 167 goto read_err; 168 } 169 } else if (size != vcdev->io_region_size) { 170 /* Information transfer error, generate channel-control check. */ 171 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND; 172 schib->scsw.cstat = SCSW_CSTAT_CHN_CTRL_CHK; 173 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL; 174 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | 175 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; 176 goto read_err; 177 } 178 179 memcpy(&irb, region->irb_area, sizeof(IRB)); 180 181 /* Update control block via irb. */ 182 s = schib->scsw; 183 copy_scsw_to_guest(&s, &irb.scsw); 184 schib->scsw = s; 185 186 /* If a uint check is pending, copy sense data. */ 187 if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && 188 (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { 189 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); 190 } 191 192 read_err: 193 css_inject_io_interrupt(sch); 194 } 195 196 static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) 197 { 198 VFIODevice *vdev = &vcdev->vdev; 199 struct vfio_irq_info *irq_info; 200 struct vfio_irq_set *irq_set; 201 size_t argsz; 202 int32_t *pfd; 203 204 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { 205 error_setg(errp, "vfio: unexpected number of io irqs %u", 206 vdev->num_irqs); 207 return; 208 } 209 210 argsz = sizeof(*irq_info); 211 irq_info = g_malloc0(argsz); 212 irq_info->index = VFIO_CCW_IO_IRQ_INDEX; 213 irq_info->argsz = argsz; 214 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, 215 irq_info) < 0 || irq_info->count < 1) { 216 error_setg_errno(errp, errno, "vfio: Error getting irq info"); 217 goto out_free_info; 218 } 219 220 if (event_notifier_init(&vcdev->io_notifier, 0)) { 221 error_setg_errno(errp, errno, 222 "vfio: Unable to init event notifier for IO"); 223 goto out_free_info; 224 } 225 226 argsz = sizeof(*irq_set) + sizeof(*pfd); 227 irq_set = g_malloc0(argsz); 228 irq_set->argsz = argsz; 229 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 230 VFIO_IRQ_SET_ACTION_TRIGGER; 231 irq_set->index = VFIO_CCW_IO_IRQ_INDEX; 232 irq_set->start = 0; 233 irq_set->count = 1; 234 pfd = (int32_t *) &irq_set->data; 235 236 *pfd = event_notifier_get_fd(&vcdev->io_notifier); 237 qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev); 238 if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { 239 error_setg(errp, "vfio: Failed to set up io notification"); 240 qemu_set_fd_handler(*pfd, NULL, NULL, vcdev); 241 event_notifier_cleanup(&vcdev->io_notifier); 242 } 243 244 g_free(irq_set); 245 246 out_free_info: 247 g_free(irq_info); 248 } 249 250 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) 251 { 252 struct vfio_irq_set *irq_set; 253 size_t argsz; 254 int32_t *pfd; 255 256 argsz = sizeof(*irq_set) + sizeof(*pfd); 257 irq_set = g_malloc0(argsz); 258 irq_set->argsz = argsz; 259 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 260 VFIO_IRQ_SET_ACTION_TRIGGER; 261 irq_set->index = VFIO_CCW_IO_IRQ_INDEX; 262 irq_set->start = 0; 263 irq_set->count = 1; 264 pfd = (int32_t *) &irq_set->data; 265 *pfd = -1; 266 267 if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) { 268 error_report("vfio: Failed to de-assign device io fd: %m"); 269 } 270 271 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), 272 NULL, NULL, vcdev); 273 event_notifier_cleanup(&vcdev->io_notifier); 274 275 g_free(irq_set); 276 } 277 278 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) 279 { 280 VFIODevice *vdev = &vcdev->vdev; 281 struct vfio_region_info *info; 282 int ret; 283 284 /* Sanity check device */ 285 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { 286 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); 287 return; 288 } 289 290 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { 291 error_setg(errp, "vfio: Unexpected number of the I/O region %u", 292 vdev->num_regions); 293 return; 294 } 295 296 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); 297 if (ret) { 298 error_setg_errno(errp, -ret, "vfio: Error getting config info"); 299 return; 300 } 301 302 vcdev->io_region_size = info->size; 303 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { 304 error_setg(errp, "vfio: Unexpected size of the I/O region"); 305 g_free(info); 306 return; 307 } 308 309 vcdev->io_region_offset = info->offset; 310 vcdev->io_region = g_malloc0(info->size); 311 312 g_free(info); 313 } 314 315 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) 316 { 317 g_free(vcdev->io_region); 318 } 319 320 static void vfio_ccw_put_device(VFIOCCWDevice *vcdev) 321 { 322 g_free(vcdev->vdev.name); 323 vfio_put_base_device(&vcdev->vdev); 324 } 325 326 static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, 327 Error **errp) 328 { 329 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, 330 vcdev->cdev.hostid.ssid, 331 vcdev->cdev.hostid.devid); 332 VFIODevice *vbasedev; 333 334 QLIST_FOREACH(vbasedev, &group->device_list, next) { 335 if (strcmp(vbasedev->name, name) == 0) { 336 error_setg(errp, "vfio: subchannel %s has already been attached", 337 name); 338 goto out_err; 339 } 340 } 341 342 /* 343 * All vfio-ccw devices are believed to operate in a way compatible with 344 * memory ballooning, ie. pages pinned in the host are in the current 345 * working set of the guest driver and therefore never overlap with pages 346 * available to the guest balloon driver. This needs to be set before 347 * vfio_get_device() for vfio common to handle the balloon inhibitor. 348 */ 349 vcdev->vdev.balloon_allowed = true; 350 351 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) { 352 goto out_err; 353 } 354 355 vcdev->vdev.ops = &vfio_ccw_ops; 356 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; 357 vcdev->vdev.name = name; 358 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj; 359 360 return; 361 362 out_err: 363 g_free(name); 364 } 365 366 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) 367 { 368 char *tmp, group_path[PATH_MAX]; 369 ssize_t len; 370 int groupid; 371 372 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", 373 cdev->hostid.cssid, cdev->hostid.ssid, 374 cdev->hostid.devid, cdev->mdevid); 375 len = readlink(tmp, group_path, sizeof(group_path)); 376 g_free(tmp); 377 378 if (len <= 0 || len >= sizeof(group_path)) { 379 error_setg(errp, "vfio: no iommu_group found"); 380 return NULL; 381 } 382 383 group_path[len] = 0; 384 385 if (sscanf(basename(group_path), "%d", &groupid) != 1) { 386 error_setg(errp, "vfio: failed to read %s", group_path); 387 return NULL; 388 } 389 390 return vfio_get_group(groupid, &address_space_memory, errp); 391 } 392 393 static void vfio_ccw_realize(DeviceState *dev, Error **errp) 394 { 395 VFIOGroup *group; 396 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 397 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 398 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 399 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 400 Error *err = NULL; 401 402 /* Call the class init function for subchannel. */ 403 if (cdc->realize) { 404 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); 405 if (err) { 406 goto out_err_propagate; 407 } 408 } 409 410 group = vfio_ccw_get_group(cdev, &err); 411 if (!group) { 412 goto out_group_err; 413 } 414 415 vfio_ccw_get_device(group, vcdev, &err); 416 if (err) { 417 goto out_device_err; 418 } 419 420 vfio_ccw_get_region(vcdev, &err); 421 if (err) { 422 goto out_region_err; 423 } 424 425 vfio_ccw_register_io_notifier(vcdev, &err); 426 if (err) { 427 goto out_notifier_err; 428 } 429 430 return; 431 432 out_notifier_err: 433 vfio_ccw_put_region(vcdev); 434 out_region_err: 435 vfio_ccw_put_device(vcdev); 436 out_device_err: 437 vfio_put_group(group); 438 out_group_err: 439 if (cdc->unrealize) { 440 cdc->unrealize(cdev, NULL); 441 } 442 out_err_propagate: 443 error_propagate(errp, err); 444 } 445 446 static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) 447 { 448 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); 449 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); 450 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); 451 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); 452 VFIOGroup *group = vcdev->vdev.group; 453 454 vfio_ccw_unregister_io_notifier(vcdev); 455 vfio_ccw_put_region(vcdev); 456 vfio_ccw_put_device(vcdev); 457 vfio_put_group(group); 458 459 if (cdc->unrealize) { 460 cdc->unrealize(cdev, errp); 461 } 462 } 463 464 static Property vfio_ccw_properties[] = { 465 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), 466 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), 467 DEFINE_PROP_END_OF_LIST(), 468 }; 469 470 static const VMStateDescription vfio_ccw_vmstate = { 471 .name = TYPE_VFIO_CCW, 472 .unmigratable = 1, 473 }; 474 475 static void vfio_ccw_class_init(ObjectClass *klass, void *data) 476 { 477 DeviceClass *dc = DEVICE_CLASS(klass); 478 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); 479 480 dc->props = vfio_ccw_properties; 481 dc->vmsd = &vfio_ccw_vmstate; 482 dc->desc = "VFIO-based subchannel assignment"; 483 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 484 dc->realize = vfio_ccw_realize; 485 dc->unrealize = vfio_ccw_unrealize; 486 dc->reset = vfio_ccw_reset; 487 488 cdc->handle_request = vfio_ccw_handle_request; 489 } 490 491 static const TypeInfo vfio_ccw_info = { 492 .name = TYPE_VFIO_CCW, 493 .parent = TYPE_S390_CCW, 494 .instance_size = sizeof(VFIOCCWDevice), 495 .class_init = vfio_ccw_class_init, 496 }; 497 498 static void register_vfio_ccw_type(void) 499 { 500 type_register_static(&vfio_ccw_info); 501 } 502 503 type_init(register_vfio_ccw_type) 504