1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Adjunct processor matrix VFIO device driver callbacks. 4 * 5 * Copyright IBM Corp. 2018 6 * 7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> 8 * Halil Pasic <pasic@linux.ibm.com> 9 * Pierre Morel <pmorel@linux.ibm.com> 10 */ 11 #include <linux/string.h> 12 #include <linux/vfio.h> 13 #include <linux/device.h> 14 #include <linux/list.h> 15 #include <linux/ctype.h> 16 #include <linux/bitops.h> 17 #include <linux/kvm_host.h> 18 #include <linux/module.h> 19 #include <asm/kvm.h> 20 #include <asm/zcrypt.h> 21 22 #include "vfio_ap_private.h" 23 24 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" 25 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" 26 27 static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev); 28 29 static int match_apqn(struct device *dev, const void *data) 30 { 31 struct vfio_ap_queue *q = dev_get_drvdata(dev); 32 33 return (q->apqn == *(int *)(data)) ? 1 : 0; 34 } 35 36 /** 37 * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list 38 * @matrix_mdev: the associated mediated matrix 39 * @apqn: The queue APQN 40 * 41 * Retrieve a queue with a specific APQN from the list of the 42 * devices of the vfio_ap_drv. 43 * Verify that the APID and the APQI are set in the matrix. 44 * 45 * Returns the pointer to the associated vfio_ap_queue 46 */ 47 static struct vfio_ap_queue *vfio_ap_get_queue( 48 struct ap_matrix_mdev *matrix_mdev, 49 int apqn) 50 { 51 struct vfio_ap_queue *q; 52 struct device *dev; 53 54 if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) 55 return NULL; 56 if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) 57 return NULL; 58 59 dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, 60 &apqn, match_apqn); 61 if (!dev) 62 return NULL; 63 q = dev_get_drvdata(dev); 64 q->matrix_mdev = matrix_mdev; 65 put_device(dev); 66 67 return q; 68 } 69 70 /** 71 * vfio_ap_wait_for_irqclear 72 * @apqn: The AP Queue number 73 * 74 * Checks the IRQ bit for the status of this APQN using ap_tapq. 75 * Returns if the ap_tapq function succeeded and the bit is clear. 76 * Returns if ap_tapq function failed with invalid, deconfigured or 77 * checkstopped AP. 78 * Otherwise retries up to 5 times after waiting 20ms. 79 * 80 */ 81 static void vfio_ap_wait_for_irqclear(int apqn) 82 { 83 struct ap_queue_status status; 84 int retry = 5; 85 86 do { 87 status = ap_tapq(apqn, NULL); 88 switch (status.response_code) { 89 case AP_RESPONSE_NORMAL: 90 case AP_RESPONSE_RESET_IN_PROGRESS: 91 if (!status.irq_enabled) 92 return; 93 /* Fall through */ 94 case AP_RESPONSE_BUSY: 95 msleep(20); 96 break; 97 case AP_RESPONSE_Q_NOT_AVAIL: 98 case AP_RESPONSE_DECONFIGURED: 99 case AP_RESPONSE_CHECKSTOPPED: 100 default: 101 WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__, 102 status.response_code, apqn); 103 return; 104 } 105 } while (--retry); 106 107 WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n", 108 __func__, status.response_code, apqn); 109 } 110 111 /** 112 * vfio_ap_free_aqic_resources 113 * @q: The vfio_ap_queue 114 * 115 * Unregisters the ISC in the GIB when the saved ISC not invalid. 116 * Unpin the guest's page holding the NIB when it exist. 117 * Reset the saved_pfn and saved_isc to invalid values. 118 * 119 */ 120 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) 121 { 122 if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev) 123 kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); 124 if (q->saved_pfn && q->matrix_mdev) 125 vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), 126 &q->saved_pfn, 1); 127 q->saved_pfn = 0; 128 q->saved_isc = VFIO_AP_ISC_INVALID; 129 } 130 131 /** 132 * vfio_ap_irq_disable 133 * @q: The vfio_ap_queue 134 * 135 * Uses ap_aqic to disable the interruption and in case of success, reset 136 * in progress or IRQ disable command already proceeded: calls 137 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear 138 * and calls vfio_ap_free_aqic_resources() to free the resources associated 139 * with the AP interrupt handling. 140 * 141 * In the case the AP is busy, or a reset is in progress, 142 * retries after 20ms, up to 5 times. 143 * 144 * Returns if ap_aqic function failed with invalid, deconfigured or 145 * checkstopped AP. 146 */ 147 struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) 148 { 149 struct ap_qirq_ctrl aqic_gisa = {}; 150 struct ap_queue_status status; 151 int retries = 5; 152 153 do { 154 status = ap_aqic(q->apqn, aqic_gisa, NULL); 155 switch (status.response_code) { 156 case AP_RESPONSE_OTHERWISE_CHANGED: 157 case AP_RESPONSE_NORMAL: 158 vfio_ap_wait_for_irqclear(q->apqn); 159 goto end_free; 160 case AP_RESPONSE_RESET_IN_PROGRESS: 161 case AP_RESPONSE_BUSY: 162 msleep(20); 163 break; 164 case AP_RESPONSE_Q_NOT_AVAIL: 165 case AP_RESPONSE_DECONFIGURED: 166 case AP_RESPONSE_CHECKSTOPPED: 167 case AP_RESPONSE_INVALID_ADDRESS: 168 default: 169 /* All cases in default means AP not operational */ 170 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, 171 status.response_code); 172 goto end_free; 173 } 174 } while (retries--); 175 176 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, 177 status.response_code); 178 end_free: 179 vfio_ap_free_aqic_resources(q); 180 q->matrix_mdev = NULL; 181 return status; 182 } 183 184 /** 185 * vfio_ap_setirq: Enable Interruption for a APQN 186 * 187 * @dev: the device associated with the ap_queue 188 * @q: the vfio_ap_queue holding AQIC parameters 189 * 190 * Pin the NIB saved in *q 191 * Register the guest ISC to GIB interface and retrieve the 192 * host ISC to issue the host side PQAP/AQIC 193 * 194 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the 195 * vfio_pin_pages failed. 196 * 197 * Otherwise return the ap_queue_status returned by the ap_aqic(), 198 * all retry handling will be done by the guest. 199 */ 200 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, 201 int isc, 202 unsigned long nib) 203 { 204 struct ap_qirq_ctrl aqic_gisa = {}; 205 struct ap_queue_status status = {}; 206 struct kvm_s390_gisa *gisa; 207 struct kvm *kvm; 208 unsigned long h_nib, g_pfn, h_pfn; 209 int ret; 210 211 g_pfn = nib >> PAGE_SHIFT; 212 ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, 213 IOMMU_READ | IOMMU_WRITE, &h_pfn); 214 switch (ret) { 215 case 1: 216 break; 217 default: 218 status.response_code = AP_RESPONSE_INVALID_ADDRESS; 219 return status; 220 } 221 222 kvm = q->matrix_mdev->kvm; 223 gisa = kvm->arch.gisa_int.origin; 224 225 h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); 226 aqic_gisa.gisc = isc; 227 aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc); 228 aqic_gisa.ir = 1; 229 aqic_gisa.gisa = (uint64_t)gisa >> 4; 230 231 status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib); 232 switch (status.response_code) { 233 case AP_RESPONSE_NORMAL: 234 /* See if we did clear older IRQ configuration */ 235 vfio_ap_free_aqic_resources(q); 236 q->saved_pfn = g_pfn; 237 q->saved_isc = isc; 238 break; 239 case AP_RESPONSE_OTHERWISE_CHANGED: 240 /* We could not modify IRQ setings: clear new configuration */ 241 vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1); 242 kvm_s390_gisc_unregister(kvm, isc); 243 break; 244 default: 245 pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn, 246 status.response_code); 247 vfio_ap_irq_disable(q); 248 break; 249 } 250 251 return status; 252 } 253 254 /** 255 * handle_pqap: PQAP instruction callback 256 * 257 * @vcpu: The vcpu on which we received the PQAP instruction 258 * 259 * Get the general register contents to initialize internal variables. 260 * REG[0]: APQN 261 * REG[1]: IR and ISC 262 * REG[2]: NIB 263 * 264 * Response.status may be set to following Response Code: 265 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available 266 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured 267 * - AP_RESPONSE_NORMAL (0) : in case of successs 268 * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC. 269 * We take the matrix_dev lock to ensure serialization on queues and 270 * mediated device access. 271 * 272 * Return 0 if we could handle the request inside KVM. 273 * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault. 274 */ 275 static int handle_pqap(struct kvm_vcpu *vcpu) 276 { 277 uint64_t status; 278 uint16_t apqn; 279 struct vfio_ap_queue *q; 280 struct ap_queue_status qstatus = { 281 .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; 282 struct ap_matrix_mdev *matrix_mdev; 283 284 /* If we do not use the AIV facility just go to userland */ 285 if (!(vcpu->arch.sie_block->eca & ECA_AIV)) 286 return -EOPNOTSUPP; 287 288 apqn = vcpu->run->s.regs.gprs[0] & 0xffff; 289 mutex_lock(&matrix_dev->lock); 290 291 if (!vcpu->kvm->arch.crypto.pqap_hook) 292 goto out_unlock; 293 matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, 294 struct ap_matrix_mdev, pqap_hook); 295 296 q = vfio_ap_get_queue(matrix_mdev, apqn); 297 if (!q) 298 goto out_unlock; 299 300 status = vcpu->run->s.regs.gprs[1]; 301 302 /* If IR bit(16) is set we enable the interrupt */ 303 if ((status >> (63 - 16)) & 0x01) 304 qstatus = vfio_ap_irq_enable(q, status & 0x07, 305 vcpu->run->s.regs.gprs[2]); 306 else 307 qstatus = vfio_ap_irq_disable(q); 308 309 out_unlock: 310 memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); 311 vcpu->run->s.regs.gprs[1] >>= 32; 312 mutex_unlock(&matrix_dev->lock); 313 return 0; 314 } 315 316 static void vfio_ap_matrix_init(struct ap_config_info *info, 317 struct ap_matrix *matrix) 318 { 319 matrix->apm_max = info->apxa ? info->Na : 63; 320 matrix->aqm_max = info->apxa ? info->Nd : 15; 321 matrix->adm_max = info->apxa ? info->Nd : 15; 322 } 323 324 static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev) 325 { 326 struct ap_matrix_mdev *matrix_mdev; 327 328 if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0)) 329 return -EPERM; 330 331 matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL); 332 if (!matrix_mdev) { 333 atomic_inc(&matrix_dev->available_instances); 334 return -ENOMEM; 335 } 336 337 matrix_mdev->mdev = mdev; 338 vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); 339 mdev_set_drvdata(mdev, matrix_mdev); 340 matrix_mdev->pqap_hook.hook = handle_pqap; 341 matrix_mdev->pqap_hook.owner = THIS_MODULE; 342 mutex_lock(&matrix_dev->lock); 343 list_add(&matrix_mdev->node, &matrix_dev->mdev_list); 344 mutex_unlock(&matrix_dev->lock); 345 346 return 0; 347 } 348 349 static int vfio_ap_mdev_remove(struct mdev_device *mdev) 350 { 351 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 352 353 if (matrix_mdev->kvm) 354 return -EBUSY; 355 356 mutex_lock(&matrix_dev->lock); 357 vfio_ap_mdev_reset_queues(mdev); 358 list_del(&matrix_mdev->node); 359 mutex_unlock(&matrix_dev->lock); 360 361 kfree(matrix_mdev); 362 mdev_set_drvdata(mdev, NULL); 363 atomic_inc(&matrix_dev->available_instances); 364 365 return 0; 366 } 367 368 static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf) 369 { 370 return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT); 371 } 372 373 static MDEV_TYPE_ATTR_RO(name); 374 375 static ssize_t available_instances_show(struct kobject *kobj, 376 struct device *dev, char *buf) 377 { 378 return sprintf(buf, "%d\n", 379 atomic_read(&matrix_dev->available_instances)); 380 } 381 382 static MDEV_TYPE_ATTR_RO(available_instances); 383 384 static ssize_t device_api_show(struct kobject *kobj, struct device *dev, 385 char *buf) 386 { 387 return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING); 388 } 389 390 static MDEV_TYPE_ATTR_RO(device_api); 391 392 static struct attribute *vfio_ap_mdev_type_attrs[] = { 393 &mdev_type_attr_name.attr, 394 &mdev_type_attr_device_api.attr, 395 &mdev_type_attr_available_instances.attr, 396 NULL, 397 }; 398 399 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = { 400 .name = VFIO_AP_MDEV_TYPE_HWVIRT, 401 .attrs = vfio_ap_mdev_type_attrs, 402 }; 403 404 static struct attribute_group *vfio_ap_mdev_type_groups[] = { 405 &vfio_ap_mdev_hwvirt_type_group, 406 NULL, 407 }; 408 409 struct vfio_ap_queue_reserved { 410 unsigned long *apid; 411 unsigned long *apqi; 412 bool reserved; 413 }; 414 415 /** 416 * vfio_ap_has_queue 417 * 418 * @dev: an AP queue device 419 * @data: a struct vfio_ap_queue_reserved reference 420 * 421 * Flags whether the AP queue device (@dev) has a queue ID containing the APQN, 422 * apid or apqi specified in @data: 423 * 424 * - If @data contains both an apid and apqi value, then @data will be flagged 425 * as reserved if the APID and APQI fields for the AP queue device matches 426 * 427 * - If @data contains only an apid value, @data will be flagged as 428 * reserved if the APID field in the AP queue device matches 429 * 430 * - If @data contains only an apqi value, @data will be flagged as 431 * reserved if the APQI field in the AP queue device matches 432 * 433 * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if 434 * @data does not contain either an apid or apqi. 435 */ 436 static int vfio_ap_has_queue(struct device *dev, void *data) 437 { 438 struct vfio_ap_queue_reserved *qres = data; 439 struct ap_queue *ap_queue = to_ap_queue(dev); 440 ap_qid_t qid; 441 unsigned long id; 442 443 if (qres->apid && qres->apqi) { 444 qid = AP_MKQID(*qres->apid, *qres->apqi); 445 if (qid == ap_queue->qid) 446 qres->reserved = true; 447 } else if (qres->apid && !qres->apqi) { 448 id = AP_QID_CARD(ap_queue->qid); 449 if (id == *qres->apid) 450 qres->reserved = true; 451 } else if (!qres->apid && qres->apqi) { 452 id = AP_QID_QUEUE(ap_queue->qid); 453 if (id == *qres->apqi) 454 qres->reserved = true; 455 } else { 456 return -EINVAL; 457 } 458 459 return 0; 460 } 461 462 /** 463 * vfio_ap_verify_queue_reserved 464 * 465 * @matrix_dev: a mediated matrix device 466 * @apid: an AP adapter ID 467 * @apqi: an AP queue index 468 * 469 * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device 470 * driver according to the following rules: 471 * 472 * - If both @apid and @apqi are not NULL, then there must be an AP queue 473 * device bound to the vfio_ap driver with the APQN identified by @apid and 474 * @apqi 475 * 476 * - If only @apid is not NULL, then there must be an AP queue device bound 477 * to the vfio_ap driver with an APQN containing @apid 478 * 479 * - If only @apqi is not NULL, then there must be an AP queue device bound 480 * to the vfio_ap driver with an APQN containing @apqi 481 * 482 * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL. 483 */ 484 static int vfio_ap_verify_queue_reserved(unsigned long *apid, 485 unsigned long *apqi) 486 { 487 int ret; 488 struct vfio_ap_queue_reserved qres; 489 490 qres.apid = apid; 491 qres.apqi = apqi; 492 qres.reserved = false; 493 494 ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL, 495 &qres, vfio_ap_has_queue); 496 if (ret) 497 return ret; 498 499 if (qres.reserved) 500 return 0; 501 502 return -EADDRNOTAVAIL; 503 } 504 505 static int 506 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev, 507 unsigned long apid) 508 { 509 int ret; 510 unsigned long apqi; 511 unsigned long nbits = matrix_mdev->matrix.aqm_max + 1; 512 513 if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits) 514 return vfio_ap_verify_queue_reserved(&apid, NULL); 515 516 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) { 517 ret = vfio_ap_verify_queue_reserved(&apid, &apqi); 518 if (ret) 519 return ret; 520 } 521 522 return 0; 523 } 524 525 /** 526 * vfio_ap_mdev_verify_no_sharing 527 * 528 * Verifies that the APQNs derived from the cross product of the AP adapter IDs 529 * and AP queue indexes comprising the AP matrix are not configured for another 530 * mediated device. AP queue sharing is not allowed. 531 * 532 * @matrix_mdev: the mediated matrix device 533 * 534 * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE. 535 */ 536 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) 537 { 538 struct ap_matrix_mdev *lstdev; 539 DECLARE_BITMAP(apm, AP_DEVICES); 540 DECLARE_BITMAP(aqm, AP_DOMAINS); 541 542 list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) { 543 if (matrix_mdev == lstdev) 544 continue; 545 546 memset(apm, 0, sizeof(apm)); 547 memset(aqm, 0, sizeof(aqm)); 548 549 /* 550 * We work on full longs, as we can only exclude the leftover 551 * bits in non-inverse order. The leftover is all zeros. 552 */ 553 if (!bitmap_and(apm, matrix_mdev->matrix.apm, 554 lstdev->matrix.apm, AP_DEVICES)) 555 continue; 556 557 if (!bitmap_and(aqm, matrix_mdev->matrix.aqm, 558 lstdev->matrix.aqm, AP_DOMAINS)) 559 continue; 560 561 return -EADDRINUSE; 562 } 563 564 return 0; 565 } 566 567 /** 568 * assign_adapter_store 569 * 570 * @dev: the matrix device 571 * @attr: the mediated matrix device's assign_adapter attribute 572 * @buf: a buffer containing the AP adapter number (APID) to 573 * be assigned 574 * @count: the number of bytes in @buf 575 * 576 * Parses the APID from @buf and sets the corresponding bit in the mediated 577 * matrix device's APM. 578 * 579 * Returns the number of bytes processed if the APID is valid; otherwise, 580 * returns one of the following errors: 581 * 582 * 1. -EINVAL 583 * The APID is not a valid number 584 * 585 * 2. -ENODEV 586 * The APID exceeds the maximum value configured for the system 587 * 588 * 3. -EADDRNOTAVAIL 589 * An APQN derived from the cross product of the APID being assigned 590 * and the APQIs previously assigned is not bound to the vfio_ap device 591 * driver; or, if no APQIs have yet been assigned, the APID is not 592 * contained in an APQN bound to the vfio_ap device driver. 593 * 594 * 4. -EADDRINUSE 595 * An APQN derived from the cross product of the APID being assigned 596 * and the APQIs previously assigned is being used by another mediated 597 * matrix device 598 */ 599 static ssize_t assign_adapter_store(struct device *dev, 600 struct device_attribute *attr, 601 const char *buf, size_t count) 602 { 603 int ret; 604 unsigned long apid; 605 struct mdev_device *mdev = mdev_from_dev(dev); 606 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 607 608 /* If the guest is running, disallow assignment of adapter */ 609 if (matrix_mdev->kvm) 610 return -EBUSY; 611 612 ret = kstrtoul(buf, 0, &apid); 613 if (ret) 614 return ret; 615 616 if (apid > matrix_mdev->matrix.apm_max) 617 return -ENODEV; 618 619 /* 620 * Set the bit in the AP mask (APM) corresponding to the AP adapter 621 * number (APID). The bits in the mask, from most significant to least 622 * significant bit, correspond to APIDs 0-255. 623 */ 624 mutex_lock(&matrix_dev->lock); 625 626 ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid); 627 if (ret) 628 goto done; 629 630 set_bit_inv(apid, matrix_mdev->matrix.apm); 631 632 ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); 633 if (ret) 634 goto share_err; 635 636 ret = count; 637 goto done; 638 639 share_err: 640 clear_bit_inv(apid, matrix_mdev->matrix.apm); 641 done: 642 mutex_unlock(&matrix_dev->lock); 643 644 return ret; 645 } 646 static DEVICE_ATTR_WO(assign_adapter); 647 648 /** 649 * unassign_adapter_store 650 * 651 * @dev: the matrix device 652 * @attr: the mediated matrix device's unassign_adapter attribute 653 * @buf: a buffer containing the adapter number (APID) to be unassigned 654 * @count: the number of bytes in @buf 655 * 656 * Parses the APID from @buf and clears the corresponding bit in the mediated 657 * matrix device's APM. 658 * 659 * Returns the number of bytes processed if the APID is valid; otherwise, 660 * returns one of the following errors: 661 * -EINVAL if the APID is not a number 662 * -ENODEV if the APID it exceeds the maximum value configured for the 663 * system 664 */ 665 static ssize_t unassign_adapter_store(struct device *dev, 666 struct device_attribute *attr, 667 const char *buf, size_t count) 668 { 669 int ret; 670 unsigned long apid; 671 struct mdev_device *mdev = mdev_from_dev(dev); 672 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 673 674 /* If the guest is running, disallow un-assignment of adapter */ 675 if (matrix_mdev->kvm) 676 return -EBUSY; 677 678 ret = kstrtoul(buf, 0, &apid); 679 if (ret) 680 return ret; 681 682 if (apid > matrix_mdev->matrix.apm_max) 683 return -ENODEV; 684 685 mutex_lock(&matrix_dev->lock); 686 clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm); 687 mutex_unlock(&matrix_dev->lock); 688 689 return count; 690 } 691 static DEVICE_ATTR_WO(unassign_adapter); 692 693 static int 694 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev, 695 unsigned long apqi) 696 { 697 int ret; 698 unsigned long apid; 699 unsigned long nbits = matrix_mdev->matrix.apm_max + 1; 700 701 if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits) 702 return vfio_ap_verify_queue_reserved(NULL, &apqi); 703 704 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) { 705 ret = vfio_ap_verify_queue_reserved(&apid, &apqi); 706 if (ret) 707 return ret; 708 } 709 710 return 0; 711 } 712 713 /** 714 * assign_domain_store 715 * 716 * @dev: the matrix device 717 * @attr: the mediated matrix device's assign_domain attribute 718 * @buf: a buffer containing the AP queue index (APQI) of the domain to 719 * be assigned 720 * @count: the number of bytes in @buf 721 * 722 * Parses the APQI from @buf and sets the corresponding bit in the mediated 723 * matrix device's AQM. 724 * 725 * Returns the number of bytes processed if the APQI is valid; otherwise returns 726 * one of the following errors: 727 * 728 * 1. -EINVAL 729 * The APQI is not a valid number 730 * 731 * 2. -ENODEV 732 * The APQI exceeds the maximum value configured for the system 733 * 734 * 3. -EADDRNOTAVAIL 735 * An APQN derived from the cross product of the APQI being assigned 736 * and the APIDs previously assigned is not bound to the vfio_ap device 737 * driver; or, if no APIDs have yet been assigned, the APQI is not 738 * contained in an APQN bound to the vfio_ap device driver. 739 * 740 * 4. -EADDRINUSE 741 * An APQN derived from the cross product of the APQI being assigned 742 * and the APIDs previously assigned is being used by another mediated 743 * matrix device 744 */ 745 static ssize_t assign_domain_store(struct device *dev, 746 struct device_attribute *attr, 747 const char *buf, size_t count) 748 { 749 int ret; 750 unsigned long apqi; 751 struct mdev_device *mdev = mdev_from_dev(dev); 752 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 753 unsigned long max_apqi = matrix_mdev->matrix.aqm_max; 754 755 /* If the guest is running, disallow assignment of domain */ 756 if (matrix_mdev->kvm) 757 return -EBUSY; 758 759 ret = kstrtoul(buf, 0, &apqi); 760 if (ret) 761 return ret; 762 if (apqi > max_apqi) 763 return -ENODEV; 764 765 mutex_lock(&matrix_dev->lock); 766 767 ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi); 768 if (ret) 769 goto done; 770 771 set_bit_inv(apqi, matrix_mdev->matrix.aqm); 772 773 ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); 774 if (ret) 775 goto share_err; 776 777 ret = count; 778 goto done; 779 780 share_err: 781 clear_bit_inv(apqi, matrix_mdev->matrix.aqm); 782 done: 783 mutex_unlock(&matrix_dev->lock); 784 785 return ret; 786 } 787 static DEVICE_ATTR_WO(assign_domain); 788 789 790 /** 791 * unassign_domain_store 792 * 793 * @dev: the matrix device 794 * @attr: the mediated matrix device's unassign_domain attribute 795 * @buf: a buffer containing the AP queue index (APQI) of the domain to 796 * be unassigned 797 * @count: the number of bytes in @buf 798 * 799 * Parses the APQI from @buf and clears the corresponding bit in the 800 * mediated matrix device's AQM. 801 * 802 * Returns the number of bytes processed if the APQI is valid; otherwise, 803 * returns one of the following errors: 804 * -EINVAL if the APQI is not a number 805 * -ENODEV if the APQI exceeds the maximum value configured for the system 806 */ 807 static ssize_t unassign_domain_store(struct device *dev, 808 struct device_attribute *attr, 809 const char *buf, size_t count) 810 { 811 int ret; 812 unsigned long apqi; 813 struct mdev_device *mdev = mdev_from_dev(dev); 814 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 815 816 /* If the guest is running, disallow un-assignment of domain */ 817 if (matrix_mdev->kvm) 818 return -EBUSY; 819 820 ret = kstrtoul(buf, 0, &apqi); 821 if (ret) 822 return ret; 823 824 if (apqi > matrix_mdev->matrix.aqm_max) 825 return -ENODEV; 826 827 mutex_lock(&matrix_dev->lock); 828 clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm); 829 mutex_unlock(&matrix_dev->lock); 830 831 return count; 832 } 833 static DEVICE_ATTR_WO(unassign_domain); 834 835 /** 836 * assign_control_domain_store 837 * 838 * @dev: the matrix device 839 * @attr: the mediated matrix device's assign_control_domain attribute 840 * @buf: a buffer containing the domain ID to be assigned 841 * @count: the number of bytes in @buf 842 * 843 * Parses the domain ID from @buf and sets the corresponding bit in the mediated 844 * matrix device's ADM. 845 * 846 * Returns the number of bytes processed if the domain ID is valid; otherwise, 847 * returns one of the following errors: 848 * -EINVAL if the ID is not a number 849 * -ENODEV if the ID exceeds the maximum value configured for the system 850 */ 851 static ssize_t assign_control_domain_store(struct device *dev, 852 struct device_attribute *attr, 853 const char *buf, size_t count) 854 { 855 int ret; 856 unsigned long id; 857 struct mdev_device *mdev = mdev_from_dev(dev); 858 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 859 860 /* If the guest is running, disallow assignment of control domain */ 861 if (matrix_mdev->kvm) 862 return -EBUSY; 863 864 ret = kstrtoul(buf, 0, &id); 865 if (ret) 866 return ret; 867 868 if (id > matrix_mdev->matrix.adm_max) 869 return -ENODEV; 870 871 /* Set the bit in the ADM (bitmask) corresponding to the AP control 872 * domain number (id). The bits in the mask, from most significant to 873 * least significant, correspond to IDs 0 up to the one less than the 874 * number of control domains that can be assigned. 875 */ 876 mutex_lock(&matrix_dev->lock); 877 set_bit_inv(id, matrix_mdev->matrix.adm); 878 mutex_unlock(&matrix_dev->lock); 879 880 return count; 881 } 882 static DEVICE_ATTR_WO(assign_control_domain); 883 884 /** 885 * unassign_control_domain_store 886 * 887 * @dev: the matrix device 888 * @attr: the mediated matrix device's unassign_control_domain attribute 889 * @buf: a buffer containing the domain ID to be unassigned 890 * @count: the number of bytes in @buf 891 * 892 * Parses the domain ID from @buf and clears the corresponding bit in the 893 * mediated matrix device's ADM. 894 * 895 * Returns the number of bytes processed if the domain ID is valid; otherwise, 896 * returns one of the following errors: 897 * -EINVAL if the ID is not a number 898 * -ENODEV if the ID exceeds the maximum value configured for the system 899 */ 900 static ssize_t unassign_control_domain_store(struct device *dev, 901 struct device_attribute *attr, 902 const char *buf, size_t count) 903 { 904 int ret; 905 unsigned long domid; 906 struct mdev_device *mdev = mdev_from_dev(dev); 907 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 908 unsigned long max_domid = matrix_mdev->matrix.adm_max; 909 910 /* If the guest is running, disallow un-assignment of control domain */ 911 if (matrix_mdev->kvm) 912 return -EBUSY; 913 914 ret = kstrtoul(buf, 0, &domid); 915 if (ret) 916 return ret; 917 if (domid > max_domid) 918 return -ENODEV; 919 920 mutex_lock(&matrix_dev->lock); 921 clear_bit_inv(domid, matrix_mdev->matrix.adm); 922 mutex_unlock(&matrix_dev->lock); 923 924 return count; 925 } 926 static DEVICE_ATTR_WO(unassign_control_domain); 927 928 static ssize_t control_domains_show(struct device *dev, 929 struct device_attribute *dev_attr, 930 char *buf) 931 { 932 unsigned long id; 933 int nchars = 0; 934 int n; 935 char *bufpos = buf; 936 struct mdev_device *mdev = mdev_from_dev(dev); 937 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 938 unsigned long max_domid = matrix_mdev->matrix.adm_max; 939 940 mutex_lock(&matrix_dev->lock); 941 for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) { 942 n = sprintf(bufpos, "%04lx\n", id); 943 bufpos += n; 944 nchars += n; 945 } 946 mutex_unlock(&matrix_dev->lock); 947 948 return nchars; 949 } 950 static DEVICE_ATTR_RO(control_domains); 951 952 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, 953 char *buf) 954 { 955 struct mdev_device *mdev = mdev_from_dev(dev); 956 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 957 char *bufpos = buf; 958 unsigned long apid; 959 unsigned long apqi; 960 unsigned long apid1; 961 unsigned long apqi1; 962 unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1; 963 unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1; 964 int nchars = 0; 965 int n; 966 967 apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits); 968 apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits); 969 970 mutex_lock(&matrix_dev->lock); 971 972 if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) { 973 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { 974 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, 975 naqm_bits) { 976 n = sprintf(bufpos, "%02lx.%04lx\n", apid, 977 apqi); 978 bufpos += n; 979 nchars += n; 980 } 981 } 982 } else if (apid1 < napm_bits) { 983 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { 984 n = sprintf(bufpos, "%02lx.\n", apid); 985 bufpos += n; 986 nchars += n; 987 } 988 } else if (apqi1 < naqm_bits) { 989 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) { 990 n = sprintf(bufpos, ".%04lx\n", apqi); 991 bufpos += n; 992 nchars += n; 993 } 994 } 995 996 mutex_unlock(&matrix_dev->lock); 997 998 return nchars; 999 } 1000 static DEVICE_ATTR_RO(matrix); 1001 1002 static struct attribute *vfio_ap_mdev_attrs[] = { 1003 &dev_attr_assign_adapter.attr, 1004 &dev_attr_unassign_adapter.attr, 1005 &dev_attr_assign_domain.attr, 1006 &dev_attr_unassign_domain.attr, 1007 &dev_attr_assign_control_domain.attr, 1008 &dev_attr_unassign_control_domain.attr, 1009 &dev_attr_control_domains.attr, 1010 &dev_attr_matrix.attr, 1011 NULL, 1012 }; 1013 1014 static struct attribute_group vfio_ap_mdev_attr_group = { 1015 .attrs = vfio_ap_mdev_attrs 1016 }; 1017 1018 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = { 1019 &vfio_ap_mdev_attr_group, 1020 NULL 1021 }; 1022 1023 /** 1024 * vfio_ap_mdev_set_kvm 1025 * 1026 * @matrix_mdev: a mediated matrix device 1027 * @kvm: reference to KVM instance 1028 * 1029 * Verifies no other mediated matrix device has @kvm and sets a reference to 1030 * it in @matrix_mdev->kvm. 1031 * 1032 * Return 0 if no other mediated matrix device has a reference to @kvm; 1033 * otherwise, returns an -EPERM. 1034 */ 1035 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, 1036 struct kvm *kvm) 1037 { 1038 struct ap_matrix_mdev *m; 1039 1040 mutex_lock(&matrix_dev->lock); 1041 1042 list_for_each_entry(m, &matrix_dev->mdev_list, node) { 1043 if ((m != matrix_mdev) && (m->kvm == kvm)) { 1044 mutex_unlock(&matrix_dev->lock); 1045 return -EPERM; 1046 } 1047 } 1048 1049 matrix_mdev->kvm = kvm; 1050 kvm_get_kvm(kvm); 1051 kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; 1052 mutex_unlock(&matrix_dev->lock); 1053 1054 return 0; 1055 } 1056 1057 /* 1058 * vfio_ap_mdev_iommu_notifier: IOMMU notifier callback 1059 * 1060 * @nb: The notifier block 1061 * @action: Action to be taken 1062 * @data: data associated with the request 1063 * 1064 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we 1065 * pinned before). Other requests are ignored. 1066 * 1067 */ 1068 static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, 1069 unsigned long action, void *data) 1070 { 1071 struct ap_matrix_mdev *matrix_mdev; 1072 1073 matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); 1074 1075 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { 1076 struct vfio_iommu_type1_dma_unmap *unmap = data; 1077 unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; 1078 1079 vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1); 1080 return NOTIFY_OK; 1081 } 1082 1083 return NOTIFY_DONE; 1084 } 1085 1086 static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, 1087 unsigned long action, void *data) 1088 { 1089 int ret; 1090 struct ap_matrix_mdev *matrix_mdev; 1091 1092 if (action != VFIO_GROUP_NOTIFY_SET_KVM) 1093 return NOTIFY_OK; 1094 1095 matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); 1096 1097 if (!data) { 1098 matrix_mdev->kvm = NULL; 1099 return NOTIFY_OK; 1100 } 1101 1102 ret = vfio_ap_mdev_set_kvm(matrix_mdev, data); 1103 if (ret) 1104 return NOTIFY_DONE; 1105 1106 /* If there is no CRYCB pointer, then we can't copy the masks */ 1107 if (!matrix_mdev->kvm->arch.crypto.crycbd) 1108 return NOTIFY_DONE; 1109 1110 kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm, 1111 matrix_mdev->matrix.aqm, 1112 matrix_mdev->matrix.adm); 1113 1114 return NOTIFY_OK; 1115 } 1116 1117 static void vfio_ap_irq_disable_apqn(int apqn) 1118 { 1119 struct device *dev; 1120 struct vfio_ap_queue *q; 1121 1122 dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, 1123 &apqn, match_apqn); 1124 if (dev) { 1125 q = dev_get_drvdata(dev); 1126 vfio_ap_irq_disable(q); 1127 put_device(dev); 1128 } 1129 } 1130 1131 int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, 1132 unsigned int retry) 1133 { 1134 struct ap_queue_status status; 1135 int retry2 = 2; 1136 int apqn = AP_MKQID(apid, apqi); 1137 1138 do { 1139 status = ap_zapq(apqn); 1140 switch (status.response_code) { 1141 case AP_RESPONSE_NORMAL: 1142 while (!status.queue_empty && retry2--) { 1143 msleep(20); 1144 status = ap_tapq(apqn, NULL); 1145 } 1146 WARN_ON_ONCE(retry <= 0); 1147 return 0; 1148 case AP_RESPONSE_RESET_IN_PROGRESS: 1149 case AP_RESPONSE_BUSY: 1150 msleep(20); 1151 break; 1152 default: 1153 /* things are really broken, give up */ 1154 return -EIO; 1155 } 1156 } while (retry--); 1157 1158 return -EBUSY; 1159 } 1160 1161 static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) 1162 { 1163 int ret; 1164 int rc = 0; 1165 unsigned long apid, apqi; 1166 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 1167 1168 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, 1169 matrix_mdev->matrix.apm_max + 1) { 1170 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, 1171 matrix_mdev->matrix.aqm_max + 1) { 1172 ret = vfio_ap_mdev_reset_queue(apid, apqi, 1); 1173 /* 1174 * Regardless whether a queue turns out to be busy, or 1175 * is not operational, we need to continue resetting 1176 * the remaining queues. 1177 */ 1178 if (ret) 1179 rc = ret; 1180 vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi)); 1181 } 1182 } 1183 1184 return rc; 1185 } 1186 1187 static int vfio_ap_mdev_open(struct mdev_device *mdev) 1188 { 1189 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 1190 unsigned long events; 1191 int ret; 1192 1193 1194 if (!try_module_get(THIS_MODULE)) 1195 return -ENODEV; 1196 1197 matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier; 1198 events = VFIO_GROUP_NOTIFY_SET_KVM; 1199 1200 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, 1201 &events, &matrix_mdev->group_notifier); 1202 if (ret) { 1203 module_put(THIS_MODULE); 1204 return ret; 1205 } 1206 1207 matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; 1208 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; 1209 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, 1210 &events, &matrix_mdev->iommu_notifier); 1211 if (!ret) 1212 return ret; 1213 1214 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, 1215 &matrix_mdev->group_notifier); 1216 module_put(THIS_MODULE); 1217 return ret; 1218 } 1219 1220 static void vfio_ap_mdev_release(struct mdev_device *mdev) 1221 { 1222 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 1223 1224 mutex_lock(&matrix_dev->lock); 1225 if (matrix_mdev->kvm) { 1226 kvm_arch_crypto_clear_masks(matrix_mdev->kvm); 1227 matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; 1228 vfio_ap_mdev_reset_queues(mdev); 1229 kvm_put_kvm(matrix_mdev->kvm); 1230 matrix_mdev->kvm = NULL; 1231 } 1232 mutex_unlock(&matrix_dev->lock); 1233 1234 vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, 1235 &matrix_mdev->iommu_notifier); 1236 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, 1237 &matrix_mdev->group_notifier); 1238 module_put(THIS_MODULE); 1239 } 1240 1241 static int vfio_ap_mdev_get_device_info(unsigned long arg) 1242 { 1243 unsigned long minsz; 1244 struct vfio_device_info info; 1245 1246 minsz = offsetofend(struct vfio_device_info, num_irqs); 1247 1248 if (copy_from_user(&info, (void __user *)arg, minsz)) 1249 return -EFAULT; 1250 1251 if (info.argsz < minsz) 1252 return -EINVAL; 1253 1254 info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET; 1255 info.num_regions = 0; 1256 info.num_irqs = 0; 1257 1258 return copy_to_user((void __user *)arg, &info, minsz); 1259 } 1260 1261 static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, 1262 unsigned int cmd, unsigned long arg) 1263 { 1264 int ret; 1265 1266 mutex_lock(&matrix_dev->lock); 1267 switch (cmd) { 1268 case VFIO_DEVICE_GET_INFO: 1269 ret = vfio_ap_mdev_get_device_info(arg); 1270 break; 1271 case VFIO_DEVICE_RESET: 1272 ret = vfio_ap_mdev_reset_queues(mdev); 1273 break; 1274 default: 1275 ret = -EOPNOTSUPP; 1276 break; 1277 } 1278 mutex_unlock(&matrix_dev->lock); 1279 1280 return ret; 1281 } 1282 1283 static const struct mdev_parent_ops vfio_ap_matrix_ops = { 1284 .owner = THIS_MODULE, 1285 .supported_type_groups = vfio_ap_mdev_type_groups, 1286 .mdev_attr_groups = vfio_ap_mdev_attr_groups, 1287 .create = vfio_ap_mdev_create, 1288 .remove = vfio_ap_mdev_remove, 1289 .open = vfio_ap_mdev_open, 1290 .release = vfio_ap_mdev_release, 1291 .ioctl = vfio_ap_mdev_ioctl, 1292 }; 1293 1294 int vfio_ap_mdev_register(void) 1295 { 1296 atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT); 1297 1298 return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops); 1299 } 1300 1301 void vfio_ap_mdev_unregister(void) 1302 { 1303 mdev_unregister_device(&matrix_dev->device); 1304 } 1305