1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Adjunct processor matrix VFIO device driver callbacks. 4 * 5 * Copyright IBM Corp. 2018 6 * 7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> 8 * Halil Pasic <pasic@linux.ibm.com> 9 * Pierre Morel <pmorel@linux.ibm.com> 10 */ 11 #include <linux/string.h> 12 #include <linux/vfio.h> 13 #include <linux/device.h> 14 #include <linux/list.h> 15 #include <linux/ctype.h> 16 #include <linux/bitops.h> 17 #include <linux/kvm_host.h> 18 #include <linux/module.h> 19 #include <linux/uuid.h> 20 #include <asm/kvm.h> 21 #include <asm/zcrypt.h> 22 23 #include "vfio_ap_private.h" 24 #include "vfio_ap_debug.h" 25 26 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" 27 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" 28 29 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev); 30 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); 31 static const struct vfio_device_ops vfio_ap_matrix_dev_ops; 32 33 static int match_apqn(struct device *dev, const void *data) 34 { 35 struct vfio_ap_queue *q = dev_get_drvdata(dev); 36 37 return (q->apqn == *(int *)(data)) ? 1 : 0; 38 } 39 40 /** 41 * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list 42 * @matrix_mdev: the associated mediated matrix 43 * @apqn: The queue APQN 44 * 45 * Retrieve a queue with a specific APQN from the list of the 46 * devices of the vfio_ap_drv. 47 * Verify that the APID and the APQI are set in the matrix. 48 * 49 * Return: the pointer to the associated vfio_ap_queue 50 */ 51 static struct vfio_ap_queue *vfio_ap_get_queue( 52 struct ap_matrix_mdev *matrix_mdev, 53 int apqn) 54 { 55 struct vfio_ap_queue *q; 56 57 if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) 58 return NULL; 59 if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) 60 return NULL; 61 62 q = vfio_ap_find_queue(apqn); 63 if (q) 64 q->matrix_mdev = matrix_mdev; 65 66 return q; 67 } 68 69 /** 70 * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries 71 * @apqn: The AP Queue number 72 * 73 * Checks the IRQ bit for the status of this APQN using ap_tapq. 74 * Returns if the ap_tapq function succeeded and the bit is clear. 75 * Returns if ap_tapq function failed with invalid, deconfigured or 76 * checkstopped AP. 77 * Otherwise retries up to 5 times after waiting 20ms. 78 */ 79 static void vfio_ap_wait_for_irqclear(int apqn) 80 { 81 struct ap_queue_status status; 82 int retry = 5; 83 84 do { 85 status = ap_tapq(apqn, NULL); 86 switch (status.response_code) { 87 case AP_RESPONSE_NORMAL: 88 case AP_RESPONSE_RESET_IN_PROGRESS: 89 if (!status.irq_enabled) 90 return; 91 fallthrough; 92 case AP_RESPONSE_BUSY: 93 msleep(20); 94 break; 95 case AP_RESPONSE_Q_NOT_AVAIL: 96 case AP_RESPONSE_DECONFIGURED: 97 case AP_RESPONSE_CHECKSTOPPED: 98 default: 99 WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__, 100 status.response_code, apqn); 101 return; 102 } 103 } while (--retry); 104 105 WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n", 106 __func__, status.response_code, apqn); 107 } 108 109 /** 110 * vfio_ap_free_aqic_resources - free vfio_ap_queue resources 111 * @q: The vfio_ap_queue 112 * 113 * Unregisters the ISC in the GIB when the saved ISC not invalid. 114 * Unpins the guest's page holding the NIB when it exists. 115 * Resets the saved_pfn and saved_isc to invalid values. 116 */ 117 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) 118 { 119 if (!q) 120 return; 121 if (q->saved_isc != VFIO_AP_ISC_INVALID && 122 !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) { 123 kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); 124 q->saved_isc = VFIO_AP_ISC_INVALID; 125 } 126 if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { 127 vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1); 128 q->saved_pfn = 0; 129 } 130 } 131 132 /** 133 * vfio_ap_irq_disable - disables and clears an ap_queue interrupt 134 * @q: The vfio_ap_queue 135 * 136 * Uses ap_aqic to disable the interruption and in case of success, reset 137 * in progress or IRQ disable command already proceeded: calls 138 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear 139 * and calls vfio_ap_free_aqic_resources() to free the resources associated 140 * with the AP interrupt handling. 141 * 142 * In the case the AP is busy, or a reset is in progress, 143 * retries after 20ms, up to 5 times. 144 * 145 * Returns if ap_aqic function failed with invalid, deconfigured or 146 * checkstopped AP. 147 * 148 * Return: &struct ap_queue_status 149 */ 150 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) 151 { 152 struct ap_qirq_ctrl aqic_gisa = {}; 153 struct ap_queue_status status; 154 int retries = 5; 155 156 do { 157 status = ap_aqic(q->apqn, aqic_gisa, NULL); 158 switch (status.response_code) { 159 case AP_RESPONSE_OTHERWISE_CHANGED: 160 case AP_RESPONSE_NORMAL: 161 vfio_ap_wait_for_irqclear(q->apqn); 162 goto end_free; 163 case AP_RESPONSE_RESET_IN_PROGRESS: 164 case AP_RESPONSE_BUSY: 165 msleep(20); 166 break; 167 case AP_RESPONSE_Q_NOT_AVAIL: 168 case AP_RESPONSE_DECONFIGURED: 169 case AP_RESPONSE_CHECKSTOPPED: 170 case AP_RESPONSE_INVALID_ADDRESS: 171 default: 172 /* All cases in default means AP not operational */ 173 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, 174 status.response_code); 175 goto end_free; 176 } 177 } while (retries--); 178 179 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, 180 status.response_code); 181 end_free: 182 vfio_ap_free_aqic_resources(q); 183 q->matrix_mdev = NULL; 184 return status; 185 } 186 187 /** 188 * vfio_ap_validate_nib - validate a notification indicator byte (nib) address. 189 * 190 * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. 191 * @nib: the location for storing the nib address. 192 * @g_pfn: the location for storing the page frame number of the page containing 193 * the nib. 194 * 195 * When the PQAP(AQIC) instruction is executed, general register 2 contains the 196 * address of the notification indicator byte (nib) used for IRQ notification. 197 * This function parses the nib from gr2 and calculates the page frame 198 * number for the guest of the page containing the nib. The values are 199 * stored in @nib and @g_pfn respectively. 200 * 201 * The g_pfn of the nib is then validated to ensure the nib address is valid. 202 * 203 * Return: returns zero if the nib address is a valid; otherwise, returns 204 * -EINVAL. 205 */ 206 static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib, 207 unsigned long *g_pfn) 208 { 209 *nib = vcpu->run->s.regs.gprs[2]; 210 *g_pfn = *nib >> PAGE_SHIFT; 211 212 if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn))) 213 return -EINVAL; 214 215 return 0; 216 } 217 218 /** 219 * vfio_ap_irq_enable - Enable Interruption for a APQN 220 * 221 * @q: the vfio_ap_queue holding AQIC parameters 222 * @isc: the guest ISC to register with the GIB interface 223 * @vcpu: the vcpu object containing the registers specifying the parameters 224 * passed to the PQAP(AQIC) instruction. 225 * 226 * Pin the NIB saved in *q 227 * Register the guest ISC to GIB interface and retrieve the 228 * host ISC to issue the host side PQAP/AQIC 229 * 230 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the 231 * vfio_pin_pages failed. 232 * 233 * Otherwise return the ap_queue_status returned by the ap_aqic(), 234 * all retry handling will be done by the guest. 235 * 236 * Return: &struct ap_queue_status 237 */ 238 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, 239 int isc, 240 struct kvm_vcpu *vcpu) 241 { 242 unsigned long nib; 243 struct ap_qirq_ctrl aqic_gisa = {}; 244 struct ap_queue_status status = {}; 245 struct kvm_s390_gisa *gisa; 246 int nisc; 247 struct kvm *kvm; 248 unsigned long h_nib, g_pfn, h_pfn; 249 int ret; 250 251 /* Verify that the notification indicator byte address is valid */ 252 if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) { 253 VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", 254 __func__, nib, g_pfn, q->apqn); 255 256 status.response_code = AP_RESPONSE_INVALID_ADDRESS; 257 return status; 258 } 259 260 ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1, 261 IOMMU_READ | IOMMU_WRITE, &h_pfn); 262 switch (ret) { 263 case 1: 264 break; 265 default: 266 VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," 267 "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", 268 __func__, ret, nib, g_pfn, q->apqn); 269 270 status.response_code = AP_RESPONSE_INVALID_ADDRESS; 271 return status; 272 } 273 274 kvm = q->matrix_mdev->kvm; 275 gisa = kvm->arch.gisa_int.origin; 276 277 h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); 278 aqic_gisa.gisc = isc; 279 280 nisc = kvm_s390_gisc_register(kvm, isc); 281 if (nisc < 0) { 282 VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", 283 __func__, nisc, isc, q->apqn); 284 285 status.response_code = AP_RESPONSE_INVALID_GISA; 286 return status; 287 } 288 289 aqic_gisa.isc = nisc; 290 aqic_gisa.ir = 1; 291 aqic_gisa.gisa = (uint64_t)gisa >> 4; 292 293 status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib); 294 switch (status.response_code) { 295 case AP_RESPONSE_NORMAL: 296 /* See if we did clear older IRQ configuration */ 297 vfio_ap_free_aqic_resources(q); 298 q->saved_pfn = g_pfn; 299 q->saved_isc = isc; 300 break; 301 case AP_RESPONSE_OTHERWISE_CHANGED: 302 /* We could not modify IRQ setings: clear new configuration */ 303 vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1); 304 kvm_s390_gisc_unregister(kvm, isc); 305 break; 306 default: 307 pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn, 308 status.response_code); 309 vfio_ap_irq_disable(q); 310 break; 311 } 312 313 if (status.response_code != AP_RESPONSE_NORMAL) { 314 VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: " 315 "zone=%#x, ir=%#x, gisc=%#x, f=%#x," 316 "gisa=%#x, isc=%#x, apqn=%#04x\n", 317 __func__, status.response_code, 318 aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc, 319 aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc, 320 q->apqn); 321 } 322 323 return status; 324 } 325 326 /** 327 * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array 328 * of big endian elements that can be passed by 329 * value to an s390dbf sprintf event function to 330 * format a UUID string. 331 * 332 * @guid: the object containing the little endian guid 333 * @uuid: a six-element array of long values that can be passed by value as 334 * arguments for a formatting string specifying a UUID. 335 * 336 * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf 337 * event functions if the memory for the passed string is available as long as 338 * the debug feature exists. Since a mediated device can be removed at any 339 * time, it's name can not be used because %s passes the reference to the string 340 * in memory and the reference will go stale once the device is removed . 341 * 342 * The s390dbf string formatting function allows a maximum of 9 arguments for a 343 * message to be displayed in the 'sprintf' view. In order to use the bytes 344 * comprising the mediated device's UUID to display the mediated device name, 345 * they will have to be converted into an array whose elements can be passed by 346 * value to sprintf. For example: 347 * 348 * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 } 349 * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804 350 * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 } 351 * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx" 352 */ 353 static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid) 354 { 355 /* 356 * The input guid is ordered in little endian, so it needs to be 357 * reordered for displaying a UUID as a string. This specifies the 358 * guid indices in proper order. 359 */ 360 uuid[0] = le32_to_cpup((__le32 *)guid); 361 uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]); 362 uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]); 363 uuid[3] = *((__u16 *)&guid->b[8]); 364 uuid[4] = *((__u16 *)&guid->b[10]); 365 uuid[5] = *((__u32 *)&guid->b[12]); 366 } 367 368 /** 369 * handle_pqap - PQAP instruction callback 370 * 371 * @vcpu: The vcpu on which we received the PQAP instruction 372 * 373 * Get the general register contents to initialize internal variables. 374 * REG[0]: APQN 375 * REG[1]: IR and ISC 376 * REG[2]: NIB 377 * 378 * Response.status may be set to following Response Code: 379 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available 380 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured 381 * - AP_RESPONSE_NORMAL (0) : in case of successs 382 * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC. 383 * We take the matrix_dev lock to ensure serialization on queues and 384 * mediated device access. 385 * 386 * Return: 0 if we could handle the request inside KVM. 387 * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault. 388 */ 389 static int handle_pqap(struct kvm_vcpu *vcpu) 390 { 391 uint64_t status; 392 uint16_t apqn; 393 unsigned long uuid[6]; 394 struct vfio_ap_queue *q; 395 struct ap_queue_status qstatus = { 396 .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; 397 struct ap_matrix_mdev *matrix_mdev; 398 399 apqn = vcpu->run->s.regs.gprs[0] & 0xffff; 400 401 /* If we do not use the AIV facility just go to userland */ 402 if (!(vcpu->arch.sie_block->eca & ECA_AIV)) { 403 VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n", 404 __func__, apqn, vcpu->arch.sie_block->eca); 405 406 return -EOPNOTSUPP; 407 } 408 409 mutex_lock(&matrix_dev->lock); 410 if (!vcpu->kvm->arch.crypto.pqap_hook) { 411 VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n", 412 __func__, apqn); 413 goto out_unlock; 414 } 415 416 matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, 417 struct ap_matrix_mdev, pqap_hook); 418 419 /* If the there is no guest using the mdev, there is nothing to do */ 420 if (!matrix_mdev->kvm) { 421 vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid); 422 VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n", 423 __func__, uuid[0], uuid[1], uuid[2], 424 uuid[3], uuid[4], uuid[5], apqn); 425 goto out_unlock; 426 } 427 428 q = vfio_ap_get_queue(matrix_mdev, apqn); 429 if (!q) { 430 VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n", 431 __func__, AP_QID_CARD(apqn), 432 AP_QID_QUEUE(apqn)); 433 goto out_unlock; 434 } 435 436 status = vcpu->run->s.regs.gprs[1]; 437 438 /* If IR bit(16) is set we enable the interrupt */ 439 if ((status >> (63 - 16)) & 0x01) 440 qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu); 441 else 442 qstatus = vfio_ap_irq_disable(q); 443 444 out_unlock: 445 memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); 446 vcpu->run->s.regs.gprs[1] >>= 32; 447 mutex_unlock(&matrix_dev->lock); 448 return 0; 449 } 450 451 static void vfio_ap_matrix_init(struct ap_config_info *info, 452 struct ap_matrix *matrix) 453 { 454 matrix->apm_max = info->apxa ? info->Na : 63; 455 matrix->aqm_max = info->apxa ? info->Nd : 15; 456 matrix->adm_max = info->apxa ? info->Nd : 15; 457 } 458 459 static int vfio_ap_mdev_probe(struct mdev_device *mdev) 460 { 461 struct ap_matrix_mdev *matrix_mdev; 462 int ret; 463 464 if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0)) 465 return -EPERM; 466 467 matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL); 468 if (!matrix_mdev) { 469 ret = -ENOMEM; 470 goto err_dec_available; 471 } 472 vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev, 473 &vfio_ap_matrix_dev_ops); 474 475 matrix_mdev->mdev = mdev; 476 vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); 477 matrix_mdev->pqap_hook = handle_pqap; 478 mutex_lock(&matrix_dev->lock); 479 list_add(&matrix_mdev->node, &matrix_dev->mdev_list); 480 mutex_unlock(&matrix_dev->lock); 481 482 ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev); 483 if (ret) 484 goto err_list; 485 dev_set_drvdata(&mdev->dev, matrix_mdev); 486 return 0; 487 488 err_list: 489 mutex_lock(&matrix_dev->lock); 490 list_del(&matrix_mdev->node); 491 mutex_unlock(&matrix_dev->lock); 492 vfio_uninit_group_dev(&matrix_mdev->vdev); 493 kfree(matrix_mdev); 494 err_dec_available: 495 atomic_inc(&matrix_dev->available_instances); 496 return ret; 497 } 498 499 static void vfio_ap_mdev_remove(struct mdev_device *mdev) 500 { 501 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev); 502 503 vfio_unregister_group_dev(&matrix_mdev->vdev); 504 505 mutex_lock(&matrix_dev->lock); 506 vfio_ap_mdev_reset_queues(matrix_mdev); 507 list_del(&matrix_mdev->node); 508 mutex_unlock(&matrix_dev->lock); 509 vfio_uninit_group_dev(&matrix_mdev->vdev); 510 kfree(matrix_mdev); 511 atomic_inc(&matrix_dev->available_instances); 512 } 513 514 static ssize_t name_show(struct mdev_type *mtype, 515 struct mdev_type_attribute *attr, char *buf) 516 { 517 return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT); 518 } 519 520 static MDEV_TYPE_ATTR_RO(name); 521 522 static ssize_t available_instances_show(struct mdev_type *mtype, 523 struct mdev_type_attribute *attr, 524 char *buf) 525 { 526 return sprintf(buf, "%d\n", 527 atomic_read(&matrix_dev->available_instances)); 528 } 529 530 static MDEV_TYPE_ATTR_RO(available_instances); 531 532 static ssize_t device_api_show(struct mdev_type *mtype, 533 struct mdev_type_attribute *attr, char *buf) 534 { 535 return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING); 536 } 537 538 static MDEV_TYPE_ATTR_RO(device_api); 539 540 static struct attribute *vfio_ap_mdev_type_attrs[] = { 541 &mdev_type_attr_name.attr, 542 &mdev_type_attr_device_api.attr, 543 &mdev_type_attr_available_instances.attr, 544 NULL, 545 }; 546 547 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = { 548 .name = VFIO_AP_MDEV_TYPE_HWVIRT, 549 .attrs = vfio_ap_mdev_type_attrs, 550 }; 551 552 static struct attribute_group *vfio_ap_mdev_type_groups[] = { 553 &vfio_ap_mdev_hwvirt_type_group, 554 NULL, 555 }; 556 557 struct vfio_ap_queue_reserved { 558 unsigned long *apid; 559 unsigned long *apqi; 560 bool reserved; 561 }; 562 563 /** 564 * vfio_ap_has_queue - determines if the AP queue containing the target in @data 565 * 566 * @dev: an AP queue device 567 * @data: a struct vfio_ap_queue_reserved reference 568 * 569 * Flags whether the AP queue device (@dev) has a queue ID containing the APQN, 570 * apid or apqi specified in @data: 571 * 572 * - If @data contains both an apid and apqi value, then @data will be flagged 573 * as reserved if the APID and APQI fields for the AP queue device matches 574 * 575 * - If @data contains only an apid value, @data will be flagged as 576 * reserved if the APID field in the AP queue device matches 577 * 578 * - If @data contains only an apqi value, @data will be flagged as 579 * reserved if the APQI field in the AP queue device matches 580 * 581 * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if 582 * @data does not contain either an apid or apqi. 583 */ 584 static int vfio_ap_has_queue(struct device *dev, void *data) 585 { 586 struct vfio_ap_queue_reserved *qres = data; 587 struct ap_queue *ap_queue = to_ap_queue(dev); 588 ap_qid_t qid; 589 unsigned long id; 590 591 if (qres->apid && qres->apqi) { 592 qid = AP_MKQID(*qres->apid, *qres->apqi); 593 if (qid == ap_queue->qid) 594 qres->reserved = true; 595 } else if (qres->apid && !qres->apqi) { 596 id = AP_QID_CARD(ap_queue->qid); 597 if (id == *qres->apid) 598 qres->reserved = true; 599 } else if (!qres->apid && qres->apqi) { 600 id = AP_QID_QUEUE(ap_queue->qid); 601 if (id == *qres->apqi) 602 qres->reserved = true; 603 } else { 604 return -EINVAL; 605 } 606 607 return 0; 608 } 609 610 /** 611 * vfio_ap_verify_queue_reserved - verifies that the AP queue containing 612 * @apid or @aqpi is reserved 613 * 614 * @apid: an AP adapter ID 615 * @apqi: an AP queue index 616 * 617 * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device 618 * driver according to the following rules: 619 * 620 * - If both @apid and @apqi are not NULL, then there must be an AP queue 621 * device bound to the vfio_ap driver with the APQN identified by @apid and 622 * @apqi 623 * 624 * - If only @apid is not NULL, then there must be an AP queue device bound 625 * to the vfio_ap driver with an APQN containing @apid 626 * 627 * - If only @apqi is not NULL, then there must be an AP queue device bound 628 * to the vfio_ap driver with an APQN containing @apqi 629 * 630 * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL. 631 */ 632 static int vfio_ap_verify_queue_reserved(unsigned long *apid, 633 unsigned long *apqi) 634 { 635 int ret; 636 struct vfio_ap_queue_reserved qres; 637 638 qres.apid = apid; 639 qres.apqi = apqi; 640 qres.reserved = false; 641 642 ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL, 643 &qres, vfio_ap_has_queue); 644 if (ret) 645 return ret; 646 647 if (qres.reserved) 648 return 0; 649 650 return -EADDRNOTAVAIL; 651 } 652 653 static int 654 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev, 655 unsigned long apid) 656 { 657 int ret; 658 unsigned long apqi; 659 unsigned long nbits = matrix_mdev->matrix.aqm_max + 1; 660 661 if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits) 662 return vfio_ap_verify_queue_reserved(&apid, NULL); 663 664 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) { 665 ret = vfio_ap_verify_queue_reserved(&apid, &apqi); 666 if (ret) 667 return ret; 668 } 669 670 return 0; 671 } 672 673 /** 674 * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured 675 * 676 * @matrix_mdev: the mediated matrix device 677 * 678 * Verifies that the APQNs derived from the cross product of the AP adapter IDs 679 * and AP queue indexes comprising the AP matrix are not configured for another 680 * mediated device. AP queue sharing is not allowed. 681 * 682 * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE. 683 */ 684 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) 685 { 686 struct ap_matrix_mdev *lstdev; 687 DECLARE_BITMAP(apm, AP_DEVICES); 688 DECLARE_BITMAP(aqm, AP_DOMAINS); 689 690 list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) { 691 if (matrix_mdev == lstdev) 692 continue; 693 694 memset(apm, 0, sizeof(apm)); 695 memset(aqm, 0, sizeof(aqm)); 696 697 /* 698 * We work on full longs, as we can only exclude the leftover 699 * bits in non-inverse order. The leftover is all zeros. 700 */ 701 if (!bitmap_and(apm, matrix_mdev->matrix.apm, 702 lstdev->matrix.apm, AP_DEVICES)) 703 continue; 704 705 if (!bitmap_and(aqm, matrix_mdev->matrix.aqm, 706 lstdev->matrix.aqm, AP_DOMAINS)) 707 continue; 708 709 return -EADDRINUSE; 710 } 711 712 return 0; 713 } 714 715 /** 716 * assign_adapter_store - parses the APID from @buf and sets the 717 * corresponding bit in the mediated matrix device's APM 718 * 719 * @dev: the matrix device 720 * @attr: the mediated matrix device's assign_adapter attribute 721 * @buf: a buffer containing the AP adapter number (APID) to 722 * be assigned 723 * @count: the number of bytes in @buf 724 * 725 * Return: the number of bytes processed if the APID is valid; otherwise, 726 * returns one of the following errors: 727 * 728 * 1. -EINVAL 729 * The APID is not a valid number 730 * 731 * 2. -ENODEV 732 * The APID exceeds the maximum value configured for the system 733 * 734 * 3. -EADDRNOTAVAIL 735 * An APQN derived from the cross product of the APID being assigned 736 * and the APQIs previously assigned is not bound to the vfio_ap device 737 * driver; or, if no APQIs have yet been assigned, the APID is not 738 * contained in an APQN bound to the vfio_ap device driver. 739 * 740 * 4. -EADDRINUSE 741 * An APQN derived from the cross product of the APID being assigned 742 * and the APQIs previously assigned is being used by another mediated 743 * matrix device 744 */ 745 static ssize_t assign_adapter_store(struct device *dev, 746 struct device_attribute *attr, 747 const char *buf, size_t count) 748 { 749 int ret; 750 unsigned long apid; 751 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 752 753 mutex_lock(&matrix_dev->lock); 754 755 /* If the KVM guest is running, disallow assignment of adapter */ 756 if (matrix_mdev->kvm) { 757 ret = -EBUSY; 758 goto done; 759 } 760 761 ret = kstrtoul(buf, 0, &apid); 762 if (ret) 763 goto done; 764 765 if (apid > matrix_mdev->matrix.apm_max) { 766 ret = -ENODEV; 767 goto done; 768 } 769 770 /* 771 * Set the bit in the AP mask (APM) corresponding to the AP adapter 772 * number (APID). The bits in the mask, from most significant to least 773 * significant bit, correspond to APIDs 0-255. 774 */ 775 ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid); 776 if (ret) 777 goto done; 778 779 set_bit_inv(apid, matrix_mdev->matrix.apm); 780 781 ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); 782 if (ret) 783 goto share_err; 784 785 ret = count; 786 goto done; 787 788 share_err: 789 clear_bit_inv(apid, matrix_mdev->matrix.apm); 790 done: 791 mutex_unlock(&matrix_dev->lock); 792 793 return ret; 794 } 795 static DEVICE_ATTR_WO(assign_adapter); 796 797 /** 798 * unassign_adapter_store - parses the APID from @buf and clears the 799 * corresponding bit in the mediated matrix device's APM 800 * 801 * @dev: the matrix device 802 * @attr: the mediated matrix device's unassign_adapter attribute 803 * @buf: a buffer containing the adapter number (APID) to be unassigned 804 * @count: the number of bytes in @buf 805 * 806 * Return: the number of bytes processed if the APID is valid; otherwise, 807 * returns one of the following errors: 808 * -EINVAL if the APID is not a number 809 * -ENODEV if the APID it exceeds the maximum value configured for the 810 * system 811 */ 812 static ssize_t unassign_adapter_store(struct device *dev, 813 struct device_attribute *attr, 814 const char *buf, size_t count) 815 { 816 int ret; 817 unsigned long apid; 818 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 819 820 mutex_lock(&matrix_dev->lock); 821 822 /* If the KVM guest is running, disallow unassignment of adapter */ 823 if (matrix_mdev->kvm) { 824 ret = -EBUSY; 825 goto done; 826 } 827 828 ret = kstrtoul(buf, 0, &apid); 829 if (ret) 830 goto done; 831 832 if (apid > matrix_mdev->matrix.apm_max) { 833 ret = -ENODEV; 834 goto done; 835 } 836 837 clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm); 838 ret = count; 839 done: 840 mutex_unlock(&matrix_dev->lock); 841 return ret; 842 } 843 static DEVICE_ATTR_WO(unassign_adapter); 844 845 static int 846 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev, 847 unsigned long apqi) 848 { 849 int ret; 850 unsigned long apid; 851 unsigned long nbits = matrix_mdev->matrix.apm_max + 1; 852 853 if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits) 854 return vfio_ap_verify_queue_reserved(NULL, &apqi); 855 856 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) { 857 ret = vfio_ap_verify_queue_reserved(&apid, &apqi); 858 if (ret) 859 return ret; 860 } 861 862 return 0; 863 } 864 865 /** 866 * assign_domain_store - parses the APQI from @buf and sets the 867 * corresponding bit in the mediated matrix device's AQM 868 * 869 * @dev: the matrix device 870 * @attr: the mediated matrix device's assign_domain attribute 871 * @buf: a buffer containing the AP queue index (APQI) of the domain to 872 * be assigned 873 * @count: the number of bytes in @buf 874 * 875 * Return: the number of bytes processed if the APQI is valid; otherwise returns 876 * one of the following errors: 877 * 878 * 1. -EINVAL 879 * The APQI is not a valid number 880 * 881 * 2. -ENODEV 882 * The APQI exceeds the maximum value configured for the system 883 * 884 * 3. -EADDRNOTAVAIL 885 * An APQN derived from the cross product of the APQI being assigned 886 * and the APIDs previously assigned is not bound to the vfio_ap device 887 * driver; or, if no APIDs have yet been assigned, the APQI is not 888 * contained in an APQN bound to the vfio_ap device driver. 889 * 890 * 4. -EADDRINUSE 891 * An APQN derived from the cross product of the APQI being assigned 892 * and the APIDs previously assigned is being used by another mediated 893 * matrix device 894 */ 895 static ssize_t assign_domain_store(struct device *dev, 896 struct device_attribute *attr, 897 const char *buf, size_t count) 898 { 899 int ret; 900 unsigned long apqi; 901 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 902 unsigned long max_apqi = matrix_mdev->matrix.aqm_max; 903 904 mutex_lock(&matrix_dev->lock); 905 906 /* If the KVM guest is running, disallow assignment of domain */ 907 if (matrix_mdev->kvm) { 908 ret = -EBUSY; 909 goto done; 910 } 911 912 ret = kstrtoul(buf, 0, &apqi); 913 if (ret) 914 goto done; 915 if (apqi > max_apqi) { 916 ret = -ENODEV; 917 goto done; 918 } 919 920 ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi); 921 if (ret) 922 goto done; 923 924 set_bit_inv(apqi, matrix_mdev->matrix.aqm); 925 926 ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); 927 if (ret) 928 goto share_err; 929 930 ret = count; 931 goto done; 932 933 share_err: 934 clear_bit_inv(apqi, matrix_mdev->matrix.aqm); 935 done: 936 mutex_unlock(&matrix_dev->lock); 937 938 return ret; 939 } 940 static DEVICE_ATTR_WO(assign_domain); 941 942 943 /** 944 * unassign_domain_store - parses the APQI from @buf and clears the 945 * corresponding bit in the mediated matrix device's AQM 946 * 947 * @dev: the matrix device 948 * @attr: the mediated matrix device's unassign_domain attribute 949 * @buf: a buffer containing the AP queue index (APQI) of the domain to 950 * be unassigned 951 * @count: the number of bytes in @buf 952 * 953 * Return: the number of bytes processed if the APQI is valid; otherwise, 954 * returns one of the following errors: 955 * -EINVAL if the APQI is not a number 956 * -ENODEV if the APQI exceeds the maximum value configured for the system 957 */ 958 static ssize_t unassign_domain_store(struct device *dev, 959 struct device_attribute *attr, 960 const char *buf, size_t count) 961 { 962 int ret; 963 unsigned long apqi; 964 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 965 966 mutex_lock(&matrix_dev->lock); 967 968 /* If the KVM guest is running, disallow unassignment of domain */ 969 if (matrix_mdev->kvm) { 970 ret = -EBUSY; 971 goto done; 972 } 973 974 ret = kstrtoul(buf, 0, &apqi); 975 if (ret) 976 goto done; 977 978 if (apqi > matrix_mdev->matrix.aqm_max) { 979 ret = -ENODEV; 980 goto done; 981 } 982 983 clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm); 984 ret = count; 985 986 done: 987 mutex_unlock(&matrix_dev->lock); 988 return ret; 989 } 990 static DEVICE_ATTR_WO(unassign_domain); 991 992 /** 993 * assign_control_domain_store - parses the domain ID from @buf and sets 994 * the corresponding bit in the mediated matrix device's ADM 995 * 996 * @dev: the matrix device 997 * @attr: the mediated matrix device's assign_control_domain attribute 998 * @buf: a buffer containing the domain ID to be assigned 999 * @count: the number of bytes in @buf 1000 * 1001 * Return: the number of bytes processed if the domain ID is valid; otherwise, 1002 * returns one of the following errors: 1003 * -EINVAL if the ID is not a number 1004 * -ENODEV if the ID exceeds the maximum value configured for the system 1005 */ 1006 static ssize_t assign_control_domain_store(struct device *dev, 1007 struct device_attribute *attr, 1008 const char *buf, size_t count) 1009 { 1010 int ret; 1011 unsigned long id; 1012 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1013 1014 mutex_lock(&matrix_dev->lock); 1015 1016 /* If the KVM guest is running, disallow assignment of control domain */ 1017 if (matrix_mdev->kvm) { 1018 ret = -EBUSY; 1019 goto done; 1020 } 1021 1022 ret = kstrtoul(buf, 0, &id); 1023 if (ret) 1024 goto done; 1025 1026 if (id > matrix_mdev->matrix.adm_max) { 1027 ret = -ENODEV; 1028 goto done; 1029 } 1030 1031 /* Set the bit in the ADM (bitmask) corresponding to the AP control 1032 * domain number (id). The bits in the mask, from most significant to 1033 * least significant, correspond to IDs 0 up to the one less than the 1034 * number of control domains that can be assigned. 1035 */ 1036 set_bit_inv(id, matrix_mdev->matrix.adm); 1037 ret = count; 1038 done: 1039 mutex_unlock(&matrix_dev->lock); 1040 return ret; 1041 } 1042 static DEVICE_ATTR_WO(assign_control_domain); 1043 1044 /** 1045 * unassign_control_domain_store - parses the domain ID from @buf and 1046 * clears the corresponding bit in the mediated matrix device's ADM 1047 * 1048 * @dev: the matrix device 1049 * @attr: the mediated matrix device's unassign_control_domain attribute 1050 * @buf: a buffer containing the domain ID to be unassigned 1051 * @count: the number of bytes in @buf 1052 * 1053 * Return: the number of bytes processed if the domain ID is valid; otherwise, 1054 * returns one of the following errors: 1055 * -EINVAL if the ID is not a number 1056 * -ENODEV if the ID exceeds the maximum value configured for the system 1057 */ 1058 static ssize_t unassign_control_domain_store(struct device *dev, 1059 struct device_attribute *attr, 1060 const char *buf, size_t count) 1061 { 1062 int ret; 1063 unsigned long domid; 1064 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1065 unsigned long max_domid = matrix_mdev->matrix.adm_max; 1066 1067 mutex_lock(&matrix_dev->lock); 1068 1069 /* If a KVM guest is running, disallow unassignment of control domain */ 1070 if (matrix_mdev->kvm) { 1071 ret = -EBUSY; 1072 goto done; 1073 } 1074 1075 ret = kstrtoul(buf, 0, &domid); 1076 if (ret) 1077 goto done; 1078 if (domid > max_domid) { 1079 ret = -ENODEV; 1080 goto done; 1081 } 1082 1083 clear_bit_inv(domid, matrix_mdev->matrix.adm); 1084 ret = count; 1085 done: 1086 mutex_unlock(&matrix_dev->lock); 1087 return ret; 1088 } 1089 static DEVICE_ATTR_WO(unassign_control_domain); 1090 1091 static ssize_t control_domains_show(struct device *dev, 1092 struct device_attribute *dev_attr, 1093 char *buf) 1094 { 1095 unsigned long id; 1096 int nchars = 0; 1097 int n; 1098 char *bufpos = buf; 1099 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1100 unsigned long max_domid = matrix_mdev->matrix.adm_max; 1101 1102 mutex_lock(&matrix_dev->lock); 1103 for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) { 1104 n = sprintf(bufpos, "%04lx\n", id); 1105 bufpos += n; 1106 nchars += n; 1107 } 1108 mutex_unlock(&matrix_dev->lock); 1109 1110 return nchars; 1111 } 1112 static DEVICE_ATTR_RO(control_domains); 1113 1114 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, 1115 char *buf) 1116 { 1117 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1118 char *bufpos = buf; 1119 unsigned long apid; 1120 unsigned long apqi; 1121 unsigned long apid1; 1122 unsigned long apqi1; 1123 unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1; 1124 unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1; 1125 int nchars = 0; 1126 int n; 1127 1128 apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits); 1129 apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits); 1130 1131 mutex_lock(&matrix_dev->lock); 1132 1133 if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) { 1134 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { 1135 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, 1136 naqm_bits) { 1137 n = sprintf(bufpos, "%02lx.%04lx\n", apid, 1138 apqi); 1139 bufpos += n; 1140 nchars += n; 1141 } 1142 } 1143 } else if (apid1 < napm_bits) { 1144 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { 1145 n = sprintf(bufpos, "%02lx.\n", apid); 1146 bufpos += n; 1147 nchars += n; 1148 } 1149 } else if (apqi1 < naqm_bits) { 1150 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) { 1151 n = sprintf(bufpos, ".%04lx\n", apqi); 1152 bufpos += n; 1153 nchars += n; 1154 } 1155 } 1156 1157 mutex_unlock(&matrix_dev->lock); 1158 1159 return nchars; 1160 } 1161 static DEVICE_ATTR_RO(matrix); 1162 1163 static struct attribute *vfio_ap_mdev_attrs[] = { 1164 &dev_attr_assign_adapter.attr, 1165 &dev_attr_unassign_adapter.attr, 1166 &dev_attr_assign_domain.attr, 1167 &dev_attr_unassign_domain.attr, 1168 &dev_attr_assign_control_domain.attr, 1169 &dev_attr_unassign_control_domain.attr, 1170 &dev_attr_control_domains.attr, 1171 &dev_attr_matrix.attr, 1172 NULL, 1173 }; 1174 1175 static struct attribute_group vfio_ap_mdev_attr_group = { 1176 .attrs = vfio_ap_mdev_attrs 1177 }; 1178 1179 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = { 1180 &vfio_ap_mdev_attr_group, 1181 NULL 1182 }; 1183 1184 /** 1185 * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed 1186 * to manage AP resources for the guest whose state is represented by @kvm 1187 * 1188 * @matrix_mdev: a mediated matrix device 1189 * @kvm: reference to KVM instance 1190 * 1191 * Return: 0 if no other mediated matrix device has a reference to @kvm; 1192 * otherwise, returns an -EPERM. 1193 */ 1194 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, 1195 struct kvm *kvm) 1196 { 1197 struct ap_matrix_mdev *m; 1198 1199 if (kvm->arch.crypto.crycbd) { 1200 down_write(&kvm->arch.crypto.pqap_hook_rwsem); 1201 kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; 1202 up_write(&kvm->arch.crypto.pqap_hook_rwsem); 1203 1204 mutex_lock(&kvm->lock); 1205 mutex_lock(&matrix_dev->lock); 1206 1207 list_for_each_entry(m, &matrix_dev->mdev_list, node) { 1208 if (m != matrix_mdev && m->kvm == kvm) { 1209 mutex_unlock(&kvm->lock); 1210 mutex_unlock(&matrix_dev->lock); 1211 return -EPERM; 1212 } 1213 } 1214 1215 kvm_get_kvm(kvm); 1216 matrix_mdev->kvm = kvm; 1217 kvm_arch_crypto_set_masks(kvm, 1218 matrix_mdev->matrix.apm, 1219 matrix_mdev->matrix.aqm, 1220 matrix_mdev->matrix.adm); 1221 1222 mutex_unlock(&kvm->lock); 1223 mutex_unlock(&matrix_dev->lock); 1224 } 1225 1226 return 0; 1227 } 1228 1229 /** 1230 * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback 1231 * 1232 * @nb: The notifier block 1233 * @action: Action to be taken 1234 * @data: data associated with the request 1235 * 1236 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we 1237 * pinned before). Other requests are ignored. 1238 * 1239 * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE. 1240 */ 1241 static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, 1242 unsigned long action, void *data) 1243 { 1244 struct ap_matrix_mdev *matrix_mdev; 1245 1246 matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); 1247 1248 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { 1249 struct vfio_iommu_type1_dma_unmap *unmap = data; 1250 unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; 1251 1252 vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); 1253 return NOTIFY_OK; 1254 } 1255 1256 return NOTIFY_DONE; 1257 } 1258 1259 /** 1260 * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed 1261 * by @matrix_mdev. 1262 * 1263 * @matrix_mdev: a matrix mediated device 1264 */ 1265 static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) 1266 { 1267 struct kvm *kvm = matrix_mdev->kvm; 1268 1269 if (kvm && kvm->arch.crypto.crycbd) { 1270 down_write(&kvm->arch.crypto.pqap_hook_rwsem); 1271 kvm->arch.crypto.pqap_hook = NULL; 1272 up_write(&kvm->arch.crypto.pqap_hook_rwsem); 1273 1274 mutex_lock(&kvm->lock); 1275 mutex_lock(&matrix_dev->lock); 1276 1277 kvm_arch_crypto_clear_masks(kvm); 1278 vfio_ap_mdev_reset_queues(matrix_mdev); 1279 kvm_put_kvm(kvm); 1280 matrix_mdev->kvm = NULL; 1281 1282 mutex_unlock(&kvm->lock); 1283 mutex_unlock(&matrix_dev->lock); 1284 } 1285 } 1286 1287 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) 1288 { 1289 struct device *dev; 1290 struct vfio_ap_queue *q = NULL; 1291 1292 dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, 1293 &apqn, match_apqn); 1294 if (dev) { 1295 q = dev_get_drvdata(dev); 1296 put_device(dev); 1297 } 1298 1299 return q; 1300 } 1301 1302 int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, 1303 unsigned int retry) 1304 { 1305 struct ap_queue_status status; 1306 int ret; 1307 int retry2 = 2; 1308 1309 if (!q) 1310 return 0; 1311 1312 retry_zapq: 1313 status = ap_zapq(q->apqn); 1314 switch (status.response_code) { 1315 case AP_RESPONSE_NORMAL: 1316 ret = 0; 1317 break; 1318 case AP_RESPONSE_RESET_IN_PROGRESS: 1319 if (retry--) { 1320 msleep(20); 1321 goto retry_zapq; 1322 } 1323 ret = -EBUSY; 1324 break; 1325 case AP_RESPONSE_Q_NOT_AVAIL: 1326 case AP_RESPONSE_DECONFIGURED: 1327 case AP_RESPONSE_CHECKSTOPPED: 1328 WARN_ON_ONCE(status.irq_enabled); 1329 ret = -EBUSY; 1330 goto free_resources; 1331 default: 1332 /* things are really broken, give up */ 1333 WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n", 1334 status.response_code); 1335 return -EIO; 1336 } 1337 1338 /* wait for the reset to take effect */ 1339 while (retry2--) { 1340 if (status.queue_empty && !status.irq_enabled) 1341 break; 1342 msleep(20); 1343 status = ap_tapq(q->apqn, NULL); 1344 } 1345 WARN_ON_ONCE(retry2 <= 0); 1346 1347 free_resources: 1348 vfio_ap_free_aqic_resources(q); 1349 1350 return ret; 1351 } 1352 1353 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev) 1354 { 1355 int ret; 1356 int rc = 0; 1357 unsigned long apid, apqi; 1358 struct vfio_ap_queue *q; 1359 1360 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, 1361 matrix_mdev->matrix.apm_max + 1) { 1362 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, 1363 matrix_mdev->matrix.aqm_max + 1) { 1364 q = vfio_ap_find_queue(AP_MKQID(apid, apqi)); 1365 ret = vfio_ap_mdev_reset_queue(q, 1); 1366 /* 1367 * Regardless whether a queue turns out to be busy, or 1368 * is not operational, we need to continue resetting 1369 * the remaining queues. 1370 */ 1371 if (ret) 1372 rc = ret; 1373 } 1374 } 1375 1376 return rc; 1377 } 1378 1379 static int vfio_ap_mdev_open_device(struct vfio_device *vdev) 1380 { 1381 struct ap_matrix_mdev *matrix_mdev = 1382 container_of(vdev, struct ap_matrix_mdev, vdev); 1383 unsigned long events; 1384 int ret; 1385 1386 if (!vdev->kvm) 1387 return -EINVAL; 1388 1389 ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); 1390 if (ret) 1391 return ret; 1392 1393 matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; 1394 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; 1395 ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, 1396 &matrix_mdev->iommu_notifier); 1397 if (ret) 1398 goto err_kvm; 1399 return 0; 1400 1401 err_kvm: 1402 vfio_ap_mdev_unset_kvm(matrix_mdev); 1403 return ret; 1404 } 1405 1406 static void vfio_ap_mdev_close_device(struct vfio_device *vdev) 1407 { 1408 struct ap_matrix_mdev *matrix_mdev = 1409 container_of(vdev, struct ap_matrix_mdev, vdev); 1410 1411 vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, 1412 &matrix_mdev->iommu_notifier); 1413 vfio_ap_mdev_unset_kvm(matrix_mdev); 1414 } 1415 1416 static int vfio_ap_mdev_get_device_info(unsigned long arg) 1417 { 1418 unsigned long minsz; 1419 struct vfio_device_info info; 1420 1421 minsz = offsetofend(struct vfio_device_info, num_irqs); 1422 1423 if (copy_from_user(&info, (void __user *)arg, minsz)) 1424 return -EFAULT; 1425 1426 if (info.argsz < minsz) 1427 return -EINVAL; 1428 1429 info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET; 1430 info.num_regions = 0; 1431 info.num_irqs = 0; 1432 1433 return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; 1434 } 1435 1436 static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, 1437 unsigned int cmd, unsigned long arg) 1438 { 1439 struct ap_matrix_mdev *matrix_mdev = 1440 container_of(vdev, struct ap_matrix_mdev, vdev); 1441 int ret; 1442 1443 mutex_lock(&matrix_dev->lock); 1444 switch (cmd) { 1445 case VFIO_DEVICE_GET_INFO: 1446 ret = vfio_ap_mdev_get_device_info(arg); 1447 break; 1448 case VFIO_DEVICE_RESET: 1449 ret = vfio_ap_mdev_reset_queues(matrix_mdev); 1450 break; 1451 default: 1452 ret = -EOPNOTSUPP; 1453 break; 1454 } 1455 mutex_unlock(&matrix_dev->lock); 1456 1457 return ret; 1458 } 1459 1460 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { 1461 .open_device = vfio_ap_mdev_open_device, 1462 .close_device = vfio_ap_mdev_close_device, 1463 .ioctl = vfio_ap_mdev_ioctl, 1464 }; 1465 1466 static struct mdev_driver vfio_ap_matrix_driver = { 1467 .driver = { 1468 .name = "vfio_ap_mdev", 1469 .owner = THIS_MODULE, 1470 .mod_name = KBUILD_MODNAME, 1471 .dev_groups = vfio_ap_mdev_attr_groups, 1472 }, 1473 .probe = vfio_ap_mdev_probe, 1474 .remove = vfio_ap_mdev_remove, 1475 .supported_type_groups = vfio_ap_mdev_type_groups, 1476 }; 1477 1478 int vfio_ap_mdev_register(void) 1479 { 1480 int ret; 1481 1482 atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT); 1483 1484 ret = mdev_register_driver(&vfio_ap_matrix_driver); 1485 if (ret) 1486 return ret; 1487 1488 ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_driver); 1489 if (ret) 1490 goto err_driver; 1491 return 0; 1492 1493 err_driver: 1494 mdev_unregister_driver(&vfio_ap_matrix_driver); 1495 return ret; 1496 } 1497 1498 void vfio_ap_mdev_unregister(void) 1499 { 1500 mdev_unregister_device(&matrix_dev->device); 1501 mdev_unregister_driver(&vfio_ap_matrix_driver); 1502 } 1503