1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Adjunct processor matrix VFIO device driver callbacks. 4 * 5 * Copyright IBM Corp. 2018 6 * 7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> 8 * Halil Pasic <pasic@linux.ibm.com> 9 * Pierre Morel <pmorel@linux.ibm.com> 10 */ 11 #include <linux/string.h> 12 #include <linux/vfio.h> 13 #include <linux/device.h> 14 #include <linux/list.h> 15 #include <linux/ctype.h> 16 #include <linux/bitops.h> 17 #include <linux/kvm_host.h> 18 #include <linux/module.h> 19 #include <linux/uuid.h> 20 #include <asm/kvm.h> 21 #include <asm/zcrypt.h> 22 23 #include "vfio_ap_private.h" 24 #include "vfio_ap_debug.h" 25 26 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" 27 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" 28 29 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev); 30 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); 31 static const struct vfio_device_ops vfio_ap_matrix_dev_ops; 32 33 static int match_apqn(struct device *dev, const void *data) 34 { 35 struct vfio_ap_queue *q = dev_get_drvdata(dev); 36 37 return (q->apqn == *(int *)(data)) ? 1 : 0; 38 } 39 40 /** 41 * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list 42 * @matrix_mdev: the associated mediated matrix 43 * @apqn: The queue APQN 44 * 45 * Retrieve a queue with a specific APQN from the list of the 46 * devices of the vfio_ap_drv. 47 * Verify that the APID and the APQI are set in the matrix. 48 * 49 * Return: the pointer to the associated vfio_ap_queue 50 */ 51 static struct vfio_ap_queue *vfio_ap_get_queue( 52 struct ap_matrix_mdev *matrix_mdev, 53 int apqn) 54 { 55 struct vfio_ap_queue *q; 56 57 if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) 58 return NULL; 59 if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) 60 return NULL; 61 62 q = vfio_ap_find_queue(apqn); 63 if (q) 64 q->matrix_mdev = matrix_mdev; 65 66 return q; 67 } 68 69 /** 70 * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries 71 * @apqn: The AP Queue number 72 * 73 * Checks the IRQ bit for the status of this APQN using ap_tapq. 74 * Returns if the ap_tapq function succeeded and the bit is clear. 75 * Returns if ap_tapq function failed with invalid, deconfigured or 76 * checkstopped AP. 77 * Otherwise retries up to 5 times after waiting 20ms. 78 */ 79 static void vfio_ap_wait_for_irqclear(int apqn) 80 { 81 struct ap_queue_status status; 82 int retry = 5; 83 84 do { 85 status = ap_tapq(apqn, NULL); 86 switch (status.response_code) { 87 case AP_RESPONSE_NORMAL: 88 case AP_RESPONSE_RESET_IN_PROGRESS: 89 if (!status.irq_enabled) 90 return; 91 fallthrough; 92 case AP_RESPONSE_BUSY: 93 msleep(20); 94 break; 95 case AP_RESPONSE_Q_NOT_AVAIL: 96 case AP_RESPONSE_DECONFIGURED: 97 case AP_RESPONSE_CHECKSTOPPED: 98 default: 99 WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__, 100 status.response_code, apqn); 101 return; 102 } 103 } while (--retry); 104 105 WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n", 106 __func__, status.response_code, apqn); 107 } 108 109 /** 110 * vfio_ap_free_aqic_resources - free vfio_ap_queue resources 111 * @q: The vfio_ap_queue 112 * 113 * Unregisters the ISC in the GIB when the saved ISC not invalid. 114 * Unpins the guest's page holding the NIB when it exists. 115 * Resets the saved_pfn and saved_isc to invalid values. 116 */ 117 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) 118 { 119 if (!q) 120 return; 121 if (q->saved_isc != VFIO_AP_ISC_INVALID && 122 !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) { 123 kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); 124 q->saved_isc = VFIO_AP_ISC_INVALID; 125 } 126 if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { 127 vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), 128 &q->saved_pfn, 1); 129 q->saved_pfn = 0; 130 } 131 } 132 133 /** 134 * vfio_ap_irq_disable - disables and clears an ap_queue interrupt 135 * @q: The vfio_ap_queue 136 * 137 * Uses ap_aqic to disable the interruption and in case of success, reset 138 * in progress or IRQ disable command already proceeded: calls 139 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear 140 * and calls vfio_ap_free_aqic_resources() to free the resources associated 141 * with the AP interrupt handling. 142 * 143 * In the case the AP is busy, or a reset is in progress, 144 * retries after 20ms, up to 5 times. 145 * 146 * Returns if ap_aqic function failed with invalid, deconfigured or 147 * checkstopped AP. 148 * 149 * Return: &struct ap_queue_status 150 */ 151 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) 152 { 153 struct ap_qirq_ctrl aqic_gisa = {}; 154 struct ap_queue_status status; 155 int retries = 5; 156 157 do { 158 status = ap_aqic(q->apqn, aqic_gisa, NULL); 159 switch (status.response_code) { 160 case AP_RESPONSE_OTHERWISE_CHANGED: 161 case AP_RESPONSE_NORMAL: 162 vfio_ap_wait_for_irqclear(q->apqn); 163 goto end_free; 164 case AP_RESPONSE_RESET_IN_PROGRESS: 165 case AP_RESPONSE_BUSY: 166 msleep(20); 167 break; 168 case AP_RESPONSE_Q_NOT_AVAIL: 169 case AP_RESPONSE_DECONFIGURED: 170 case AP_RESPONSE_CHECKSTOPPED: 171 case AP_RESPONSE_INVALID_ADDRESS: 172 default: 173 /* All cases in default means AP not operational */ 174 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, 175 status.response_code); 176 goto end_free; 177 } 178 } while (retries--); 179 180 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__, 181 status.response_code); 182 end_free: 183 vfio_ap_free_aqic_resources(q); 184 q->matrix_mdev = NULL; 185 return status; 186 } 187 188 /** 189 * vfio_ap_validate_nib - validate a notification indicator byte (nib) address. 190 * 191 * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. 192 * @nib: the location for storing the nib address. 193 * @g_pfn: the location for storing the page frame number of the page containing 194 * the nib. 195 * 196 * When the PQAP(AQIC) instruction is executed, general register 2 contains the 197 * address of the notification indicator byte (nib) used for IRQ notification. 198 * This function parses the nib from gr2 and calculates the page frame 199 * number for the guest of the page containing the nib. The values are 200 * stored in @nib and @g_pfn respectively. 201 * 202 * The g_pfn of the nib is then validated to ensure the nib address is valid. 203 * 204 * Return: returns zero if the nib address is a valid; otherwise, returns 205 * -EINVAL. 206 */ 207 static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib, 208 unsigned long *g_pfn) 209 { 210 *nib = vcpu->run->s.regs.gprs[2]; 211 *g_pfn = *nib >> PAGE_SHIFT; 212 213 if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn))) 214 return -EINVAL; 215 216 return 0; 217 } 218 219 /** 220 * vfio_ap_irq_enable - Enable Interruption for a APQN 221 * 222 * @q: the vfio_ap_queue holding AQIC parameters 223 * @isc: the guest ISC to register with the GIB interface 224 * @vcpu: the vcpu object containing the registers specifying the parameters 225 * passed to the PQAP(AQIC) instruction. 226 * 227 * Pin the NIB saved in *q 228 * Register the guest ISC to GIB interface and retrieve the 229 * host ISC to issue the host side PQAP/AQIC 230 * 231 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the 232 * vfio_pin_pages failed. 233 * 234 * Otherwise return the ap_queue_status returned by the ap_aqic(), 235 * all retry handling will be done by the guest. 236 * 237 * Return: &struct ap_queue_status 238 */ 239 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, 240 int isc, 241 struct kvm_vcpu *vcpu) 242 { 243 unsigned long nib; 244 struct ap_qirq_ctrl aqic_gisa = {}; 245 struct ap_queue_status status = {}; 246 struct kvm_s390_gisa *gisa; 247 int nisc; 248 struct kvm *kvm; 249 unsigned long h_nib, g_pfn, h_pfn; 250 int ret; 251 252 /* Verify that the notification indicator byte address is valid */ 253 if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) { 254 VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", 255 __func__, nib, g_pfn, q->apqn); 256 257 status.response_code = AP_RESPONSE_INVALID_ADDRESS; 258 return status; 259 } 260 261 ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, 262 IOMMU_READ | IOMMU_WRITE, &h_pfn); 263 switch (ret) { 264 case 1: 265 break; 266 default: 267 VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," 268 "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", 269 __func__, ret, nib, g_pfn, q->apqn); 270 271 status.response_code = AP_RESPONSE_INVALID_ADDRESS; 272 return status; 273 } 274 275 kvm = q->matrix_mdev->kvm; 276 gisa = kvm->arch.gisa_int.origin; 277 278 h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); 279 aqic_gisa.gisc = isc; 280 281 nisc = kvm_s390_gisc_register(kvm, isc); 282 if (nisc < 0) { 283 VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", 284 __func__, nisc, isc, q->apqn); 285 286 status.response_code = AP_RESPONSE_INVALID_GISA; 287 return status; 288 } 289 290 aqic_gisa.isc = nisc; 291 aqic_gisa.ir = 1; 292 aqic_gisa.gisa = (uint64_t)gisa >> 4; 293 294 status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib); 295 switch (status.response_code) { 296 case AP_RESPONSE_NORMAL: 297 /* See if we did clear older IRQ configuration */ 298 vfio_ap_free_aqic_resources(q); 299 q->saved_pfn = g_pfn; 300 q->saved_isc = isc; 301 break; 302 case AP_RESPONSE_OTHERWISE_CHANGED: 303 /* We could not modify IRQ setings: clear new configuration */ 304 vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1); 305 kvm_s390_gisc_unregister(kvm, isc); 306 break; 307 default: 308 pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn, 309 status.response_code); 310 vfio_ap_irq_disable(q); 311 break; 312 } 313 314 if (status.response_code != AP_RESPONSE_NORMAL) { 315 VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: " 316 "zone=%#x, ir=%#x, gisc=%#x, f=%#x," 317 "gisa=%#x, isc=%#x, apqn=%#04x\n", 318 __func__, status.response_code, 319 aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc, 320 aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc, 321 q->apqn); 322 } 323 324 return status; 325 } 326 327 /** 328 * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array 329 * of big endian elements that can be passed by 330 * value to an s390dbf sprintf event function to 331 * format a UUID string. 332 * 333 * @guid: the object containing the little endian guid 334 * @uuid: a six-element array of long values that can be passed by value as 335 * arguments for a formatting string specifying a UUID. 336 * 337 * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf 338 * event functions if the memory for the passed string is available as long as 339 * the debug feature exists. Since a mediated device can be removed at any 340 * time, it's name can not be used because %s passes the reference to the string 341 * in memory and the reference will go stale once the device is removed . 342 * 343 * The s390dbf string formatting function allows a maximum of 9 arguments for a 344 * message to be displayed in the 'sprintf' view. In order to use the bytes 345 * comprising the mediated device's UUID to display the mediated device name, 346 * they will have to be converted into an array whose elements can be passed by 347 * value to sprintf. For example: 348 * 349 * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 } 350 * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804 351 * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 } 352 * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx" 353 */ 354 static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid) 355 { 356 /* 357 * The input guid is ordered in little endian, so it needs to be 358 * reordered for displaying a UUID as a string. This specifies the 359 * guid indices in proper order. 360 */ 361 uuid[0] = le32_to_cpup((__le32 *)guid); 362 uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]); 363 uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]); 364 uuid[3] = *((__u16 *)&guid->b[8]); 365 uuid[4] = *((__u16 *)&guid->b[10]); 366 uuid[5] = *((__u32 *)&guid->b[12]); 367 } 368 369 /** 370 * handle_pqap - PQAP instruction callback 371 * 372 * @vcpu: The vcpu on which we received the PQAP instruction 373 * 374 * Get the general register contents to initialize internal variables. 375 * REG[0]: APQN 376 * REG[1]: IR and ISC 377 * REG[2]: NIB 378 * 379 * Response.status may be set to following Response Code: 380 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available 381 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured 382 * - AP_RESPONSE_NORMAL (0) : in case of successs 383 * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC. 384 * We take the matrix_dev lock to ensure serialization on queues and 385 * mediated device access. 386 * 387 * Return: 0 if we could handle the request inside KVM. 388 * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault. 389 */ 390 static int handle_pqap(struct kvm_vcpu *vcpu) 391 { 392 uint64_t status; 393 uint16_t apqn; 394 unsigned long uuid[6]; 395 struct vfio_ap_queue *q; 396 struct ap_queue_status qstatus = { 397 .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; 398 struct ap_matrix_mdev *matrix_mdev; 399 400 apqn = vcpu->run->s.regs.gprs[0] & 0xffff; 401 402 /* If we do not use the AIV facility just go to userland */ 403 if (!(vcpu->arch.sie_block->eca & ECA_AIV)) { 404 VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n", 405 __func__, apqn, vcpu->arch.sie_block->eca); 406 407 return -EOPNOTSUPP; 408 } 409 410 mutex_lock(&matrix_dev->lock); 411 if (!vcpu->kvm->arch.crypto.pqap_hook) { 412 VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n", 413 __func__, apqn); 414 goto out_unlock; 415 } 416 417 matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, 418 struct ap_matrix_mdev, pqap_hook); 419 420 /* If the there is no guest using the mdev, there is nothing to do */ 421 if (!matrix_mdev->kvm) { 422 vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid); 423 VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n", 424 __func__, uuid[0], uuid[1], uuid[2], 425 uuid[3], uuid[4], uuid[5], apqn); 426 goto out_unlock; 427 } 428 429 q = vfio_ap_get_queue(matrix_mdev, apqn); 430 if (!q) { 431 VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n", 432 __func__, AP_QID_CARD(apqn), 433 AP_QID_QUEUE(apqn)); 434 goto out_unlock; 435 } 436 437 status = vcpu->run->s.regs.gprs[1]; 438 439 /* If IR bit(16) is set we enable the interrupt */ 440 if ((status >> (63 - 16)) & 0x01) 441 qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu); 442 else 443 qstatus = vfio_ap_irq_disable(q); 444 445 out_unlock: 446 memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); 447 vcpu->run->s.regs.gprs[1] >>= 32; 448 mutex_unlock(&matrix_dev->lock); 449 return 0; 450 } 451 452 static void vfio_ap_matrix_init(struct ap_config_info *info, 453 struct ap_matrix *matrix) 454 { 455 matrix->apm_max = info->apxa ? info->Na : 63; 456 matrix->aqm_max = info->apxa ? info->Nd : 15; 457 matrix->adm_max = info->apxa ? info->Nd : 15; 458 } 459 460 static int vfio_ap_mdev_probe(struct mdev_device *mdev) 461 { 462 struct ap_matrix_mdev *matrix_mdev; 463 int ret; 464 465 if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0)) 466 return -EPERM; 467 468 matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL); 469 if (!matrix_mdev) { 470 ret = -ENOMEM; 471 goto err_dec_available; 472 } 473 vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev, 474 &vfio_ap_matrix_dev_ops); 475 476 matrix_mdev->mdev = mdev; 477 vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); 478 matrix_mdev->pqap_hook = handle_pqap; 479 mutex_lock(&matrix_dev->lock); 480 list_add(&matrix_mdev->node, &matrix_dev->mdev_list); 481 mutex_unlock(&matrix_dev->lock); 482 483 ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev); 484 if (ret) 485 goto err_list; 486 dev_set_drvdata(&mdev->dev, matrix_mdev); 487 return 0; 488 489 err_list: 490 mutex_lock(&matrix_dev->lock); 491 list_del(&matrix_mdev->node); 492 mutex_unlock(&matrix_dev->lock); 493 vfio_uninit_group_dev(&matrix_mdev->vdev); 494 kfree(matrix_mdev); 495 err_dec_available: 496 atomic_inc(&matrix_dev->available_instances); 497 return ret; 498 } 499 500 static void vfio_ap_mdev_remove(struct mdev_device *mdev) 501 { 502 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev); 503 504 vfio_unregister_group_dev(&matrix_mdev->vdev); 505 506 mutex_lock(&matrix_dev->lock); 507 vfio_ap_mdev_reset_queues(matrix_mdev); 508 list_del(&matrix_mdev->node); 509 mutex_unlock(&matrix_dev->lock); 510 vfio_uninit_group_dev(&matrix_mdev->vdev); 511 kfree(matrix_mdev); 512 atomic_inc(&matrix_dev->available_instances); 513 } 514 515 static ssize_t name_show(struct mdev_type *mtype, 516 struct mdev_type_attribute *attr, char *buf) 517 { 518 return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT); 519 } 520 521 static MDEV_TYPE_ATTR_RO(name); 522 523 static ssize_t available_instances_show(struct mdev_type *mtype, 524 struct mdev_type_attribute *attr, 525 char *buf) 526 { 527 return sprintf(buf, "%d\n", 528 atomic_read(&matrix_dev->available_instances)); 529 } 530 531 static MDEV_TYPE_ATTR_RO(available_instances); 532 533 static ssize_t device_api_show(struct mdev_type *mtype, 534 struct mdev_type_attribute *attr, char *buf) 535 { 536 return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING); 537 } 538 539 static MDEV_TYPE_ATTR_RO(device_api); 540 541 static struct attribute *vfio_ap_mdev_type_attrs[] = { 542 &mdev_type_attr_name.attr, 543 &mdev_type_attr_device_api.attr, 544 &mdev_type_attr_available_instances.attr, 545 NULL, 546 }; 547 548 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = { 549 .name = VFIO_AP_MDEV_TYPE_HWVIRT, 550 .attrs = vfio_ap_mdev_type_attrs, 551 }; 552 553 static struct attribute_group *vfio_ap_mdev_type_groups[] = { 554 &vfio_ap_mdev_hwvirt_type_group, 555 NULL, 556 }; 557 558 struct vfio_ap_queue_reserved { 559 unsigned long *apid; 560 unsigned long *apqi; 561 bool reserved; 562 }; 563 564 /** 565 * vfio_ap_has_queue - determines if the AP queue containing the target in @data 566 * 567 * @dev: an AP queue device 568 * @data: a struct vfio_ap_queue_reserved reference 569 * 570 * Flags whether the AP queue device (@dev) has a queue ID containing the APQN, 571 * apid or apqi specified in @data: 572 * 573 * - If @data contains both an apid and apqi value, then @data will be flagged 574 * as reserved if the APID and APQI fields for the AP queue device matches 575 * 576 * - If @data contains only an apid value, @data will be flagged as 577 * reserved if the APID field in the AP queue device matches 578 * 579 * - If @data contains only an apqi value, @data will be flagged as 580 * reserved if the APQI field in the AP queue device matches 581 * 582 * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if 583 * @data does not contain either an apid or apqi. 584 */ 585 static int vfio_ap_has_queue(struct device *dev, void *data) 586 { 587 struct vfio_ap_queue_reserved *qres = data; 588 struct ap_queue *ap_queue = to_ap_queue(dev); 589 ap_qid_t qid; 590 unsigned long id; 591 592 if (qres->apid && qres->apqi) { 593 qid = AP_MKQID(*qres->apid, *qres->apqi); 594 if (qid == ap_queue->qid) 595 qres->reserved = true; 596 } else if (qres->apid && !qres->apqi) { 597 id = AP_QID_CARD(ap_queue->qid); 598 if (id == *qres->apid) 599 qres->reserved = true; 600 } else if (!qres->apid && qres->apqi) { 601 id = AP_QID_QUEUE(ap_queue->qid); 602 if (id == *qres->apqi) 603 qres->reserved = true; 604 } else { 605 return -EINVAL; 606 } 607 608 return 0; 609 } 610 611 /** 612 * vfio_ap_verify_queue_reserved - verifies that the AP queue containing 613 * @apid or @aqpi is reserved 614 * 615 * @apid: an AP adapter ID 616 * @apqi: an AP queue index 617 * 618 * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device 619 * driver according to the following rules: 620 * 621 * - If both @apid and @apqi are not NULL, then there must be an AP queue 622 * device bound to the vfio_ap driver with the APQN identified by @apid and 623 * @apqi 624 * 625 * - If only @apid is not NULL, then there must be an AP queue device bound 626 * to the vfio_ap driver with an APQN containing @apid 627 * 628 * - If only @apqi is not NULL, then there must be an AP queue device bound 629 * to the vfio_ap driver with an APQN containing @apqi 630 * 631 * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL. 632 */ 633 static int vfio_ap_verify_queue_reserved(unsigned long *apid, 634 unsigned long *apqi) 635 { 636 int ret; 637 struct vfio_ap_queue_reserved qres; 638 639 qres.apid = apid; 640 qres.apqi = apqi; 641 qres.reserved = false; 642 643 ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL, 644 &qres, vfio_ap_has_queue); 645 if (ret) 646 return ret; 647 648 if (qres.reserved) 649 return 0; 650 651 return -EADDRNOTAVAIL; 652 } 653 654 static int 655 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev, 656 unsigned long apid) 657 { 658 int ret; 659 unsigned long apqi; 660 unsigned long nbits = matrix_mdev->matrix.aqm_max + 1; 661 662 if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits) 663 return vfio_ap_verify_queue_reserved(&apid, NULL); 664 665 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) { 666 ret = vfio_ap_verify_queue_reserved(&apid, &apqi); 667 if (ret) 668 return ret; 669 } 670 671 return 0; 672 } 673 674 /** 675 * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured 676 * 677 * @matrix_mdev: the mediated matrix device 678 * 679 * Verifies that the APQNs derived from the cross product of the AP adapter IDs 680 * and AP queue indexes comprising the AP matrix are not configured for another 681 * mediated device. AP queue sharing is not allowed. 682 * 683 * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE. 684 */ 685 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev) 686 { 687 struct ap_matrix_mdev *lstdev; 688 DECLARE_BITMAP(apm, AP_DEVICES); 689 DECLARE_BITMAP(aqm, AP_DOMAINS); 690 691 list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) { 692 if (matrix_mdev == lstdev) 693 continue; 694 695 memset(apm, 0, sizeof(apm)); 696 memset(aqm, 0, sizeof(aqm)); 697 698 /* 699 * We work on full longs, as we can only exclude the leftover 700 * bits in non-inverse order. The leftover is all zeros. 701 */ 702 if (!bitmap_and(apm, matrix_mdev->matrix.apm, 703 lstdev->matrix.apm, AP_DEVICES)) 704 continue; 705 706 if (!bitmap_and(aqm, matrix_mdev->matrix.aqm, 707 lstdev->matrix.aqm, AP_DOMAINS)) 708 continue; 709 710 return -EADDRINUSE; 711 } 712 713 return 0; 714 } 715 716 /** 717 * assign_adapter_store - parses the APID from @buf and sets the 718 * corresponding bit in the mediated matrix device's APM 719 * 720 * @dev: the matrix device 721 * @attr: the mediated matrix device's assign_adapter attribute 722 * @buf: a buffer containing the AP adapter number (APID) to 723 * be assigned 724 * @count: the number of bytes in @buf 725 * 726 * Return: the number of bytes processed if the APID is valid; otherwise, 727 * returns one of the following errors: 728 * 729 * 1. -EINVAL 730 * The APID is not a valid number 731 * 732 * 2. -ENODEV 733 * The APID exceeds the maximum value configured for the system 734 * 735 * 3. -EADDRNOTAVAIL 736 * An APQN derived from the cross product of the APID being assigned 737 * and the APQIs previously assigned is not bound to the vfio_ap device 738 * driver; or, if no APQIs have yet been assigned, the APID is not 739 * contained in an APQN bound to the vfio_ap device driver. 740 * 741 * 4. -EADDRINUSE 742 * An APQN derived from the cross product of the APID being assigned 743 * and the APQIs previously assigned is being used by another mediated 744 * matrix device 745 */ 746 static ssize_t assign_adapter_store(struct device *dev, 747 struct device_attribute *attr, 748 const char *buf, size_t count) 749 { 750 int ret; 751 unsigned long apid; 752 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 753 754 mutex_lock(&matrix_dev->lock); 755 756 /* If the KVM guest is running, disallow assignment of adapter */ 757 if (matrix_mdev->kvm) { 758 ret = -EBUSY; 759 goto done; 760 } 761 762 ret = kstrtoul(buf, 0, &apid); 763 if (ret) 764 goto done; 765 766 if (apid > matrix_mdev->matrix.apm_max) { 767 ret = -ENODEV; 768 goto done; 769 } 770 771 /* 772 * Set the bit in the AP mask (APM) corresponding to the AP adapter 773 * number (APID). The bits in the mask, from most significant to least 774 * significant bit, correspond to APIDs 0-255. 775 */ 776 ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid); 777 if (ret) 778 goto done; 779 780 set_bit_inv(apid, matrix_mdev->matrix.apm); 781 782 ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); 783 if (ret) 784 goto share_err; 785 786 ret = count; 787 goto done; 788 789 share_err: 790 clear_bit_inv(apid, matrix_mdev->matrix.apm); 791 done: 792 mutex_unlock(&matrix_dev->lock); 793 794 return ret; 795 } 796 static DEVICE_ATTR_WO(assign_adapter); 797 798 /** 799 * unassign_adapter_store - parses the APID from @buf and clears the 800 * corresponding bit in the mediated matrix device's APM 801 * 802 * @dev: the matrix device 803 * @attr: the mediated matrix device's unassign_adapter attribute 804 * @buf: a buffer containing the adapter number (APID) to be unassigned 805 * @count: the number of bytes in @buf 806 * 807 * Return: the number of bytes processed if the APID is valid; otherwise, 808 * returns one of the following errors: 809 * -EINVAL if the APID is not a number 810 * -ENODEV if the APID it exceeds the maximum value configured for the 811 * system 812 */ 813 static ssize_t unassign_adapter_store(struct device *dev, 814 struct device_attribute *attr, 815 const char *buf, size_t count) 816 { 817 int ret; 818 unsigned long apid; 819 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 820 821 mutex_lock(&matrix_dev->lock); 822 823 /* If the KVM guest is running, disallow unassignment of adapter */ 824 if (matrix_mdev->kvm) { 825 ret = -EBUSY; 826 goto done; 827 } 828 829 ret = kstrtoul(buf, 0, &apid); 830 if (ret) 831 goto done; 832 833 if (apid > matrix_mdev->matrix.apm_max) { 834 ret = -ENODEV; 835 goto done; 836 } 837 838 clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm); 839 ret = count; 840 done: 841 mutex_unlock(&matrix_dev->lock); 842 return ret; 843 } 844 static DEVICE_ATTR_WO(unassign_adapter); 845 846 static int 847 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev, 848 unsigned long apqi) 849 { 850 int ret; 851 unsigned long apid; 852 unsigned long nbits = matrix_mdev->matrix.apm_max + 1; 853 854 if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits) 855 return vfio_ap_verify_queue_reserved(NULL, &apqi); 856 857 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) { 858 ret = vfio_ap_verify_queue_reserved(&apid, &apqi); 859 if (ret) 860 return ret; 861 } 862 863 return 0; 864 } 865 866 /** 867 * assign_domain_store - parses the APQI from @buf and sets the 868 * corresponding bit in the mediated matrix device's AQM 869 * 870 * @dev: the matrix device 871 * @attr: the mediated matrix device's assign_domain attribute 872 * @buf: a buffer containing the AP queue index (APQI) of the domain to 873 * be assigned 874 * @count: the number of bytes in @buf 875 * 876 * Return: the number of bytes processed if the APQI is valid; otherwise returns 877 * one of the following errors: 878 * 879 * 1. -EINVAL 880 * The APQI is not a valid number 881 * 882 * 2. -ENODEV 883 * The APQI exceeds the maximum value configured for the system 884 * 885 * 3. -EADDRNOTAVAIL 886 * An APQN derived from the cross product of the APQI being assigned 887 * and the APIDs previously assigned is not bound to the vfio_ap device 888 * driver; or, if no APIDs have yet been assigned, the APQI is not 889 * contained in an APQN bound to the vfio_ap device driver. 890 * 891 * 4. -EADDRINUSE 892 * An APQN derived from the cross product of the APQI being assigned 893 * and the APIDs previously assigned is being used by another mediated 894 * matrix device 895 */ 896 static ssize_t assign_domain_store(struct device *dev, 897 struct device_attribute *attr, 898 const char *buf, size_t count) 899 { 900 int ret; 901 unsigned long apqi; 902 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 903 unsigned long max_apqi = matrix_mdev->matrix.aqm_max; 904 905 mutex_lock(&matrix_dev->lock); 906 907 /* If the KVM guest is running, disallow assignment of domain */ 908 if (matrix_mdev->kvm) { 909 ret = -EBUSY; 910 goto done; 911 } 912 913 ret = kstrtoul(buf, 0, &apqi); 914 if (ret) 915 goto done; 916 if (apqi > max_apqi) { 917 ret = -ENODEV; 918 goto done; 919 } 920 921 ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi); 922 if (ret) 923 goto done; 924 925 set_bit_inv(apqi, matrix_mdev->matrix.aqm); 926 927 ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev); 928 if (ret) 929 goto share_err; 930 931 ret = count; 932 goto done; 933 934 share_err: 935 clear_bit_inv(apqi, matrix_mdev->matrix.aqm); 936 done: 937 mutex_unlock(&matrix_dev->lock); 938 939 return ret; 940 } 941 static DEVICE_ATTR_WO(assign_domain); 942 943 944 /** 945 * unassign_domain_store - parses the APQI from @buf and clears the 946 * corresponding bit in the mediated matrix device's AQM 947 * 948 * @dev: the matrix device 949 * @attr: the mediated matrix device's unassign_domain attribute 950 * @buf: a buffer containing the AP queue index (APQI) of the domain to 951 * be unassigned 952 * @count: the number of bytes in @buf 953 * 954 * Return: the number of bytes processed if the APQI is valid; otherwise, 955 * returns one of the following errors: 956 * -EINVAL if the APQI is not a number 957 * -ENODEV if the APQI exceeds the maximum value configured for the system 958 */ 959 static ssize_t unassign_domain_store(struct device *dev, 960 struct device_attribute *attr, 961 const char *buf, size_t count) 962 { 963 int ret; 964 unsigned long apqi; 965 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 966 967 mutex_lock(&matrix_dev->lock); 968 969 /* If the KVM guest is running, disallow unassignment of domain */ 970 if (matrix_mdev->kvm) { 971 ret = -EBUSY; 972 goto done; 973 } 974 975 ret = kstrtoul(buf, 0, &apqi); 976 if (ret) 977 goto done; 978 979 if (apqi > matrix_mdev->matrix.aqm_max) { 980 ret = -ENODEV; 981 goto done; 982 } 983 984 clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm); 985 ret = count; 986 987 done: 988 mutex_unlock(&matrix_dev->lock); 989 return ret; 990 } 991 static DEVICE_ATTR_WO(unassign_domain); 992 993 /** 994 * assign_control_domain_store - parses the domain ID from @buf and sets 995 * the corresponding bit in the mediated matrix device's ADM 996 * 997 * @dev: the matrix device 998 * @attr: the mediated matrix device's assign_control_domain attribute 999 * @buf: a buffer containing the domain ID to be assigned 1000 * @count: the number of bytes in @buf 1001 * 1002 * Return: the number of bytes processed if the domain ID is valid; otherwise, 1003 * returns one of the following errors: 1004 * -EINVAL if the ID is not a number 1005 * -ENODEV if the ID exceeds the maximum value configured for the system 1006 */ 1007 static ssize_t assign_control_domain_store(struct device *dev, 1008 struct device_attribute *attr, 1009 const char *buf, size_t count) 1010 { 1011 int ret; 1012 unsigned long id; 1013 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1014 1015 mutex_lock(&matrix_dev->lock); 1016 1017 /* If the KVM guest is running, disallow assignment of control domain */ 1018 if (matrix_mdev->kvm) { 1019 ret = -EBUSY; 1020 goto done; 1021 } 1022 1023 ret = kstrtoul(buf, 0, &id); 1024 if (ret) 1025 goto done; 1026 1027 if (id > matrix_mdev->matrix.adm_max) { 1028 ret = -ENODEV; 1029 goto done; 1030 } 1031 1032 /* Set the bit in the ADM (bitmask) corresponding to the AP control 1033 * domain number (id). The bits in the mask, from most significant to 1034 * least significant, correspond to IDs 0 up to the one less than the 1035 * number of control domains that can be assigned. 1036 */ 1037 set_bit_inv(id, matrix_mdev->matrix.adm); 1038 ret = count; 1039 done: 1040 mutex_unlock(&matrix_dev->lock); 1041 return ret; 1042 } 1043 static DEVICE_ATTR_WO(assign_control_domain); 1044 1045 /** 1046 * unassign_control_domain_store - parses the domain ID from @buf and 1047 * clears the corresponding bit in the mediated matrix device's ADM 1048 * 1049 * @dev: the matrix device 1050 * @attr: the mediated matrix device's unassign_control_domain attribute 1051 * @buf: a buffer containing the domain ID to be unassigned 1052 * @count: the number of bytes in @buf 1053 * 1054 * Return: the number of bytes processed if the domain ID is valid; otherwise, 1055 * returns one of the following errors: 1056 * -EINVAL if the ID is not a number 1057 * -ENODEV if the ID exceeds the maximum value configured for the system 1058 */ 1059 static ssize_t unassign_control_domain_store(struct device *dev, 1060 struct device_attribute *attr, 1061 const char *buf, size_t count) 1062 { 1063 int ret; 1064 unsigned long domid; 1065 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1066 unsigned long max_domid = matrix_mdev->matrix.adm_max; 1067 1068 mutex_lock(&matrix_dev->lock); 1069 1070 /* If a KVM guest is running, disallow unassignment of control domain */ 1071 if (matrix_mdev->kvm) { 1072 ret = -EBUSY; 1073 goto done; 1074 } 1075 1076 ret = kstrtoul(buf, 0, &domid); 1077 if (ret) 1078 goto done; 1079 if (domid > max_domid) { 1080 ret = -ENODEV; 1081 goto done; 1082 } 1083 1084 clear_bit_inv(domid, matrix_mdev->matrix.adm); 1085 ret = count; 1086 done: 1087 mutex_unlock(&matrix_dev->lock); 1088 return ret; 1089 } 1090 static DEVICE_ATTR_WO(unassign_control_domain); 1091 1092 static ssize_t control_domains_show(struct device *dev, 1093 struct device_attribute *dev_attr, 1094 char *buf) 1095 { 1096 unsigned long id; 1097 int nchars = 0; 1098 int n; 1099 char *bufpos = buf; 1100 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1101 unsigned long max_domid = matrix_mdev->matrix.adm_max; 1102 1103 mutex_lock(&matrix_dev->lock); 1104 for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) { 1105 n = sprintf(bufpos, "%04lx\n", id); 1106 bufpos += n; 1107 nchars += n; 1108 } 1109 mutex_unlock(&matrix_dev->lock); 1110 1111 return nchars; 1112 } 1113 static DEVICE_ATTR_RO(control_domains); 1114 1115 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, 1116 char *buf) 1117 { 1118 struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); 1119 char *bufpos = buf; 1120 unsigned long apid; 1121 unsigned long apqi; 1122 unsigned long apid1; 1123 unsigned long apqi1; 1124 unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1; 1125 unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1; 1126 int nchars = 0; 1127 int n; 1128 1129 apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits); 1130 apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits); 1131 1132 mutex_lock(&matrix_dev->lock); 1133 1134 if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) { 1135 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { 1136 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, 1137 naqm_bits) { 1138 n = sprintf(bufpos, "%02lx.%04lx\n", apid, 1139 apqi); 1140 bufpos += n; 1141 nchars += n; 1142 } 1143 } 1144 } else if (apid1 < napm_bits) { 1145 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { 1146 n = sprintf(bufpos, "%02lx.\n", apid); 1147 bufpos += n; 1148 nchars += n; 1149 } 1150 } else if (apqi1 < naqm_bits) { 1151 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) { 1152 n = sprintf(bufpos, ".%04lx\n", apqi); 1153 bufpos += n; 1154 nchars += n; 1155 } 1156 } 1157 1158 mutex_unlock(&matrix_dev->lock); 1159 1160 return nchars; 1161 } 1162 static DEVICE_ATTR_RO(matrix); 1163 1164 static struct attribute *vfio_ap_mdev_attrs[] = { 1165 &dev_attr_assign_adapter.attr, 1166 &dev_attr_unassign_adapter.attr, 1167 &dev_attr_assign_domain.attr, 1168 &dev_attr_unassign_domain.attr, 1169 &dev_attr_assign_control_domain.attr, 1170 &dev_attr_unassign_control_domain.attr, 1171 &dev_attr_control_domains.attr, 1172 &dev_attr_matrix.attr, 1173 NULL, 1174 }; 1175 1176 static struct attribute_group vfio_ap_mdev_attr_group = { 1177 .attrs = vfio_ap_mdev_attrs 1178 }; 1179 1180 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = { 1181 &vfio_ap_mdev_attr_group, 1182 NULL 1183 }; 1184 1185 /** 1186 * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed 1187 * to manage AP resources for the guest whose state is represented by @kvm 1188 * 1189 * @matrix_mdev: a mediated matrix device 1190 * @kvm: reference to KVM instance 1191 * 1192 * Note: The matrix_dev->lock must be taken prior to calling 1193 * this function; however, the lock will be temporarily released while the 1194 * guest's AP configuration is set to avoid a potential lockdep splat. 1195 * The kvm->lock is taken to set the guest's AP configuration which, under 1196 * certain circumstances, will result in a circular lock dependency if this is 1197 * done under the @matrix_mdev->lock. 1198 * 1199 * Return: 0 if no other mediated matrix device has a reference to @kvm; 1200 * otherwise, returns an -EPERM. 1201 */ 1202 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, 1203 struct kvm *kvm) 1204 { 1205 struct ap_matrix_mdev *m; 1206 1207 if (kvm->arch.crypto.crycbd) { 1208 down_write(&kvm->arch.crypto.pqap_hook_rwsem); 1209 kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; 1210 up_write(&kvm->arch.crypto.pqap_hook_rwsem); 1211 1212 mutex_lock(&kvm->lock); 1213 mutex_lock(&matrix_dev->lock); 1214 1215 list_for_each_entry(m, &matrix_dev->mdev_list, node) { 1216 if (m != matrix_mdev && m->kvm == kvm) { 1217 mutex_unlock(&kvm->lock); 1218 mutex_unlock(&matrix_dev->lock); 1219 return -EPERM; 1220 } 1221 } 1222 1223 kvm_get_kvm(kvm); 1224 matrix_mdev->kvm = kvm; 1225 kvm_arch_crypto_set_masks(kvm, 1226 matrix_mdev->matrix.apm, 1227 matrix_mdev->matrix.aqm, 1228 matrix_mdev->matrix.adm); 1229 1230 mutex_unlock(&kvm->lock); 1231 mutex_unlock(&matrix_dev->lock); 1232 } 1233 1234 return 0; 1235 } 1236 1237 /** 1238 * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback 1239 * 1240 * @nb: The notifier block 1241 * @action: Action to be taken 1242 * @data: data associated with the request 1243 * 1244 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we 1245 * pinned before). Other requests are ignored. 1246 * 1247 * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE. 1248 */ 1249 static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, 1250 unsigned long action, void *data) 1251 { 1252 struct ap_matrix_mdev *matrix_mdev; 1253 1254 matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); 1255 1256 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { 1257 struct vfio_iommu_type1_dma_unmap *unmap = data; 1258 unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; 1259 1260 vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1); 1261 return NOTIFY_OK; 1262 } 1263 1264 return NOTIFY_DONE; 1265 } 1266 1267 /** 1268 * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed 1269 * by @matrix_mdev. 1270 * 1271 * @matrix_mdev: a matrix mediated device 1272 * @kvm: the pointer to the kvm structure being unset. 1273 * 1274 * Note: The matrix_dev->lock must be taken prior to calling 1275 * this function; however, the lock will be temporarily released while the 1276 * guest's AP configuration is cleared to avoid a potential lockdep splat. 1277 * The kvm->lock is taken to clear the guest's AP configuration which, under 1278 * certain circumstances, will result in a circular lock dependency if this is 1279 * done under the @matrix_mdev->lock. 1280 */ 1281 static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev, 1282 struct kvm *kvm) 1283 { 1284 if (kvm && kvm->arch.crypto.crycbd) { 1285 down_write(&kvm->arch.crypto.pqap_hook_rwsem); 1286 kvm->arch.crypto.pqap_hook = NULL; 1287 up_write(&kvm->arch.crypto.pqap_hook_rwsem); 1288 1289 mutex_lock(&kvm->lock); 1290 mutex_lock(&matrix_dev->lock); 1291 1292 kvm_arch_crypto_clear_masks(kvm); 1293 vfio_ap_mdev_reset_queues(matrix_mdev); 1294 kvm_put_kvm(kvm); 1295 matrix_mdev->kvm = NULL; 1296 1297 mutex_unlock(&kvm->lock); 1298 mutex_unlock(&matrix_dev->lock); 1299 } 1300 } 1301 1302 static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, 1303 unsigned long action, void *data) 1304 { 1305 int notify_rc = NOTIFY_OK; 1306 struct ap_matrix_mdev *matrix_mdev; 1307 1308 if (action != VFIO_GROUP_NOTIFY_SET_KVM) 1309 return NOTIFY_OK; 1310 1311 matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); 1312 1313 if (!data) 1314 vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm); 1315 else if (vfio_ap_mdev_set_kvm(matrix_mdev, data)) 1316 notify_rc = NOTIFY_DONE; 1317 1318 return notify_rc; 1319 } 1320 1321 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) 1322 { 1323 struct device *dev; 1324 struct vfio_ap_queue *q = NULL; 1325 1326 dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, 1327 &apqn, match_apqn); 1328 if (dev) { 1329 q = dev_get_drvdata(dev); 1330 put_device(dev); 1331 } 1332 1333 return q; 1334 } 1335 1336 int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, 1337 unsigned int retry) 1338 { 1339 struct ap_queue_status status; 1340 int ret; 1341 int retry2 = 2; 1342 1343 if (!q) 1344 return 0; 1345 1346 retry_zapq: 1347 status = ap_zapq(q->apqn); 1348 switch (status.response_code) { 1349 case AP_RESPONSE_NORMAL: 1350 ret = 0; 1351 break; 1352 case AP_RESPONSE_RESET_IN_PROGRESS: 1353 if (retry--) { 1354 msleep(20); 1355 goto retry_zapq; 1356 } 1357 ret = -EBUSY; 1358 break; 1359 case AP_RESPONSE_Q_NOT_AVAIL: 1360 case AP_RESPONSE_DECONFIGURED: 1361 case AP_RESPONSE_CHECKSTOPPED: 1362 WARN_ON_ONCE(status.irq_enabled); 1363 ret = -EBUSY; 1364 goto free_resources; 1365 default: 1366 /* things are really broken, give up */ 1367 WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n", 1368 status.response_code); 1369 return -EIO; 1370 } 1371 1372 /* wait for the reset to take effect */ 1373 while (retry2--) { 1374 if (status.queue_empty && !status.irq_enabled) 1375 break; 1376 msleep(20); 1377 status = ap_tapq(q->apqn, NULL); 1378 } 1379 WARN_ON_ONCE(retry2 <= 0); 1380 1381 free_resources: 1382 vfio_ap_free_aqic_resources(q); 1383 1384 return ret; 1385 } 1386 1387 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev) 1388 { 1389 int ret; 1390 int rc = 0; 1391 unsigned long apid, apqi; 1392 struct vfio_ap_queue *q; 1393 1394 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, 1395 matrix_mdev->matrix.apm_max + 1) { 1396 for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, 1397 matrix_mdev->matrix.aqm_max + 1) { 1398 q = vfio_ap_find_queue(AP_MKQID(apid, apqi)); 1399 ret = vfio_ap_mdev_reset_queue(q, 1); 1400 /* 1401 * Regardless whether a queue turns out to be busy, or 1402 * is not operational, we need to continue resetting 1403 * the remaining queues. 1404 */ 1405 if (ret) 1406 rc = ret; 1407 } 1408 } 1409 1410 return rc; 1411 } 1412 1413 static int vfio_ap_mdev_open_device(struct vfio_device *vdev) 1414 { 1415 struct ap_matrix_mdev *matrix_mdev = 1416 container_of(vdev, struct ap_matrix_mdev, vdev); 1417 unsigned long events; 1418 int ret; 1419 1420 matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier; 1421 events = VFIO_GROUP_NOTIFY_SET_KVM; 1422 1423 ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY, 1424 &events, &matrix_mdev->group_notifier); 1425 if (ret) 1426 return ret; 1427 1428 matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; 1429 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; 1430 ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, 1431 &events, &matrix_mdev->iommu_notifier); 1432 if (ret) 1433 goto out_unregister_group; 1434 return 0; 1435 1436 out_unregister_group: 1437 vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY, 1438 &matrix_mdev->group_notifier); 1439 return ret; 1440 } 1441 1442 static void vfio_ap_mdev_close_device(struct vfio_device *vdev) 1443 { 1444 struct ap_matrix_mdev *matrix_mdev = 1445 container_of(vdev, struct ap_matrix_mdev, vdev); 1446 1447 vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, 1448 &matrix_mdev->iommu_notifier); 1449 vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY, 1450 &matrix_mdev->group_notifier); 1451 vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm); 1452 } 1453 1454 static int vfio_ap_mdev_get_device_info(unsigned long arg) 1455 { 1456 unsigned long minsz; 1457 struct vfio_device_info info; 1458 1459 minsz = offsetofend(struct vfio_device_info, num_irqs); 1460 1461 if (copy_from_user(&info, (void __user *)arg, minsz)) 1462 return -EFAULT; 1463 1464 if (info.argsz < minsz) 1465 return -EINVAL; 1466 1467 info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET; 1468 info.num_regions = 0; 1469 info.num_irqs = 0; 1470 1471 return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; 1472 } 1473 1474 static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, 1475 unsigned int cmd, unsigned long arg) 1476 { 1477 struct ap_matrix_mdev *matrix_mdev = 1478 container_of(vdev, struct ap_matrix_mdev, vdev); 1479 int ret; 1480 1481 mutex_lock(&matrix_dev->lock); 1482 switch (cmd) { 1483 case VFIO_DEVICE_GET_INFO: 1484 ret = vfio_ap_mdev_get_device_info(arg); 1485 break; 1486 case VFIO_DEVICE_RESET: 1487 ret = vfio_ap_mdev_reset_queues(matrix_mdev); 1488 break; 1489 default: 1490 ret = -EOPNOTSUPP; 1491 break; 1492 } 1493 mutex_unlock(&matrix_dev->lock); 1494 1495 return ret; 1496 } 1497 1498 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { 1499 .open_device = vfio_ap_mdev_open_device, 1500 .close_device = vfio_ap_mdev_close_device, 1501 .ioctl = vfio_ap_mdev_ioctl, 1502 }; 1503 1504 static struct mdev_driver vfio_ap_matrix_driver = { 1505 .driver = { 1506 .name = "vfio_ap_mdev", 1507 .owner = THIS_MODULE, 1508 .mod_name = KBUILD_MODNAME, 1509 .dev_groups = vfio_ap_mdev_attr_groups, 1510 }, 1511 .probe = vfio_ap_mdev_probe, 1512 .remove = vfio_ap_mdev_remove, 1513 }; 1514 1515 static const struct mdev_parent_ops vfio_ap_matrix_ops = { 1516 .owner = THIS_MODULE, 1517 .device_driver = &vfio_ap_matrix_driver, 1518 .supported_type_groups = vfio_ap_mdev_type_groups, 1519 }; 1520 1521 int vfio_ap_mdev_register(void) 1522 { 1523 int ret; 1524 1525 atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT); 1526 1527 ret = mdev_register_driver(&vfio_ap_matrix_driver); 1528 if (ret) 1529 return ret; 1530 1531 ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops); 1532 if (ret) 1533 goto err_driver; 1534 return 0; 1535 1536 err_driver: 1537 mdev_unregister_driver(&vfio_ap_matrix_driver); 1538 return ret; 1539 } 1540 1541 void vfio_ap_mdev_unregister(void) 1542 { 1543 mdev_unregister_device(&matrix_dev->device); 1544 mdev_unregister_driver(&vfio_ap_matrix_driver); 1545 } 1546