1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Adjunct processor matrix VFIO device driver callbacks.
4  *
5  * Copyright IBM Corp. 2018
6  *
7  * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
8  *	      Halil Pasic <pasic@linux.ibm.com>
9  *	      Pierre Morel <pmorel@linux.ibm.com>
10  */
11 #include <linux/string.h>
12 #include <linux/vfio.h>
13 #include <linux/device.h>
14 #include <linux/list.h>
15 #include <linux/ctype.h>
16 #include <linux/bitops.h>
17 #include <linux/kvm_host.h>
18 #include <linux/module.h>
19 #include <linux/uuid.h>
20 #include <asm/kvm.h>
21 #include <asm/zcrypt.h>
22 
23 #include "vfio_ap_private.h"
24 #include "vfio_ap_debug.h"
25 
26 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
27 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
28 
29 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
30 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
31 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
32 
33 static int match_apqn(struct device *dev, const void *data)
34 {
35 	struct vfio_ap_queue *q = dev_get_drvdata(dev);
36 
37 	return (q->apqn == *(int *)(data)) ? 1 : 0;
38 }
39 
40 /**
41  * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
42  * @matrix_mdev: the associated mediated matrix
43  * @apqn: The queue APQN
44  *
45  * Retrieve a queue with a specific APQN from the list of the
46  * devices of the vfio_ap_drv.
47  * Verify that the APID and the APQI are set in the matrix.
48  *
49  * Return: the pointer to the associated vfio_ap_queue
50  */
51 static struct vfio_ap_queue *vfio_ap_get_queue(
52 					struct ap_matrix_mdev *matrix_mdev,
53 					int apqn)
54 {
55 	struct vfio_ap_queue *q;
56 
57 	if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
58 		return NULL;
59 	if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
60 		return NULL;
61 
62 	q = vfio_ap_find_queue(apqn);
63 	if (q)
64 		q->matrix_mdev = matrix_mdev;
65 
66 	return q;
67 }
68 
69 /**
70  * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
71  * @apqn: The AP Queue number
72  *
73  * Checks the IRQ bit for the status of this APQN using ap_tapq.
74  * Returns if the ap_tapq function succeeded and the bit is clear.
75  * Returns if ap_tapq function failed with invalid, deconfigured or
76  * checkstopped AP.
77  * Otherwise retries up to 5 times after waiting 20ms.
78  */
79 static void vfio_ap_wait_for_irqclear(int apqn)
80 {
81 	struct ap_queue_status status;
82 	int retry = 5;
83 
84 	do {
85 		status = ap_tapq(apqn, NULL);
86 		switch (status.response_code) {
87 		case AP_RESPONSE_NORMAL:
88 		case AP_RESPONSE_RESET_IN_PROGRESS:
89 			if (!status.irq_enabled)
90 				return;
91 			fallthrough;
92 		case AP_RESPONSE_BUSY:
93 			msleep(20);
94 			break;
95 		case AP_RESPONSE_Q_NOT_AVAIL:
96 		case AP_RESPONSE_DECONFIGURED:
97 		case AP_RESPONSE_CHECKSTOPPED:
98 		default:
99 			WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
100 				  status.response_code, apqn);
101 			return;
102 		}
103 	} while (--retry);
104 
105 	WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
106 		  __func__, status.response_code, apqn);
107 }
108 
109 /**
110  * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
111  * @q: The vfio_ap_queue
112  *
113  * Unregisters the ISC in the GIB when the saved ISC not invalid.
114  * Unpins the guest's page holding the NIB when it exists.
115  * Resets the saved_iova and saved_isc to invalid values.
116  */
117 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
118 {
119 	if (!q)
120 		return;
121 	if (q->saved_isc != VFIO_AP_ISC_INVALID &&
122 	    !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
123 		kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
124 		q->saved_isc = VFIO_AP_ISC_INVALID;
125 	}
126 	if (q->saved_iova && !WARN_ON(!q->matrix_mdev)) {
127 		vfio_unpin_pages(&q->matrix_mdev->vdev, q->saved_iova, 1);
128 		q->saved_iova = 0;
129 	}
130 }
131 
132 /**
133  * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
134  * @q: The vfio_ap_queue
135  *
136  * Uses ap_aqic to disable the interruption and in case of success, reset
137  * in progress or IRQ disable command already proceeded: calls
138  * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
139  * and calls vfio_ap_free_aqic_resources() to free the resources associated
140  * with the AP interrupt handling.
141  *
142  * In the case the AP is busy, or a reset is in progress,
143  * retries after 20ms, up to 5 times.
144  *
145  * Returns if ap_aqic function failed with invalid, deconfigured or
146  * checkstopped AP.
147  *
148  * Return: &struct ap_queue_status
149  */
150 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
151 {
152 	struct ap_qirq_ctrl aqic_gisa = {};
153 	struct ap_queue_status status;
154 	int retries = 5;
155 
156 	do {
157 		status = ap_aqic(q->apqn, aqic_gisa, 0);
158 		switch (status.response_code) {
159 		case AP_RESPONSE_OTHERWISE_CHANGED:
160 		case AP_RESPONSE_NORMAL:
161 			vfio_ap_wait_for_irqclear(q->apqn);
162 			goto end_free;
163 		case AP_RESPONSE_RESET_IN_PROGRESS:
164 		case AP_RESPONSE_BUSY:
165 			msleep(20);
166 			break;
167 		case AP_RESPONSE_Q_NOT_AVAIL:
168 		case AP_RESPONSE_DECONFIGURED:
169 		case AP_RESPONSE_CHECKSTOPPED:
170 		case AP_RESPONSE_INVALID_ADDRESS:
171 		default:
172 			/* All cases in default means AP not operational */
173 			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
174 				  status.response_code);
175 			goto end_free;
176 		}
177 	} while (retries--);
178 
179 	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
180 		  status.response_code);
181 end_free:
182 	vfio_ap_free_aqic_resources(q);
183 	q->matrix_mdev = NULL;
184 	return status;
185 }
186 
187 /**
188  * vfio_ap_validate_nib - validate a notification indicator byte (nib) address.
189  *
190  * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction.
191  * @nib: the location for storing the nib address.
192  *
193  * When the PQAP(AQIC) instruction is executed, general register 2 contains the
194  * address of the notification indicator byte (nib) used for IRQ notification.
195  * This function parses and validates the nib from gr2.
196  *
197  * Return: returns zero if the nib address is a valid; otherwise, returns
198  *	   -EINVAL.
199  */
200 static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
201 {
202 	*nib = vcpu->run->s.regs.gprs[2];
203 
204 	if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *nib >> PAGE_SHIFT)))
205 		return -EINVAL;
206 
207 	return 0;
208 }
209 
210 /**
211  * vfio_ap_irq_enable - Enable Interruption for a APQN
212  *
213  * @q:	 the vfio_ap_queue holding AQIC parameters
214  * @isc: the guest ISC to register with the GIB interface
215  * @vcpu: the vcpu object containing the registers specifying the parameters
216  *	  passed to the PQAP(AQIC) instruction.
217  *
218  * Pin the NIB saved in *q
219  * Register the guest ISC to GIB interface and retrieve the
220  * host ISC to issue the host side PQAP/AQIC
221  *
222  * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
223  * vfio_pin_pages failed.
224  *
225  * Otherwise return the ap_queue_status returned by the ap_aqic(),
226  * all retry handling will be done by the guest.
227  *
228  * Return: &struct ap_queue_status
229  */
230 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
231 						 int isc,
232 						 struct kvm_vcpu *vcpu)
233 {
234 	struct ap_qirq_ctrl aqic_gisa = {};
235 	struct ap_queue_status status = {};
236 	struct kvm_s390_gisa *gisa;
237 	struct page *h_page;
238 	int nisc;
239 	struct kvm *kvm;
240 	phys_addr_t h_nib;
241 	dma_addr_t nib;
242 	int ret;
243 
244 	/* Verify that the notification indicator byte address is valid */
245 	if (vfio_ap_validate_nib(vcpu, &nib)) {
246 		VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%pad, apqn=%#04x\n",
247 				 __func__, &nib, q->apqn);
248 
249 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
250 		return status;
251 	}
252 
253 	ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1,
254 			     IOMMU_READ | IOMMU_WRITE, &h_page);
255 	switch (ret) {
256 	case 1:
257 		break;
258 	default:
259 		VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d,"
260 				 "nib=%pad, apqn=%#04x\n",
261 				 __func__, ret, &nib, q->apqn);
262 
263 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
264 		return status;
265 	}
266 
267 	kvm = q->matrix_mdev->kvm;
268 	gisa = kvm->arch.gisa_int.origin;
269 
270 	h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
271 	aqic_gisa.gisc = isc;
272 
273 	nisc = kvm_s390_gisc_register(kvm, isc);
274 	if (nisc < 0) {
275 		VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
276 				 __func__, nisc, isc, q->apqn);
277 
278 		status.response_code = AP_RESPONSE_INVALID_GISA;
279 		return status;
280 	}
281 
282 	aqic_gisa.isc = nisc;
283 	aqic_gisa.ir = 1;
284 	aqic_gisa.gisa = (uint64_t)gisa >> 4;
285 
286 	status = ap_aqic(q->apqn, aqic_gisa, h_nib);
287 	switch (status.response_code) {
288 	case AP_RESPONSE_NORMAL:
289 		/* See if we did clear older IRQ configuration */
290 		vfio_ap_free_aqic_resources(q);
291 		q->saved_iova = nib;
292 		q->saved_isc = isc;
293 		break;
294 	case AP_RESPONSE_OTHERWISE_CHANGED:
295 		/* We could not modify IRQ setings: clear new configuration */
296 		vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
297 		kvm_s390_gisc_unregister(kvm, isc);
298 		break;
299 	default:
300 		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
301 			status.response_code);
302 		vfio_ap_irq_disable(q);
303 		break;
304 	}
305 
306 	if (status.response_code != AP_RESPONSE_NORMAL) {
307 		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: "
308 				 "zone=%#x, ir=%#x, gisc=%#x, f=%#x,"
309 				 "gisa=%#x, isc=%#x, apqn=%#04x\n",
310 				 __func__, status.response_code,
311 				 aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc,
312 				 aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc,
313 				 q->apqn);
314 	}
315 
316 	return status;
317 }
318 
319 /**
320  * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array
321  *				of big endian elements that can be passed by
322  *				value to an s390dbf sprintf event function to
323  *				format a UUID string.
324  *
325  * @guid: the object containing the little endian guid
326  * @uuid: a six-element array of long values that can be passed by value as
327  *	  arguments for a formatting string specifying a UUID.
328  *
329  * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf
330  * event functions if the memory for the passed string is available as long as
331  * the debug feature exists. Since a mediated device can be removed at any
332  * time, it's name can not be used because %s passes the reference to the string
333  * in memory and the reference will go stale once the device is removed .
334  *
335  * The s390dbf string formatting function allows a maximum of 9 arguments for a
336  * message to be displayed in the 'sprintf' view. In order to use the bytes
337  * comprising the mediated device's UUID to display the mediated device name,
338  * they will have to be converted into an array whose elements can be passed by
339  * value to sprintf. For example:
340  *
341  * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 }
342  * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804
343  * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 }
344  * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx"
345  */
346 static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid)
347 {
348 	/*
349 	 * The input guid is ordered in little endian, so it needs to be
350 	 * reordered for displaying a UUID as a string. This specifies the
351 	 * guid indices in proper order.
352 	 */
353 	uuid[0] = le32_to_cpup((__le32 *)guid);
354 	uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]);
355 	uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]);
356 	uuid[3] = *((__u16 *)&guid->b[8]);
357 	uuid[4] = *((__u16 *)&guid->b[10]);
358 	uuid[5] = *((__u32 *)&guid->b[12]);
359 }
360 
361 /**
362  * handle_pqap - PQAP instruction callback
363  *
364  * @vcpu: The vcpu on which we received the PQAP instruction
365  *
366  * Get the general register contents to initialize internal variables.
367  * REG[0]: APQN
368  * REG[1]: IR and ISC
369  * REG[2]: NIB
370  *
371  * Response.status may be set to following Response Code:
372  * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
373  * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
374  * - AP_RESPONSE_NORMAL (0) : in case of successs
375  *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
376  * We take the matrix_dev lock to ensure serialization on queues and
377  * mediated device access.
378  *
379  * Return: 0 if we could handle the request inside KVM.
380  * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
381  */
382 static int handle_pqap(struct kvm_vcpu *vcpu)
383 {
384 	uint64_t status;
385 	uint16_t apqn;
386 	unsigned long uuid[6];
387 	struct vfio_ap_queue *q;
388 	struct ap_queue_status qstatus = {
389 			       .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
390 	struct ap_matrix_mdev *matrix_mdev;
391 
392 	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
393 
394 	/* If we do not use the AIV facility just go to userland */
395 	if (!(vcpu->arch.sie_block->eca & ECA_AIV)) {
396 		VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n",
397 				 __func__, apqn, vcpu->arch.sie_block->eca);
398 
399 		return -EOPNOTSUPP;
400 	}
401 
402 	mutex_lock(&matrix_dev->lock);
403 	if (!vcpu->kvm->arch.crypto.pqap_hook) {
404 		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n",
405 				 __func__, apqn);
406 		goto out_unlock;
407 	}
408 
409 	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
410 				   struct ap_matrix_mdev, pqap_hook);
411 
412 	/* If the there is no guest using the mdev, there is nothing to do */
413 	if (!matrix_mdev->kvm) {
414 		vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid);
415 		VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n",
416 				 __func__, uuid[0],  uuid[1], uuid[2],
417 				 uuid[3], uuid[4], uuid[5], apqn);
418 		goto out_unlock;
419 	}
420 
421 	q = vfio_ap_get_queue(matrix_mdev, apqn);
422 	if (!q) {
423 		VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n",
424 				 __func__, AP_QID_CARD(apqn),
425 				 AP_QID_QUEUE(apqn));
426 		goto out_unlock;
427 	}
428 
429 	status = vcpu->run->s.regs.gprs[1];
430 
431 	/* If IR bit(16) is set we enable the interrupt */
432 	if ((status >> (63 - 16)) & 0x01)
433 		qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu);
434 	else
435 		qstatus = vfio_ap_irq_disable(q);
436 
437 out_unlock:
438 	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
439 	vcpu->run->s.regs.gprs[1] >>= 32;
440 	mutex_unlock(&matrix_dev->lock);
441 	return 0;
442 }
443 
444 static void vfio_ap_matrix_init(struct ap_config_info *info,
445 				struct ap_matrix *matrix)
446 {
447 	matrix->apm_max = info->apxa ? info->Na : 63;
448 	matrix->aqm_max = info->apxa ? info->Nd : 15;
449 	matrix->adm_max = info->apxa ? info->Nd : 15;
450 }
451 
452 static int vfio_ap_mdev_probe(struct mdev_device *mdev)
453 {
454 	struct ap_matrix_mdev *matrix_mdev;
455 	int ret;
456 
457 	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
458 		return -EPERM;
459 
460 	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
461 	if (!matrix_mdev) {
462 		ret = -ENOMEM;
463 		goto err_dec_available;
464 	}
465 	vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
466 			    &vfio_ap_matrix_dev_ops);
467 
468 	matrix_mdev->mdev = mdev;
469 	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
470 	matrix_mdev->pqap_hook = handle_pqap;
471 	mutex_lock(&matrix_dev->lock);
472 	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
473 	mutex_unlock(&matrix_dev->lock);
474 
475 	ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
476 	if (ret)
477 		goto err_list;
478 	dev_set_drvdata(&mdev->dev, matrix_mdev);
479 	return 0;
480 
481 err_list:
482 	mutex_lock(&matrix_dev->lock);
483 	list_del(&matrix_mdev->node);
484 	mutex_unlock(&matrix_dev->lock);
485 	vfio_uninit_group_dev(&matrix_mdev->vdev);
486 	kfree(matrix_mdev);
487 err_dec_available:
488 	atomic_inc(&matrix_dev->available_instances);
489 	return ret;
490 }
491 
492 static void vfio_ap_mdev_remove(struct mdev_device *mdev)
493 {
494 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
495 
496 	vfio_unregister_group_dev(&matrix_mdev->vdev);
497 
498 	mutex_lock(&matrix_dev->lock);
499 	vfio_ap_mdev_reset_queues(matrix_mdev);
500 	list_del(&matrix_mdev->node);
501 	mutex_unlock(&matrix_dev->lock);
502 	vfio_uninit_group_dev(&matrix_mdev->vdev);
503 	kfree(matrix_mdev);
504 	atomic_inc(&matrix_dev->available_instances);
505 }
506 
507 static ssize_t name_show(struct mdev_type *mtype,
508 			 struct mdev_type_attribute *attr, char *buf)
509 {
510 	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
511 }
512 
513 static MDEV_TYPE_ATTR_RO(name);
514 
515 static ssize_t available_instances_show(struct mdev_type *mtype,
516 					struct mdev_type_attribute *attr,
517 					char *buf)
518 {
519 	return sprintf(buf, "%d\n",
520 		       atomic_read(&matrix_dev->available_instances));
521 }
522 
523 static MDEV_TYPE_ATTR_RO(available_instances);
524 
525 static ssize_t device_api_show(struct mdev_type *mtype,
526 			       struct mdev_type_attribute *attr, char *buf)
527 {
528 	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
529 }
530 
531 static MDEV_TYPE_ATTR_RO(device_api);
532 
533 static struct attribute *vfio_ap_mdev_type_attrs[] = {
534 	&mdev_type_attr_name.attr,
535 	&mdev_type_attr_device_api.attr,
536 	&mdev_type_attr_available_instances.attr,
537 	NULL,
538 };
539 
540 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
541 	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
542 	.attrs = vfio_ap_mdev_type_attrs,
543 };
544 
545 static struct attribute_group *vfio_ap_mdev_type_groups[] = {
546 	&vfio_ap_mdev_hwvirt_type_group,
547 	NULL,
548 };
549 
550 struct vfio_ap_queue_reserved {
551 	unsigned long *apid;
552 	unsigned long *apqi;
553 	bool reserved;
554 };
555 
556 /**
557  * vfio_ap_has_queue - determines if the AP queue containing the target in @data
558  *
559  * @dev: an AP queue device
560  * @data: a struct vfio_ap_queue_reserved reference
561  *
562  * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
563  * apid or apqi specified in @data:
564  *
565  * - If @data contains both an apid and apqi value, then @data will be flagged
566  *   as reserved if the APID and APQI fields for the AP queue device matches
567  *
568  * - If @data contains only an apid value, @data will be flagged as
569  *   reserved if the APID field in the AP queue device matches
570  *
571  * - If @data contains only an apqi value, @data will be flagged as
572  *   reserved if the APQI field in the AP queue device matches
573  *
574  * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
575  * @data does not contain either an apid or apqi.
576  */
577 static int vfio_ap_has_queue(struct device *dev, void *data)
578 {
579 	struct vfio_ap_queue_reserved *qres = data;
580 	struct ap_queue *ap_queue = to_ap_queue(dev);
581 	ap_qid_t qid;
582 	unsigned long id;
583 
584 	if (qres->apid && qres->apqi) {
585 		qid = AP_MKQID(*qres->apid, *qres->apqi);
586 		if (qid == ap_queue->qid)
587 			qres->reserved = true;
588 	} else if (qres->apid && !qres->apqi) {
589 		id = AP_QID_CARD(ap_queue->qid);
590 		if (id == *qres->apid)
591 			qres->reserved = true;
592 	} else if (!qres->apid && qres->apqi) {
593 		id = AP_QID_QUEUE(ap_queue->qid);
594 		if (id == *qres->apqi)
595 			qres->reserved = true;
596 	} else {
597 		return -EINVAL;
598 	}
599 
600 	return 0;
601 }
602 
603 /**
604  * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
605  * @apid or @aqpi is reserved
606  *
607  * @apid: an AP adapter ID
608  * @apqi: an AP queue index
609  *
610  * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
611  * driver according to the following rules:
612  *
613  * - If both @apid and @apqi are not NULL, then there must be an AP queue
614  *   device bound to the vfio_ap driver with the APQN identified by @apid and
615  *   @apqi
616  *
617  * - If only @apid is not NULL, then there must be an AP queue device bound
618  *   to the vfio_ap driver with an APQN containing @apid
619  *
620  * - If only @apqi is not NULL, then there must be an AP queue device bound
621  *   to the vfio_ap driver with an APQN containing @apqi
622  *
623  * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
624  */
625 static int vfio_ap_verify_queue_reserved(unsigned long *apid,
626 					 unsigned long *apqi)
627 {
628 	int ret;
629 	struct vfio_ap_queue_reserved qres;
630 
631 	qres.apid = apid;
632 	qres.apqi = apqi;
633 	qres.reserved = false;
634 
635 	ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
636 				     &qres, vfio_ap_has_queue);
637 	if (ret)
638 		return ret;
639 
640 	if (qres.reserved)
641 		return 0;
642 
643 	return -EADDRNOTAVAIL;
644 }
645 
646 static int
647 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
648 					     unsigned long apid)
649 {
650 	int ret;
651 	unsigned long apqi;
652 	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
653 
654 	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
655 		return vfio_ap_verify_queue_reserved(&apid, NULL);
656 
657 	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
658 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
659 		if (ret)
660 			return ret;
661 	}
662 
663 	return 0;
664 }
665 
666 /**
667  * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
668  *
669  * @matrix_mdev: the mediated matrix device
670  *
671  * Verifies that the APQNs derived from the cross product of the AP adapter IDs
672  * and AP queue indexes comprising the AP matrix are not configured for another
673  * mediated device. AP queue sharing is not allowed.
674  *
675  * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
676  */
677 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
678 {
679 	struct ap_matrix_mdev *lstdev;
680 	DECLARE_BITMAP(apm, AP_DEVICES);
681 	DECLARE_BITMAP(aqm, AP_DOMAINS);
682 
683 	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
684 		if (matrix_mdev == lstdev)
685 			continue;
686 
687 		memset(apm, 0, sizeof(apm));
688 		memset(aqm, 0, sizeof(aqm));
689 
690 		/*
691 		 * We work on full longs, as we can only exclude the leftover
692 		 * bits in non-inverse order. The leftover is all zeros.
693 		 */
694 		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
695 				lstdev->matrix.apm, AP_DEVICES))
696 			continue;
697 
698 		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
699 				lstdev->matrix.aqm, AP_DOMAINS))
700 			continue;
701 
702 		return -EADDRINUSE;
703 	}
704 
705 	return 0;
706 }
707 
708 /**
709  * assign_adapter_store - parses the APID from @buf and sets the
710  * corresponding bit in the mediated matrix device's APM
711  *
712  * @dev:	the matrix device
713  * @attr:	the mediated matrix device's assign_adapter attribute
714  * @buf:	a buffer containing the AP adapter number (APID) to
715  *		be assigned
716  * @count:	the number of bytes in @buf
717  *
718  * Return: the number of bytes processed if the APID is valid; otherwise,
719  * returns one of the following errors:
720  *
721  *	1. -EINVAL
722  *	   The APID is not a valid number
723  *
724  *	2. -ENODEV
725  *	   The APID exceeds the maximum value configured for the system
726  *
727  *	3. -EADDRNOTAVAIL
728  *	   An APQN derived from the cross product of the APID being assigned
729  *	   and the APQIs previously assigned is not bound to the vfio_ap device
730  *	   driver; or, if no APQIs have yet been assigned, the APID is not
731  *	   contained in an APQN bound to the vfio_ap device driver.
732  *
733  *	4. -EADDRINUSE
734  *	   An APQN derived from the cross product of the APID being assigned
735  *	   and the APQIs previously assigned is being used by another mediated
736  *	   matrix device
737  */
738 static ssize_t assign_adapter_store(struct device *dev,
739 				    struct device_attribute *attr,
740 				    const char *buf, size_t count)
741 {
742 	int ret;
743 	unsigned long apid;
744 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
745 
746 	mutex_lock(&matrix_dev->lock);
747 
748 	/* If the KVM guest is running, disallow assignment of adapter */
749 	if (matrix_mdev->kvm) {
750 		ret = -EBUSY;
751 		goto done;
752 	}
753 
754 	ret = kstrtoul(buf, 0, &apid);
755 	if (ret)
756 		goto done;
757 
758 	if (apid > matrix_mdev->matrix.apm_max) {
759 		ret = -ENODEV;
760 		goto done;
761 	}
762 
763 	/*
764 	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
765 	 * number (APID). The bits in the mask, from most significant to least
766 	 * significant bit, correspond to APIDs 0-255.
767 	 */
768 	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
769 	if (ret)
770 		goto done;
771 
772 	set_bit_inv(apid, matrix_mdev->matrix.apm);
773 
774 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
775 	if (ret)
776 		goto share_err;
777 
778 	ret = count;
779 	goto done;
780 
781 share_err:
782 	clear_bit_inv(apid, matrix_mdev->matrix.apm);
783 done:
784 	mutex_unlock(&matrix_dev->lock);
785 
786 	return ret;
787 }
788 static DEVICE_ATTR_WO(assign_adapter);
789 
790 /**
791  * unassign_adapter_store - parses the APID from @buf and clears the
792  * corresponding bit in the mediated matrix device's APM
793  *
794  * @dev:	the matrix device
795  * @attr:	the mediated matrix device's unassign_adapter attribute
796  * @buf:	a buffer containing the adapter number (APID) to be unassigned
797  * @count:	the number of bytes in @buf
798  *
799  * Return: the number of bytes processed if the APID is valid; otherwise,
800  * returns one of the following errors:
801  *	-EINVAL if the APID is not a number
802  *	-ENODEV if the APID it exceeds the maximum value configured for the
803  *		system
804  */
805 static ssize_t unassign_adapter_store(struct device *dev,
806 				      struct device_attribute *attr,
807 				      const char *buf, size_t count)
808 {
809 	int ret;
810 	unsigned long apid;
811 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
812 
813 	mutex_lock(&matrix_dev->lock);
814 
815 	/* If the KVM guest is running, disallow unassignment of adapter */
816 	if (matrix_mdev->kvm) {
817 		ret = -EBUSY;
818 		goto done;
819 	}
820 
821 	ret = kstrtoul(buf, 0, &apid);
822 	if (ret)
823 		goto done;
824 
825 	if (apid > matrix_mdev->matrix.apm_max) {
826 		ret = -ENODEV;
827 		goto done;
828 	}
829 
830 	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
831 	ret = count;
832 done:
833 	mutex_unlock(&matrix_dev->lock);
834 	return ret;
835 }
836 static DEVICE_ATTR_WO(unassign_adapter);
837 
838 static int
839 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
840 					     unsigned long apqi)
841 {
842 	int ret;
843 	unsigned long apid;
844 	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
845 
846 	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
847 		return vfio_ap_verify_queue_reserved(NULL, &apqi);
848 
849 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
850 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
851 		if (ret)
852 			return ret;
853 	}
854 
855 	return 0;
856 }
857 
858 /**
859  * assign_domain_store - parses the APQI from @buf and sets the
860  * corresponding bit in the mediated matrix device's AQM
861  *
862  * @dev:	the matrix device
863  * @attr:	the mediated matrix device's assign_domain attribute
864  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
865  *		be assigned
866  * @count:	the number of bytes in @buf
867  *
868  * Return: the number of bytes processed if the APQI is valid; otherwise returns
869  * one of the following errors:
870  *
871  *	1. -EINVAL
872  *	   The APQI is not a valid number
873  *
874  *	2. -ENODEV
875  *	   The APQI exceeds the maximum value configured for the system
876  *
877  *	3. -EADDRNOTAVAIL
878  *	   An APQN derived from the cross product of the APQI being assigned
879  *	   and the APIDs previously assigned is not bound to the vfio_ap device
880  *	   driver; or, if no APIDs have yet been assigned, the APQI is not
881  *	   contained in an APQN bound to the vfio_ap device driver.
882  *
883  *	4. -EADDRINUSE
884  *	   An APQN derived from the cross product of the APQI being assigned
885  *	   and the APIDs previously assigned is being used by another mediated
886  *	   matrix device
887  */
888 static ssize_t assign_domain_store(struct device *dev,
889 				   struct device_attribute *attr,
890 				   const char *buf, size_t count)
891 {
892 	int ret;
893 	unsigned long apqi;
894 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
895 	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
896 
897 	mutex_lock(&matrix_dev->lock);
898 
899 	/* If the KVM guest is running, disallow assignment of domain */
900 	if (matrix_mdev->kvm) {
901 		ret = -EBUSY;
902 		goto done;
903 	}
904 
905 	ret = kstrtoul(buf, 0, &apqi);
906 	if (ret)
907 		goto done;
908 	if (apqi > max_apqi) {
909 		ret = -ENODEV;
910 		goto done;
911 	}
912 
913 	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
914 	if (ret)
915 		goto done;
916 
917 	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
918 
919 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
920 	if (ret)
921 		goto share_err;
922 
923 	ret = count;
924 	goto done;
925 
926 share_err:
927 	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
928 done:
929 	mutex_unlock(&matrix_dev->lock);
930 
931 	return ret;
932 }
933 static DEVICE_ATTR_WO(assign_domain);
934 
935 
936 /**
937  * unassign_domain_store - parses the APQI from @buf and clears the
938  * corresponding bit in the mediated matrix device's AQM
939  *
940  * @dev:	the matrix device
941  * @attr:	the mediated matrix device's unassign_domain attribute
942  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
943  *		be unassigned
944  * @count:	the number of bytes in @buf
945  *
946  * Return: the number of bytes processed if the APQI is valid; otherwise,
947  * returns one of the following errors:
948  *	-EINVAL if the APQI is not a number
949  *	-ENODEV if the APQI exceeds the maximum value configured for the system
950  */
951 static ssize_t unassign_domain_store(struct device *dev,
952 				     struct device_attribute *attr,
953 				     const char *buf, size_t count)
954 {
955 	int ret;
956 	unsigned long apqi;
957 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
958 
959 	mutex_lock(&matrix_dev->lock);
960 
961 	/* If the KVM guest is running, disallow unassignment of domain */
962 	if (matrix_mdev->kvm) {
963 		ret = -EBUSY;
964 		goto done;
965 	}
966 
967 	ret = kstrtoul(buf, 0, &apqi);
968 	if (ret)
969 		goto done;
970 
971 	if (apqi > matrix_mdev->matrix.aqm_max) {
972 		ret = -ENODEV;
973 		goto done;
974 	}
975 
976 	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
977 	ret = count;
978 
979 done:
980 	mutex_unlock(&matrix_dev->lock);
981 	return ret;
982 }
983 static DEVICE_ATTR_WO(unassign_domain);
984 
985 /**
986  * assign_control_domain_store - parses the domain ID from @buf and sets
987  * the corresponding bit in the mediated matrix device's ADM
988  *
989  * @dev:	the matrix device
990  * @attr:	the mediated matrix device's assign_control_domain attribute
991  * @buf:	a buffer containing the domain ID to be assigned
992  * @count:	the number of bytes in @buf
993  *
994  * Return: the number of bytes processed if the domain ID is valid; otherwise,
995  * returns one of the following errors:
996  *	-EINVAL if the ID is not a number
997  *	-ENODEV if the ID exceeds the maximum value configured for the system
998  */
999 static ssize_t assign_control_domain_store(struct device *dev,
1000 					   struct device_attribute *attr,
1001 					   const char *buf, size_t count)
1002 {
1003 	int ret;
1004 	unsigned long id;
1005 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1006 
1007 	mutex_lock(&matrix_dev->lock);
1008 
1009 	/* If the KVM guest is running, disallow assignment of control domain */
1010 	if (matrix_mdev->kvm) {
1011 		ret = -EBUSY;
1012 		goto done;
1013 	}
1014 
1015 	ret = kstrtoul(buf, 0, &id);
1016 	if (ret)
1017 		goto done;
1018 
1019 	if (id > matrix_mdev->matrix.adm_max) {
1020 		ret = -ENODEV;
1021 		goto done;
1022 	}
1023 
1024 	/* Set the bit in the ADM (bitmask) corresponding to the AP control
1025 	 * domain number (id). The bits in the mask, from most significant to
1026 	 * least significant, correspond to IDs 0 up to the one less than the
1027 	 * number of control domains that can be assigned.
1028 	 */
1029 	set_bit_inv(id, matrix_mdev->matrix.adm);
1030 	ret = count;
1031 done:
1032 	mutex_unlock(&matrix_dev->lock);
1033 	return ret;
1034 }
1035 static DEVICE_ATTR_WO(assign_control_domain);
1036 
1037 /**
1038  * unassign_control_domain_store - parses the domain ID from @buf and
1039  * clears the corresponding bit in the mediated matrix device's ADM
1040  *
1041  * @dev:	the matrix device
1042  * @attr:	the mediated matrix device's unassign_control_domain attribute
1043  * @buf:	a buffer containing the domain ID to be unassigned
1044  * @count:	the number of bytes in @buf
1045  *
1046  * Return: the number of bytes processed if the domain ID is valid; otherwise,
1047  * returns one of the following errors:
1048  *	-EINVAL if the ID is not a number
1049  *	-ENODEV if the ID exceeds the maximum value configured for the system
1050  */
1051 static ssize_t unassign_control_domain_store(struct device *dev,
1052 					     struct device_attribute *attr,
1053 					     const char *buf, size_t count)
1054 {
1055 	int ret;
1056 	unsigned long domid;
1057 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1058 	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
1059 
1060 	mutex_lock(&matrix_dev->lock);
1061 
1062 	/* If a KVM guest is running, disallow unassignment of control domain */
1063 	if (matrix_mdev->kvm) {
1064 		ret = -EBUSY;
1065 		goto done;
1066 	}
1067 
1068 	ret = kstrtoul(buf, 0, &domid);
1069 	if (ret)
1070 		goto done;
1071 	if (domid > max_domid) {
1072 		ret = -ENODEV;
1073 		goto done;
1074 	}
1075 
1076 	clear_bit_inv(domid, matrix_mdev->matrix.adm);
1077 	ret = count;
1078 done:
1079 	mutex_unlock(&matrix_dev->lock);
1080 	return ret;
1081 }
1082 static DEVICE_ATTR_WO(unassign_control_domain);
1083 
1084 static ssize_t control_domains_show(struct device *dev,
1085 				    struct device_attribute *dev_attr,
1086 				    char *buf)
1087 {
1088 	unsigned long id;
1089 	int nchars = 0;
1090 	int n;
1091 	char *bufpos = buf;
1092 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1093 	unsigned long max_domid = matrix_mdev->matrix.adm_max;
1094 
1095 	mutex_lock(&matrix_dev->lock);
1096 	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
1097 		n = sprintf(bufpos, "%04lx\n", id);
1098 		bufpos += n;
1099 		nchars += n;
1100 	}
1101 	mutex_unlock(&matrix_dev->lock);
1102 
1103 	return nchars;
1104 }
1105 static DEVICE_ATTR_RO(control_domains);
1106 
1107 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
1108 			   char *buf)
1109 {
1110 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1111 	char *bufpos = buf;
1112 	unsigned long apid;
1113 	unsigned long apqi;
1114 	unsigned long apid1;
1115 	unsigned long apqi1;
1116 	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
1117 	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
1118 	int nchars = 0;
1119 	int n;
1120 
1121 	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
1122 	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
1123 
1124 	mutex_lock(&matrix_dev->lock);
1125 
1126 	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
1127 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1128 			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1129 					     naqm_bits) {
1130 				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
1131 					    apqi);
1132 				bufpos += n;
1133 				nchars += n;
1134 			}
1135 		}
1136 	} else if (apid1 < napm_bits) {
1137 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1138 			n = sprintf(bufpos, "%02lx.\n", apid);
1139 			bufpos += n;
1140 			nchars += n;
1141 		}
1142 	} else if (apqi1 < naqm_bits) {
1143 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
1144 			n = sprintf(bufpos, ".%04lx\n", apqi);
1145 			bufpos += n;
1146 			nchars += n;
1147 		}
1148 	}
1149 
1150 	mutex_unlock(&matrix_dev->lock);
1151 
1152 	return nchars;
1153 }
1154 static DEVICE_ATTR_RO(matrix);
1155 
1156 static struct attribute *vfio_ap_mdev_attrs[] = {
1157 	&dev_attr_assign_adapter.attr,
1158 	&dev_attr_unassign_adapter.attr,
1159 	&dev_attr_assign_domain.attr,
1160 	&dev_attr_unassign_domain.attr,
1161 	&dev_attr_assign_control_domain.attr,
1162 	&dev_attr_unassign_control_domain.attr,
1163 	&dev_attr_control_domains.attr,
1164 	&dev_attr_matrix.attr,
1165 	NULL,
1166 };
1167 
1168 static struct attribute_group vfio_ap_mdev_attr_group = {
1169 	.attrs = vfio_ap_mdev_attrs
1170 };
1171 
1172 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1173 	&vfio_ap_mdev_attr_group,
1174 	NULL
1175 };
1176 
1177 /**
1178  * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
1179  * to manage AP resources for the guest whose state is represented by @kvm
1180  *
1181  * @matrix_mdev: a mediated matrix device
1182  * @kvm: reference to KVM instance
1183  *
1184  * Return: 0 if no other mediated matrix device has a reference to @kvm;
1185  * otherwise, returns an -EPERM.
1186  */
1187 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1188 				struct kvm *kvm)
1189 {
1190 	struct ap_matrix_mdev *m;
1191 
1192 	if (kvm->arch.crypto.crycbd) {
1193 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1194 		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1195 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1196 
1197 		mutex_lock(&kvm->lock);
1198 		mutex_lock(&matrix_dev->lock);
1199 
1200 		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1201 			if (m != matrix_mdev && m->kvm == kvm) {
1202 				mutex_unlock(&kvm->lock);
1203 				mutex_unlock(&matrix_dev->lock);
1204 				return -EPERM;
1205 			}
1206 		}
1207 
1208 		kvm_get_kvm(kvm);
1209 		matrix_mdev->kvm = kvm;
1210 		kvm_arch_crypto_set_masks(kvm,
1211 					  matrix_mdev->matrix.apm,
1212 					  matrix_mdev->matrix.aqm,
1213 					  matrix_mdev->matrix.adm);
1214 
1215 		mutex_unlock(&kvm->lock);
1216 		mutex_unlock(&matrix_dev->lock);
1217 	}
1218 
1219 	return 0;
1220 }
1221 
1222 static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
1223 				   u64 length)
1224 {
1225 	struct ap_matrix_mdev *matrix_mdev =
1226 		container_of(vdev, struct ap_matrix_mdev, vdev);
1227 
1228 	vfio_unpin_pages(&matrix_mdev->vdev, iova, 1);
1229 }
1230 
1231 /**
1232  * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
1233  * by @matrix_mdev.
1234  *
1235  * @matrix_mdev: a matrix mediated device
1236  */
1237 static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
1238 {
1239 	struct kvm *kvm = matrix_mdev->kvm;
1240 
1241 	if (kvm && kvm->arch.crypto.crycbd) {
1242 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1243 		kvm->arch.crypto.pqap_hook = NULL;
1244 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1245 
1246 		mutex_lock(&kvm->lock);
1247 		mutex_lock(&matrix_dev->lock);
1248 
1249 		kvm_arch_crypto_clear_masks(kvm);
1250 		vfio_ap_mdev_reset_queues(matrix_mdev);
1251 		kvm_put_kvm(kvm);
1252 		matrix_mdev->kvm = NULL;
1253 
1254 		mutex_unlock(&kvm->lock);
1255 		mutex_unlock(&matrix_dev->lock);
1256 	}
1257 }
1258 
1259 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1260 {
1261 	struct device *dev;
1262 	struct vfio_ap_queue *q = NULL;
1263 
1264 	dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1265 				 &apqn, match_apqn);
1266 	if (dev) {
1267 		q = dev_get_drvdata(dev);
1268 		put_device(dev);
1269 	}
1270 
1271 	return q;
1272 }
1273 
1274 int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1275 			     unsigned int retry)
1276 {
1277 	struct ap_queue_status status;
1278 	int ret;
1279 	int retry2 = 2;
1280 
1281 	if (!q)
1282 		return 0;
1283 
1284 retry_zapq:
1285 	status = ap_zapq(q->apqn);
1286 	switch (status.response_code) {
1287 	case AP_RESPONSE_NORMAL:
1288 		ret = 0;
1289 		break;
1290 	case AP_RESPONSE_RESET_IN_PROGRESS:
1291 		if (retry--) {
1292 			msleep(20);
1293 			goto retry_zapq;
1294 		}
1295 		ret = -EBUSY;
1296 		break;
1297 	case AP_RESPONSE_Q_NOT_AVAIL:
1298 	case AP_RESPONSE_DECONFIGURED:
1299 	case AP_RESPONSE_CHECKSTOPPED:
1300 		WARN_ON_ONCE(status.irq_enabled);
1301 		ret = -EBUSY;
1302 		goto free_resources;
1303 	default:
1304 		/* things are really broken, give up */
1305 		WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1306 		     status.response_code);
1307 		return -EIO;
1308 	}
1309 
1310 	/* wait for the reset to take effect */
1311 	while (retry2--) {
1312 		if (status.queue_empty && !status.irq_enabled)
1313 			break;
1314 		msleep(20);
1315 		status = ap_tapq(q->apqn, NULL);
1316 	}
1317 	WARN_ON_ONCE(retry2 <= 0);
1318 
1319 free_resources:
1320 	vfio_ap_free_aqic_resources(q);
1321 
1322 	return ret;
1323 }
1324 
1325 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
1326 {
1327 	int ret;
1328 	int rc = 0;
1329 	unsigned long apid, apqi;
1330 	struct vfio_ap_queue *q;
1331 
1332 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1333 			     matrix_mdev->matrix.apm_max + 1) {
1334 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1335 				     matrix_mdev->matrix.aqm_max + 1) {
1336 			q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1337 			ret = vfio_ap_mdev_reset_queue(q, 1);
1338 			/*
1339 			 * Regardless whether a queue turns out to be busy, or
1340 			 * is not operational, we need to continue resetting
1341 			 * the remaining queues.
1342 			 */
1343 			if (ret)
1344 				rc = ret;
1345 		}
1346 	}
1347 
1348 	return rc;
1349 }
1350 
1351 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
1352 {
1353 	struct ap_matrix_mdev *matrix_mdev =
1354 		container_of(vdev, struct ap_matrix_mdev, vdev);
1355 
1356 	if (!vdev->kvm)
1357 		return -EINVAL;
1358 
1359 	return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
1360 }
1361 
1362 static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
1363 {
1364 	struct ap_matrix_mdev *matrix_mdev =
1365 		container_of(vdev, struct ap_matrix_mdev, vdev);
1366 
1367 	vfio_ap_mdev_unset_kvm(matrix_mdev);
1368 }
1369 
1370 static int vfio_ap_mdev_get_device_info(unsigned long arg)
1371 {
1372 	unsigned long minsz;
1373 	struct vfio_device_info info;
1374 
1375 	minsz = offsetofend(struct vfio_device_info, num_irqs);
1376 
1377 	if (copy_from_user(&info, (void __user *)arg, minsz))
1378 		return -EFAULT;
1379 
1380 	if (info.argsz < minsz)
1381 		return -EINVAL;
1382 
1383 	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1384 	info.num_regions = 0;
1385 	info.num_irqs = 0;
1386 
1387 	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1388 }
1389 
1390 static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
1391 				    unsigned int cmd, unsigned long arg)
1392 {
1393 	struct ap_matrix_mdev *matrix_mdev =
1394 		container_of(vdev, struct ap_matrix_mdev, vdev);
1395 	int ret;
1396 
1397 	mutex_lock(&matrix_dev->lock);
1398 	switch (cmd) {
1399 	case VFIO_DEVICE_GET_INFO:
1400 		ret = vfio_ap_mdev_get_device_info(arg);
1401 		break;
1402 	case VFIO_DEVICE_RESET:
1403 		ret = vfio_ap_mdev_reset_queues(matrix_mdev);
1404 		break;
1405 	default:
1406 		ret = -EOPNOTSUPP;
1407 		break;
1408 	}
1409 	mutex_unlock(&matrix_dev->lock);
1410 
1411 	return ret;
1412 }
1413 
1414 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
1415 	.open_device = vfio_ap_mdev_open_device,
1416 	.close_device = vfio_ap_mdev_close_device,
1417 	.ioctl = vfio_ap_mdev_ioctl,
1418 	.dma_unmap = vfio_ap_mdev_dma_unmap,
1419 };
1420 
1421 static struct mdev_driver vfio_ap_matrix_driver = {
1422 	.driver = {
1423 		.name = "vfio_ap_mdev",
1424 		.owner = THIS_MODULE,
1425 		.mod_name = KBUILD_MODNAME,
1426 		.dev_groups = vfio_ap_mdev_attr_groups,
1427 	},
1428 	.probe = vfio_ap_mdev_probe,
1429 	.remove = vfio_ap_mdev_remove,
1430 	.supported_type_groups = vfio_ap_mdev_type_groups,
1431 };
1432 
1433 int vfio_ap_mdev_register(void)
1434 {
1435 	int ret;
1436 
1437 	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1438 
1439 	ret = mdev_register_driver(&vfio_ap_matrix_driver);
1440 	if (ret)
1441 		return ret;
1442 
1443 	ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_driver);
1444 	if (ret)
1445 		goto err_driver;
1446 	return 0;
1447 
1448 err_driver:
1449 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1450 	return ret;
1451 }
1452 
1453 void vfio_ap_mdev_unregister(void)
1454 {
1455 	mdev_unregister_device(&matrix_dev->device);
1456 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1457 }
1458