1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Adjunct processor matrix VFIO device driver callbacks.
4  *
5  * Copyright IBM Corp. 2018
6  *
7  * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
8  *	      Halil Pasic <pasic@linux.ibm.com>
9  *	      Pierre Morel <pmorel@linux.ibm.com>
10  */
11 #include <linux/string.h>
12 #include <linux/vfio.h>
13 #include <linux/device.h>
14 #include <linux/list.h>
15 #include <linux/ctype.h>
16 #include <linux/bitops.h>
17 #include <linux/kvm_host.h>
18 #include <linux/module.h>
19 #include <asm/kvm.h>
20 #include <asm/zcrypt.h>
21 
22 #include "vfio_ap_private.h"
23 
24 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
25 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
26 
27 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
28 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
29 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
30 
31 static int match_apqn(struct device *dev, const void *data)
32 {
33 	struct vfio_ap_queue *q = dev_get_drvdata(dev);
34 
35 	return (q->apqn == *(int *)(data)) ? 1 : 0;
36 }
37 
38 /**
39  * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
40  * @matrix_mdev: the associated mediated matrix
41  * @apqn: The queue APQN
42  *
43  * Retrieve a queue with a specific APQN from the list of the
44  * devices of the vfio_ap_drv.
45  * Verify that the APID and the APQI are set in the matrix.
46  *
47  * Return: the pointer to the associated vfio_ap_queue
48  */
49 static struct vfio_ap_queue *vfio_ap_get_queue(
50 					struct ap_matrix_mdev *matrix_mdev,
51 					int apqn)
52 {
53 	struct vfio_ap_queue *q;
54 
55 	if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
56 		return NULL;
57 	if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
58 		return NULL;
59 
60 	q = vfio_ap_find_queue(apqn);
61 	if (q)
62 		q->matrix_mdev = matrix_mdev;
63 
64 	return q;
65 }
66 
67 /**
68  * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
69  * @apqn: The AP Queue number
70  *
71  * Checks the IRQ bit for the status of this APQN using ap_tapq.
72  * Returns if the ap_tapq function succeeded and the bit is clear.
73  * Returns if ap_tapq function failed with invalid, deconfigured or
74  * checkstopped AP.
75  * Otherwise retries up to 5 times after waiting 20ms.
76  */
77 static void vfio_ap_wait_for_irqclear(int apqn)
78 {
79 	struct ap_queue_status status;
80 	int retry = 5;
81 
82 	do {
83 		status = ap_tapq(apqn, NULL);
84 		switch (status.response_code) {
85 		case AP_RESPONSE_NORMAL:
86 		case AP_RESPONSE_RESET_IN_PROGRESS:
87 			if (!status.irq_enabled)
88 				return;
89 			fallthrough;
90 		case AP_RESPONSE_BUSY:
91 			msleep(20);
92 			break;
93 		case AP_RESPONSE_Q_NOT_AVAIL:
94 		case AP_RESPONSE_DECONFIGURED:
95 		case AP_RESPONSE_CHECKSTOPPED:
96 		default:
97 			WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
98 				  status.response_code, apqn);
99 			return;
100 		}
101 	} while (--retry);
102 
103 	WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
104 		  __func__, status.response_code, apqn);
105 }
106 
107 /**
108  * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
109  * @q: The vfio_ap_queue
110  *
111  * Unregisters the ISC in the GIB when the saved ISC not invalid.
112  * Unpins the guest's page holding the NIB when it exists.
113  * Resets the saved_pfn and saved_isc to invalid values.
114  */
115 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
116 {
117 	if (!q)
118 		return;
119 	if (q->saved_isc != VFIO_AP_ISC_INVALID &&
120 	    !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
121 		kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
122 		q->saved_isc = VFIO_AP_ISC_INVALID;
123 	}
124 	if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
125 		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
126 				 &q->saved_pfn, 1);
127 		q->saved_pfn = 0;
128 	}
129 }
130 
131 /**
132  * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
133  * @q: The vfio_ap_queue
134  *
135  * Uses ap_aqic to disable the interruption and in case of success, reset
136  * in progress or IRQ disable command already proceeded: calls
137  * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
138  * and calls vfio_ap_free_aqic_resources() to free the resources associated
139  * with the AP interrupt handling.
140  *
141  * In the case the AP is busy, or a reset is in progress,
142  * retries after 20ms, up to 5 times.
143  *
144  * Returns if ap_aqic function failed with invalid, deconfigured or
145  * checkstopped AP.
146  *
147  * Return: &struct ap_queue_status
148  */
149 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
150 {
151 	struct ap_qirq_ctrl aqic_gisa = {};
152 	struct ap_queue_status status;
153 	int retries = 5;
154 
155 	do {
156 		status = ap_aqic(q->apqn, aqic_gisa, NULL);
157 		switch (status.response_code) {
158 		case AP_RESPONSE_OTHERWISE_CHANGED:
159 		case AP_RESPONSE_NORMAL:
160 			vfio_ap_wait_for_irqclear(q->apqn);
161 			goto end_free;
162 		case AP_RESPONSE_RESET_IN_PROGRESS:
163 		case AP_RESPONSE_BUSY:
164 			msleep(20);
165 			break;
166 		case AP_RESPONSE_Q_NOT_AVAIL:
167 		case AP_RESPONSE_DECONFIGURED:
168 		case AP_RESPONSE_CHECKSTOPPED:
169 		case AP_RESPONSE_INVALID_ADDRESS:
170 		default:
171 			/* All cases in default means AP not operational */
172 			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
173 				  status.response_code);
174 			goto end_free;
175 		}
176 	} while (retries--);
177 
178 	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
179 		  status.response_code);
180 end_free:
181 	vfio_ap_free_aqic_resources(q);
182 	q->matrix_mdev = NULL;
183 	return status;
184 }
185 
186 /**
187  * vfio_ap_irq_enable - Enable Interruption for a APQN
188  *
189  * @q:	 the vfio_ap_queue holding AQIC parameters
190  * @isc: the guest ISC to register with the GIB interface
191  * @nib: the notification indicator byte to pin.
192  *
193  * Pin the NIB saved in *q
194  * Register the guest ISC to GIB interface and retrieve the
195  * host ISC to issue the host side PQAP/AQIC
196  *
197  * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
198  * vfio_pin_pages failed.
199  *
200  * Otherwise return the ap_queue_status returned by the ap_aqic(),
201  * all retry handling will be done by the guest.
202  *
203  * Return: &struct ap_queue_status
204  */
205 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
206 						 int isc,
207 						 unsigned long nib)
208 {
209 	struct ap_qirq_ctrl aqic_gisa = {};
210 	struct ap_queue_status status = {};
211 	struct kvm_s390_gisa *gisa;
212 	struct kvm *kvm;
213 	unsigned long h_nib, g_pfn, h_pfn;
214 	int ret;
215 
216 	g_pfn = nib >> PAGE_SHIFT;
217 	ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
218 			     IOMMU_READ | IOMMU_WRITE, &h_pfn);
219 	switch (ret) {
220 	case 1:
221 		break;
222 	default:
223 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
224 		return status;
225 	}
226 
227 	kvm = q->matrix_mdev->kvm;
228 	gisa = kvm->arch.gisa_int.origin;
229 
230 	h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
231 	aqic_gisa.gisc = isc;
232 	aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
233 	aqic_gisa.ir = 1;
234 	aqic_gisa.gisa = (uint64_t)gisa >> 4;
235 
236 	status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
237 	switch (status.response_code) {
238 	case AP_RESPONSE_NORMAL:
239 		/* See if we did clear older IRQ configuration */
240 		vfio_ap_free_aqic_resources(q);
241 		q->saved_pfn = g_pfn;
242 		q->saved_isc = isc;
243 		break;
244 	case AP_RESPONSE_OTHERWISE_CHANGED:
245 		/* We could not modify IRQ setings: clear new configuration */
246 		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
247 		kvm_s390_gisc_unregister(kvm, isc);
248 		break;
249 	default:
250 		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
251 			status.response_code);
252 		vfio_ap_irq_disable(q);
253 		break;
254 	}
255 
256 	return status;
257 }
258 
259 /**
260  * handle_pqap - PQAP instruction callback
261  *
262  * @vcpu: The vcpu on which we received the PQAP instruction
263  *
264  * Get the general register contents to initialize internal variables.
265  * REG[0]: APQN
266  * REG[1]: IR and ISC
267  * REG[2]: NIB
268  *
269  * Response.status may be set to following Response Code:
270  * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
271  * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
272  * - AP_RESPONSE_NORMAL (0) : in case of successs
273  *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
274  * We take the matrix_dev lock to ensure serialization on queues and
275  * mediated device access.
276  *
277  * Return: 0 if we could handle the request inside KVM.
278  * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
279  */
280 static int handle_pqap(struct kvm_vcpu *vcpu)
281 {
282 	uint64_t status;
283 	uint16_t apqn;
284 	struct vfio_ap_queue *q;
285 	struct ap_queue_status qstatus = {
286 			       .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
287 	struct ap_matrix_mdev *matrix_mdev;
288 
289 	/* If we do not use the AIV facility just go to userland */
290 	if (!(vcpu->arch.sie_block->eca & ECA_AIV))
291 		return -EOPNOTSUPP;
292 
293 	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
294 	mutex_lock(&matrix_dev->lock);
295 
296 	if (!vcpu->kvm->arch.crypto.pqap_hook)
297 		goto out_unlock;
298 	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
299 				   struct ap_matrix_mdev, pqap_hook);
300 
301 	/* If the there is no guest using the mdev, there is nothing to do */
302 	if (!matrix_mdev->kvm)
303 		goto out_unlock;
304 
305 	q = vfio_ap_get_queue(matrix_mdev, apqn);
306 	if (!q)
307 		goto out_unlock;
308 
309 	status = vcpu->run->s.regs.gprs[1];
310 
311 	/* If IR bit(16) is set we enable the interrupt */
312 	if ((status >> (63 - 16)) & 0x01)
313 		qstatus = vfio_ap_irq_enable(q, status & 0x07,
314 					     vcpu->run->s.regs.gprs[2]);
315 	else
316 		qstatus = vfio_ap_irq_disable(q);
317 
318 out_unlock:
319 	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
320 	vcpu->run->s.regs.gprs[1] >>= 32;
321 	mutex_unlock(&matrix_dev->lock);
322 	return 0;
323 }
324 
325 static void vfio_ap_matrix_init(struct ap_config_info *info,
326 				struct ap_matrix *matrix)
327 {
328 	matrix->apm_max = info->apxa ? info->Na : 63;
329 	matrix->aqm_max = info->apxa ? info->Nd : 15;
330 	matrix->adm_max = info->apxa ? info->Nd : 15;
331 }
332 
333 static int vfio_ap_mdev_probe(struct mdev_device *mdev)
334 {
335 	struct ap_matrix_mdev *matrix_mdev;
336 	int ret;
337 
338 	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
339 		return -EPERM;
340 
341 	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
342 	if (!matrix_mdev) {
343 		ret = -ENOMEM;
344 		goto err_dec_available;
345 	}
346 	vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
347 			    &vfio_ap_matrix_dev_ops);
348 
349 	matrix_mdev->mdev = mdev;
350 	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
351 	matrix_mdev->pqap_hook = handle_pqap;
352 	mutex_lock(&matrix_dev->lock);
353 	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
354 	mutex_unlock(&matrix_dev->lock);
355 
356 	ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
357 	if (ret)
358 		goto err_list;
359 	dev_set_drvdata(&mdev->dev, matrix_mdev);
360 	return 0;
361 
362 err_list:
363 	mutex_lock(&matrix_dev->lock);
364 	list_del(&matrix_mdev->node);
365 	mutex_unlock(&matrix_dev->lock);
366 	vfio_uninit_group_dev(&matrix_mdev->vdev);
367 	kfree(matrix_mdev);
368 err_dec_available:
369 	atomic_inc(&matrix_dev->available_instances);
370 	return ret;
371 }
372 
373 static void vfio_ap_mdev_remove(struct mdev_device *mdev)
374 {
375 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
376 
377 	vfio_unregister_group_dev(&matrix_mdev->vdev);
378 
379 	mutex_lock(&matrix_dev->lock);
380 	vfio_ap_mdev_reset_queues(matrix_mdev);
381 	list_del(&matrix_mdev->node);
382 	mutex_unlock(&matrix_dev->lock);
383 	vfio_uninit_group_dev(&matrix_mdev->vdev);
384 	kfree(matrix_mdev);
385 	atomic_inc(&matrix_dev->available_instances);
386 }
387 
388 static ssize_t name_show(struct mdev_type *mtype,
389 			 struct mdev_type_attribute *attr, char *buf)
390 {
391 	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
392 }
393 
394 static MDEV_TYPE_ATTR_RO(name);
395 
396 static ssize_t available_instances_show(struct mdev_type *mtype,
397 					struct mdev_type_attribute *attr,
398 					char *buf)
399 {
400 	return sprintf(buf, "%d\n",
401 		       atomic_read(&matrix_dev->available_instances));
402 }
403 
404 static MDEV_TYPE_ATTR_RO(available_instances);
405 
406 static ssize_t device_api_show(struct mdev_type *mtype,
407 			       struct mdev_type_attribute *attr, char *buf)
408 {
409 	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
410 }
411 
412 static MDEV_TYPE_ATTR_RO(device_api);
413 
414 static struct attribute *vfio_ap_mdev_type_attrs[] = {
415 	&mdev_type_attr_name.attr,
416 	&mdev_type_attr_device_api.attr,
417 	&mdev_type_attr_available_instances.attr,
418 	NULL,
419 };
420 
421 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
422 	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
423 	.attrs = vfio_ap_mdev_type_attrs,
424 };
425 
426 static struct attribute_group *vfio_ap_mdev_type_groups[] = {
427 	&vfio_ap_mdev_hwvirt_type_group,
428 	NULL,
429 };
430 
431 struct vfio_ap_queue_reserved {
432 	unsigned long *apid;
433 	unsigned long *apqi;
434 	bool reserved;
435 };
436 
437 /**
438  * vfio_ap_has_queue - determines if the AP queue containing the target in @data
439  *
440  * @dev: an AP queue device
441  * @data: a struct vfio_ap_queue_reserved reference
442  *
443  * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
444  * apid or apqi specified in @data:
445  *
446  * - If @data contains both an apid and apqi value, then @data will be flagged
447  *   as reserved if the APID and APQI fields for the AP queue device matches
448  *
449  * - If @data contains only an apid value, @data will be flagged as
450  *   reserved if the APID field in the AP queue device matches
451  *
452  * - If @data contains only an apqi value, @data will be flagged as
453  *   reserved if the APQI field in the AP queue device matches
454  *
455  * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
456  * @data does not contain either an apid or apqi.
457  */
458 static int vfio_ap_has_queue(struct device *dev, void *data)
459 {
460 	struct vfio_ap_queue_reserved *qres = data;
461 	struct ap_queue *ap_queue = to_ap_queue(dev);
462 	ap_qid_t qid;
463 	unsigned long id;
464 
465 	if (qres->apid && qres->apqi) {
466 		qid = AP_MKQID(*qres->apid, *qres->apqi);
467 		if (qid == ap_queue->qid)
468 			qres->reserved = true;
469 	} else if (qres->apid && !qres->apqi) {
470 		id = AP_QID_CARD(ap_queue->qid);
471 		if (id == *qres->apid)
472 			qres->reserved = true;
473 	} else if (!qres->apid && qres->apqi) {
474 		id = AP_QID_QUEUE(ap_queue->qid);
475 		if (id == *qres->apqi)
476 			qres->reserved = true;
477 	} else {
478 		return -EINVAL;
479 	}
480 
481 	return 0;
482 }
483 
484 /**
485  * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
486  * @apid or @aqpi is reserved
487  *
488  * @apid: an AP adapter ID
489  * @apqi: an AP queue index
490  *
491  * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
492  * driver according to the following rules:
493  *
494  * - If both @apid and @apqi are not NULL, then there must be an AP queue
495  *   device bound to the vfio_ap driver with the APQN identified by @apid and
496  *   @apqi
497  *
498  * - If only @apid is not NULL, then there must be an AP queue device bound
499  *   to the vfio_ap driver with an APQN containing @apid
500  *
501  * - If only @apqi is not NULL, then there must be an AP queue device bound
502  *   to the vfio_ap driver with an APQN containing @apqi
503  *
504  * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
505  */
506 static int vfio_ap_verify_queue_reserved(unsigned long *apid,
507 					 unsigned long *apqi)
508 {
509 	int ret;
510 	struct vfio_ap_queue_reserved qres;
511 
512 	qres.apid = apid;
513 	qres.apqi = apqi;
514 	qres.reserved = false;
515 
516 	ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
517 				     &qres, vfio_ap_has_queue);
518 	if (ret)
519 		return ret;
520 
521 	if (qres.reserved)
522 		return 0;
523 
524 	return -EADDRNOTAVAIL;
525 }
526 
527 static int
528 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
529 					     unsigned long apid)
530 {
531 	int ret;
532 	unsigned long apqi;
533 	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
534 
535 	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
536 		return vfio_ap_verify_queue_reserved(&apid, NULL);
537 
538 	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
539 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
540 		if (ret)
541 			return ret;
542 	}
543 
544 	return 0;
545 }
546 
547 /**
548  * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
549  *
550  * @matrix_mdev: the mediated matrix device
551  *
552  * Verifies that the APQNs derived from the cross product of the AP adapter IDs
553  * and AP queue indexes comprising the AP matrix are not configured for another
554  * mediated device. AP queue sharing is not allowed.
555  *
556  * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
557  */
558 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
559 {
560 	struct ap_matrix_mdev *lstdev;
561 	DECLARE_BITMAP(apm, AP_DEVICES);
562 	DECLARE_BITMAP(aqm, AP_DOMAINS);
563 
564 	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
565 		if (matrix_mdev == lstdev)
566 			continue;
567 
568 		memset(apm, 0, sizeof(apm));
569 		memset(aqm, 0, sizeof(aqm));
570 
571 		/*
572 		 * We work on full longs, as we can only exclude the leftover
573 		 * bits in non-inverse order. The leftover is all zeros.
574 		 */
575 		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
576 				lstdev->matrix.apm, AP_DEVICES))
577 			continue;
578 
579 		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
580 				lstdev->matrix.aqm, AP_DOMAINS))
581 			continue;
582 
583 		return -EADDRINUSE;
584 	}
585 
586 	return 0;
587 }
588 
589 /**
590  * assign_adapter_store - parses the APID from @buf and sets the
591  * corresponding bit in the mediated matrix device's APM
592  *
593  * @dev:	the matrix device
594  * @attr:	the mediated matrix device's assign_adapter attribute
595  * @buf:	a buffer containing the AP adapter number (APID) to
596  *		be assigned
597  * @count:	the number of bytes in @buf
598  *
599  * Return: the number of bytes processed if the APID is valid; otherwise,
600  * returns one of the following errors:
601  *
602  *	1. -EINVAL
603  *	   The APID is not a valid number
604  *
605  *	2. -ENODEV
606  *	   The APID exceeds the maximum value configured for the system
607  *
608  *	3. -EADDRNOTAVAIL
609  *	   An APQN derived from the cross product of the APID being assigned
610  *	   and the APQIs previously assigned is not bound to the vfio_ap device
611  *	   driver; or, if no APQIs have yet been assigned, the APID is not
612  *	   contained in an APQN bound to the vfio_ap device driver.
613  *
614  *	4. -EADDRINUSE
615  *	   An APQN derived from the cross product of the APID being assigned
616  *	   and the APQIs previously assigned is being used by another mediated
617  *	   matrix device
618  */
619 static ssize_t assign_adapter_store(struct device *dev,
620 				    struct device_attribute *attr,
621 				    const char *buf, size_t count)
622 {
623 	int ret;
624 	unsigned long apid;
625 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
626 
627 	mutex_lock(&matrix_dev->lock);
628 
629 	/* If the KVM guest is running, disallow assignment of adapter */
630 	if (matrix_mdev->kvm) {
631 		ret = -EBUSY;
632 		goto done;
633 	}
634 
635 	ret = kstrtoul(buf, 0, &apid);
636 	if (ret)
637 		goto done;
638 
639 	if (apid > matrix_mdev->matrix.apm_max) {
640 		ret = -ENODEV;
641 		goto done;
642 	}
643 
644 	/*
645 	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
646 	 * number (APID). The bits in the mask, from most significant to least
647 	 * significant bit, correspond to APIDs 0-255.
648 	 */
649 	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
650 	if (ret)
651 		goto done;
652 
653 	set_bit_inv(apid, matrix_mdev->matrix.apm);
654 
655 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
656 	if (ret)
657 		goto share_err;
658 
659 	ret = count;
660 	goto done;
661 
662 share_err:
663 	clear_bit_inv(apid, matrix_mdev->matrix.apm);
664 done:
665 	mutex_unlock(&matrix_dev->lock);
666 
667 	return ret;
668 }
669 static DEVICE_ATTR_WO(assign_adapter);
670 
671 /**
672  * unassign_adapter_store - parses the APID from @buf and clears the
673  * corresponding bit in the mediated matrix device's APM
674  *
675  * @dev:	the matrix device
676  * @attr:	the mediated matrix device's unassign_adapter attribute
677  * @buf:	a buffer containing the adapter number (APID) to be unassigned
678  * @count:	the number of bytes in @buf
679  *
680  * Return: the number of bytes processed if the APID is valid; otherwise,
681  * returns one of the following errors:
682  *	-EINVAL if the APID is not a number
683  *	-ENODEV if the APID it exceeds the maximum value configured for the
684  *		system
685  */
686 static ssize_t unassign_adapter_store(struct device *dev,
687 				      struct device_attribute *attr,
688 				      const char *buf, size_t count)
689 {
690 	int ret;
691 	unsigned long apid;
692 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
693 
694 	mutex_lock(&matrix_dev->lock);
695 
696 	/* If the KVM guest is running, disallow unassignment of adapter */
697 	if (matrix_mdev->kvm) {
698 		ret = -EBUSY;
699 		goto done;
700 	}
701 
702 	ret = kstrtoul(buf, 0, &apid);
703 	if (ret)
704 		goto done;
705 
706 	if (apid > matrix_mdev->matrix.apm_max) {
707 		ret = -ENODEV;
708 		goto done;
709 	}
710 
711 	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
712 	ret = count;
713 done:
714 	mutex_unlock(&matrix_dev->lock);
715 	return ret;
716 }
717 static DEVICE_ATTR_WO(unassign_adapter);
718 
719 static int
720 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
721 					     unsigned long apqi)
722 {
723 	int ret;
724 	unsigned long apid;
725 	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
726 
727 	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
728 		return vfio_ap_verify_queue_reserved(NULL, &apqi);
729 
730 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
731 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
732 		if (ret)
733 			return ret;
734 	}
735 
736 	return 0;
737 }
738 
739 /**
740  * assign_domain_store - parses the APQI from @buf and sets the
741  * corresponding bit in the mediated matrix device's AQM
742  *
743  * @dev:	the matrix device
744  * @attr:	the mediated matrix device's assign_domain attribute
745  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
746  *		be assigned
747  * @count:	the number of bytes in @buf
748  *
749  * Return: the number of bytes processed if the APQI is valid; otherwise returns
750  * one of the following errors:
751  *
752  *	1. -EINVAL
753  *	   The APQI is not a valid number
754  *
755  *	2. -ENODEV
756  *	   The APQI exceeds the maximum value configured for the system
757  *
758  *	3. -EADDRNOTAVAIL
759  *	   An APQN derived from the cross product of the APQI being assigned
760  *	   and the APIDs previously assigned is not bound to the vfio_ap device
761  *	   driver; or, if no APIDs have yet been assigned, the APQI is not
762  *	   contained in an APQN bound to the vfio_ap device driver.
763  *
764  *	4. -EADDRINUSE
765  *	   An APQN derived from the cross product of the APQI being assigned
766  *	   and the APIDs previously assigned is being used by another mediated
767  *	   matrix device
768  */
769 static ssize_t assign_domain_store(struct device *dev,
770 				   struct device_attribute *attr,
771 				   const char *buf, size_t count)
772 {
773 	int ret;
774 	unsigned long apqi;
775 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
776 	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
777 
778 	mutex_lock(&matrix_dev->lock);
779 
780 	/* If the KVM guest is running, disallow assignment of domain */
781 	if (matrix_mdev->kvm) {
782 		ret = -EBUSY;
783 		goto done;
784 	}
785 
786 	ret = kstrtoul(buf, 0, &apqi);
787 	if (ret)
788 		goto done;
789 	if (apqi > max_apqi) {
790 		ret = -ENODEV;
791 		goto done;
792 	}
793 
794 	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
795 	if (ret)
796 		goto done;
797 
798 	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
799 
800 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
801 	if (ret)
802 		goto share_err;
803 
804 	ret = count;
805 	goto done;
806 
807 share_err:
808 	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
809 done:
810 	mutex_unlock(&matrix_dev->lock);
811 
812 	return ret;
813 }
814 static DEVICE_ATTR_WO(assign_domain);
815 
816 
817 /**
818  * unassign_domain_store - parses the APQI from @buf and clears the
819  * corresponding bit in the mediated matrix device's AQM
820  *
821  * @dev:	the matrix device
822  * @attr:	the mediated matrix device's unassign_domain attribute
823  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
824  *		be unassigned
825  * @count:	the number of bytes in @buf
826  *
827  * Return: the number of bytes processed if the APQI is valid; otherwise,
828  * returns one of the following errors:
829  *	-EINVAL if the APQI is not a number
830  *	-ENODEV if the APQI exceeds the maximum value configured for the system
831  */
832 static ssize_t unassign_domain_store(struct device *dev,
833 				     struct device_attribute *attr,
834 				     const char *buf, size_t count)
835 {
836 	int ret;
837 	unsigned long apqi;
838 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
839 
840 	mutex_lock(&matrix_dev->lock);
841 
842 	/* If the KVM guest is running, disallow unassignment of domain */
843 	if (matrix_mdev->kvm) {
844 		ret = -EBUSY;
845 		goto done;
846 	}
847 
848 	ret = kstrtoul(buf, 0, &apqi);
849 	if (ret)
850 		goto done;
851 
852 	if (apqi > matrix_mdev->matrix.aqm_max) {
853 		ret = -ENODEV;
854 		goto done;
855 	}
856 
857 	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
858 	ret = count;
859 
860 done:
861 	mutex_unlock(&matrix_dev->lock);
862 	return ret;
863 }
864 static DEVICE_ATTR_WO(unassign_domain);
865 
866 /**
867  * assign_control_domain_store - parses the domain ID from @buf and sets
868  * the corresponding bit in the mediated matrix device's ADM
869  *
870  * @dev:	the matrix device
871  * @attr:	the mediated matrix device's assign_control_domain attribute
872  * @buf:	a buffer containing the domain ID to be assigned
873  * @count:	the number of bytes in @buf
874  *
875  * Return: the number of bytes processed if the domain ID is valid; otherwise,
876  * returns one of the following errors:
877  *	-EINVAL if the ID is not a number
878  *	-ENODEV if the ID exceeds the maximum value configured for the system
879  */
880 static ssize_t assign_control_domain_store(struct device *dev,
881 					   struct device_attribute *attr,
882 					   const char *buf, size_t count)
883 {
884 	int ret;
885 	unsigned long id;
886 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
887 
888 	mutex_lock(&matrix_dev->lock);
889 
890 	/* If the KVM guest is running, disallow assignment of control domain */
891 	if (matrix_mdev->kvm) {
892 		ret = -EBUSY;
893 		goto done;
894 	}
895 
896 	ret = kstrtoul(buf, 0, &id);
897 	if (ret)
898 		goto done;
899 
900 	if (id > matrix_mdev->matrix.adm_max) {
901 		ret = -ENODEV;
902 		goto done;
903 	}
904 
905 	/* Set the bit in the ADM (bitmask) corresponding to the AP control
906 	 * domain number (id). The bits in the mask, from most significant to
907 	 * least significant, correspond to IDs 0 up to the one less than the
908 	 * number of control domains that can be assigned.
909 	 */
910 	set_bit_inv(id, matrix_mdev->matrix.adm);
911 	ret = count;
912 done:
913 	mutex_unlock(&matrix_dev->lock);
914 	return ret;
915 }
916 static DEVICE_ATTR_WO(assign_control_domain);
917 
918 /**
919  * unassign_control_domain_store - parses the domain ID from @buf and
920  * clears the corresponding bit in the mediated matrix device's ADM
921  *
922  * @dev:	the matrix device
923  * @attr:	the mediated matrix device's unassign_control_domain attribute
924  * @buf:	a buffer containing the domain ID to be unassigned
925  * @count:	the number of bytes in @buf
926  *
927  * Return: the number of bytes processed if the domain ID is valid; otherwise,
928  * returns one of the following errors:
929  *	-EINVAL if the ID is not a number
930  *	-ENODEV if the ID exceeds the maximum value configured for the system
931  */
932 static ssize_t unassign_control_domain_store(struct device *dev,
933 					     struct device_attribute *attr,
934 					     const char *buf, size_t count)
935 {
936 	int ret;
937 	unsigned long domid;
938 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
939 	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
940 
941 	mutex_lock(&matrix_dev->lock);
942 
943 	/* If a KVM guest is running, disallow unassignment of control domain */
944 	if (matrix_mdev->kvm) {
945 		ret = -EBUSY;
946 		goto done;
947 	}
948 
949 	ret = kstrtoul(buf, 0, &domid);
950 	if (ret)
951 		goto done;
952 	if (domid > max_domid) {
953 		ret = -ENODEV;
954 		goto done;
955 	}
956 
957 	clear_bit_inv(domid, matrix_mdev->matrix.adm);
958 	ret = count;
959 done:
960 	mutex_unlock(&matrix_dev->lock);
961 	return ret;
962 }
963 static DEVICE_ATTR_WO(unassign_control_domain);
964 
965 static ssize_t control_domains_show(struct device *dev,
966 				    struct device_attribute *dev_attr,
967 				    char *buf)
968 {
969 	unsigned long id;
970 	int nchars = 0;
971 	int n;
972 	char *bufpos = buf;
973 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
974 	unsigned long max_domid = matrix_mdev->matrix.adm_max;
975 
976 	mutex_lock(&matrix_dev->lock);
977 	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
978 		n = sprintf(bufpos, "%04lx\n", id);
979 		bufpos += n;
980 		nchars += n;
981 	}
982 	mutex_unlock(&matrix_dev->lock);
983 
984 	return nchars;
985 }
986 static DEVICE_ATTR_RO(control_domains);
987 
988 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
989 			   char *buf)
990 {
991 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
992 	char *bufpos = buf;
993 	unsigned long apid;
994 	unsigned long apqi;
995 	unsigned long apid1;
996 	unsigned long apqi1;
997 	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
998 	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
999 	int nchars = 0;
1000 	int n;
1001 
1002 	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
1003 	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
1004 
1005 	mutex_lock(&matrix_dev->lock);
1006 
1007 	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
1008 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1009 			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1010 					     naqm_bits) {
1011 				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
1012 					    apqi);
1013 				bufpos += n;
1014 				nchars += n;
1015 			}
1016 		}
1017 	} else if (apid1 < napm_bits) {
1018 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1019 			n = sprintf(bufpos, "%02lx.\n", apid);
1020 			bufpos += n;
1021 			nchars += n;
1022 		}
1023 	} else if (apqi1 < naqm_bits) {
1024 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
1025 			n = sprintf(bufpos, ".%04lx\n", apqi);
1026 			bufpos += n;
1027 			nchars += n;
1028 		}
1029 	}
1030 
1031 	mutex_unlock(&matrix_dev->lock);
1032 
1033 	return nchars;
1034 }
1035 static DEVICE_ATTR_RO(matrix);
1036 
1037 static struct attribute *vfio_ap_mdev_attrs[] = {
1038 	&dev_attr_assign_adapter.attr,
1039 	&dev_attr_unassign_adapter.attr,
1040 	&dev_attr_assign_domain.attr,
1041 	&dev_attr_unassign_domain.attr,
1042 	&dev_attr_assign_control_domain.attr,
1043 	&dev_attr_unassign_control_domain.attr,
1044 	&dev_attr_control_domains.attr,
1045 	&dev_attr_matrix.attr,
1046 	NULL,
1047 };
1048 
1049 static struct attribute_group vfio_ap_mdev_attr_group = {
1050 	.attrs = vfio_ap_mdev_attrs
1051 };
1052 
1053 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1054 	&vfio_ap_mdev_attr_group,
1055 	NULL
1056 };
1057 
1058 /**
1059  * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
1060  * to manage AP resources for the guest whose state is represented by @kvm
1061  *
1062  * @matrix_mdev: a mediated matrix device
1063  * @kvm: reference to KVM instance
1064  *
1065  * Note: The matrix_dev->lock must be taken prior to calling
1066  * this function; however, the lock will be temporarily released while the
1067  * guest's AP configuration is set to avoid a potential lockdep splat.
1068  * The kvm->lock is taken to set the guest's AP configuration which, under
1069  * certain circumstances, will result in a circular lock dependency if this is
1070  * done under the @matrix_mdev->lock.
1071  *
1072  * Return: 0 if no other mediated matrix device has a reference to @kvm;
1073  * otherwise, returns an -EPERM.
1074  */
1075 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1076 				struct kvm *kvm)
1077 {
1078 	struct ap_matrix_mdev *m;
1079 
1080 	if (kvm->arch.crypto.crycbd) {
1081 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1082 		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1083 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1084 
1085 		mutex_lock(&kvm->lock);
1086 		mutex_lock(&matrix_dev->lock);
1087 
1088 		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1089 			if (m != matrix_mdev && m->kvm == kvm) {
1090 				mutex_unlock(&kvm->lock);
1091 				mutex_unlock(&matrix_dev->lock);
1092 				return -EPERM;
1093 			}
1094 		}
1095 
1096 		kvm_get_kvm(kvm);
1097 		matrix_mdev->kvm = kvm;
1098 		kvm_arch_crypto_set_masks(kvm,
1099 					  matrix_mdev->matrix.apm,
1100 					  matrix_mdev->matrix.aqm,
1101 					  matrix_mdev->matrix.adm);
1102 
1103 		mutex_unlock(&kvm->lock);
1104 		mutex_unlock(&matrix_dev->lock);
1105 	}
1106 
1107 	return 0;
1108 }
1109 
1110 /**
1111  * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
1112  *
1113  * @nb: The notifier block
1114  * @action: Action to be taken
1115  * @data: data associated with the request
1116  *
1117  * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1118  * pinned before). Other requests are ignored.
1119  *
1120  * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
1121  */
1122 static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1123 				       unsigned long action, void *data)
1124 {
1125 	struct ap_matrix_mdev *matrix_mdev;
1126 
1127 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1128 
1129 	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1130 		struct vfio_iommu_type1_dma_unmap *unmap = data;
1131 		unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1132 
1133 		vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
1134 		return NOTIFY_OK;
1135 	}
1136 
1137 	return NOTIFY_DONE;
1138 }
1139 
1140 /**
1141  * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
1142  * by @matrix_mdev.
1143  *
1144  * @matrix_mdev: a matrix mediated device
1145  * @kvm: the pointer to the kvm structure being unset.
1146  *
1147  * Note: The matrix_dev->lock must be taken prior to calling
1148  * this function; however, the lock will be temporarily released while the
1149  * guest's AP configuration is cleared to avoid a potential lockdep splat.
1150  * The kvm->lock is taken to clear the guest's AP configuration which, under
1151  * certain circumstances, will result in a circular lock dependency if this is
1152  * done under the @matrix_mdev->lock.
1153  */
1154 static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev,
1155 				   struct kvm *kvm)
1156 {
1157 	if (kvm && kvm->arch.crypto.crycbd) {
1158 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1159 		kvm->arch.crypto.pqap_hook = NULL;
1160 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1161 
1162 		mutex_lock(&kvm->lock);
1163 		mutex_lock(&matrix_dev->lock);
1164 
1165 		kvm_arch_crypto_clear_masks(kvm);
1166 		vfio_ap_mdev_reset_queues(matrix_mdev);
1167 		kvm_put_kvm(kvm);
1168 		matrix_mdev->kvm = NULL;
1169 
1170 		mutex_unlock(&kvm->lock);
1171 		mutex_unlock(&matrix_dev->lock);
1172 	}
1173 }
1174 
1175 static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
1176 				       unsigned long action, void *data)
1177 {
1178 	int notify_rc = NOTIFY_OK;
1179 	struct ap_matrix_mdev *matrix_mdev;
1180 
1181 	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
1182 		return NOTIFY_OK;
1183 
1184 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
1185 
1186 	if (!data)
1187 		vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
1188 	else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
1189 		notify_rc = NOTIFY_DONE;
1190 
1191 	return notify_rc;
1192 }
1193 
1194 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1195 {
1196 	struct device *dev;
1197 	struct vfio_ap_queue *q = NULL;
1198 
1199 	dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1200 				 &apqn, match_apqn);
1201 	if (dev) {
1202 		q = dev_get_drvdata(dev);
1203 		put_device(dev);
1204 	}
1205 
1206 	return q;
1207 }
1208 
1209 int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1210 			     unsigned int retry)
1211 {
1212 	struct ap_queue_status status;
1213 	int ret;
1214 	int retry2 = 2;
1215 
1216 	if (!q)
1217 		return 0;
1218 
1219 retry_zapq:
1220 	status = ap_zapq(q->apqn);
1221 	switch (status.response_code) {
1222 	case AP_RESPONSE_NORMAL:
1223 		ret = 0;
1224 		break;
1225 	case AP_RESPONSE_RESET_IN_PROGRESS:
1226 		if (retry--) {
1227 			msleep(20);
1228 			goto retry_zapq;
1229 		}
1230 		ret = -EBUSY;
1231 		break;
1232 	case AP_RESPONSE_Q_NOT_AVAIL:
1233 	case AP_RESPONSE_DECONFIGURED:
1234 	case AP_RESPONSE_CHECKSTOPPED:
1235 		WARN_ON_ONCE(status.irq_enabled);
1236 		ret = -EBUSY;
1237 		goto free_resources;
1238 	default:
1239 		/* things are really broken, give up */
1240 		WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1241 		     status.response_code);
1242 		return -EIO;
1243 	}
1244 
1245 	/* wait for the reset to take effect */
1246 	while (retry2--) {
1247 		if (status.queue_empty && !status.irq_enabled)
1248 			break;
1249 		msleep(20);
1250 		status = ap_tapq(q->apqn, NULL);
1251 	}
1252 	WARN_ON_ONCE(retry2 <= 0);
1253 
1254 free_resources:
1255 	vfio_ap_free_aqic_resources(q);
1256 
1257 	return ret;
1258 }
1259 
1260 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
1261 {
1262 	int ret;
1263 	int rc = 0;
1264 	unsigned long apid, apqi;
1265 	struct vfio_ap_queue *q;
1266 
1267 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1268 			     matrix_mdev->matrix.apm_max + 1) {
1269 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1270 				     matrix_mdev->matrix.aqm_max + 1) {
1271 			q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1272 			ret = vfio_ap_mdev_reset_queue(q, 1);
1273 			/*
1274 			 * Regardless whether a queue turns out to be busy, or
1275 			 * is not operational, we need to continue resetting
1276 			 * the remaining queues.
1277 			 */
1278 			if (ret)
1279 				rc = ret;
1280 		}
1281 	}
1282 
1283 	return rc;
1284 }
1285 
1286 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
1287 {
1288 	struct ap_matrix_mdev *matrix_mdev =
1289 		container_of(vdev, struct ap_matrix_mdev, vdev);
1290 	unsigned long events;
1291 	int ret;
1292 
1293 	matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
1294 	events = VFIO_GROUP_NOTIFY_SET_KVM;
1295 
1296 	ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1297 				     &events, &matrix_mdev->group_notifier);
1298 	if (ret)
1299 		return ret;
1300 
1301 	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1302 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1303 	ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
1304 				     &events, &matrix_mdev->iommu_notifier);
1305 	if (ret)
1306 		goto out_unregister_group;
1307 	return 0;
1308 
1309 out_unregister_group:
1310 	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1311 				 &matrix_mdev->group_notifier);
1312 	return ret;
1313 }
1314 
1315 static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
1316 {
1317 	struct ap_matrix_mdev *matrix_mdev =
1318 		container_of(vdev, struct ap_matrix_mdev, vdev);
1319 
1320 	vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
1321 				 &matrix_mdev->iommu_notifier);
1322 	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1323 				 &matrix_mdev->group_notifier);
1324 	vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
1325 }
1326 
1327 static int vfio_ap_mdev_get_device_info(unsigned long arg)
1328 {
1329 	unsigned long minsz;
1330 	struct vfio_device_info info;
1331 
1332 	minsz = offsetofend(struct vfio_device_info, num_irqs);
1333 
1334 	if (copy_from_user(&info, (void __user *)arg, minsz))
1335 		return -EFAULT;
1336 
1337 	if (info.argsz < minsz)
1338 		return -EINVAL;
1339 
1340 	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1341 	info.num_regions = 0;
1342 	info.num_irqs = 0;
1343 
1344 	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1345 }
1346 
1347 static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
1348 				    unsigned int cmd, unsigned long arg)
1349 {
1350 	struct ap_matrix_mdev *matrix_mdev =
1351 		container_of(vdev, struct ap_matrix_mdev, vdev);
1352 	int ret;
1353 
1354 	mutex_lock(&matrix_dev->lock);
1355 	switch (cmd) {
1356 	case VFIO_DEVICE_GET_INFO:
1357 		ret = vfio_ap_mdev_get_device_info(arg);
1358 		break;
1359 	case VFIO_DEVICE_RESET:
1360 		ret = vfio_ap_mdev_reset_queues(matrix_mdev);
1361 		break;
1362 	default:
1363 		ret = -EOPNOTSUPP;
1364 		break;
1365 	}
1366 	mutex_unlock(&matrix_dev->lock);
1367 
1368 	return ret;
1369 }
1370 
1371 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
1372 	.open_device = vfio_ap_mdev_open_device,
1373 	.close_device = vfio_ap_mdev_close_device,
1374 	.ioctl = vfio_ap_mdev_ioctl,
1375 };
1376 
1377 static struct mdev_driver vfio_ap_matrix_driver = {
1378 	.driver = {
1379 		.name = "vfio_ap_mdev",
1380 		.owner = THIS_MODULE,
1381 		.mod_name = KBUILD_MODNAME,
1382 		.dev_groups = vfio_ap_mdev_attr_groups,
1383 	},
1384 	.probe = vfio_ap_mdev_probe,
1385 	.remove = vfio_ap_mdev_remove,
1386 };
1387 
1388 static const struct mdev_parent_ops vfio_ap_matrix_ops = {
1389 	.owner			= THIS_MODULE,
1390 	.device_driver		= &vfio_ap_matrix_driver,
1391 	.supported_type_groups	= vfio_ap_mdev_type_groups,
1392 };
1393 
1394 int vfio_ap_mdev_register(void)
1395 {
1396 	int ret;
1397 
1398 	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1399 
1400 	ret = mdev_register_driver(&vfio_ap_matrix_driver);
1401 	if (ret)
1402 		return ret;
1403 
1404 	ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
1405 	if (ret)
1406 		goto err_driver;
1407 	return 0;
1408 
1409 err_driver:
1410 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1411 	return ret;
1412 }
1413 
1414 void vfio_ap_mdev_unregister(void)
1415 {
1416 	mdev_unregister_device(&matrix_dev->device);
1417 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1418 }
1419