xref: /openbmc/linux/drivers/dma/idxd/submit.c (revision 7930d85535751bc8b05c6731c6b79d874671f13c)
1d1dfe5b8SDave Jiang // SPDX-License-Identifier: GPL-2.0
2d1dfe5b8SDave Jiang /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3d1dfe5b8SDave Jiang #include <linux/init.h>
4d1dfe5b8SDave Jiang #include <linux/kernel.h>
5d1dfe5b8SDave Jiang #include <linux/module.h>
6d1dfe5b8SDave Jiang #include <linux/pci.h>
7d1dfe5b8SDave Jiang #include <uapi/linux/idxd.h>
8d1dfe5b8SDave Jiang #include "idxd.h"
9d1dfe5b8SDave Jiang #include "registers.h"
10d1dfe5b8SDave Jiang 
110705107fSDave Jiang static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
12d1dfe5b8SDave Jiang {
13d1dfe5b8SDave Jiang 	struct idxd_desc *desc;
148e50d392SDave Jiang 	struct idxd_device *idxd = wq->idxd;
15d1dfe5b8SDave Jiang 
16d1dfe5b8SDave Jiang 	desc = wq->descs[idx];
17d1dfe5b8SDave Jiang 	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
18435b512dSDave Jiang 	memset(desc->completion, 0, idxd->data->compl_size);
190705107fSDave Jiang 	desc->cpu = cpu;
208e50d392SDave Jiang 
218e50d392SDave Jiang 	if (device_pasid_enabled(idxd))
228e50d392SDave Jiang 		desc->hw->pasid = idxd->pasid;
238e50d392SDave Jiang 
24d1dfe5b8SDave Jiang 	return desc;
25d1dfe5b8SDave Jiang }
26d1dfe5b8SDave Jiang 
270705107fSDave Jiang struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
280705107fSDave Jiang {
290705107fSDave Jiang 	int cpu, idx;
300705107fSDave Jiang 	struct idxd_device *idxd = wq->idxd;
310705107fSDave Jiang 	DEFINE_SBQ_WAIT(wait);
320705107fSDave Jiang 	struct sbq_wait_state *ws;
330705107fSDave Jiang 	struct sbitmap_queue *sbq;
340705107fSDave Jiang 
350705107fSDave Jiang 	if (idxd->state != IDXD_DEV_ENABLED)
360705107fSDave Jiang 		return ERR_PTR(-EIO);
370705107fSDave Jiang 
380705107fSDave Jiang 	sbq = &wq->sbq;
390705107fSDave Jiang 	idx = sbitmap_queue_get(sbq, &cpu);
400705107fSDave Jiang 	if (idx < 0) {
410705107fSDave Jiang 		if (optype == IDXD_OP_NONBLOCK)
420705107fSDave Jiang 			return ERR_PTR(-EAGAIN);
430705107fSDave Jiang 	} else {
440705107fSDave Jiang 		return __get_desc(wq, idx, cpu);
450705107fSDave Jiang 	}
460705107fSDave Jiang 
470705107fSDave Jiang 	ws = &sbq->ws[0];
480705107fSDave Jiang 	for (;;) {
490705107fSDave Jiang 		sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
500705107fSDave Jiang 		if (signal_pending_state(TASK_INTERRUPTIBLE, current))
510705107fSDave Jiang 			break;
520705107fSDave Jiang 		idx = sbitmap_queue_get(sbq, &cpu);
53673d812dSDave Jiang 		if (idx >= 0)
540705107fSDave Jiang 			break;
550705107fSDave Jiang 		schedule();
560705107fSDave Jiang 	}
570705107fSDave Jiang 
580705107fSDave Jiang 	sbitmap_finish_wait(sbq, ws, &wait);
590705107fSDave Jiang 	if (idx < 0)
600705107fSDave Jiang 		return ERR_PTR(-EAGAIN);
610705107fSDave Jiang 
620705107fSDave Jiang 	return __get_desc(wq, idx, cpu);
630705107fSDave Jiang }
640705107fSDave Jiang 
65d1dfe5b8SDave Jiang void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
66d1dfe5b8SDave Jiang {
670705107fSDave Jiang 	int cpu = desc->cpu;
68d1dfe5b8SDave Jiang 
690705107fSDave Jiang 	desc->cpu = -1;
700705107fSDave Jiang 	sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
71d1dfe5b8SDave Jiang }
72d1dfe5b8SDave Jiang 
736b4b87f2SDave Jiang static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
746b4b87f2SDave Jiang 					 struct idxd_desc *desc)
756b4b87f2SDave Jiang {
766b4b87f2SDave Jiang 	struct idxd_desc *d, *n;
776b4b87f2SDave Jiang 
786b4b87f2SDave Jiang 	lockdep_assert_held(&ie->list_lock);
796b4b87f2SDave Jiang 	list_for_each_entry_safe(d, n, &ie->work_list, list) {
806b4b87f2SDave Jiang 		if (d == desc) {
816b4b87f2SDave Jiang 			list_del(&d->list);
826b4b87f2SDave Jiang 			return d;
836b4b87f2SDave Jiang 		}
846b4b87f2SDave Jiang 	}
856b4b87f2SDave Jiang 
866b4b87f2SDave Jiang 	/*
876b4b87f2SDave Jiang 	 * At this point, the desc needs to be aborted is held by the completion
886b4b87f2SDave Jiang 	 * handler where it has taken it off the pending list but has not added to the
896b4b87f2SDave Jiang 	 * work list. It will be cleaned up by the interrupt handler when it sees the
906b4b87f2SDave Jiang 	 * IDXD_COMP_DESC_ABORT for completion status.
916b4b87f2SDave Jiang 	 */
926b4b87f2SDave Jiang 	return NULL;
936b4b87f2SDave Jiang }
946b4b87f2SDave Jiang 
956b4b87f2SDave Jiang static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
966b4b87f2SDave Jiang 			     struct idxd_desc *desc)
976b4b87f2SDave Jiang {
986b4b87f2SDave Jiang 	struct idxd_desc *d, *t, *found = NULL;
996b4b87f2SDave Jiang 	struct llist_node *head;
1006b4b87f2SDave Jiang 
1016b4b87f2SDave Jiang 	desc->completion->status = IDXD_COMP_DESC_ABORT;
1026b4b87f2SDave Jiang 	/*
1036b4b87f2SDave Jiang 	 * Grab the list lock so it will block the irq thread handler. This allows the
1046b4b87f2SDave Jiang 	 * abort code to locate the descriptor need to be aborted.
1056b4b87f2SDave Jiang 	 */
1069fce3b3aSDave Jiang 	spin_lock(&ie->list_lock);
1076b4b87f2SDave Jiang 	head = llist_del_all(&ie->pending_llist);
1086b4b87f2SDave Jiang 	if (head) {
1096b4b87f2SDave Jiang 		llist_for_each_entry_safe(d, t, head, llnode) {
1106b4b87f2SDave Jiang 			if (d == desc) {
1116b4b87f2SDave Jiang 				found = desc;
1126b4b87f2SDave Jiang 				continue;
1136b4b87f2SDave Jiang 			}
1146b4b87f2SDave Jiang 			list_add_tail(&desc->list, &ie->work_list);
1156b4b87f2SDave Jiang 		}
1166b4b87f2SDave Jiang 	}
1176b4b87f2SDave Jiang 
1186b4b87f2SDave Jiang 	if (!found)
1196b4b87f2SDave Jiang 		found = list_abort_desc(wq, ie, desc);
1209fce3b3aSDave Jiang 	spin_unlock(&ie->list_lock);
1216b4b87f2SDave Jiang 
1226b4b87f2SDave Jiang 	if (found)
1235d78abb6SDave Jiang 		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
1246b4b87f2SDave Jiang }
1256b4b87f2SDave Jiang 
126*7930d855SDave Jiang /*
127*7930d855SDave Jiang  * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
128*7930d855SDave Jiang  * has better control of number of descriptors being submitted to a shared wq by limiting
129*7930d855SDave Jiang  * the number of driver allocated descriptors to the wq size. However, when the swq is
130*7930d855SDave Jiang  * exported to a guest kernel, it may be shared with multiple guest kernels. This means
131*7930d855SDave Jiang  * the likelihood of getting busy returned on the swq when submitting goes significantly up.
132*7930d855SDave Jiang  * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
133*7930d855SDave Jiang  * up. The sysfs knob can be tuned by the system administrator.
134*7930d855SDave Jiang  */
135*7930d855SDave Jiang int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
136*7930d855SDave Jiang {
137*7930d855SDave Jiang 	int rc, retries = 0;
138*7930d855SDave Jiang 
139*7930d855SDave Jiang 	do {
140*7930d855SDave Jiang 		rc = enqcmds(portal, desc);
141*7930d855SDave Jiang 		if (rc == 0)
142*7930d855SDave Jiang 			break;
143*7930d855SDave Jiang 		cpu_relax();
144*7930d855SDave Jiang 	} while (retries++ < wq->enqcmds_retries);
145*7930d855SDave Jiang 
146*7930d855SDave Jiang 	return rc;
147*7930d855SDave Jiang }
148*7930d855SDave Jiang 
149d1dfe5b8SDave Jiang int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
150d1dfe5b8SDave Jiang {
151d1dfe5b8SDave Jiang 	struct idxd_device *idxd = wq->idxd;
1526b4b87f2SDave Jiang 	struct idxd_irq_entry *ie = NULL;
15356fc39f5SDave Jiang 	u32 desc_flags = desc->hw->flags;
15442d279f9SDave Jiang 	void __iomem *portal;
1558e50d392SDave Jiang 	int rc;
156d1dfe5b8SDave Jiang 
1575d78abb6SDave Jiang 	if (idxd->state != IDXD_DEV_ENABLED)
158d1dfe5b8SDave Jiang 		return -EIO;
159d1dfe5b8SDave Jiang 
16056fc39f5SDave Jiang 	if (!percpu_ref_tryget_live(&wq->wq_active)) {
16156fc39f5SDave Jiang 		wait_for_completion(&wq->wq_resurrect);
1625d78abb6SDave Jiang 		if (!percpu_ref_tryget_live(&wq->wq_active))
16393a40a6dSDave Jiang 			return -ENXIO;
16456fc39f5SDave Jiang 	}
16593a40a6dSDave Jiang 
166a9c17152SDave Jiang 	portal = idxd_wq_portal_addr(wq);
1678e50d392SDave Jiang 
168d1dfe5b8SDave Jiang 	/*
1698e50d392SDave Jiang 	 * The wmb() flushes writes to coherent DMA data before
1708e50d392SDave Jiang 	 * possibly triggering a DMA read. The wmb() is necessary
1718e50d392SDave Jiang 	 * even on UP because the recipient is a device.
172d1dfe5b8SDave Jiang 	 */
173d1dfe5b8SDave Jiang 	wmb();
1746b4b87f2SDave Jiang 
1756b4b87f2SDave Jiang 	/*
1766b4b87f2SDave Jiang 	 * Pending the descriptor to the lockless list for the irq_entry
1776b4b87f2SDave Jiang 	 * that we designated the descriptor to.
1786b4b87f2SDave Jiang 	 */
17956fc39f5SDave Jiang 	if (desc_flags & IDXD_OP_FLAG_RCI) {
1808b67426eSDave Jiang 		ie = wq->ie;
181eb0cf33aSDave Jiang 		if (ie->int_handle == INVALID_INT_HANDLE)
182eb0cf33aSDave Jiang 			desc->hw->int_handle = ie->id;
183eb0cf33aSDave Jiang 		else
184eb0cf33aSDave Jiang 			desc->hw->int_handle = ie->int_handle;
185eb0cf33aSDave Jiang 
1866b4b87f2SDave Jiang 		llist_add(&desc->llnode, &ie->pending_llist);
1876b4b87f2SDave Jiang 	}
1886b4b87f2SDave Jiang 
1898e50d392SDave Jiang 	if (wq_dedicated(wq)) {
19042d279f9SDave Jiang 		iosubmit_cmds512(portal, desc->hw, 1);
1918e50d392SDave Jiang 	} else {
192*7930d855SDave Jiang 		rc = idxd_enqcmds(wq, portal, desc->hw);
193ac24a2dcSDave Jiang 		if (rc < 0) {
194ac24a2dcSDave Jiang 			percpu_ref_put(&wq->wq_active);
1950b030f54SDave Jiang 			/* abort operation frees the descriptor */
1966b4b87f2SDave Jiang 			if (ie)
1976b4b87f2SDave Jiang 				llist_abort_desc(wq, ie, desc);
1988e50d392SDave Jiang 			return rc;
1998e50d392SDave Jiang 		}
200ac24a2dcSDave Jiang 	}
201d1dfe5b8SDave Jiang 
20293a40a6dSDave Jiang 	percpu_ref_put(&wq->wq_active);
203d1dfe5b8SDave Jiang 	return 0;
204d1dfe5b8SDave Jiang }
205