1d1dfe5b8SDave Jiang // SPDX-License-Identifier: GPL-2.0
2d1dfe5b8SDave Jiang /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3d1dfe5b8SDave Jiang #include <linux/init.h>
4d1dfe5b8SDave Jiang #include <linux/kernel.h>
5d1dfe5b8SDave Jiang #include <linux/module.h>
6d1dfe5b8SDave Jiang #include <linux/pci.h>
7d1dfe5b8SDave Jiang #include <uapi/linux/idxd.h>
8d1dfe5b8SDave Jiang #include "idxd.h"
9d1dfe5b8SDave Jiang #include "registers.h"
10d1dfe5b8SDave Jiang
__get_desc(struct idxd_wq * wq,int idx,int cpu)110705107fSDave Jiang static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
12d1dfe5b8SDave Jiang {
13d1dfe5b8SDave Jiang struct idxd_desc *desc;
148e50d392SDave Jiang struct idxd_device *idxd = wq->idxd;
15d1dfe5b8SDave Jiang
16d1dfe5b8SDave Jiang desc = wq->descs[idx];
17d1dfe5b8SDave Jiang memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
18435b512dSDave Jiang memset(desc->completion, 0, idxd->data->compl_size);
190705107fSDave Jiang desc->cpu = cpu;
208e50d392SDave Jiang
218e50d392SDave Jiang if (device_pasid_enabled(idxd))
228e50d392SDave Jiang desc->hw->pasid = idxd->pasid;
238e50d392SDave Jiang
24d1dfe5b8SDave Jiang return desc;
25d1dfe5b8SDave Jiang }
26d1dfe5b8SDave Jiang
idxd_alloc_desc(struct idxd_wq * wq,enum idxd_op_type optype)270705107fSDave Jiang struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
280705107fSDave Jiang {
290705107fSDave Jiang int cpu, idx;
300705107fSDave Jiang struct idxd_device *idxd = wq->idxd;
310705107fSDave Jiang DEFINE_SBQ_WAIT(wait);
320705107fSDave Jiang struct sbq_wait_state *ws;
330705107fSDave Jiang struct sbitmap_queue *sbq;
340705107fSDave Jiang
350705107fSDave Jiang if (idxd->state != IDXD_DEV_ENABLED)
360705107fSDave Jiang return ERR_PTR(-EIO);
370705107fSDave Jiang
380705107fSDave Jiang sbq = &wq->sbq;
390705107fSDave Jiang idx = sbitmap_queue_get(sbq, &cpu);
400705107fSDave Jiang if (idx < 0) {
410705107fSDave Jiang if (optype == IDXD_OP_NONBLOCK)
420705107fSDave Jiang return ERR_PTR(-EAGAIN);
430705107fSDave Jiang } else {
440705107fSDave Jiang return __get_desc(wq, idx, cpu);
450705107fSDave Jiang }
460705107fSDave Jiang
470705107fSDave Jiang ws = &sbq->ws[0];
480705107fSDave Jiang for (;;) {
490705107fSDave Jiang sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
500705107fSDave Jiang if (signal_pending_state(TASK_INTERRUPTIBLE, current))
510705107fSDave Jiang break;
520705107fSDave Jiang idx = sbitmap_queue_get(sbq, &cpu);
53673d812dSDave Jiang if (idx >= 0)
540705107fSDave Jiang break;
550705107fSDave Jiang schedule();
560705107fSDave Jiang }
570705107fSDave Jiang
580705107fSDave Jiang sbitmap_finish_wait(sbq, ws, &wait);
590705107fSDave Jiang if (idx < 0)
600705107fSDave Jiang return ERR_PTR(-EAGAIN);
610705107fSDave Jiang
620705107fSDave Jiang return __get_desc(wq, idx, cpu);
630705107fSDave Jiang }
640705107fSDave Jiang
idxd_free_desc(struct idxd_wq * wq,struct idxd_desc * desc)65d1dfe5b8SDave Jiang void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
66d1dfe5b8SDave Jiang {
670705107fSDave Jiang int cpu = desc->cpu;
68d1dfe5b8SDave Jiang
690705107fSDave Jiang desc->cpu = -1;
700705107fSDave Jiang sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
71d1dfe5b8SDave Jiang }
72d1dfe5b8SDave Jiang
list_abort_desc(struct idxd_wq * wq,struct idxd_irq_entry * ie,struct idxd_desc * desc)736b4b87f2SDave Jiang static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
746b4b87f2SDave Jiang struct idxd_desc *desc)
756b4b87f2SDave Jiang {
766b4b87f2SDave Jiang struct idxd_desc *d, *n;
776b4b87f2SDave Jiang
786b4b87f2SDave Jiang lockdep_assert_held(&ie->list_lock);
796b4b87f2SDave Jiang list_for_each_entry_safe(d, n, &ie->work_list, list) {
806b4b87f2SDave Jiang if (d == desc) {
816b4b87f2SDave Jiang list_del(&d->list);
826b4b87f2SDave Jiang return d;
836b4b87f2SDave Jiang }
846b4b87f2SDave Jiang }
856b4b87f2SDave Jiang
866b4b87f2SDave Jiang /*
876b4b87f2SDave Jiang * At this point, the desc needs to be aborted is held by the completion
886b4b87f2SDave Jiang * handler where it has taken it off the pending list but has not added to the
896b4b87f2SDave Jiang * work list. It will be cleaned up by the interrupt handler when it sees the
906b4b87f2SDave Jiang * IDXD_COMP_DESC_ABORT for completion status.
916b4b87f2SDave Jiang */
926b4b87f2SDave Jiang return NULL;
936b4b87f2SDave Jiang }
946b4b87f2SDave Jiang
llist_abort_desc(struct idxd_wq * wq,struct idxd_irq_entry * ie,struct idxd_desc * desc)956b4b87f2SDave Jiang static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
966b4b87f2SDave Jiang struct idxd_desc *desc)
976b4b87f2SDave Jiang {
986b4b87f2SDave Jiang struct idxd_desc *d, *t, *found = NULL;
996b4b87f2SDave Jiang struct llist_node *head;
1008affd8a4SDave Jiang LIST_HEAD(flist);
1016b4b87f2SDave Jiang
1026b4b87f2SDave Jiang desc->completion->status = IDXD_COMP_DESC_ABORT;
1036b4b87f2SDave Jiang /*
1046b4b87f2SDave Jiang * Grab the list lock so it will block the irq thread handler. This allows the
1056b4b87f2SDave Jiang * abort code to locate the descriptor need to be aborted.
1066b4b87f2SDave Jiang */
1079fce3b3aSDave Jiang spin_lock(&ie->list_lock);
1086b4b87f2SDave Jiang head = llist_del_all(&ie->pending_llist);
1096b4b87f2SDave Jiang if (head) {
1106b4b87f2SDave Jiang llist_for_each_entry_safe(d, t, head, llnode) {
1116b4b87f2SDave Jiang if (d == desc) {
1126b4b87f2SDave Jiang found = desc;
1136b4b87f2SDave Jiang continue;
1146b4b87f2SDave Jiang }
1158affd8a4SDave Jiang
1168affd8a4SDave Jiang if (d->completion->status)
1178affd8a4SDave Jiang list_add_tail(&d->list, &flist);
1188affd8a4SDave Jiang else
1198affd8a4SDave Jiang list_add_tail(&d->list, &ie->work_list);
1206b4b87f2SDave Jiang }
1216b4b87f2SDave Jiang }
1226b4b87f2SDave Jiang
1236b4b87f2SDave Jiang if (!found)
1246b4b87f2SDave Jiang found = list_abort_desc(wq, ie, desc);
1259fce3b3aSDave Jiang spin_unlock(&ie->list_lock);
1266b4b87f2SDave Jiang
1276b4b87f2SDave Jiang if (found)
1285d78abb6SDave Jiang idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
1298affd8a4SDave Jiang
1308affd8a4SDave Jiang /*
1315cb664fbSVinod Koul * completing the descriptor will return desc to allocator and
1325cb664fbSVinod Koul * the desc can be acquired by a different process and the
1335cb664fbSVinod Koul * desc->list can be modified. Delete desc from list so the
1345cb664fbSVinod Koul * list trasversing does not get corrupted by the other process.
1358affd8a4SDave Jiang */
1368affd8a4SDave Jiang list_for_each_entry_safe(d, t, &flist, list) {
1378affd8a4SDave Jiang list_del_init(&d->list);
1385cb664fbSVinod Koul idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
1398affd8a4SDave Jiang }
1406b4b87f2SDave Jiang }
1416b4b87f2SDave Jiang
1427930d855SDave Jiang /*
1437930d855SDave Jiang * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
1447930d855SDave Jiang * has better control of number of descriptors being submitted to a shared wq by limiting
1457930d855SDave Jiang * the number of driver allocated descriptors to the wq size. However, when the swq is
1467930d855SDave Jiang * exported to a guest kernel, it may be shared with multiple guest kernels. This means
1477930d855SDave Jiang * the likelihood of getting busy returned on the swq when submitting goes significantly up.
1487930d855SDave Jiang * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
1497930d855SDave Jiang * up. The sysfs knob can be tuned by the system administrator.
1507930d855SDave Jiang */
idxd_enqcmds(struct idxd_wq * wq,void __iomem * portal,const void * desc)1517930d855SDave Jiang int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
1527930d855SDave Jiang {
153bc3452cdSDave Jiang unsigned int retries = wq->enqcmds_retries;
1545d9d16e5SDave Jiang int rc;
1557930d855SDave Jiang
1567930d855SDave Jiang do {
1577930d855SDave Jiang rc = enqcmds(portal, desc);
1587930d855SDave Jiang if (rc == 0)
1597930d855SDave Jiang break;
1607930d855SDave Jiang cpu_relax();
161bc3452cdSDave Jiang } while (retries--);
1627930d855SDave Jiang
1637930d855SDave Jiang return rc;
1647930d855SDave Jiang }
1657930d855SDave Jiang
idxd_submit_desc(struct idxd_wq * wq,struct idxd_desc * desc)166d1dfe5b8SDave Jiang int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
167d1dfe5b8SDave Jiang {
168d1dfe5b8SDave Jiang struct idxd_device *idxd = wq->idxd;
1696b4b87f2SDave Jiang struct idxd_irq_entry *ie = NULL;
17056fc39f5SDave Jiang u32 desc_flags = desc->hw->flags;
17142d279f9SDave Jiang void __iomem *portal;
1728e50d392SDave Jiang int rc;
173d1dfe5b8SDave Jiang
1745d78abb6SDave Jiang if (idxd->state != IDXD_DEV_ENABLED)
175d1dfe5b8SDave Jiang return -EIO;
176d1dfe5b8SDave Jiang
17756fc39f5SDave Jiang if (!percpu_ref_tryget_live(&wq->wq_active)) {
17856fc39f5SDave Jiang wait_for_completion(&wq->wq_resurrect);
1795d78abb6SDave Jiang if (!percpu_ref_tryget_live(&wq->wq_active))
18093a40a6dSDave Jiang return -ENXIO;
18156fc39f5SDave Jiang }
18293a40a6dSDave Jiang
183a9c17152SDave Jiang portal = idxd_wq_portal_addr(wq);
1848e50d392SDave Jiang
185d1dfe5b8SDave Jiang /*
1866b4b87f2SDave Jiang * Pending the descriptor to the lockless list for the irq_entry
1876b4b87f2SDave Jiang * that we designated the descriptor to.
1886b4b87f2SDave Jiang */
18956fc39f5SDave Jiang if (desc_flags & IDXD_OP_FLAG_RCI) {
190ec0d6423SDave Jiang ie = &wq->ie;
191eb0cf33aSDave Jiang desc->hw->int_handle = ie->int_handle;
1926b4b87f2SDave Jiang llist_add(&desc->llnode, &ie->pending_llist);
1936b4b87f2SDave Jiang }
1946b4b87f2SDave Jiang
195*7734bb38SGuanjun /*
196*7734bb38SGuanjun * The wmb() flushes writes to coherent DMA data before
197*7734bb38SGuanjun * possibly triggering a DMA read. The wmb() is necessary
198*7734bb38SGuanjun * even on UP because the recipient is a device.
199*7734bb38SGuanjun */
200*7734bb38SGuanjun wmb();
201*7734bb38SGuanjun
2028e50d392SDave Jiang if (wq_dedicated(wq)) {
20342d279f9SDave Jiang iosubmit_cmds512(portal, desc->hw, 1);
2048e50d392SDave Jiang } else {
2057930d855SDave Jiang rc = idxd_enqcmds(wq, portal, desc->hw);
206ac24a2dcSDave Jiang if (rc < 0) {
207ac24a2dcSDave Jiang percpu_ref_put(&wq->wq_active);
2080b030f54SDave Jiang /* abort operation frees the descriptor */
2096b4b87f2SDave Jiang if (ie)
2106b4b87f2SDave Jiang llist_abort_desc(wq, ie, desc);
2118e50d392SDave Jiang return rc;
2128e50d392SDave Jiang }
213ac24a2dcSDave Jiang }
214d1dfe5b8SDave Jiang
21593a40a6dSDave Jiang percpu_ref_put(&wq->wq_active);
216d1dfe5b8SDave Jiang return 0;
217d1dfe5b8SDave Jiang }
218