1d1dfe5b8SDave Jiang // SPDX-License-Identifier: GPL-2.0 2d1dfe5b8SDave Jiang /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ 3d1dfe5b8SDave Jiang #include <linux/init.h> 4d1dfe5b8SDave Jiang #include <linux/kernel.h> 5d1dfe5b8SDave Jiang #include <linux/module.h> 6d1dfe5b8SDave Jiang #include <linux/pci.h> 7d1dfe5b8SDave Jiang #include <uapi/linux/idxd.h> 8d1dfe5b8SDave Jiang #include "idxd.h" 9d1dfe5b8SDave Jiang #include "registers.h" 10d1dfe5b8SDave Jiang 110705107fSDave Jiang static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) 12d1dfe5b8SDave Jiang { 13d1dfe5b8SDave Jiang struct idxd_desc *desc; 148e50d392SDave Jiang struct idxd_device *idxd = wq->idxd; 15d1dfe5b8SDave Jiang 16d1dfe5b8SDave Jiang desc = wq->descs[idx]; 17d1dfe5b8SDave Jiang memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); 18435b512dSDave Jiang memset(desc->completion, 0, idxd->data->compl_size); 190705107fSDave Jiang desc->cpu = cpu; 208e50d392SDave Jiang 218e50d392SDave Jiang if (device_pasid_enabled(idxd)) 228e50d392SDave Jiang desc->hw->pasid = idxd->pasid; 238e50d392SDave Jiang 24d1dfe5b8SDave Jiang return desc; 25d1dfe5b8SDave Jiang } 26d1dfe5b8SDave Jiang 270705107fSDave Jiang struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) 280705107fSDave Jiang { 290705107fSDave Jiang int cpu, idx; 300705107fSDave Jiang struct idxd_device *idxd = wq->idxd; 310705107fSDave Jiang DEFINE_SBQ_WAIT(wait); 320705107fSDave Jiang struct sbq_wait_state *ws; 330705107fSDave Jiang struct sbitmap_queue *sbq; 340705107fSDave Jiang 350705107fSDave Jiang if (idxd->state != IDXD_DEV_ENABLED) 360705107fSDave Jiang return ERR_PTR(-EIO); 370705107fSDave Jiang 380705107fSDave Jiang sbq = &wq->sbq; 390705107fSDave Jiang idx = sbitmap_queue_get(sbq, &cpu); 400705107fSDave Jiang if (idx < 0) { 410705107fSDave Jiang if (optype == IDXD_OP_NONBLOCK) 420705107fSDave Jiang return ERR_PTR(-EAGAIN); 430705107fSDave Jiang } else { 440705107fSDave Jiang return __get_desc(wq, idx, cpu); 450705107fSDave Jiang } 460705107fSDave Jiang 470705107fSDave Jiang ws = &sbq->ws[0]; 480705107fSDave Jiang for (;;) { 490705107fSDave Jiang sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE); 500705107fSDave Jiang if (signal_pending_state(TASK_INTERRUPTIBLE, current)) 510705107fSDave Jiang break; 520705107fSDave Jiang idx = sbitmap_queue_get(sbq, &cpu); 53673d812dSDave Jiang if (idx >= 0) 540705107fSDave Jiang break; 550705107fSDave Jiang schedule(); 560705107fSDave Jiang } 570705107fSDave Jiang 580705107fSDave Jiang sbitmap_finish_wait(sbq, ws, &wait); 590705107fSDave Jiang if (idx < 0) 600705107fSDave Jiang return ERR_PTR(-EAGAIN); 610705107fSDave Jiang 620705107fSDave Jiang return __get_desc(wq, idx, cpu); 630705107fSDave Jiang } 640705107fSDave Jiang 65d1dfe5b8SDave Jiang void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) 66d1dfe5b8SDave Jiang { 670705107fSDave Jiang int cpu = desc->cpu; 68d1dfe5b8SDave Jiang 690705107fSDave Jiang desc->cpu = -1; 700705107fSDave Jiang sbitmap_queue_clear(&wq->sbq, desc->id, cpu); 71d1dfe5b8SDave Jiang } 72d1dfe5b8SDave Jiang 736b4b87f2SDave Jiang static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, 746b4b87f2SDave Jiang struct idxd_desc *desc) 756b4b87f2SDave Jiang { 766b4b87f2SDave Jiang struct idxd_desc *d, *n; 776b4b87f2SDave Jiang 786b4b87f2SDave Jiang lockdep_assert_held(&ie->list_lock); 796b4b87f2SDave Jiang list_for_each_entry_safe(d, n, &ie->work_list, list) { 806b4b87f2SDave Jiang if (d == desc) { 816b4b87f2SDave Jiang list_del(&d->list); 826b4b87f2SDave Jiang return d; 836b4b87f2SDave Jiang } 846b4b87f2SDave Jiang } 856b4b87f2SDave Jiang 866b4b87f2SDave Jiang /* 876b4b87f2SDave Jiang * At this point, the desc needs to be aborted is held by the completion 886b4b87f2SDave Jiang * handler where it has taken it off the pending list but has not added to the 896b4b87f2SDave Jiang * work list. It will be cleaned up by the interrupt handler when it sees the 906b4b87f2SDave Jiang * IDXD_COMP_DESC_ABORT for completion status. 916b4b87f2SDave Jiang */ 926b4b87f2SDave Jiang return NULL; 936b4b87f2SDave Jiang } 946b4b87f2SDave Jiang 956b4b87f2SDave Jiang static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, 966b4b87f2SDave Jiang struct idxd_desc *desc) 976b4b87f2SDave Jiang { 986b4b87f2SDave Jiang struct idxd_desc *d, *t, *found = NULL; 996b4b87f2SDave Jiang struct llist_node *head; 1008affd8a4SDave Jiang LIST_HEAD(flist); 1016b4b87f2SDave Jiang 1026b4b87f2SDave Jiang desc->completion->status = IDXD_COMP_DESC_ABORT; 1036b4b87f2SDave Jiang /* 1046b4b87f2SDave Jiang * Grab the list lock so it will block the irq thread handler. This allows the 1056b4b87f2SDave Jiang * abort code to locate the descriptor need to be aborted. 1066b4b87f2SDave Jiang */ 1079fce3b3aSDave Jiang spin_lock(&ie->list_lock); 1086b4b87f2SDave Jiang head = llist_del_all(&ie->pending_llist); 1096b4b87f2SDave Jiang if (head) { 1106b4b87f2SDave Jiang llist_for_each_entry_safe(d, t, head, llnode) { 1116b4b87f2SDave Jiang if (d == desc) { 1126b4b87f2SDave Jiang found = desc; 1136b4b87f2SDave Jiang continue; 1146b4b87f2SDave Jiang } 1158affd8a4SDave Jiang 1168affd8a4SDave Jiang if (d->completion->status) 1178affd8a4SDave Jiang list_add_tail(&d->list, &flist); 1188affd8a4SDave Jiang else 1198affd8a4SDave Jiang list_add_tail(&d->list, &ie->work_list); 1206b4b87f2SDave Jiang } 1216b4b87f2SDave Jiang } 1226b4b87f2SDave Jiang 1236b4b87f2SDave Jiang if (!found) 1246b4b87f2SDave Jiang found = list_abort_desc(wq, ie, desc); 1259fce3b3aSDave Jiang spin_unlock(&ie->list_lock); 1266b4b87f2SDave Jiang 1276b4b87f2SDave Jiang if (found) 1285d78abb6SDave Jiang idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); 1298affd8a4SDave Jiang 1308affd8a4SDave Jiang /* 131*5cb664fbSVinod Koul * completing the descriptor will return desc to allocator and 132*5cb664fbSVinod Koul * the desc can be acquired by a different process and the 133*5cb664fbSVinod Koul * desc->list can be modified. Delete desc from list so the 134*5cb664fbSVinod Koul * list trasversing does not get corrupted by the other process. 1358affd8a4SDave Jiang */ 1368affd8a4SDave Jiang list_for_each_entry_safe(d, t, &flist, list) { 1378affd8a4SDave Jiang list_del_init(&d->list); 138*5cb664fbSVinod Koul idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true); 1398affd8a4SDave Jiang } 1406b4b87f2SDave Jiang } 1416b4b87f2SDave Jiang 1427930d855SDave Jiang /* 1437930d855SDave Jiang * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver 1447930d855SDave Jiang * has better control of number of descriptors being submitted to a shared wq by limiting 1457930d855SDave Jiang * the number of driver allocated descriptors to the wq size. However, when the swq is 1467930d855SDave Jiang * exported to a guest kernel, it may be shared with multiple guest kernels. This means 1477930d855SDave Jiang * the likelihood of getting busy returned on the swq when submitting goes significantly up. 1487930d855SDave Jiang * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving 1497930d855SDave Jiang * up. The sysfs knob can be tuned by the system administrator. 1507930d855SDave Jiang */ 1517930d855SDave Jiang int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) 1527930d855SDave Jiang { 1537930d855SDave Jiang int rc, retries = 0; 1547930d855SDave Jiang 1557930d855SDave Jiang do { 1567930d855SDave Jiang rc = enqcmds(portal, desc); 1577930d855SDave Jiang if (rc == 0) 1587930d855SDave Jiang break; 1597930d855SDave Jiang cpu_relax(); 1607930d855SDave Jiang } while (retries++ < wq->enqcmds_retries); 1617930d855SDave Jiang 1627930d855SDave Jiang return rc; 1637930d855SDave Jiang } 1647930d855SDave Jiang 165d1dfe5b8SDave Jiang int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) 166d1dfe5b8SDave Jiang { 167d1dfe5b8SDave Jiang struct idxd_device *idxd = wq->idxd; 1686b4b87f2SDave Jiang struct idxd_irq_entry *ie = NULL; 16956fc39f5SDave Jiang u32 desc_flags = desc->hw->flags; 17042d279f9SDave Jiang void __iomem *portal; 1718e50d392SDave Jiang int rc; 172d1dfe5b8SDave Jiang 1735d78abb6SDave Jiang if (idxd->state != IDXD_DEV_ENABLED) 174d1dfe5b8SDave Jiang return -EIO; 175d1dfe5b8SDave Jiang 17656fc39f5SDave Jiang if (!percpu_ref_tryget_live(&wq->wq_active)) { 17756fc39f5SDave Jiang wait_for_completion(&wq->wq_resurrect); 1785d78abb6SDave Jiang if (!percpu_ref_tryget_live(&wq->wq_active)) 17993a40a6dSDave Jiang return -ENXIO; 18056fc39f5SDave Jiang } 18193a40a6dSDave Jiang 182a9c17152SDave Jiang portal = idxd_wq_portal_addr(wq); 1838e50d392SDave Jiang 184d1dfe5b8SDave Jiang /* 1858e50d392SDave Jiang * The wmb() flushes writes to coherent DMA data before 1868e50d392SDave Jiang * possibly triggering a DMA read. The wmb() is necessary 1878e50d392SDave Jiang * even on UP because the recipient is a device. 188d1dfe5b8SDave Jiang */ 189d1dfe5b8SDave Jiang wmb(); 1906b4b87f2SDave Jiang 1916b4b87f2SDave Jiang /* 1926b4b87f2SDave Jiang * Pending the descriptor to the lockless list for the irq_entry 1936b4b87f2SDave Jiang * that we designated the descriptor to. 1946b4b87f2SDave Jiang */ 19556fc39f5SDave Jiang if (desc_flags & IDXD_OP_FLAG_RCI) { 1968b67426eSDave Jiang ie = wq->ie; 197eb0cf33aSDave Jiang if (ie->int_handle == INVALID_INT_HANDLE) 198eb0cf33aSDave Jiang desc->hw->int_handle = ie->id; 199eb0cf33aSDave Jiang else 200eb0cf33aSDave Jiang desc->hw->int_handle = ie->int_handle; 201eb0cf33aSDave Jiang 2026b4b87f2SDave Jiang llist_add(&desc->llnode, &ie->pending_llist); 2036b4b87f2SDave Jiang } 2046b4b87f2SDave Jiang 2058e50d392SDave Jiang if (wq_dedicated(wq)) { 20642d279f9SDave Jiang iosubmit_cmds512(portal, desc->hw, 1); 2078e50d392SDave Jiang } else { 2087930d855SDave Jiang rc = idxd_enqcmds(wq, portal, desc->hw); 209ac24a2dcSDave Jiang if (rc < 0) { 210ac24a2dcSDave Jiang percpu_ref_put(&wq->wq_active); 2110b030f54SDave Jiang /* abort operation frees the descriptor */ 2126b4b87f2SDave Jiang if (ie) 2136b4b87f2SDave Jiang llist_abort_desc(wq, ie, desc); 2148e50d392SDave Jiang return rc; 2158e50d392SDave Jiang } 216ac24a2dcSDave Jiang } 217d1dfe5b8SDave Jiang 21893a40a6dSDave Jiang percpu_ref_put(&wq->wq_active); 219d1dfe5b8SDave Jiang return 0; 220d1dfe5b8SDave Jiang } 221