1d1dfe5b8SDave Jiang // SPDX-License-Identifier: GPL-2.0 2d1dfe5b8SDave Jiang /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ 3d1dfe5b8SDave Jiang #include <linux/init.h> 4d1dfe5b8SDave Jiang #include <linux/kernel.h> 5d1dfe5b8SDave Jiang #include <linux/module.h> 6d1dfe5b8SDave Jiang #include <linux/pci.h> 7d1dfe5b8SDave Jiang #include <uapi/linux/idxd.h> 8d1dfe5b8SDave Jiang #include "idxd.h" 9d1dfe5b8SDave Jiang #include "registers.h" 10d1dfe5b8SDave Jiang 110705107fSDave Jiang static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) 12d1dfe5b8SDave Jiang { 13d1dfe5b8SDave Jiang struct idxd_desc *desc; 148e50d392SDave Jiang struct idxd_device *idxd = wq->idxd; 15d1dfe5b8SDave Jiang 16d1dfe5b8SDave Jiang desc = wq->descs[idx]; 17d1dfe5b8SDave Jiang memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); 18435b512dSDave Jiang memset(desc->completion, 0, idxd->data->compl_size); 190705107fSDave Jiang desc->cpu = cpu; 208e50d392SDave Jiang 218e50d392SDave Jiang if (device_pasid_enabled(idxd)) 228e50d392SDave Jiang desc->hw->pasid = idxd->pasid; 238e50d392SDave Jiang 24d1dfe5b8SDave Jiang return desc; 25d1dfe5b8SDave Jiang } 26d1dfe5b8SDave Jiang 270705107fSDave Jiang struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) 280705107fSDave Jiang { 290705107fSDave Jiang int cpu, idx; 300705107fSDave Jiang struct idxd_device *idxd = wq->idxd; 310705107fSDave Jiang DEFINE_SBQ_WAIT(wait); 320705107fSDave Jiang struct sbq_wait_state *ws; 330705107fSDave Jiang struct sbitmap_queue *sbq; 340705107fSDave Jiang 350705107fSDave Jiang if (idxd->state != IDXD_DEV_ENABLED) 360705107fSDave Jiang return ERR_PTR(-EIO); 370705107fSDave Jiang 380705107fSDave Jiang sbq = &wq->sbq; 390705107fSDave Jiang idx = sbitmap_queue_get(sbq, &cpu); 400705107fSDave Jiang if (idx < 0) { 410705107fSDave Jiang if (optype == IDXD_OP_NONBLOCK) 420705107fSDave Jiang return ERR_PTR(-EAGAIN); 430705107fSDave Jiang } else { 440705107fSDave Jiang return __get_desc(wq, idx, cpu); 450705107fSDave Jiang } 460705107fSDave Jiang 470705107fSDave Jiang ws = &sbq->ws[0]; 480705107fSDave Jiang for (;;) { 490705107fSDave Jiang sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE); 500705107fSDave Jiang if (signal_pending_state(TASK_INTERRUPTIBLE, current)) 510705107fSDave Jiang break; 520705107fSDave Jiang idx = sbitmap_queue_get(sbq, &cpu); 53673d812dSDave Jiang if (idx >= 0) 540705107fSDave Jiang break; 550705107fSDave Jiang schedule(); 560705107fSDave Jiang } 570705107fSDave Jiang 580705107fSDave Jiang sbitmap_finish_wait(sbq, ws, &wait); 590705107fSDave Jiang if (idx < 0) 600705107fSDave Jiang return ERR_PTR(-EAGAIN); 610705107fSDave Jiang 620705107fSDave Jiang return __get_desc(wq, idx, cpu); 630705107fSDave Jiang } 640705107fSDave Jiang 65d1dfe5b8SDave Jiang void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) 66d1dfe5b8SDave Jiang { 670705107fSDave Jiang int cpu = desc->cpu; 68d1dfe5b8SDave Jiang 690705107fSDave Jiang desc->cpu = -1; 700705107fSDave Jiang sbitmap_queue_clear(&wq->sbq, desc->id, cpu); 71d1dfe5b8SDave Jiang } 72d1dfe5b8SDave Jiang 736b4b87f2SDave Jiang static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, 746b4b87f2SDave Jiang struct idxd_desc *desc) 756b4b87f2SDave Jiang { 766b4b87f2SDave Jiang struct idxd_desc *d, *n; 776b4b87f2SDave Jiang 786b4b87f2SDave Jiang lockdep_assert_held(&ie->list_lock); 796b4b87f2SDave Jiang list_for_each_entry_safe(d, n, &ie->work_list, list) { 806b4b87f2SDave Jiang if (d == desc) { 816b4b87f2SDave Jiang list_del(&d->list); 826b4b87f2SDave Jiang return d; 836b4b87f2SDave Jiang } 846b4b87f2SDave Jiang } 856b4b87f2SDave Jiang 866b4b87f2SDave Jiang /* 876b4b87f2SDave Jiang * At this point, the desc needs to be aborted is held by the completion 886b4b87f2SDave Jiang * handler where it has taken it off the pending list but has not added to the 896b4b87f2SDave Jiang * work list. It will be cleaned up by the interrupt handler when it sees the 906b4b87f2SDave Jiang * IDXD_COMP_DESC_ABORT for completion status. 916b4b87f2SDave Jiang */ 926b4b87f2SDave Jiang return NULL; 936b4b87f2SDave Jiang } 946b4b87f2SDave Jiang 956b4b87f2SDave Jiang static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, 966b4b87f2SDave Jiang struct idxd_desc *desc) 976b4b87f2SDave Jiang { 986b4b87f2SDave Jiang struct idxd_desc *d, *t, *found = NULL; 996b4b87f2SDave Jiang struct llist_node *head; 1006b4b87f2SDave Jiang 1016b4b87f2SDave Jiang desc->completion->status = IDXD_COMP_DESC_ABORT; 1026b4b87f2SDave Jiang /* 1036b4b87f2SDave Jiang * Grab the list lock so it will block the irq thread handler. This allows the 1046b4b87f2SDave Jiang * abort code to locate the descriptor need to be aborted. 1056b4b87f2SDave Jiang */ 1069fce3b3aSDave Jiang spin_lock(&ie->list_lock); 1076b4b87f2SDave Jiang head = llist_del_all(&ie->pending_llist); 1086b4b87f2SDave Jiang if (head) { 1096b4b87f2SDave Jiang llist_for_each_entry_safe(d, t, head, llnode) { 1106b4b87f2SDave Jiang if (d == desc) { 1116b4b87f2SDave Jiang found = desc; 1126b4b87f2SDave Jiang continue; 1136b4b87f2SDave Jiang } 1146b4b87f2SDave Jiang list_add_tail(&desc->list, &ie->work_list); 1156b4b87f2SDave Jiang } 1166b4b87f2SDave Jiang } 1176b4b87f2SDave Jiang 1186b4b87f2SDave Jiang if (!found) 1196b4b87f2SDave Jiang found = list_abort_desc(wq, ie, desc); 1209fce3b3aSDave Jiang spin_unlock(&ie->list_lock); 1216b4b87f2SDave Jiang 1226b4b87f2SDave Jiang if (found) 1235d78abb6SDave Jiang idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); 1246b4b87f2SDave Jiang } 1256b4b87f2SDave Jiang 126*7930d855SDave Jiang /* 127*7930d855SDave Jiang * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver 128*7930d855SDave Jiang * has better control of number of descriptors being submitted to a shared wq by limiting 129*7930d855SDave Jiang * the number of driver allocated descriptors to the wq size. However, when the swq is 130*7930d855SDave Jiang * exported to a guest kernel, it may be shared with multiple guest kernels. This means 131*7930d855SDave Jiang * the likelihood of getting busy returned on the swq when submitting goes significantly up. 132*7930d855SDave Jiang * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving 133*7930d855SDave Jiang * up. The sysfs knob can be tuned by the system administrator. 134*7930d855SDave Jiang */ 135*7930d855SDave Jiang int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) 136*7930d855SDave Jiang { 137*7930d855SDave Jiang int rc, retries = 0; 138*7930d855SDave Jiang 139*7930d855SDave Jiang do { 140*7930d855SDave Jiang rc = enqcmds(portal, desc); 141*7930d855SDave Jiang if (rc == 0) 142*7930d855SDave Jiang break; 143*7930d855SDave Jiang cpu_relax(); 144*7930d855SDave Jiang } while (retries++ < wq->enqcmds_retries); 145*7930d855SDave Jiang 146*7930d855SDave Jiang return rc; 147*7930d855SDave Jiang } 148*7930d855SDave Jiang 149d1dfe5b8SDave Jiang int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) 150d1dfe5b8SDave Jiang { 151d1dfe5b8SDave Jiang struct idxd_device *idxd = wq->idxd; 1526b4b87f2SDave Jiang struct idxd_irq_entry *ie = NULL; 15356fc39f5SDave Jiang u32 desc_flags = desc->hw->flags; 15442d279f9SDave Jiang void __iomem *portal; 1558e50d392SDave Jiang int rc; 156d1dfe5b8SDave Jiang 1575d78abb6SDave Jiang if (idxd->state != IDXD_DEV_ENABLED) 158d1dfe5b8SDave Jiang return -EIO; 159d1dfe5b8SDave Jiang 16056fc39f5SDave Jiang if (!percpu_ref_tryget_live(&wq->wq_active)) { 16156fc39f5SDave Jiang wait_for_completion(&wq->wq_resurrect); 1625d78abb6SDave Jiang if (!percpu_ref_tryget_live(&wq->wq_active)) 16393a40a6dSDave Jiang return -ENXIO; 16456fc39f5SDave Jiang } 16593a40a6dSDave Jiang 166a9c17152SDave Jiang portal = idxd_wq_portal_addr(wq); 1678e50d392SDave Jiang 168d1dfe5b8SDave Jiang /* 1698e50d392SDave Jiang * The wmb() flushes writes to coherent DMA data before 1708e50d392SDave Jiang * possibly triggering a DMA read. The wmb() is necessary 1718e50d392SDave Jiang * even on UP because the recipient is a device. 172d1dfe5b8SDave Jiang */ 173d1dfe5b8SDave Jiang wmb(); 1746b4b87f2SDave Jiang 1756b4b87f2SDave Jiang /* 1766b4b87f2SDave Jiang * Pending the descriptor to the lockless list for the irq_entry 1776b4b87f2SDave Jiang * that we designated the descriptor to. 1786b4b87f2SDave Jiang */ 17956fc39f5SDave Jiang if (desc_flags & IDXD_OP_FLAG_RCI) { 1808b67426eSDave Jiang ie = wq->ie; 181eb0cf33aSDave Jiang if (ie->int_handle == INVALID_INT_HANDLE) 182eb0cf33aSDave Jiang desc->hw->int_handle = ie->id; 183eb0cf33aSDave Jiang else 184eb0cf33aSDave Jiang desc->hw->int_handle = ie->int_handle; 185eb0cf33aSDave Jiang 1866b4b87f2SDave Jiang llist_add(&desc->llnode, &ie->pending_llist); 1876b4b87f2SDave Jiang } 1886b4b87f2SDave Jiang 1898e50d392SDave Jiang if (wq_dedicated(wq)) { 19042d279f9SDave Jiang iosubmit_cmds512(portal, desc->hw, 1); 1918e50d392SDave Jiang } else { 192*7930d855SDave Jiang rc = idxd_enqcmds(wq, portal, desc->hw); 193ac24a2dcSDave Jiang if (rc < 0) { 194ac24a2dcSDave Jiang percpu_ref_put(&wq->wq_active); 1950b030f54SDave Jiang /* abort operation frees the descriptor */ 1966b4b87f2SDave Jiang if (ie) 1976b4b87f2SDave Jiang llist_abort_desc(wq, ie, desc); 1988e50d392SDave Jiang return rc; 1998e50d392SDave Jiang } 200ac24a2dcSDave Jiang } 201d1dfe5b8SDave Jiang 20293a40a6dSDave Jiang percpu_ref_put(&wq->wq_active); 203d1dfe5b8SDave Jiang return 0; 204d1dfe5b8SDave Jiang } 205