xref: /openbmc/linux/drivers/dma/idxd/irq.c (revision 758071a3)
1bfe1d560SDave Jiang // SPDX-License-Identifier: GPL-2.0
2bfe1d560SDave Jiang /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3bfe1d560SDave Jiang #include <linux/init.h>
4bfe1d560SDave Jiang #include <linux/kernel.h>
5bfe1d560SDave Jiang #include <linux/module.h>
6bfe1d560SDave Jiang #include <linux/pci.h>
7bfe1d560SDave Jiang #include <linux/io-64-nonatomic-lo-hi.h>
88f47d1a5SDave Jiang #include <linux/dmaengine.h>
956fc39f5SDave Jiang #include <linux/delay.h>
10c40bd7d9SDave Jiang #include <linux/iommu.h>
11c40bd7d9SDave Jiang #include <linux/sched/mm.h>
12bfe1d560SDave Jiang #include <uapi/linux/idxd.h>
138f47d1a5SDave Jiang #include "../dmaengine.h"
14bfe1d560SDave Jiang #include "idxd.h"
15bfe1d560SDave Jiang #include "registers.h"
16bfe1d560SDave Jiang 
17e4f4d8cdSDave Jiang enum irq_work_type {
18e4f4d8cdSDave Jiang 	IRQ_WORK_NORMAL = 0,
19e4f4d8cdSDave Jiang 	IRQ_WORK_PROCESS_FAULT,
20e4f4d8cdSDave Jiang };
21e4f4d8cdSDave Jiang 
22f6d442f7SDave Jiang struct idxd_resubmit {
23f6d442f7SDave Jiang 	struct work_struct work;
24f6d442f7SDave Jiang 	struct idxd_desc *desc;
25f6d442f7SDave Jiang };
26f6d442f7SDave Jiang 
2756fc39f5SDave Jiang struct idxd_int_handle_revoke {
2856fc39f5SDave Jiang 	struct work_struct work;
2956fc39f5SDave Jiang 	struct idxd_device *idxd;
3056fc39f5SDave Jiang };
3156fc39f5SDave Jiang 
idxd_device_reinit(struct work_struct * work)320d5c10b4SDave Jiang static void idxd_device_reinit(struct work_struct *work)
33bfe1d560SDave Jiang {
340d5c10b4SDave Jiang 	struct idxd_device *idxd = container_of(work, struct idxd_device, work);
350d5c10b4SDave Jiang 	struct device *dev = &idxd->pdev->dev;
360d5c10b4SDave Jiang 	int rc, i;
37bfe1d560SDave Jiang 
380d5c10b4SDave Jiang 	idxd_device_reset(idxd);
39bfe1d560SDave Jiang 	rc = idxd_device_config(idxd);
40bfe1d560SDave Jiang 	if (rc < 0)
41bfe1d560SDave Jiang 		goto out;
42bfe1d560SDave Jiang 
43bfe1d560SDave Jiang 	rc = idxd_device_enable(idxd);
44bfe1d560SDave Jiang 	if (rc < 0)
45bfe1d560SDave Jiang 		goto out;
46bfe1d560SDave Jiang 
47bfe1d560SDave Jiang 	for (i = 0; i < idxd->max_wqs; i++) {
48de5819b9SJerry Snitselaar 		if (test_bit(i, idxd->wq_enable_map)) {
497c5dd23eSDave Jiang 			struct idxd_wq *wq = idxd->wqs[i];
50bfe1d560SDave Jiang 
51bfe1d560SDave Jiang 			rc = idxd_wq_enable(wq);
52bfe1d560SDave Jiang 			if (rc < 0) {
53de5819b9SJerry Snitselaar 				clear_bit(i, idxd->wq_enable_map);
540d5c10b4SDave Jiang 				dev_warn(dev, "Unable to re-enable wq %s\n",
55700af3a0SDave Jiang 					 dev_name(wq_confdev(wq)));
56bfe1d560SDave Jiang 			}
57bfe1d560SDave Jiang 		}
58bfe1d560SDave Jiang 	}
59bfe1d560SDave Jiang 
600d5c10b4SDave Jiang 	return;
61bfe1d560SDave Jiang 
62bfe1d560SDave Jiang  out:
630dcfe41eSDave Jiang 	idxd_device_clear_state(idxd);
64bfe1d560SDave Jiang }
65bfe1d560SDave Jiang 
6646c6df1cSDave Jiang /*
6746c6df1cSDave Jiang  * The function sends a drain descriptor for the interrupt handle. The drain ensures
6846c6df1cSDave Jiang  * all descriptors with this interrupt handle is flushed and the interrupt
6946c6df1cSDave Jiang  * will allow the cleanup of the outstanding descriptors.
7046c6df1cSDave Jiang  */
idxd_int_handle_revoke_drain(struct idxd_irq_entry * ie)7146c6df1cSDave Jiang static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie)
7246c6df1cSDave Jiang {
73ec0d6423SDave Jiang 	struct idxd_wq *wq = ie_to_wq(ie);
74ec0d6423SDave Jiang 	struct idxd_device *idxd = wq->idxd;
7546c6df1cSDave Jiang 	struct device *dev = &idxd->pdev->dev;
7646c6df1cSDave Jiang 	struct dsa_hw_desc desc = {};
7746c6df1cSDave Jiang 	void __iomem *portal;
7846c6df1cSDave Jiang 	int rc;
7946c6df1cSDave Jiang 
8046c6df1cSDave Jiang 	/* Issue a simple drain operation with interrupt but no completion record */
8146c6df1cSDave Jiang 	desc.flags = IDXD_OP_FLAG_RCI;
8246c6df1cSDave Jiang 	desc.opcode = DSA_OPCODE_DRAIN;
8346c6df1cSDave Jiang 	desc.priv = 1;
8446c6df1cSDave Jiang 
85fffaed1eSJacob Pan 	if (ie->pasid != IOMMU_PASID_INVALID)
8646c6df1cSDave Jiang 		desc.pasid = ie->pasid;
8746c6df1cSDave Jiang 	desc.int_handle = ie->int_handle;
8846c6df1cSDave Jiang 	portal = idxd_wq_portal_addr(wq);
8946c6df1cSDave Jiang 
9046c6df1cSDave Jiang 	/*
9146c6df1cSDave Jiang 	 * The wmb() makes sure that the descriptor is all there before we
9246c6df1cSDave Jiang 	 * issue.
9346c6df1cSDave Jiang 	 */
9446c6df1cSDave Jiang 	wmb();
9546c6df1cSDave Jiang 	if (wq_dedicated(wq)) {
9646c6df1cSDave Jiang 		iosubmit_cmds512(portal, &desc, 1);
9746c6df1cSDave Jiang 	} else {
987930d855SDave Jiang 		rc = idxd_enqcmds(wq, portal, &desc);
9946c6df1cSDave Jiang 		/* This should not fail unless hardware failed. */
10046c6df1cSDave Jiang 		if (rc < 0)
10146c6df1cSDave Jiang 			dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id);
10246c6df1cSDave Jiang 	}
10346c6df1cSDave Jiang }
10446c6df1cSDave Jiang 
idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry * ie)10556fc39f5SDave Jiang static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie)
10656fc39f5SDave Jiang {
10756fc39f5SDave Jiang 	LIST_HEAD(flist);
10856fc39f5SDave Jiang 	struct idxd_desc *d, *t;
10956fc39f5SDave Jiang 	struct llist_node *head;
11056fc39f5SDave Jiang 
11156fc39f5SDave Jiang 	spin_lock(&ie->list_lock);
11256fc39f5SDave Jiang 	head = llist_del_all(&ie->pending_llist);
11356fc39f5SDave Jiang 	if (head) {
11456fc39f5SDave Jiang 		llist_for_each_entry_safe(d, t, head, llnode)
11556fc39f5SDave Jiang 			list_add_tail(&d->list, &ie->work_list);
11656fc39f5SDave Jiang 	}
11756fc39f5SDave Jiang 
11856fc39f5SDave Jiang 	list_for_each_entry_safe(d, t, &ie->work_list, list) {
11956fc39f5SDave Jiang 		if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL)
12056fc39f5SDave Jiang 			list_move_tail(&d->list, &flist);
12156fc39f5SDave Jiang 	}
12256fc39f5SDave Jiang 	spin_unlock(&ie->list_lock);
12356fc39f5SDave Jiang 
12456fc39f5SDave Jiang 	list_for_each_entry_safe(d, t, &flist, list) {
12556fc39f5SDave Jiang 		list_del(&d->list);
12656fc39f5SDave Jiang 		idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true);
12756fc39f5SDave Jiang 	}
12856fc39f5SDave Jiang }
12956fc39f5SDave Jiang 
idxd_int_handle_revoke(struct work_struct * work)13056fc39f5SDave Jiang static void idxd_int_handle_revoke(struct work_struct *work)
13156fc39f5SDave Jiang {
13256fc39f5SDave Jiang 	struct idxd_int_handle_revoke *revoke =
13356fc39f5SDave Jiang 		container_of(work, struct idxd_int_handle_revoke, work);
13456fc39f5SDave Jiang 	struct idxd_device *idxd = revoke->idxd;
13556fc39f5SDave Jiang 	struct pci_dev *pdev = idxd->pdev;
13656fc39f5SDave Jiang 	struct device *dev = &pdev->dev;
13756fc39f5SDave Jiang 	int i, new_handle, rc;
13856fc39f5SDave Jiang 
13956fc39f5SDave Jiang 	if (!idxd->request_int_handles) {
14056fc39f5SDave Jiang 		kfree(revoke);
14156fc39f5SDave Jiang 		dev_warn(dev, "Unexpected int handle refresh interrupt.\n");
14256fc39f5SDave Jiang 		return;
14356fc39f5SDave Jiang 	}
14456fc39f5SDave Jiang 
14556fc39f5SDave Jiang 	/*
14656fc39f5SDave Jiang 	 * The loop attempts to acquire new interrupt handle for all interrupt
14756fc39f5SDave Jiang 	 * vectors that supports a handle. If a new interrupt handle is acquired and the
14856fc39f5SDave Jiang 	 * wq is kernel type, the driver will kill the percpu_ref to pause all
14956fc39f5SDave Jiang 	 * ongoing descriptor submissions. The interrupt handle is then changed.
15056fc39f5SDave Jiang 	 * After change, the percpu_ref is revived and all the pending submissions
15156fc39f5SDave Jiang 	 * are woken to try again. A drain is sent to for the interrupt handle
15256fc39f5SDave Jiang 	 * at the end to make sure all invalid int handle descriptors are processed.
15356fc39f5SDave Jiang 	 */
15456fc39f5SDave Jiang 	for (i = 1; i < idxd->irq_cnt; i++) {
155ec0d6423SDave Jiang 		struct idxd_irq_entry *ie = idxd_get_ie(idxd, i);
156ec0d6423SDave Jiang 		struct idxd_wq *wq = ie_to_wq(ie);
15756fc39f5SDave Jiang 
158403a2e23SDave Jiang 		if (ie->int_handle == INVALID_INT_HANDLE)
159403a2e23SDave Jiang 			continue;
160403a2e23SDave Jiang 
16156fc39f5SDave Jiang 		rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX);
16256fc39f5SDave Jiang 		if (rc < 0) {
16356fc39f5SDave Jiang 			dev_warn(dev, "get int handle %d failed: %d\n", i, rc);
16456fc39f5SDave Jiang 			/*
16556fc39f5SDave Jiang 			 * Failed to acquire new interrupt handle. Kill the WQ
16656fc39f5SDave Jiang 			 * and release all the pending submitters. The submitters will
16756fc39f5SDave Jiang 			 * get error return code and handle appropriately.
16856fc39f5SDave Jiang 			 */
16956fc39f5SDave Jiang 			ie->int_handle = INVALID_INT_HANDLE;
17056fc39f5SDave Jiang 			idxd_wq_quiesce(wq);
17156fc39f5SDave Jiang 			idxd_abort_invalid_int_handle_descs(ie);
17256fc39f5SDave Jiang 			continue;
17356fc39f5SDave Jiang 		}
17456fc39f5SDave Jiang 
17556fc39f5SDave Jiang 		/* No change in interrupt handle, nothing needs to be done */
17656fc39f5SDave Jiang 		if (ie->int_handle == new_handle)
17756fc39f5SDave Jiang 			continue;
17856fc39f5SDave Jiang 
17956fc39f5SDave Jiang 		if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) {
18056fc39f5SDave Jiang 			/*
18156fc39f5SDave Jiang 			 * All the MSIX interrupts are allocated at once during probe.
18256fc39f5SDave Jiang 			 * Therefore we need to update all interrupts even if the WQ
18356fc39f5SDave Jiang 			 * isn't supporting interrupt operations.
18456fc39f5SDave Jiang 			 */
18556fc39f5SDave Jiang 			ie->int_handle = new_handle;
18656fc39f5SDave Jiang 			continue;
18756fc39f5SDave Jiang 		}
18856fc39f5SDave Jiang 
18956fc39f5SDave Jiang 		mutex_lock(&wq->wq_lock);
19056fc39f5SDave Jiang 		reinit_completion(&wq->wq_resurrect);
19156fc39f5SDave Jiang 
19256fc39f5SDave Jiang 		/* Kill percpu_ref to pause additional descriptor submissions */
19356fc39f5SDave Jiang 		percpu_ref_kill(&wq->wq_active);
19456fc39f5SDave Jiang 
19556fc39f5SDave Jiang 		/* Wait for all submitters quiesce before we change interrupt handle */
19656fc39f5SDave Jiang 		wait_for_completion(&wq->wq_dead);
19756fc39f5SDave Jiang 
19856fc39f5SDave Jiang 		ie->int_handle = new_handle;
19956fc39f5SDave Jiang 
20056fc39f5SDave Jiang 		/* Revive percpu ref and wake up all the waiting submitters */
20156fc39f5SDave Jiang 		percpu_ref_reinit(&wq->wq_active);
20256fc39f5SDave Jiang 		complete_all(&wq->wq_resurrect);
20356fc39f5SDave Jiang 		mutex_unlock(&wq->wq_lock);
20456fc39f5SDave Jiang 
20556fc39f5SDave Jiang 		/*
20656fc39f5SDave Jiang 		 * The delay here is to wait for all possible MOVDIR64B that
20756fc39f5SDave Jiang 		 * are issued before percpu_ref_kill() has happened to have
20856fc39f5SDave Jiang 		 * reached the PCIe domain before the drain is issued. The driver
20956fc39f5SDave Jiang 		 * needs to ensure that the drain descriptor issued does not pass
21056fc39f5SDave Jiang 		 * all the other issued descriptors that contain the invalid
21156fc39f5SDave Jiang 		 * interrupt handle in order to ensure that the drain descriptor
21256fc39f5SDave Jiang 		 * interrupt will allow the cleanup of all the descriptors with
21356fc39f5SDave Jiang 		 * invalid interrupt handle.
21456fc39f5SDave Jiang 		 */
21556fc39f5SDave Jiang 		if (wq_dedicated(wq))
21656fc39f5SDave Jiang 			udelay(100);
21756fc39f5SDave Jiang 		idxd_int_handle_revoke_drain(ie);
21856fc39f5SDave Jiang 	}
21956fc39f5SDave Jiang 	kfree(revoke);
22056fc39f5SDave Jiang }
22156fc39f5SDave Jiang 
idxd_evl_fault_work(struct work_struct * work)222c40bd7d9SDave Jiang static void idxd_evl_fault_work(struct work_struct *work)
223bfe1d560SDave Jiang {
224c40bd7d9SDave Jiang 	struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work);
225c40bd7d9SDave Jiang 	struct idxd_wq *wq = fault->wq;
226c40bd7d9SDave Jiang 	struct idxd_device *idxd = wq->idxd;
227c40bd7d9SDave Jiang 	struct device *dev = &idxd->pdev->dev;
2282442b747SDave Jiang 	struct idxd_evl *evl = idxd->evl;
229c40bd7d9SDave Jiang 	struct __evl_entry *entry_head = fault->entry;
230c40bd7d9SDave Jiang 	void *cr = (void *)entry_head + idxd->data->evl_cr_off;
2312442b747SDave Jiang 	int cr_size = idxd->data->compl_size;
2322442b747SDave Jiang 	u8 *status = (u8 *)cr + idxd->data->cr_status_off;
2332442b747SDave Jiang 	u8 *result = (u8 *)cr + idxd->data->cr_result_off;
2342442b747SDave Jiang 	int copied, copy_size;
2352442b747SDave Jiang 	bool *bf;
236c40bd7d9SDave Jiang 
237c40bd7d9SDave Jiang 	switch (fault->status) {
238c40bd7d9SDave Jiang 	case DSA_COMP_CRA_XLAT:
2392442b747SDave Jiang 		if (entry_head->batch && entry_head->first_err_in_batch)
2402442b747SDave Jiang 			evl->batch_fail[entry_head->batch_id] = false;
2412442b747SDave Jiang 
2422442b747SDave Jiang 		copy_size = cr_size;
243fecae134SDave Jiang 		idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS);
2442442b747SDave Jiang 		break;
2452442b747SDave Jiang 	case DSA_COMP_BATCH_EVL_ERR:
2462442b747SDave Jiang 		bf = &evl->batch_fail[entry_head->batch_id];
2472442b747SDave Jiang 
2482442b747SDave Jiang 		copy_size = entry_head->rcr || *bf ? cr_size : 0;
2492442b747SDave Jiang 		if (*bf) {
2502442b747SDave Jiang 			if (*status == DSA_COMP_SUCCESS)
2512442b747SDave Jiang 				*status = DSA_COMP_BATCH_FAIL;
2522442b747SDave Jiang 			*result = 1;
2532442b747SDave Jiang 			*bf = false;
2542442b747SDave Jiang 		}
255fecae134SDave Jiang 		idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS);
2562442b747SDave Jiang 		break;
257c40bd7d9SDave Jiang 	case DSA_COMP_DRAIN_EVL:
2582442b747SDave Jiang 		copy_size = cr_size;
2592442b747SDave Jiang 		break;
2602442b747SDave Jiang 	default:
2612442b747SDave Jiang 		copy_size = 0;
2622442b747SDave Jiang 		dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status);
2632442b747SDave Jiang 		break;
2642442b747SDave Jiang 	}
2652442b747SDave Jiang 
2662442b747SDave Jiang 	if (copy_size == 0)
2672442b747SDave Jiang 		return;
2682442b747SDave Jiang 
269c40bd7d9SDave Jiang 	/*
270c40bd7d9SDave Jiang 	 * Copy completion record to fault_addr in user address space
271c40bd7d9SDave Jiang 	 * that is found by wq and PASID.
272c40bd7d9SDave Jiang 	 */
2732442b747SDave Jiang 	copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr,
2742442b747SDave Jiang 			      cr, copy_size);
275c40bd7d9SDave Jiang 	/*
276c40bd7d9SDave Jiang 	 * The task that triggered the page fault is unknown currently
277c40bd7d9SDave Jiang 	 * because multiple threads may share the user address
278c40bd7d9SDave Jiang 	 * space or the task exits already before this fault.
279c40bd7d9SDave Jiang 	 * So if the copy fails, SIGSEGV can not be sent to the task.
280c40bd7d9SDave Jiang 	 * Just print an error for the failure. The user application
281c40bd7d9SDave Jiang 	 * waiting for the completion record will time out on this
282c40bd7d9SDave Jiang 	 * failure.
283c40bd7d9SDave Jiang 	 */
2842442b747SDave Jiang 	switch (fault->status) {
2852442b747SDave Jiang 	case DSA_COMP_CRA_XLAT:
2862442b747SDave Jiang 		if (copied != copy_size) {
287fecae134SDave Jiang 			idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS);
2882442b747SDave Jiang 			dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n",
2892442b747SDave Jiang 					    copy_size, copied);
2902442b747SDave Jiang 			if (entry_head->batch)
2912442b747SDave Jiang 				evl->batch_fail[entry_head->batch_id] = true;
292c40bd7d9SDave Jiang 		}
293c40bd7d9SDave Jiang 		break;
2942442b747SDave Jiang 	case DSA_COMP_BATCH_EVL_ERR:
2952442b747SDave Jiang 		if (copied != copy_size) {
296fecae134SDave Jiang 			idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS);
2972442b747SDave Jiang 			dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n",
2982442b747SDave Jiang 					    copy_size, copied);
2992442b747SDave Jiang 		}
3002442b747SDave Jiang 		break;
3012442b747SDave Jiang 	case DSA_COMP_DRAIN_EVL:
3022442b747SDave Jiang 		if (copied != copy_size)
3032442b747SDave Jiang 			dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n",
3042442b747SDave Jiang 					    copy_size, copied);
305c40bd7d9SDave Jiang 		break;
306c40bd7d9SDave Jiang 	}
307c40bd7d9SDave Jiang 
308c40bd7d9SDave Jiang 	kmem_cache_free(idxd->evl_cache, fault);
309c40bd7d9SDave Jiang }
310c40bd7d9SDave Jiang 
process_evl_entry(struct idxd_device * idxd,struct __evl_entry * entry_head,unsigned int index)311c40bd7d9SDave Jiang static void process_evl_entry(struct idxd_device *idxd,
312c40bd7d9SDave Jiang 			      struct __evl_entry *entry_head, unsigned int index)
3132f431ba9SDave Jiang {
3142f431ba9SDave Jiang 	struct device *dev = &idxd->pdev->dev;
315c40bd7d9SDave Jiang 	struct idxd_evl *evl = idxd->evl;
3162f431ba9SDave Jiang 	u8 status;
3172f431ba9SDave Jiang 
318c40bd7d9SDave Jiang 	if (test_bit(index, evl->bmap)) {
319c40bd7d9SDave Jiang 		clear_bit(index, evl->bmap);
320c40bd7d9SDave Jiang 	} else {
3212f431ba9SDave Jiang 		status = DSA_COMP_STATUS(entry_head->error);
322c40bd7d9SDave Jiang 
3232442b747SDave Jiang 		if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL ||
3242442b747SDave Jiang 		    status == DSA_COMP_BATCH_EVL_ERR) {
325c40bd7d9SDave Jiang 			struct idxd_evl_fault *fault;
326c40bd7d9SDave Jiang 			int ent_size = evl_ent_size(idxd);
327c40bd7d9SDave Jiang 
328c40bd7d9SDave Jiang 			if (entry_head->rci)
329c40bd7d9SDave Jiang 				dev_dbg(dev, "Completion Int Req set, ignoring!\n");
330c40bd7d9SDave Jiang 
331c40bd7d9SDave Jiang 			if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL)
332c40bd7d9SDave Jiang 				return;
333c40bd7d9SDave Jiang 
334c40bd7d9SDave Jiang 			fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC);
335c40bd7d9SDave Jiang 			if (fault) {
336c40bd7d9SDave Jiang 				struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx];
337c40bd7d9SDave Jiang 
338c40bd7d9SDave Jiang 				fault->wq = wq;
339c40bd7d9SDave Jiang 				fault->status = status;
340c40bd7d9SDave Jiang 				memcpy(&fault->entry, entry_head, ent_size);
341c40bd7d9SDave Jiang 				INIT_WORK(&fault->work, idxd_evl_fault_work);
342c40bd7d9SDave Jiang 				queue_work(wq->wq, &fault->work);
343c40bd7d9SDave Jiang 			} else {
344c40bd7d9SDave Jiang 				dev_warn(dev, "Failed to service fault work.\n");
345c40bd7d9SDave Jiang 			}
346c40bd7d9SDave Jiang 		} else {
3472f431ba9SDave Jiang 			dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n",
348c40bd7d9SDave Jiang 					     status, entry_head->operation,
349c40bd7d9SDave Jiang 					     entry_head->fault_addr);
350c40bd7d9SDave Jiang 		}
351c40bd7d9SDave Jiang 	}
3522f431ba9SDave Jiang }
3532f431ba9SDave Jiang 
process_evl_entries(struct idxd_device * idxd)3542f431ba9SDave Jiang static void process_evl_entries(struct idxd_device *idxd)
3552f431ba9SDave Jiang {
3562f431ba9SDave Jiang 	union evl_status_reg evl_status;
3572f431ba9SDave Jiang 	unsigned int h, t;
3582f431ba9SDave Jiang 	struct idxd_evl *evl = idxd->evl;
3592f431ba9SDave Jiang 	struct __evl_entry *entry_head;
3602f431ba9SDave Jiang 	unsigned int ent_size = evl_ent_size(idxd);
3612f431ba9SDave Jiang 	u32 size;
3622f431ba9SDave Jiang 
3632f431ba9SDave Jiang 	evl_status.bits = 0;
3642f431ba9SDave Jiang 	evl_status.int_pending = 1;
3652f431ba9SDave Jiang 
366758071a3SRex Zhang 	mutex_lock(&evl->lock);
3672f431ba9SDave Jiang 	/* Clear interrupt pending bit */
3682f431ba9SDave Jiang 	iowrite32(evl_status.bits_upper32,
3692f431ba9SDave Jiang 		  idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32));
3702f431ba9SDave Jiang 	evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
3712f431ba9SDave Jiang 	t = evl_status.tail;
3724d6e793eSFenghua Yu 	h = evl_status.head;
3732f431ba9SDave Jiang 	size = idxd->evl->size;
3742f431ba9SDave Jiang 
3752f431ba9SDave Jiang 	while (h != t) {
3762f431ba9SDave Jiang 		entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
377c40bd7d9SDave Jiang 		process_evl_entry(idxd, entry_head, h);
3782f431ba9SDave Jiang 		h = (h + 1) % size;
3792f431ba9SDave Jiang 	}
3802f431ba9SDave Jiang 
3812f431ba9SDave Jiang 	evl_status.head = h;
3822f431ba9SDave Jiang 	iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
383758071a3SRex Zhang 	mutex_unlock(&evl->lock);
3842f431ba9SDave Jiang }
3852f431ba9SDave Jiang 
idxd_misc_thread(int vec,void * data)3860c40bfb4SDave Jiang irqreturn_t idxd_misc_thread(int vec, void *data)
387bfe1d560SDave Jiang {
3880c40bfb4SDave Jiang 	struct idxd_irq_entry *irq_entry = data;
3890c40bfb4SDave Jiang 	struct idxd_device *idxd = ie_to_idxd(irq_entry);
390bfe1d560SDave Jiang 	struct device *dev = &idxd->pdev->dev;
391bfe1d560SDave Jiang 	union gensts_reg gensts;
392f5cc9aceSDave Jiang 	u32 val = 0;
3930d5c10b4SDave Jiang 	int i;
394bfe1d560SDave Jiang 	bool err = false;
3950c40bfb4SDave Jiang 	u32 cause;
3960c40bfb4SDave Jiang 
3970c40bfb4SDave Jiang 	cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
3980c40bfb4SDave Jiang 	if (!cause)
3990c40bfb4SDave Jiang 		return IRQ_NONE;
4000c40bfb4SDave Jiang 
4010c40bfb4SDave Jiang 	iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
402bfe1d560SDave Jiang 
40388d97ea8SDave Jiang 	if (cause & IDXD_INTC_HALT_STATE)
40488d97ea8SDave Jiang 		goto halt;
40588d97ea8SDave Jiang 
406bfe1d560SDave Jiang 	if (cause & IDXD_INTC_ERR) {
407cf84a4b9SDave Jiang 		spin_lock(&idxd->dev_lock);
408bfe1d560SDave Jiang 		for (i = 0; i < 4; i++)
409bfe1d560SDave Jiang 			idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
410bfe1d560SDave Jiang 					IDXD_SWERR_OFFSET + i * sizeof(u64));
411ea941ac2SDave Jiang 
412ea941ac2SDave Jiang 		iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK,
413ea941ac2SDave Jiang 			  idxd->reg_base + IDXD_SWERR_OFFSET);
41442d279f9SDave Jiang 
41542d279f9SDave Jiang 		if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) {
41642d279f9SDave Jiang 			int id = idxd->sw_err.wq_idx;
4177c5dd23eSDave Jiang 			struct idxd_wq *wq = idxd->wqs[id];
41842d279f9SDave Jiang 
41942d279f9SDave Jiang 			if (wq->type == IDXD_WQT_USER)
42004922b74SDave Jiang 				wake_up_interruptible(&wq->err_queue);
42142d279f9SDave Jiang 		} else {
42242d279f9SDave Jiang 			int i;
42342d279f9SDave Jiang 
42442d279f9SDave Jiang 			for (i = 0; i < idxd->max_wqs; i++) {
4257c5dd23eSDave Jiang 				struct idxd_wq *wq = idxd->wqs[i];
42642d279f9SDave Jiang 
42742d279f9SDave Jiang 				if (wq->type == IDXD_WQT_USER)
42804922b74SDave Jiang 					wake_up_interruptible(&wq->err_queue);
42942d279f9SDave Jiang 			}
43042d279f9SDave Jiang 		}
43142d279f9SDave Jiang 
432cf84a4b9SDave Jiang 		spin_unlock(&idxd->dev_lock);
433bfe1d560SDave Jiang 		val |= IDXD_INTC_ERR;
434bfe1d560SDave Jiang 
435bfe1d560SDave Jiang 		for (i = 0; i < 4; i++)
436bfe1d560SDave Jiang 			dev_warn(dev, "err[%d]: %#16.16llx\n",
437bfe1d560SDave Jiang 				 i, idxd->sw_err.bits[i]);
438bfe1d560SDave Jiang 		err = true;
439bfe1d560SDave Jiang 	}
440bfe1d560SDave Jiang 
44156fc39f5SDave Jiang 	if (cause & IDXD_INTC_INT_HANDLE_REVOKED) {
44256fc39f5SDave Jiang 		struct idxd_int_handle_revoke *revoke;
44356fc39f5SDave Jiang 
44456fc39f5SDave Jiang 		val |= IDXD_INTC_INT_HANDLE_REVOKED;
44556fc39f5SDave Jiang 
44656fc39f5SDave Jiang 		revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC);
44756fc39f5SDave Jiang 		if (revoke) {
44856fc39f5SDave Jiang 			revoke->idxd = idxd;
44956fc39f5SDave Jiang 			INIT_WORK(&revoke->work, idxd_int_handle_revoke);
45056fc39f5SDave Jiang 			queue_work(idxd->wq, &revoke->work);
45156fc39f5SDave Jiang 
45256fc39f5SDave Jiang 		} else {
45356fc39f5SDave Jiang 			dev_err(dev, "Failed to allocate work for int handle revoke\n");
45456fc39f5SDave Jiang 			idxd_wqs_quiesce(idxd);
45556fc39f5SDave Jiang 		}
45656fc39f5SDave Jiang 	}
45756fc39f5SDave Jiang 
458bfe1d560SDave Jiang 	if (cause & IDXD_INTC_CMD) {
459bfe1d560SDave Jiang 		val |= IDXD_INTC_CMD;
4600d5c10b4SDave Jiang 		complete(idxd->cmd_done);
461bfe1d560SDave Jiang 	}
462bfe1d560SDave Jiang 
463bfe1d560SDave Jiang 	if (cause & IDXD_INTC_OCCUPY) {
464bfe1d560SDave Jiang 		/* Driver does not utilize occupancy interrupt */
465bfe1d560SDave Jiang 		val |= IDXD_INTC_OCCUPY;
466bfe1d560SDave Jiang 	}
467bfe1d560SDave Jiang 
468bfe1d560SDave Jiang 	if (cause & IDXD_INTC_PERFMON_OVFL) {
469bfe1d560SDave Jiang 		val |= IDXD_INTC_PERFMON_OVFL;
4700bde4444STom Zanussi 		perfmon_counter_overflow(idxd);
471bfe1d560SDave Jiang 	}
472bfe1d560SDave Jiang 
4732f431ba9SDave Jiang 	if (cause & IDXD_INTC_EVL) {
4742f431ba9SDave Jiang 		val |= IDXD_INTC_EVL;
4752f431ba9SDave Jiang 		process_evl_entries(idxd);
4762f431ba9SDave Jiang 	}
4772f431ba9SDave Jiang 
478bfe1d560SDave Jiang 	val ^= cause;
479bfe1d560SDave Jiang 	if (val)
480bfe1d560SDave Jiang 		dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n",
481bfe1d560SDave Jiang 			      val);
482bfe1d560SDave Jiang 
483bfe1d560SDave Jiang 	if (!err)
4840c40bfb4SDave Jiang 		goto out;
485bfe1d560SDave Jiang 
48688d97ea8SDave Jiang halt:
487bfe1d560SDave Jiang 	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
488bfe1d560SDave Jiang 	if (gensts.state == IDXD_DEVICE_STATE_HALT) {
489bfe1d560SDave Jiang 		idxd->state = IDXD_DEV_HALTED;
4900d5c10b4SDave Jiang 		if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) {
4910d5c10b4SDave Jiang 			/*
4920d5c10b4SDave Jiang 			 * If we need a software reset, we will throw the work
4930d5c10b4SDave Jiang 			 * on a system workqueue in order to allow interrupts
4940d5c10b4SDave Jiang 			 * for the device command completions.
4950d5c10b4SDave Jiang 			 */
4960d5c10b4SDave Jiang 			INIT_WORK(&idxd->work, idxd_device_reinit);
4970d5c10b4SDave Jiang 			queue_work(idxd->wq, &idxd->work);
4980d5c10b4SDave Jiang 		} else {
49988d97ea8SDave Jiang 			idxd->state = IDXD_DEV_HALTED;
5005b0c68c4SDave Jiang 			idxd_wqs_quiesce(idxd);
5015b0c68c4SDave Jiang 			idxd_wqs_unmap_portal(idxd);
5020dcfe41eSDave Jiang 			idxd_device_clear_state(idxd);
503bfe1d560SDave Jiang 			dev_err(&idxd->pdev->dev,
504bfe1d560SDave Jiang 				"idxd halted, need %s.\n",
505bfe1d560SDave Jiang 				gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
506bfe1d560SDave Jiang 				"FLR" : "system reset");
507bfe1d560SDave Jiang 		}
5080d5c10b4SDave Jiang 	}
509bfe1d560SDave Jiang 
5100c40bfb4SDave Jiang out:
511bfe1d560SDave Jiang 	return IRQ_HANDLED;
512bfe1d560SDave Jiang }
513bfe1d560SDave Jiang 
idxd_int_handle_resubmit_work(struct work_struct * work)514f6d442f7SDave Jiang static void idxd_int_handle_resubmit_work(struct work_struct *work)
515f6d442f7SDave Jiang {
516f6d442f7SDave Jiang 	struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work);
517f6d442f7SDave Jiang 	struct idxd_desc *desc = irw->desc;
518f6d442f7SDave Jiang 	struct idxd_wq *wq = desc->wq;
519f6d442f7SDave Jiang 	int rc;
520f6d442f7SDave Jiang 
521f6d442f7SDave Jiang 	desc->completion->status = 0;
522f6d442f7SDave Jiang 	rc = idxd_submit_desc(wq, desc);
523f6d442f7SDave Jiang 	if (rc < 0) {
524f6d442f7SDave Jiang 		dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n",
525f6d442f7SDave Jiang 			desc->id, wq->id);
526f6d442f7SDave Jiang 		/*
527f6d442f7SDave Jiang 		 * If the error is not -EAGAIN, it means the submission failed due to wq
528f6d442f7SDave Jiang 		 * has been killed instead of ENQCMDS failure. Here the driver needs to
529f6d442f7SDave Jiang 		 * notify the submitter of the failure by reporting abort status.
530f6d442f7SDave Jiang 		 *
531f6d442f7SDave Jiang 		 * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the
532f6d442f7SDave Jiang 		 * abort.
533f6d442f7SDave Jiang 		 */
534f6d442f7SDave Jiang 		if (rc != -EAGAIN) {
535f6d442f7SDave Jiang 			desc->completion->status = IDXD_COMP_DESC_ABORT;
536f6d442f7SDave Jiang 			idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false);
537f6d442f7SDave Jiang 		}
538f6d442f7SDave Jiang 		idxd_free_desc(wq, desc);
539f6d442f7SDave Jiang 	}
540f6d442f7SDave Jiang 	kfree(irw);
541f6d442f7SDave Jiang }
542f6d442f7SDave Jiang 
idxd_queue_int_handle_resubmit(struct idxd_desc * desc)543f6d442f7SDave Jiang bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc)
544f6d442f7SDave Jiang {
545f6d442f7SDave Jiang 	struct idxd_wq *wq = desc->wq;
546f6d442f7SDave Jiang 	struct idxd_device *idxd = wq->idxd;
547f6d442f7SDave Jiang 	struct idxd_resubmit *irw;
548f6d442f7SDave Jiang 
549f6d442f7SDave Jiang 	irw = kzalloc(sizeof(*irw), GFP_KERNEL);
550f6d442f7SDave Jiang 	if (!irw)
551f6d442f7SDave Jiang 		return false;
552f6d442f7SDave Jiang 
553f6d442f7SDave Jiang 	irw->desc = desc;
554f6d442f7SDave Jiang 	INIT_WORK(&irw->work, idxd_int_handle_resubmit_work);
555f6d442f7SDave Jiang 	queue_work(idxd->wq, &irw->work);
556f6d442f7SDave Jiang 	return true;
557f6d442f7SDave Jiang }
558f6d442f7SDave Jiang 
irq_process_pending_llist(struct idxd_irq_entry * irq_entry)559d803c8b9SDave Jiang static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
5608f47d1a5SDave Jiang {
5618f47d1a5SDave Jiang 	struct idxd_desc *desc, *t;
5628f47d1a5SDave Jiang 	struct llist_node *head;
5638f47d1a5SDave Jiang 
5648f47d1a5SDave Jiang 	head = llist_del_all(&irq_entry->pending_llist);
5658f47d1a5SDave Jiang 	if (!head)
566d803c8b9SDave Jiang 		return;
5678f47d1a5SDave Jiang 
56816e19e11SDave Jiang 	llist_for_each_entry_safe(desc, t, head, llnode) {
5696b4b87f2SDave Jiang 		u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
5706b4b87f2SDave Jiang 
5716b4b87f2SDave Jiang 		if (status) {
572b60bb6e2SDave Jiang 			/*
573b60bb6e2SDave Jiang 			 * Check against the original status as ABORT is software defined
574b60bb6e2SDave Jiang 			 * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
575b60bb6e2SDave Jiang 			 */
576b60bb6e2SDave Jiang 			if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
5775d78abb6SDave Jiang 				idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
5786b4b87f2SDave Jiang 				continue;
5796b4b87f2SDave Jiang 			}
5806b4b87f2SDave Jiang 
5815d78abb6SDave Jiang 			idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
5828f47d1a5SDave Jiang 		} else {
5839fce3b3aSDave Jiang 			spin_lock(&irq_entry->list_lock);
584e4f4d8cdSDave Jiang 			list_add_tail(&desc->list,
585e4f4d8cdSDave Jiang 				      &irq_entry->work_list);
5869fce3b3aSDave Jiang 			spin_unlock(&irq_entry->list_lock);
587d803c8b9SDave Jiang 		}
5888f47d1a5SDave Jiang 	}
5898f47d1a5SDave Jiang }
5908f47d1a5SDave Jiang 
irq_process_work_list(struct idxd_irq_entry * irq_entry)591d803c8b9SDave Jiang static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
5928f47d1a5SDave Jiang {
59316e19e11SDave Jiang 	LIST_HEAD(flist);
59416e19e11SDave Jiang 	struct idxd_desc *desc, *n;
5958f47d1a5SDave Jiang 
59616e19e11SDave Jiang 	/*
59716e19e11SDave Jiang 	 * This lock protects list corruption from access of list outside of the irq handler
59816e19e11SDave Jiang 	 * thread.
59916e19e11SDave Jiang 	 */
6009fce3b3aSDave Jiang 	spin_lock(&irq_entry->list_lock);
60116e19e11SDave Jiang 	if (list_empty(&irq_entry->work_list)) {
6029fce3b3aSDave Jiang 		spin_unlock(&irq_entry->list_lock);
603d803c8b9SDave Jiang 		return;
60416e19e11SDave Jiang 	}
60516e19e11SDave Jiang 
60616e19e11SDave Jiang 	list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) {
60716e19e11SDave Jiang 		if (desc->completion->status) {
608ee5c6f0cSBixuan Cui 			list_move_tail(&desc->list, &flist);
6098f47d1a5SDave Jiang 		}
6108f47d1a5SDave Jiang 	}
6118f47d1a5SDave Jiang 
6129fce3b3aSDave Jiang 	spin_unlock(&irq_entry->list_lock);
61316e19e11SDave Jiang 
61416e19e11SDave Jiang 	list_for_each_entry(desc, &flist, list) {
615b60bb6e2SDave Jiang 		/*
616b60bb6e2SDave Jiang 		 * Check against the original status as ABORT is software defined
617b60bb6e2SDave Jiang 		 * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
618b60bb6e2SDave Jiang 		 */
619b60bb6e2SDave Jiang 		if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
6205d78abb6SDave Jiang 			idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
6216b4b87f2SDave Jiang 			continue;
6226b4b87f2SDave Jiang 		}
6236b4b87f2SDave Jiang 
6245d78abb6SDave Jiang 		idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
62516e19e11SDave Jiang 	}
6268f47d1a5SDave Jiang }
6278f47d1a5SDave Jiang 
idxd_wq_thread(int irq,void * data)628d803c8b9SDave Jiang irqreturn_t idxd_wq_thread(int irq, void *data)
629bfe1d560SDave Jiang {
630d803c8b9SDave Jiang 	struct idxd_irq_entry *irq_entry = data;
6318f47d1a5SDave Jiang 
6328f47d1a5SDave Jiang 	/*
6338f47d1a5SDave Jiang 	 * There are two lists we are processing. The pending_llist is where
6348f47d1a5SDave Jiang 	 * submmiter adds all the submitted descriptor after sending it to
6358f47d1a5SDave Jiang 	 * the workqueue. It's a lockless singly linked list. The work_list
6368f47d1a5SDave Jiang 	 * is the common linux double linked list. We are in a scenario of
6378f47d1a5SDave Jiang 	 * multiple producers and a single consumer. The producers are all
6388f47d1a5SDave Jiang 	 * the kernel submitters of descriptors, and the consumer is the
6398f47d1a5SDave Jiang 	 * kernel irq handler thread for the msix vector when using threaded
6408f47d1a5SDave Jiang 	 * irq. To work with the restrictions of llist to remain lockless,
6418f47d1a5SDave Jiang 	 * we are doing the following steps:
6428f47d1a5SDave Jiang 	 * 1. Iterate through the work_list and process any completed
6438f47d1a5SDave Jiang 	 *    descriptor. Delete the completed entries during iteration.
6448f47d1a5SDave Jiang 	 * 2. llist_del_all() from the pending list.
6458f47d1a5SDave Jiang 	 * 3. Iterate through the llist that was deleted from the pending list
6468f47d1a5SDave Jiang 	 *    and process the completed entries.
6478f47d1a5SDave Jiang 	 * 4. If the entry is still waiting on hardware, list_add_tail() to
6488f47d1a5SDave Jiang 	 *    the work_list.
6498f47d1a5SDave Jiang 	 */
650d803c8b9SDave Jiang 	irq_process_work_list(irq_entry);
651d803c8b9SDave Jiang 	irq_process_pending_llist(irq_entry);
6528f47d1a5SDave Jiang 
653bfe1d560SDave Jiang 	return IRQ_HANDLED;
654bfe1d560SDave Jiang }
655