xref: /openbmc/linux/drivers/dma/idxd/cdev.c (revision b181f7029bd71238ac2754ce7052dffd69432085)
142d279f9SDave Jiang // SPDX-License-Identifier: GPL-2.0
242d279f9SDave Jiang /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
342d279f9SDave Jiang #include <linux/init.h>
442d279f9SDave Jiang #include <linux/kernel.h>
542d279f9SDave Jiang #include <linux/module.h>
642d279f9SDave Jiang #include <linux/pci.h>
742d279f9SDave Jiang #include <linux/device.h>
842d279f9SDave Jiang #include <linux/sched/task.h>
942d279f9SDave Jiang #include <linux/io-64-nonatomic-lo-hi.h>
1042d279f9SDave Jiang #include <linux/cdev.h>
1142d279f9SDave Jiang #include <linux/fs.h>
1242d279f9SDave Jiang #include <linux/poll.h>
138e50d392SDave Jiang #include <linux/iommu.h>
14b022f597SFenghua Yu #include <linux/highmem.h>
1542d279f9SDave Jiang #include <uapi/linux/idxd.h>
16b022f597SFenghua Yu #include <linux/xarray.h>
1742d279f9SDave Jiang #include "registers.h"
1842d279f9SDave Jiang #include "idxd.h"
1942d279f9SDave Jiang 
2042d279f9SDave Jiang struct idxd_cdev_context {
2142d279f9SDave Jiang 	const char *name;
2242d279f9SDave Jiang 	dev_t devt;
2342d279f9SDave Jiang 	struct ida minor_ida;
2442d279f9SDave Jiang };
2542d279f9SDave Jiang 
2642d279f9SDave Jiang /*
27e6fd6d7eSDave Jiang  * Since user file names are global in DSA devices, define their ida's as
28e6fd6d7eSDave Jiang  * global to avoid conflict file names.
29e6fd6d7eSDave Jiang  */
30e6fd6d7eSDave Jiang static DEFINE_IDA(file_ida);
31e6fd6d7eSDave Jiang static DEFINE_MUTEX(ida_lock);
32e6fd6d7eSDave Jiang 
33e6fd6d7eSDave Jiang /*
3442d279f9SDave Jiang  * ictx is an array based off of accelerator types. enum idxd_type
3542d279f9SDave Jiang  * is used as index
3642d279f9SDave Jiang  */
3742d279f9SDave Jiang static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
3842d279f9SDave Jiang 	{ .name = "dsa" },
39f25b4638SDave Jiang 	{ .name = "iax" }
4042d279f9SDave Jiang };
4142d279f9SDave Jiang 
4242d279f9SDave Jiang struct idxd_user_context {
4342d279f9SDave Jiang 	struct idxd_wq *wq;
4442d279f9SDave Jiang 	struct task_struct *task;
458e50d392SDave Jiang 	unsigned int pasid;
46b022f597SFenghua Yu 	struct mm_struct *mm;
4742d279f9SDave Jiang 	unsigned int flags;
488e50d392SDave Jiang 	struct iommu_sva *sva;
49e6fd6d7eSDave Jiang 	struct idxd_dev idxd_dev;
50fecae134SDave Jiang 	u64 counters[COUNTER_MAX];
51e6fd6d7eSDave Jiang 	int id;
52a62b8f87SDave Jiang 	pid_t pid;
53e6fd6d7eSDave Jiang };
54e6fd6d7eSDave Jiang 
55e6fd6d7eSDave Jiang static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid);
56e6fd6d7eSDave Jiang static void idxd_xa_pasid_remove(struct idxd_user_context *ctx);
57e6fd6d7eSDave Jiang 
dev_to_uctx(struct device * dev)58e6fd6d7eSDave Jiang static inline struct idxd_user_context *dev_to_uctx(struct device *dev)
59e6fd6d7eSDave Jiang {
60e6fd6d7eSDave Jiang 	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
61e6fd6d7eSDave Jiang 
62e6fd6d7eSDave Jiang 	return container_of(idxd_dev, struct idxd_user_context, idxd_dev);
63e6fd6d7eSDave Jiang }
64e6fd6d7eSDave Jiang 
cr_faults_show(struct device * dev,struct device_attribute * attr,char * buf)65244009b0SDave Jiang static ssize_t cr_faults_show(struct device *dev, struct device_attribute *attr, char *buf)
66244009b0SDave Jiang {
67244009b0SDave Jiang 	struct idxd_user_context *ctx = dev_to_uctx(dev);
68244009b0SDave Jiang 
69244009b0SDave Jiang 	return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULTS]);
70244009b0SDave Jiang }
71244009b0SDave Jiang static DEVICE_ATTR_RO(cr_faults);
72244009b0SDave Jiang 
cr_fault_failures_show(struct device * dev,struct device_attribute * attr,char * buf)73244009b0SDave Jiang static ssize_t cr_fault_failures_show(struct device *dev,
74244009b0SDave Jiang 				      struct device_attribute *attr, char *buf)
75244009b0SDave Jiang {
76244009b0SDave Jiang 	struct idxd_user_context *ctx = dev_to_uctx(dev);
77244009b0SDave Jiang 
78244009b0SDave Jiang 	return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULT_FAILS]);
79244009b0SDave Jiang }
80244009b0SDave Jiang static DEVICE_ATTR_RO(cr_fault_failures);
81244009b0SDave Jiang 
pid_show(struct device * dev,struct device_attribute * attr,char * buf)82a62b8f87SDave Jiang static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf)
83a62b8f87SDave Jiang {
84a62b8f87SDave Jiang 	struct idxd_user_context *ctx = dev_to_uctx(dev);
85a62b8f87SDave Jiang 
86a62b8f87SDave Jiang 	return sysfs_emit(buf, "%u\n", ctx->pid);
87a62b8f87SDave Jiang }
88a62b8f87SDave Jiang static DEVICE_ATTR_RO(pid);
89a62b8f87SDave Jiang 
90244009b0SDave Jiang static struct attribute *cdev_file_attributes[] = {
91244009b0SDave Jiang 	&dev_attr_cr_faults.attr,
92244009b0SDave Jiang 	&dev_attr_cr_fault_failures.attr,
93a62b8f87SDave Jiang 	&dev_attr_pid.attr,
94244009b0SDave Jiang 	NULL
95244009b0SDave Jiang };
96244009b0SDave Jiang 
cdev_file_attr_visible(struct kobject * kobj,struct attribute * a,int n)97244009b0SDave Jiang static umode_t cdev_file_attr_visible(struct kobject *kobj, struct attribute *a, int n)
98244009b0SDave Jiang {
99244009b0SDave Jiang 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
100244009b0SDave Jiang 	struct idxd_user_context *ctx = dev_to_uctx(dev);
101244009b0SDave Jiang 	struct idxd_wq *wq = ctx->wq;
102244009b0SDave Jiang 
103244009b0SDave Jiang 	if (!wq_pasid_enabled(wq))
104244009b0SDave Jiang 		return 0;
105244009b0SDave Jiang 
106244009b0SDave Jiang 	return a->mode;
107244009b0SDave Jiang }
108244009b0SDave Jiang 
109244009b0SDave Jiang static const struct attribute_group cdev_file_attribute_group = {
110244009b0SDave Jiang 	.attrs = cdev_file_attributes,
111244009b0SDave Jiang 	.is_visible = cdev_file_attr_visible,
112244009b0SDave Jiang };
113244009b0SDave Jiang 
114244009b0SDave Jiang static const struct attribute_group *cdev_file_attribute_groups[] = {
115244009b0SDave Jiang 	&cdev_file_attribute_group,
116244009b0SDave Jiang 	NULL
117244009b0SDave Jiang };
118244009b0SDave Jiang 
idxd_file_dev_release(struct device * dev)119e6fd6d7eSDave Jiang static void idxd_file_dev_release(struct device *dev)
120e6fd6d7eSDave Jiang {
121e6fd6d7eSDave Jiang 	struct idxd_user_context *ctx = dev_to_uctx(dev);
122e6fd6d7eSDave Jiang 	struct idxd_wq *wq = ctx->wq;
123e6fd6d7eSDave Jiang 	struct idxd_device *idxd = wq->idxd;
124e6fd6d7eSDave Jiang 	int rc;
125e6fd6d7eSDave Jiang 
126e6fd6d7eSDave Jiang 	mutex_lock(&ida_lock);
127e6fd6d7eSDave Jiang 	ida_free(&file_ida, ctx->id);
128e6fd6d7eSDave Jiang 	mutex_unlock(&ida_lock);
129e6fd6d7eSDave Jiang 
130e6fd6d7eSDave Jiang 	/* Wait for in-flight operations to complete. */
131e6fd6d7eSDave Jiang 	if (wq_shared(wq)) {
132e6fd6d7eSDave Jiang 		idxd_device_drain_pasid(idxd, ctx->pasid);
133e6fd6d7eSDave Jiang 	} else {
134e6fd6d7eSDave Jiang 		if (device_user_pasid_enabled(idxd)) {
135e6fd6d7eSDave Jiang 			/* The wq disable in the disable pasid function will drain the wq */
136e6fd6d7eSDave Jiang 			rc = idxd_wq_disable_pasid(wq);
137e6fd6d7eSDave Jiang 			if (rc < 0)
138e6fd6d7eSDave Jiang 				dev_err(dev, "wq disable pasid failed.\n");
139e6fd6d7eSDave Jiang 		} else {
140e6fd6d7eSDave Jiang 			idxd_wq_drain(wq);
141e6fd6d7eSDave Jiang 		}
142e6fd6d7eSDave Jiang 	}
143e6fd6d7eSDave Jiang 
144e6fd6d7eSDave Jiang 	if (ctx->sva) {
145e6fd6d7eSDave Jiang 		idxd_cdev_evl_drain_pasid(wq, ctx->pasid);
146e6fd6d7eSDave Jiang 		iommu_sva_unbind_device(ctx->sva);
147e6fd6d7eSDave Jiang 		idxd_xa_pasid_remove(ctx);
148e6fd6d7eSDave Jiang 	}
149e6fd6d7eSDave Jiang 	kfree(ctx);
150e6fd6d7eSDave Jiang 	mutex_lock(&wq->wq_lock);
151e6fd6d7eSDave Jiang 	idxd_wq_put(wq);
152e6fd6d7eSDave Jiang 	mutex_unlock(&wq->wq_lock);
153e6fd6d7eSDave Jiang }
154e6fd6d7eSDave Jiang 
155e6fd6d7eSDave Jiang static struct device_type idxd_cdev_file_type = {
156e6fd6d7eSDave Jiang 	.name = "idxd_file",
157e6fd6d7eSDave Jiang 	.release = idxd_file_dev_release,
158244009b0SDave Jiang 	.groups = cdev_file_attribute_groups,
15942d279f9SDave Jiang };
16042d279f9SDave Jiang 
idxd_cdev_dev_release(struct device * dev)16142d279f9SDave Jiang static void idxd_cdev_dev_release(struct device *dev)
16242d279f9SDave Jiang {
163700af3a0SDave Jiang 	struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
16404922b74SDave Jiang 	struct idxd_cdev_context *cdev_ctx;
16504922b74SDave Jiang 	struct idxd_wq *wq = idxd_cdev->wq;
16604922b74SDave Jiang 
167435b512dSDave Jiang 	cdev_ctx = &ictx[wq->idxd->data->type];
16804922b74SDave Jiang 	ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
16904922b74SDave Jiang 	kfree(idxd_cdev);
17042d279f9SDave Jiang }
17142d279f9SDave Jiang 
17242d279f9SDave Jiang static struct device_type idxd_cdev_device_type = {
17342d279f9SDave Jiang 	.name = "idxd_cdev",
17442d279f9SDave Jiang 	.release = idxd_cdev_dev_release,
17542d279f9SDave Jiang };
17642d279f9SDave Jiang 
inode_idxd_cdev(struct inode * inode)17742d279f9SDave Jiang static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode)
17842d279f9SDave Jiang {
17942d279f9SDave Jiang 	struct cdev *cdev = inode->i_cdev;
18042d279f9SDave Jiang 
18142d279f9SDave Jiang 	return container_of(cdev, struct idxd_cdev, cdev);
18242d279f9SDave Jiang }
18342d279f9SDave Jiang 
inode_wq(struct inode * inode)18442d279f9SDave Jiang static inline struct idxd_wq *inode_wq(struct inode *inode)
18542d279f9SDave Jiang {
18604922b74SDave Jiang 	struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode);
18704922b74SDave Jiang 
18804922b74SDave Jiang 	return idxd_cdev->wq;
18942d279f9SDave Jiang }
19042d279f9SDave Jiang 
idxd_xa_pasid_remove(struct idxd_user_context * ctx)191b022f597SFenghua Yu static void idxd_xa_pasid_remove(struct idxd_user_context *ctx)
192b022f597SFenghua Yu {
193b022f597SFenghua Yu 	struct idxd_wq *wq = ctx->wq;
194b022f597SFenghua Yu 	void *ptr;
195b022f597SFenghua Yu 
196b022f597SFenghua Yu 	mutex_lock(&wq->uc_lock);
197b022f597SFenghua Yu 	ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL);
198b022f597SFenghua Yu 	if (ptr != (void *)ctx)
199b022f597SFenghua Yu 		dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n",
200b022f597SFenghua Yu 			 ctx->pasid);
201b022f597SFenghua Yu 	mutex_unlock(&wq->uc_lock);
202b022f597SFenghua Yu }
203b022f597SFenghua Yu 
idxd_user_counter_increment(struct idxd_wq * wq,u32 pasid,int index)204fecae134SDave Jiang void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index)
205fecae134SDave Jiang {
206fecae134SDave Jiang 	struct idxd_user_context *ctx;
207fecae134SDave Jiang 
208fecae134SDave Jiang 	if (index >= COUNTER_MAX)
209fecae134SDave Jiang 		return;
210fecae134SDave Jiang 
211fecae134SDave Jiang 	mutex_lock(&wq->uc_lock);
212fecae134SDave Jiang 	ctx = xa_load(&wq->upasid_xa, pasid);
213fecae134SDave Jiang 	if (!ctx) {
214fecae134SDave Jiang 		mutex_unlock(&wq->uc_lock);
215fecae134SDave Jiang 		return;
216fecae134SDave Jiang 	}
217fecae134SDave Jiang 	ctx->counters[index]++;
218fecae134SDave Jiang 	mutex_unlock(&wq->uc_lock);
219fecae134SDave Jiang }
220fecae134SDave Jiang 
idxd_cdev_open(struct inode * inode,struct file * filp)22142d279f9SDave Jiang static int idxd_cdev_open(struct inode *inode, struct file *filp)
22242d279f9SDave Jiang {
22342d279f9SDave Jiang 	struct idxd_user_context *ctx;
22442d279f9SDave Jiang 	struct idxd_device *idxd;
22542d279f9SDave Jiang 	struct idxd_wq *wq;
226e6fd6d7eSDave Jiang 	struct device *dev, *fdev;
22766983bc1SNikhil Rao 	int rc = 0;
2288e50d392SDave Jiang 	struct iommu_sva *sva;
2298e50d392SDave Jiang 	unsigned int pasid;
230e6fd6d7eSDave Jiang 	struct idxd_cdev *idxd_cdev;
23142d279f9SDave Jiang 
23242d279f9SDave Jiang 	wq = inode_wq(inode);
23342d279f9SDave Jiang 	idxd = wq->idxd;
23442d279f9SDave Jiang 	dev = &idxd->pdev->dev;
23542d279f9SDave Jiang 
236988aad2fSDave Jiang 	dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq));
23742d279f9SDave Jiang 
23842d279f9SDave Jiang 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
23942d279f9SDave Jiang 	if (!ctx)
24042d279f9SDave Jiang 		return -ENOMEM;
24142d279f9SDave Jiang 
24266983bc1SNikhil Rao 	mutex_lock(&wq->wq_lock);
24366983bc1SNikhil Rao 
24466983bc1SNikhil Rao 	if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq)) {
24566983bc1SNikhil Rao 		rc = -EBUSY;
24666983bc1SNikhil Rao 		goto failed;
24766983bc1SNikhil Rao 	}
24866983bc1SNikhil Rao 
24942d279f9SDave Jiang 	ctx->wq = wq;
25042d279f9SDave Jiang 	filp->private_data = ctx;
251a62b8f87SDave Jiang 	ctx->pid = current->pid;
2528e50d392SDave Jiang 
25342a1b738SDave Jiang 	if (device_user_pasid_enabled(idxd)) {
254942fd543SLu Baolu 		sva = iommu_sva_bind_device(dev, current->mm);
2558e50d392SDave Jiang 		if (IS_ERR(sva)) {
2568e50d392SDave Jiang 			rc = PTR_ERR(sva);
2578e50d392SDave Jiang 			dev_err(dev, "pasid allocation failed: %d\n", rc);
2588e50d392SDave Jiang 			goto failed;
2598e50d392SDave Jiang 		}
2608e50d392SDave Jiang 
2618e50d392SDave Jiang 		pasid = iommu_sva_get_pasid(sva);
2628e50d392SDave Jiang 		if (pasid == IOMMU_PASID_INVALID) {
26399b18e88SJiapeng Chong 			rc = -EINVAL;
264b022f597SFenghua Yu 			goto failed_get_pasid;
2658e50d392SDave Jiang 		}
2668e50d392SDave Jiang 
2678e50d392SDave Jiang 		ctx->sva = sva;
2688e50d392SDave Jiang 		ctx->pasid = pasid;
269b022f597SFenghua Yu 		ctx->mm = current->mm;
270b022f597SFenghua Yu 
271b022f597SFenghua Yu 		mutex_lock(&wq->uc_lock);
272b022f597SFenghua Yu 		rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL);
273b022f597SFenghua Yu 		mutex_unlock(&wq->uc_lock);
274b022f597SFenghua Yu 		if (rc < 0)
275b022f597SFenghua Yu 			dev_warn(dev, "PASID entry already exist in xarray.\n");
2768e50d392SDave Jiang 
2778e50d392SDave Jiang 		if (wq_dedicated(wq)) {
2788e50d392SDave Jiang 			rc = idxd_wq_set_pasid(wq, pasid);
2798e50d392SDave Jiang 			if (rc < 0) {
2808e50d392SDave Jiang 				dev_err(dev, "wq set pasid failed: %d\n", rc);
281b022f597SFenghua Yu 				goto failed_set_pasid;
2828e50d392SDave Jiang 			}
2838e50d392SDave Jiang 		}
2848e50d392SDave Jiang 	}
2858e50d392SDave Jiang 
286e6fd6d7eSDave Jiang 	idxd_cdev = wq->idxd_cdev;
287e6fd6d7eSDave Jiang 	mutex_lock(&ida_lock);
288e6fd6d7eSDave Jiang 	ctx->id = ida_alloc(&file_ida, GFP_KERNEL);
289e6fd6d7eSDave Jiang 	mutex_unlock(&ida_lock);
290e6fd6d7eSDave Jiang 	if (ctx->id < 0) {
291e6fd6d7eSDave Jiang 		dev_warn(dev, "ida alloc failure\n");
292e6fd6d7eSDave Jiang 		goto failed_ida;
293e6fd6d7eSDave Jiang 	}
294e6fd6d7eSDave Jiang 	ctx->idxd_dev.type  = IDXD_DEV_CDEV_FILE;
295e6fd6d7eSDave Jiang 	fdev = user_ctx_dev(ctx);
296e6fd6d7eSDave Jiang 	device_initialize(fdev);
297e6fd6d7eSDave Jiang 	fdev->parent = cdev_dev(idxd_cdev);
298e6fd6d7eSDave Jiang 	fdev->bus = &dsa_bus_type;
299e6fd6d7eSDave Jiang 	fdev->type = &idxd_cdev_file_type;
300e6fd6d7eSDave Jiang 
301e6fd6d7eSDave Jiang 	rc = dev_set_name(fdev, "file%d", ctx->id);
302e6fd6d7eSDave Jiang 	if (rc < 0) {
303e6fd6d7eSDave Jiang 		dev_warn(dev, "set name failure\n");
304e6fd6d7eSDave Jiang 		goto failed_dev_name;
305e6fd6d7eSDave Jiang 	}
306e6fd6d7eSDave Jiang 
307e6fd6d7eSDave Jiang 	rc = device_add(fdev);
308e6fd6d7eSDave Jiang 	if (rc < 0) {
309e6fd6d7eSDave Jiang 		dev_warn(dev, "file device add failure\n");
310e6fd6d7eSDave Jiang 		goto failed_dev_add;
311e6fd6d7eSDave Jiang 	}
312e6fd6d7eSDave Jiang 
31342d279f9SDave Jiang 	idxd_wq_get(wq);
31466983bc1SNikhil Rao 	mutex_unlock(&wq->wq_lock);
31542d279f9SDave Jiang 	return 0;
31666983bc1SNikhil Rao 
317e6fd6d7eSDave Jiang failed_dev_add:
318e6fd6d7eSDave Jiang failed_dev_name:
319e6fd6d7eSDave Jiang 	put_device(fdev);
320e6fd6d7eSDave Jiang failed_ida:
321b022f597SFenghua Yu failed_set_pasid:
322b022f597SFenghua Yu 	if (device_user_pasid_enabled(idxd))
323b022f597SFenghua Yu 		idxd_xa_pasid_remove(ctx);
324b022f597SFenghua Yu failed_get_pasid:
325b022f597SFenghua Yu 	if (device_user_pasid_enabled(idxd))
326b022f597SFenghua Yu 		iommu_sva_unbind_device(sva);
32766983bc1SNikhil Rao failed:
32866983bc1SNikhil Rao 	mutex_unlock(&wq->wq_lock);
32966983bc1SNikhil Rao 	kfree(ctx);
33066983bc1SNikhil Rao 	return rc;
33142d279f9SDave Jiang }
33242d279f9SDave Jiang 
idxd_cdev_evl_drain_pasid(struct idxd_wq * wq,u32 pasid)333c40bd7d9SDave Jiang static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
334c40bd7d9SDave Jiang {
335c40bd7d9SDave Jiang 	struct idxd_device *idxd = wq->idxd;
336c40bd7d9SDave Jiang 	struct idxd_evl *evl = idxd->evl;
337c40bd7d9SDave Jiang 	union evl_status_reg status;
338c40bd7d9SDave Jiang 	u16 h, t, size;
339c40bd7d9SDave Jiang 	int ent_size = evl_ent_size(idxd);
340c40bd7d9SDave Jiang 	struct __evl_entry *entry_head;
341c40bd7d9SDave Jiang 
342c40bd7d9SDave Jiang 	if (!evl)
343c40bd7d9SDave Jiang 		return;
344c40bd7d9SDave Jiang 
345758071a3SRex Zhang 	mutex_lock(&evl->lock);
346c40bd7d9SDave Jiang 	status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
347c40bd7d9SDave Jiang 	t = status.tail;
3484d6e793eSFenghua Yu 	h = status.head;
349c40bd7d9SDave Jiang 	size = evl->size;
350c40bd7d9SDave Jiang 
351c40bd7d9SDave Jiang 	while (h != t) {
352c40bd7d9SDave Jiang 		entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
353c40bd7d9SDave Jiang 		if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id)
354c40bd7d9SDave Jiang 			set_bit(h, evl->bmap);
355c40bd7d9SDave Jiang 		h = (h + 1) % size;
356c40bd7d9SDave Jiang 	}
357c40bd7d9SDave Jiang 	drain_workqueue(wq->wq);
358758071a3SRex Zhang 	mutex_unlock(&evl->lock);
359c40bd7d9SDave Jiang }
360c40bd7d9SDave Jiang 
idxd_cdev_release(struct inode * node,struct file * filep)36142d279f9SDave Jiang static int idxd_cdev_release(struct inode *node, struct file *filep)
36242d279f9SDave Jiang {
36342d279f9SDave Jiang 	struct idxd_user_context *ctx = filep->private_data;
36442d279f9SDave Jiang 	struct idxd_wq *wq = ctx->wq;
36542d279f9SDave Jiang 	struct idxd_device *idxd = wq->idxd;
36642d279f9SDave Jiang 	struct device *dev = &idxd->pdev->dev;
36742d279f9SDave Jiang 
36842d279f9SDave Jiang 	dev_dbg(dev, "%s called\n", __func__);
36942d279f9SDave Jiang 	filep->private_data = NULL;
37042d279f9SDave Jiang 
371e6fd6d7eSDave Jiang 	device_unregister(user_ctx_dev(ctx));
3720d5c10b4SDave Jiang 
37342d279f9SDave Jiang 	return 0;
37442d279f9SDave Jiang }
37542d279f9SDave Jiang 
check_vma(struct idxd_wq * wq,struct vm_area_struct * vma,const char * func)37642d279f9SDave Jiang static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma,
37742d279f9SDave Jiang 		     const char *func)
37842d279f9SDave Jiang {
37942d279f9SDave Jiang 	struct device *dev = &wq->idxd->pdev->dev;
38042d279f9SDave Jiang 
38142d279f9SDave Jiang 	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
38242d279f9SDave Jiang 		dev_info_ratelimited(dev,
38342d279f9SDave Jiang 				     "%s: %s: mapping too large: %lu\n",
38442d279f9SDave Jiang 				     current->comm, func,
38542d279f9SDave Jiang 				     vma->vm_end - vma->vm_start);
38642d279f9SDave Jiang 		return -EINVAL;
38742d279f9SDave Jiang 	}
38842d279f9SDave Jiang 
38942d279f9SDave Jiang 	return 0;
39042d279f9SDave Jiang }
39142d279f9SDave Jiang 
idxd_cdev_mmap(struct file * filp,struct vm_area_struct * vma)39242d279f9SDave Jiang static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
39342d279f9SDave Jiang {
39442d279f9SDave Jiang 	struct idxd_user_context *ctx = filp->private_data;
39542d279f9SDave Jiang 	struct idxd_wq *wq = ctx->wq;
39642d279f9SDave Jiang 	struct idxd_device *idxd = wq->idxd;
39742d279f9SDave Jiang 	struct pci_dev *pdev = idxd->pdev;
39842d279f9SDave Jiang 	phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR);
39942d279f9SDave Jiang 	unsigned long pfn;
40042d279f9SDave Jiang 	int rc;
40142d279f9SDave Jiang 
40242d279f9SDave Jiang 	dev_dbg(&pdev->dev, "%s called\n", __func__);
4038cacaaa4SArjan van de Ven 
4048cacaaa4SArjan van de Ven 	/*
4058cacaaa4SArjan van de Ven 	 * Due to an erratum in some of the devices supported by the driver,
4068cacaaa4SArjan van de Ven 	 * direct user submission to the device can be unsafe.
4078cacaaa4SArjan van de Ven 	 * (See the INTEL-SA-01084 security advisory)
4088cacaaa4SArjan van de Ven 	 *
4098cacaaa4SArjan van de Ven 	 * For the devices that exhibit this behavior, require that the user
4108cacaaa4SArjan van de Ven 	 * has CAP_SYS_RAWIO capabilities.
4118cacaaa4SArjan van de Ven 	 */
4128cacaaa4SArjan van de Ven 	if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO))
4138cacaaa4SArjan van de Ven 		return -EPERM;
4148cacaaa4SArjan van de Ven 
41542d279f9SDave Jiang 	rc = check_vma(wq, vma, __func__);
416b391554cSDave Jiang 	if (rc < 0)
417b391554cSDave Jiang 		return rc;
41842d279f9SDave Jiang 
4191c71222eSSuren Baghdasaryan 	vm_flags_set(vma, VM_DONTCOPY);
42042d279f9SDave Jiang 	pfn = (base + idxd_get_wq_portal_full_offset(wq->id,
42142d279f9SDave Jiang 				IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT;
42242d279f9SDave Jiang 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
42342d279f9SDave Jiang 	vma->vm_private_data = ctx;
42442d279f9SDave Jiang 
42542d279f9SDave Jiang 	return io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
42642d279f9SDave Jiang 			vma->vm_page_prot);
42742d279f9SDave Jiang }
42842d279f9SDave Jiang 
idxd_submit_user_descriptor(struct idxd_user_context * ctx,struct dsa_hw_desc __user * udesc)429*9fda5aedSNikhil Rao static int idxd_submit_user_descriptor(struct idxd_user_context *ctx,
430*9fda5aedSNikhil Rao 				       struct dsa_hw_desc __user *udesc)
431*9fda5aedSNikhil Rao {
432*9fda5aedSNikhil Rao 	struct idxd_wq *wq = ctx->wq;
433*9fda5aedSNikhil Rao 	struct idxd_dev *idxd_dev = &wq->idxd->idxd_dev;
434*9fda5aedSNikhil Rao 	const uint64_t comp_addr_align = is_dsa_dev(idxd_dev) ? 0x20 : 0x40;
435*9fda5aedSNikhil Rao 	void __iomem *portal = idxd_wq_portal_addr(wq);
436*9fda5aedSNikhil Rao 	struct dsa_hw_desc descriptor __aligned(64);
437*9fda5aedSNikhil Rao 	int rc;
438*9fda5aedSNikhil Rao 
439*9fda5aedSNikhil Rao 	rc = copy_from_user(&descriptor, udesc, sizeof(descriptor));
440*9fda5aedSNikhil Rao 	if (rc)
441*9fda5aedSNikhil Rao 		return -EFAULT;
442*9fda5aedSNikhil Rao 
443*9fda5aedSNikhil Rao 	/*
444*9fda5aedSNikhil Rao 	 * DSA devices are capable of indirect ("batch") command submission.
445*9fda5aedSNikhil Rao 	 * On devices where direct user submissions are not safe, we cannot
446*9fda5aedSNikhil Rao 	 * allow this since there is no good way for us to verify these
447*9fda5aedSNikhil Rao 	 * indirect commands.
448*9fda5aedSNikhil Rao 	 */
449*9fda5aedSNikhil Rao 	if (is_dsa_dev(idxd_dev) && descriptor.opcode == DSA_OPCODE_BATCH &&
450*9fda5aedSNikhil Rao 		!wq->idxd->user_submission_safe)
451*9fda5aedSNikhil Rao 		return -EINVAL;
452*9fda5aedSNikhil Rao 	/*
453*9fda5aedSNikhil Rao 	 * As per the programming specification, the completion address must be
454*9fda5aedSNikhil Rao 	 * aligned to 32 or 64 bytes. If this is violated the hardware
455*9fda5aedSNikhil Rao 	 * engine can get very confused (security issue).
456*9fda5aedSNikhil Rao 	 */
457*9fda5aedSNikhil Rao 	if (!IS_ALIGNED(descriptor.completion_addr, comp_addr_align))
458*9fda5aedSNikhil Rao 		return -EINVAL;
459*9fda5aedSNikhil Rao 
460*9fda5aedSNikhil Rao 	if (wq_dedicated(wq))
461*9fda5aedSNikhil Rao 		iosubmit_cmds512(portal, &descriptor, 1);
462*9fda5aedSNikhil Rao 	else {
463*9fda5aedSNikhil Rao 		descriptor.priv = 0;
464*9fda5aedSNikhil Rao 		descriptor.pasid = ctx->pasid;
465*9fda5aedSNikhil Rao 		rc = idxd_enqcmds(wq, portal, &descriptor);
466*9fda5aedSNikhil Rao 		if (rc < 0)
467*9fda5aedSNikhil Rao 			return rc;
468*9fda5aedSNikhil Rao 	}
469*9fda5aedSNikhil Rao 
470*9fda5aedSNikhil Rao 	return 0;
471*9fda5aedSNikhil Rao }
472*9fda5aedSNikhil Rao 
idxd_cdev_write(struct file * filp,const char __user * buf,size_t len,loff_t * unused)473*9fda5aedSNikhil Rao static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t len,
474*9fda5aedSNikhil Rao 			       loff_t *unused)
475*9fda5aedSNikhil Rao {
476*9fda5aedSNikhil Rao 	struct dsa_hw_desc __user *udesc = (struct dsa_hw_desc __user *)buf;
477*9fda5aedSNikhil Rao 	struct idxd_user_context *ctx = filp->private_data;
478*9fda5aedSNikhil Rao 	ssize_t written = 0;
479*9fda5aedSNikhil Rao 	int i;
480*9fda5aedSNikhil Rao 
481*9fda5aedSNikhil Rao 	for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) {
482*9fda5aedSNikhil Rao 		int rc = idxd_submit_user_descriptor(ctx, udesc + i);
483*9fda5aedSNikhil Rao 
484*9fda5aedSNikhil Rao 		if (rc)
485*9fda5aedSNikhil Rao 			return written ? written : rc;
486*9fda5aedSNikhil Rao 
487*9fda5aedSNikhil Rao 		written += sizeof(struct dsa_hw_desc);
488*9fda5aedSNikhil Rao 	}
489*9fda5aedSNikhil Rao 
490*9fda5aedSNikhil Rao 	return written;
491*9fda5aedSNikhil Rao }
492*9fda5aedSNikhil Rao 
idxd_cdev_poll(struct file * filp,struct poll_table_struct * wait)49342d279f9SDave Jiang static __poll_t idxd_cdev_poll(struct file *filp,
49442d279f9SDave Jiang 			       struct poll_table_struct *wait)
49542d279f9SDave Jiang {
49642d279f9SDave Jiang 	struct idxd_user_context *ctx = filp->private_data;
49742d279f9SDave Jiang 	struct idxd_wq *wq = ctx->wq;
49842d279f9SDave Jiang 	struct idxd_device *idxd = wq->idxd;
49942d279f9SDave Jiang 	__poll_t out = 0;
50042d279f9SDave Jiang 
50104922b74SDave Jiang 	poll_wait(filp, &wq->err_queue, wait);
502cf84a4b9SDave Jiang 	spin_lock(&idxd->dev_lock);
50342d279f9SDave Jiang 	if (idxd->sw_err.valid)
50442d279f9SDave Jiang 		out = EPOLLIN | EPOLLRDNORM;
505cf84a4b9SDave Jiang 	spin_unlock(&idxd->dev_lock);
50642d279f9SDave Jiang 
50742d279f9SDave Jiang 	return out;
50842d279f9SDave Jiang }
50942d279f9SDave Jiang 
51042d279f9SDave Jiang static const struct file_operations idxd_cdev_fops = {
51142d279f9SDave Jiang 	.owner = THIS_MODULE,
51242d279f9SDave Jiang 	.open = idxd_cdev_open,
51342d279f9SDave Jiang 	.release = idxd_cdev_release,
51442d279f9SDave Jiang 	.mmap = idxd_cdev_mmap,
515*9fda5aedSNikhil Rao 	.write = idxd_cdev_write,
51642d279f9SDave Jiang 	.poll = idxd_cdev_poll,
51742d279f9SDave Jiang };
51842d279f9SDave Jiang 
idxd_cdev_get_major(struct idxd_device * idxd)51942d279f9SDave Jiang int idxd_cdev_get_major(struct idxd_device *idxd)
52042d279f9SDave Jiang {
521435b512dSDave Jiang 	return MAJOR(ictx[idxd->data->type].devt);
52242d279f9SDave Jiang }
52342d279f9SDave Jiang 
idxd_wq_add_cdev(struct idxd_wq * wq)52404922b74SDave Jiang int idxd_wq_add_cdev(struct idxd_wq *wq)
52542d279f9SDave Jiang {
52642d279f9SDave Jiang 	struct idxd_device *idxd = wq->idxd;
52704922b74SDave Jiang 	struct idxd_cdev *idxd_cdev;
52804922b74SDave Jiang 	struct cdev *cdev;
52942d279f9SDave Jiang 	struct device *dev;
53004922b74SDave Jiang 	struct idxd_cdev_context *cdev_ctx;
53104922b74SDave Jiang 	int rc, minor;
53242d279f9SDave Jiang 
53304922b74SDave Jiang 	idxd_cdev = kzalloc(sizeof(*idxd_cdev), GFP_KERNEL);
53404922b74SDave Jiang 	if (!idxd_cdev)
53542d279f9SDave Jiang 		return -ENOMEM;
53642d279f9SDave Jiang 
537700af3a0SDave Jiang 	idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV;
53804922b74SDave Jiang 	idxd_cdev->wq = wq;
53904922b74SDave Jiang 	cdev = &idxd_cdev->cdev;
540700af3a0SDave Jiang 	dev = cdev_dev(idxd_cdev);
541435b512dSDave Jiang 	cdev_ctx = &ictx[wq->idxd->data->type];
54242d279f9SDave Jiang 	minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
54342d279f9SDave Jiang 	if (minor < 0) {
54404922b74SDave Jiang 		kfree(idxd_cdev);
54504922b74SDave Jiang 		return minor;
54642d279f9SDave Jiang 	}
54742d279f9SDave Jiang 	idxd_cdev->minor = minor;
54842d279f9SDave Jiang 
54904922b74SDave Jiang 	device_initialize(dev);
550700af3a0SDave Jiang 	dev->parent = wq_confdev(wq);
5514b73e4ebSDave Jiang 	dev->bus = &dsa_bus_type;
55204922b74SDave Jiang 	dev->type = &idxd_cdev_device_type;
55304922b74SDave Jiang 	dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
55442d279f9SDave Jiang 
555435b512dSDave Jiang 	rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id);
55642d279f9SDave Jiang 	if (rc < 0)
55704922b74SDave Jiang 		goto err;
55842d279f9SDave Jiang 
55904922b74SDave Jiang 	wq->idxd_cdev = idxd_cdev;
56042d279f9SDave Jiang 	cdev_init(cdev, &idxd_cdev_fops);
56104922b74SDave Jiang 	rc = cdev_device_add(cdev, dev);
56242d279f9SDave Jiang 	if (rc) {
56342d279f9SDave Jiang 		dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
56404922b74SDave Jiang 		goto err;
56542d279f9SDave Jiang 	}
56642d279f9SDave Jiang 
56742d279f9SDave Jiang 	return 0;
56804922b74SDave Jiang 
56904922b74SDave Jiang  err:
57004922b74SDave Jiang 	put_device(dev);
57104922b74SDave Jiang 	wq->idxd_cdev = NULL;
57204922b74SDave Jiang 	return rc;
57342d279f9SDave Jiang }
57442d279f9SDave Jiang 
idxd_wq_del_cdev(struct idxd_wq * wq)57542d279f9SDave Jiang void idxd_wq_del_cdev(struct idxd_wq *wq)
57642d279f9SDave Jiang {
57704922b74SDave Jiang 	struct idxd_cdev *idxd_cdev;
57804922b74SDave Jiang 
57904922b74SDave Jiang 	idxd_cdev = wq->idxd_cdev;
58004922b74SDave Jiang 	wq->idxd_cdev = NULL;
581700af3a0SDave Jiang 	cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
582700af3a0SDave Jiang 	put_device(cdev_dev(idxd_cdev));
58342d279f9SDave Jiang }
58442d279f9SDave Jiang 
idxd_user_drv_probe(struct idxd_dev * idxd_dev)585448c3de8SDave Jiang static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
586448c3de8SDave Jiang {
587448c3de8SDave Jiang 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
588448c3de8SDave Jiang 	struct idxd_device *idxd = wq->idxd;
589448c3de8SDave Jiang 	int rc;
590448c3de8SDave Jiang 
591448c3de8SDave Jiang 	if (idxd->state != IDXD_DEV_ENABLED)
592448c3de8SDave Jiang 		return -ENXIO;
593448c3de8SDave Jiang 
5940ec8ce07SFenghua Yu 	/*
5950ec8ce07SFenghua Yu 	 * User type WQ is enabled only when SVA is enabled for two reasons:
5960ec8ce07SFenghua Yu 	 *   - If no IOMMU or IOMMU Passthrough without SVA, userspace
5970ec8ce07SFenghua Yu 	 *     can directly access physical address through the WQ.
5980ec8ce07SFenghua Yu 	 *   - The IDXD cdev driver does not provide any ways to pin
5990ec8ce07SFenghua Yu 	 *     user pages and translate the address from user VA to IOVA or
6000ec8ce07SFenghua Yu 	 *     PA without IOMMU SVA. Therefore the application has no way
6010ec8ce07SFenghua Yu 	 *     to instruct the device to perform DMA function. This makes
6020ec8ce07SFenghua Yu 	 *     the cdev not usable for normal application usage.
6030ec8ce07SFenghua Yu 	 */
6040ec8ce07SFenghua Yu 	if (!device_user_pasid_enabled(idxd)) {
6050ec8ce07SFenghua Yu 		idxd->cmd_status = IDXD_SCMD_WQ_USER_NO_IOMMU;
6060ec8ce07SFenghua Yu 		dev_dbg(&idxd->pdev->dev,
6070ec8ce07SFenghua Yu 			"User type WQ cannot be enabled without SVA.\n");
6080ec8ce07SFenghua Yu 
6090ec8ce07SFenghua Yu 		return -EOPNOTSUPP;
6100ec8ce07SFenghua Yu 	}
6110ec8ce07SFenghua Yu 
612448c3de8SDave Jiang 	mutex_lock(&wq->wq_lock);
6132f30decdSDave Jiang 
6142f30decdSDave Jiang 	wq->wq = create_workqueue(dev_name(wq_confdev(wq)));
6152f30decdSDave Jiang 	if (!wq->wq) {
6162f30decdSDave Jiang 		rc = -ENOMEM;
6172f30decdSDave Jiang 		goto wq_err;
6182f30decdSDave Jiang 	}
6192f30decdSDave Jiang 
620448c3de8SDave Jiang 	wq->type = IDXD_WQT_USER;
62163c14ae6SDave Jiang 	rc = drv_enable_wq(wq);
622448c3de8SDave Jiang 	if (rc < 0)
623448c3de8SDave Jiang 		goto err;
624448c3de8SDave Jiang 
625448c3de8SDave Jiang 	rc = idxd_wq_add_cdev(wq);
626125d1037SDave Jiang 	if (rc < 0) {
627125d1037SDave Jiang 		idxd->cmd_status = IDXD_SCMD_CDEV_ERR;
628448c3de8SDave Jiang 		goto err_cdev;
629125d1037SDave Jiang 	}
630448c3de8SDave Jiang 
631125d1037SDave Jiang 	idxd->cmd_status = 0;
632448c3de8SDave Jiang 	mutex_unlock(&wq->wq_lock);
633448c3de8SDave Jiang 	return 0;
634448c3de8SDave Jiang 
635448c3de8SDave Jiang err_cdev:
63663c14ae6SDave Jiang 	drv_disable_wq(wq);
637448c3de8SDave Jiang err:
6382f30decdSDave Jiang 	destroy_workqueue(wq->wq);
639448c3de8SDave Jiang 	wq->type = IDXD_WQT_NONE;
6402f30decdSDave Jiang wq_err:
641448c3de8SDave Jiang 	mutex_unlock(&wq->wq_lock);
642448c3de8SDave Jiang 	return rc;
643448c3de8SDave Jiang }
644448c3de8SDave Jiang 
idxd_user_drv_remove(struct idxd_dev * idxd_dev)645448c3de8SDave Jiang static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
646448c3de8SDave Jiang {
647448c3de8SDave Jiang 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
648448c3de8SDave Jiang 
649448c3de8SDave Jiang 	mutex_lock(&wq->wq_lock);
650448c3de8SDave Jiang 	idxd_wq_del_cdev(wq);
65163c14ae6SDave Jiang 	drv_disable_wq(wq);
652448c3de8SDave Jiang 	wq->type = IDXD_WQT_NONE;
6532f30decdSDave Jiang 	destroy_workqueue(wq->wq);
6542f30decdSDave Jiang 	wq->wq = NULL;
655448c3de8SDave Jiang 	mutex_unlock(&wq->wq_lock);
656448c3de8SDave Jiang }
657448c3de8SDave Jiang 
658448c3de8SDave Jiang static enum idxd_dev_type dev_types[] = {
659448c3de8SDave Jiang 	IDXD_DEV_WQ,
660448c3de8SDave Jiang 	IDXD_DEV_NONE,
661448c3de8SDave Jiang };
662448c3de8SDave Jiang 
663448c3de8SDave Jiang struct idxd_device_driver idxd_user_drv = {
664448c3de8SDave Jiang 	.probe = idxd_user_drv_probe,
665448c3de8SDave Jiang 	.remove = idxd_user_drv_remove,
666448c3de8SDave Jiang 	.name = "user",
667448c3de8SDave Jiang 	.type = dev_types,
668448c3de8SDave Jiang };
6696e7f3ee9SDave Jiang EXPORT_SYMBOL_GPL(idxd_user_drv);
670448c3de8SDave Jiang 
idxd_cdev_register(void)67142d279f9SDave Jiang int idxd_cdev_register(void)
67242d279f9SDave Jiang {
67342d279f9SDave Jiang 	int rc, i;
67442d279f9SDave Jiang 
67542d279f9SDave Jiang 	for (i = 0; i < IDXD_TYPE_MAX; i++) {
67642d279f9SDave Jiang 		ida_init(&ictx[i].minor_ida);
67742d279f9SDave Jiang 		rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK,
67842d279f9SDave Jiang 					 ictx[i].name);
67942d279f9SDave Jiang 		if (rc)
680aab08c1aSChristophe JAILLET 			goto err_free_chrdev_region;
68142d279f9SDave Jiang 	}
68242d279f9SDave Jiang 
68342d279f9SDave Jiang 	return 0;
684aab08c1aSChristophe JAILLET 
685aab08c1aSChristophe JAILLET err_free_chrdev_region:
686aab08c1aSChristophe JAILLET 	for (i--; i >= 0; i--)
687aab08c1aSChristophe JAILLET 		unregister_chrdev_region(ictx[i].devt, MINORMASK);
688aab08c1aSChristophe JAILLET 
689aab08c1aSChristophe JAILLET 	return rc;
69042d279f9SDave Jiang }
69142d279f9SDave Jiang 
idxd_cdev_remove(void)69242d279f9SDave Jiang void idxd_cdev_remove(void)
69342d279f9SDave Jiang {
69442d279f9SDave Jiang 	int i;
69542d279f9SDave Jiang 
69642d279f9SDave Jiang 	for (i = 0; i < IDXD_TYPE_MAX; i++) {
69742d279f9SDave Jiang 		unregister_chrdev_region(ictx[i].devt, MINORMASK);
69842d279f9SDave Jiang 		ida_destroy(&ictx[i].minor_ida);
69942d279f9SDave Jiang 	}
70042d279f9SDave Jiang }
701b022f597SFenghua Yu 
702b022f597SFenghua Yu /**
703b022f597SFenghua Yu  * idxd_copy_cr - copy completion record to user address space found by wq and
704b022f597SFenghua Yu  *		  PASID
705b022f597SFenghua Yu  * @wq:		work queue
706b022f597SFenghua Yu  * @pasid:	PASID
707b022f597SFenghua Yu  * @addr:	user fault address to write
708b022f597SFenghua Yu  * @cr:		completion record
709b022f597SFenghua Yu  * @len:	number of bytes to copy
710b022f597SFenghua Yu  *
711b022f597SFenghua Yu  * This is called by a work that handles completion record fault.
712b022f597SFenghua Yu  *
713b022f597SFenghua Yu  * Return: number of bytes copied.
714b022f597SFenghua Yu  */
idxd_copy_cr(struct idxd_wq * wq,ioasid_t pasid,unsigned long addr,void * cr,int len)715b022f597SFenghua Yu int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
716b022f597SFenghua Yu 		 void *cr, int len)
717b022f597SFenghua Yu {
718b022f597SFenghua Yu 	struct device *dev = &wq->idxd->pdev->dev;
719b022f597SFenghua Yu 	int left = len, status_size = 1;
720b022f597SFenghua Yu 	struct idxd_user_context *ctx;
721b022f597SFenghua Yu 	struct mm_struct *mm;
722b022f597SFenghua Yu 
723b022f597SFenghua Yu 	mutex_lock(&wq->uc_lock);
724b022f597SFenghua Yu 
725b022f597SFenghua Yu 	ctx = xa_load(&wq->upasid_xa, pasid);
726b022f597SFenghua Yu 	if (!ctx) {
727b022f597SFenghua Yu 		dev_warn(dev, "No user context\n");
728b022f597SFenghua Yu 		goto out;
729b022f597SFenghua Yu 	}
730b022f597SFenghua Yu 
731b022f597SFenghua Yu 	mm = ctx->mm;
732b022f597SFenghua Yu 	/*
733b022f597SFenghua Yu 	 * The completion record fault handling work is running in kernel
734b022f597SFenghua Yu 	 * thread context. It temporarily switches to the mm to copy cr
735b022f597SFenghua Yu 	 * to addr in the mm.
736b022f597SFenghua Yu 	 */
737b022f597SFenghua Yu 	kthread_use_mm(mm);
738b022f597SFenghua Yu 	left = copy_to_user((void __user *)addr + status_size, cr + status_size,
739b022f597SFenghua Yu 			    len - status_size);
740b022f597SFenghua Yu 	/*
741b022f597SFenghua Yu 	 * Copy status only after the rest of completion record is copied
742b022f597SFenghua Yu 	 * successfully so that the user gets the complete completion record
743b022f597SFenghua Yu 	 * when a non-zero status is polled.
744b022f597SFenghua Yu 	 */
745b022f597SFenghua Yu 	if (!left) {
746b022f597SFenghua Yu 		u8 status;
747b022f597SFenghua Yu 
748b022f597SFenghua Yu 		/*
749b022f597SFenghua Yu 		 * Ensure that the completion record's status field is written
750b022f597SFenghua Yu 		 * after the rest of the completion record has been written.
751b022f597SFenghua Yu 		 * This ensures that the user receives the correct completion
752b022f597SFenghua Yu 		 * record information once polling for a non-zero status.
753b022f597SFenghua Yu 		 */
754b022f597SFenghua Yu 		wmb();
755b022f597SFenghua Yu 		status = *(u8 *)cr;
756b022f597SFenghua Yu 		if (put_user(status, (u8 __user *)addr))
757b022f597SFenghua Yu 			left += status_size;
758b022f597SFenghua Yu 	} else {
759b022f597SFenghua Yu 		left += status_size;
760b022f597SFenghua Yu 	}
761b022f597SFenghua Yu 	kthread_unuse_mm(mm);
762b022f597SFenghua Yu 
763b022f597SFenghua Yu out:
764b022f597SFenghua Yu 	mutex_unlock(&wq->uc_lock);
765b022f597SFenghua Yu 
766b022f597SFenghua Yu 	return len - left;
767b022f597SFenghua Yu }
768