xref: /openbmc/linux/drivers/sbus/char/oradax.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
4   */
5  
6  /*
7   * Oracle Data Analytics Accelerator (DAX)
8   *
9   * DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8
10   * (DAX2) processor chips, and has direct access to the CPU's L3
11   * caches as well as physical memory. It can perform several
12   * operations on data streams with various input and output formats.
13   * The driver provides a transport mechanism only and has limited
14   * knowledge of the various opcodes and data formats. A user space
15   * library provides high level services and translates these into low
16   * level commands which are then passed into the driver and
17   * subsequently the hypervisor and the coprocessor.  The library is
18   * the recommended way for applications to use the coprocessor, and
19   * the driver interface is not intended for general use.
20   *
21   * See Documentation/arch/sparc/oradax/oracle-dax.rst for more details.
22   */
23  
24  #include <linux/uaccess.h>
25  #include <linux/module.h>
26  #include <linux/delay.h>
27  #include <linux/cdev.h>
28  #include <linux/slab.h>
29  #include <linux/mm.h>
30  
31  #include <asm/hypervisor.h>
32  #include <asm/mdesc.h>
33  #include <asm/oradax.h>
34  
35  MODULE_LICENSE("GPL");
36  MODULE_DESCRIPTION("Driver for Oracle Data Analytics Accelerator");
37  
38  #define	DAX_DBG_FLG_BASIC	0x01
39  #define	DAX_DBG_FLG_STAT	0x02
40  #define	DAX_DBG_FLG_INFO	0x04
41  #define	DAX_DBG_FLG_ALL		0xff
42  
43  #define	dax_err(fmt, ...)      pr_err("%s: " fmt "\n", __func__, ##__VA_ARGS__)
44  #define	dax_info(fmt, ...)     pr_info("%s: " fmt "\n", __func__, ##__VA_ARGS__)
45  
46  #define	dax_dbg(fmt, ...)	do {					\
47  					if (dax_debug & DAX_DBG_FLG_BASIC)\
48  						dax_info(fmt, ##__VA_ARGS__); \
49  				} while (0)
50  #define	dax_stat_dbg(fmt, ...)	do {					\
51  					if (dax_debug & DAX_DBG_FLG_STAT) \
52  						dax_info(fmt, ##__VA_ARGS__); \
53  				} while (0)
54  #define	dax_info_dbg(fmt, ...)	do { \
55  					if (dax_debug & DAX_DBG_FLG_INFO) \
56  						dax_info(fmt, ##__VA_ARGS__); \
57  				} while (0)
58  
59  #define	DAX1_MINOR		1
60  #define	DAX1_MAJOR		1
61  #define	DAX2_MINOR		0
62  #define	DAX2_MAJOR		2
63  
64  #define	DAX1_STR    "ORCL,sun4v-dax"
65  #define	DAX2_STR    "ORCL,sun4v-dax2"
66  
67  #define	DAX_CA_ELEMS		(DAX_MMAP_LEN / sizeof(struct dax_cca))
68  
69  #define	DAX_CCB_USEC		100
70  #define	DAX_CCB_RETRIES		10000
71  
72  /* stream types */
73  enum {
74  	OUT,
75  	PRI,
76  	SEC,
77  	TBL,
78  	NUM_STREAM_TYPES
79  };
80  
81  /* completion status */
82  #define	CCA_STAT_NOT_COMPLETED	0
83  #define	CCA_STAT_COMPLETED	1
84  #define	CCA_STAT_FAILED		2
85  #define	CCA_STAT_KILLED		3
86  #define	CCA_STAT_NOT_RUN	4
87  #define	CCA_STAT_PIPE_OUT	5
88  #define	CCA_STAT_PIPE_SRC	6
89  #define	CCA_STAT_PIPE_DST	7
90  
91  /* completion err */
92  #define	CCA_ERR_SUCCESS		0x0	/* no error */
93  #define	CCA_ERR_OVERFLOW	0x1	/* buffer overflow */
94  #define	CCA_ERR_DECODE		0x2	/* CCB decode error */
95  #define	CCA_ERR_PAGE_OVERFLOW	0x3	/* page overflow */
96  #define	CCA_ERR_KILLED		0x7	/* command was killed */
97  #define	CCA_ERR_TIMEOUT		0x8	/* Timeout */
98  #define	CCA_ERR_ADI		0x9	/* ADI error */
99  #define	CCA_ERR_DATA_FMT	0xA	/* data format error */
100  #define	CCA_ERR_OTHER_NO_RETRY	0xE	/* Other error, do not retry */
101  #define	CCA_ERR_OTHER_RETRY	0xF	/* Other error, retry */
102  #define	CCA_ERR_PARTIAL_SYMBOL	0x80	/* QP partial symbol warning */
103  
104  /* CCB address types */
105  #define	DAX_ADDR_TYPE_NONE	0
106  #define	DAX_ADDR_TYPE_VA_ALT	1	/* secondary context */
107  #define	DAX_ADDR_TYPE_RA	2	/* real address */
108  #define	DAX_ADDR_TYPE_VA	3	/* virtual address */
109  
110  /* dax_header_t opcode */
111  #define	DAX_OP_SYNC_NOP		0x0
112  #define	DAX_OP_EXTRACT		0x1
113  #define	DAX_OP_SCAN_VALUE	0x2
114  #define	DAX_OP_SCAN_RANGE	0x3
115  #define	DAX_OP_TRANSLATE	0x4
116  #define	DAX_OP_SELECT		0x5
117  #define	DAX_OP_INVERT		0x10	/* OR with translate, scan opcodes */
118  
119  struct dax_header {
120  	u32 ccb_version:4;	/* 31:28 CCB Version */
121  				/* 27:24 Sync Flags */
122  	u32 pipe:1;		/* Pipeline */
123  	u32 longccb:1;		/* Longccb. Set for scan with lu2, lu3, lu4. */
124  	u32 cond:1;		/* Conditional */
125  	u32 serial:1;		/* Serial */
126  	u32 opcode:8;		/* 23:16 Opcode */
127  				/* 15:0 Address Type. */
128  	u32 reserved:3;		/* 15:13 reserved */
129  	u32 table_addr_type:2;	/* 12:11 Huffman Table Address Type */
130  	u32 out_addr_type:3;	/* 10:8 Destination Address Type */
131  	u32 sec_addr_type:3;	/* 7:5 Secondary Source Address Type */
132  	u32 pri_addr_type:3;	/* 4:2 Primary Source Address Type */
133  	u32 cca_addr_type:2;	/* 1:0 Completion Address Type */
134  };
135  
136  struct dax_control {
137  	u32 pri_fmt:4;		/* 31:28 Primary Input Format */
138  	u32 pri_elem_size:5;	/* 27:23 Primary Input Element Size(less1) */
139  	u32 pri_offset:3;	/* 22:20 Primary Input Starting Offset */
140  	u32 sec_encoding:1;	/* 19    Secondary Input Encoding */
141  				/*	 (must be 0 for Select) */
142  	u32 sec_offset:3;	/* 18:16 Secondary Input Starting Offset */
143  	u32 sec_elem_size:2;	/* 15:14 Secondary Input Element Size */
144  				/*	 (must be 0 for Select) */
145  	u32 out_fmt:2;		/* 13:12 Output Format */
146  	u32 out_elem_size:2;	/* 11:10 Output Element Size */
147  	u32 misc:10;		/* 9:0 Opcode specific info */
148  };
149  
150  struct dax_data_access {
151  	u64 flow_ctrl:2;	/* 63:62 Flow Control Type */
152  	u64 pipe_target:2;	/* 61:60 Pipeline Target */
153  	u64 out_buf_size:20;	/* 59:40 Output Buffer Size */
154  				/*	 (cachelines less 1) */
155  	u64 unused1:8;		/* 39:32 Reserved, Set to 0 */
156  	u64 out_alloc:5;	/* 31:27 Output Allocation */
157  	u64 unused2:1;		/* 26	 Reserved */
158  	u64 pri_len_fmt:2;	/* 25:24 Input Length Format */
159  	u64 pri_len:24;		/* 23:0  Input Element/Byte/Bit Count */
160  				/*	 (less 1) */
161  };
162  
163  struct dax_ccb {
164  	struct dax_header hdr;	/* CCB Header */
165  	struct dax_control ctrl;/* Control Word */
166  	void *ca;		/* Completion Address */
167  	void *pri;		/* Primary Input Address */
168  	struct dax_data_access dac; /* Data Access Control */
169  	void *sec;		/* Secondary Input Address */
170  	u64 dword5;		/* depends on opcode */
171  	void *out;		/* Output Address */
172  	void *tbl;		/* Table Address or bitmap */
173  };
174  
175  struct dax_cca {
176  	u8	status;		/* user may mwait on this address */
177  	u8	err;		/* user visible error notification */
178  	u8	rsvd[2];	/* reserved */
179  	u32	n_remaining;	/* for QP partial symbol warning */
180  	u32	output_sz;	/* output in bytes */
181  	u32	rsvd2;		/* reserved */
182  	u64	run_cycles;	/* run time in OCND2 cycles */
183  	u64	run_stats;	/* nothing reported in version 1.0 */
184  	u32	n_processed;	/* number input elements */
185  	u32	rsvd3[5];	/* reserved */
186  	u64	retval;		/* command return value */
187  	u64	rsvd4[8];	/* reserved */
188  };
189  
190  /* per thread CCB context */
191  struct dax_ctx {
192  	struct dax_ccb		*ccb_buf;
193  	u64			ccb_buf_ra;	/* cached RA of ccb_buf  */
194  	struct dax_cca		*ca_buf;
195  	u64			ca_buf_ra;	/* cached RA of ca_buf   */
196  	struct page		*pages[DAX_CA_ELEMS][NUM_STREAM_TYPES];
197  						/* array of locked pages */
198  	struct task_struct	*owner;		/* thread that owns ctx  */
199  	struct task_struct	*client;	/* requesting thread     */
200  	union ccb_result	result;
201  	u32			ccb_count;
202  	u32			fail_count;
203  };
204  
205  /* driver public entry points */
206  static int dax_open(struct inode *inode, struct file *file);
207  static ssize_t dax_read(struct file *filp, char __user *buf,
208  			size_t count, loff_t *ppos);
209  static ssize_t dax_write(struct file *filp, const char __user *buf,
210  			 size_t count, loff_t *ppos);
211  static int dax_devmap(struct file *f, struct vm_area_struct *vma);
212  static int dax_close(struct inode *i, struct file *f);
213  
214  static const struct file_operations dax_fops = {
215  	.owner	=	THIS_MODULE,
216  	.open	=	dax_open,
217  	.read	=	dax_read,
218  	.write	=	dax_write,
219  	.mmap	=	dax_devmap,
220  	.release =	dax_close,
221  };
222  
223  static int dax_ccb_exec(struct dax_ctx *ctx, const char __user *buf,
224  			size_t count, loff_t *ppos);
225  static int dax_ccb_info(u64 ca, struct ccb_info_result *info);
226  static int dax_ccb_kill(u64 ca, u16 *kill_res);
227  
228  static struct cdev c_dev;
229  static dev_t first;
230  static const struct class cl = {
231  	.name = DAX_NAME,
232  };
233  
234  static int max_ccb_version;
235  static int dax_debug;
236  module_param(dax_debug, int, 0644);
237  MODULE_PARM_DESC(dax_debug, "Debug flags");
238  
dax_attach(void)239  static int __init dax_attach(void)
240  {
241  	unsigned long dummy, hv_rv, major, minor, minor_requested, max_ccbs;
242  	struct mdesc_handle *hp = mdesc_grab();
243  	char *prop, *dax_name;
244  	bool found = false;
245  	int len, ret = 0;
246  	u64 pn;
247  
248  	if (hp == NULL) {
249  		dax_err("Unable to grab mdesc");
250  		return -ENODEV;
251  	}
252  
253  	mdesc_for_each_node_by_name(hp, pn, "virtual-device") {
254  		prop = (char *)mdesc_get_property(hp, pn, "name", &len);
255  		if (prop == NULL)
256  			continue;
257  		if (strncmp(prop, "dax", strlen("dax")))
258  			continue;
259  		dax_dbg("Found node 0x%llx = %s", pn, prop);
260  
261  		prop = (char *)mdesc_get_property(hp, pn, "compatible", &len);
262  		if (prop == NULL)
263  			continue;
264  		dax_dbg("Found node 0x%llx = %s", pn, prop);
265  		found = true;
266  		break;
267  	}
268  
269  	if (!found) {
270  		dax_err("No DAX device found");
271  		ret = -ENODEV;
272  		goto done;
273  	}
274  
275  	if (strncmp(prop, DAX2_STR, strlen(DAX2_STR)) == 0) {
276  		dax_name = DAX_NAME "2";
277  		major = DAX2_MAJOR;
278  		minor_requested = DAX2_MINOR;
279  		max_ccb_version = 1;
280  		dax_dbg("MD indicates DAX2 coprocessor");
281  	} else if (strncmp(prop, DAX1_STR, strlen(DAX1_STR)) == 0) {
282  		dax_name = DAX_NAME "1";
283  		major = DAX1_MAJOR;
284  		minor_requested = DAX1_MINOR;
285  		max_ccb_version = 0;
286  		dax_dbg("MD indicates DAX1 coprocessor");
287  	} else {
288  		dax_err("Unknown dax type: %s", prop);
289  		ret = -ENODEV;
290  		goto done;
291  	}
292  
293  	minor = minor_requested;
294  	dax_dbg("Registering DAX HV api with major %ld minor %ld", major,
295  		minor);
296  	if (sun4v_hvapi_register(HV_GRP_DAX, major, &minor)) {
297  		dax_err("hvapi_register failed");
298  		ret = -ENODEV;
299  		goto done;
300  	} else {
301  		dax_dbg("Max minor supported by HV = %ld (major %ld)", minor,
302  			major);
303  		minor = min(minor, minor_requested);
304  		dax_dbg("registered DAX major %ld minor %ld", major, minor);
305  	}
306  
307  	/* submit a zero length ccb array to query coprocessor queue size */
308  	hv_rv = sun4v_ccb_submit(0, 0, HV_CCB_QUERY_CMD, 0, &max_ccbs, &dummy);
309  	if (hv_rv != 0) {
310  		dax_err("get_hwqueue_size failed with status=%ld and max_ccbs=%ld",
311  			hv_rv, max_ccbs);
312  		ret = -ENODEV;
313  		goto done;
314  	}
315  
316  	if (max_ccbs != DAX_MAX_CCBS) {
317  		dax_err("HV reports unsupported max_ccbs=%ld", max_ccbs);
318  		ret = -ENODEV;
319  		goto done;
320  	}
321  
322  	if (alloc_chrdev_region(&first, 0, 1, DAX_NAME) < 0) {
323  		dax_err("alloc_chrdev_region failed");
324  		ret = -ENXIO;
325  		goto done;
326  	}
327  
328  	ret = class_register(&cl);
329  	if (ret)
330  		goto class_error;
331  
332  	if (device_create(&cl, NULL, first, NULL, dax_name) == NULL) {
333  		dax_err("device_create failed");
334  		ret = -ENXIO;
335  		goto device_error;
336  	}
337  
338  	cdev_init(&c_dev, &dax_fops);
339  	if (cdev_add(&c_dev, first, 1) == -1) {
340  		dax_err("cdev_add failed");
341  		ret = -ENXIO;
342  		goto cdev_error;
343  	}
344  
345  	pr_info("Attached DAX module\n");
346  	goto done;
347  
348  cdev_error:
349  	device_destroy(&cl, first);
350  device_error:
351  	class_unregister(&cl);
352  class_error:
353  	unregister_chrdev_region(first, 1);
354  done:
355  	mdesc_release(hp);
356  	return ret;
357  }
358  module_init(dax_attach);
359  
dax_detach(void)360  static void __exit dax_detach(void)
361  {
362  	pr_info("Cleaning up DAX module\n");
363  	cdev_del(&c_dev);
364  	device_destroy(&cl, first);
365  	class_unregister(&cl);
366  	unregister_chrdev_region(first, 1);
367  }
368  module_exit(dax_detach);
369  
370  /* map completion area */
dax_devmap(struct file * f,struct vm_area_struct * vma)371  static int dax_devmap(struct file *f, struct vm_area_struct *vma)
372  {
373  	struct dax_ctx *ctx = (struct dax_ctx *)f->private_data;
374  	size_t len = vma->vm_end - vma->vm_start;
375  
376  	dax_dbg("len=0x%lx, flags=0x%lx", len, vma->vm_flags);
377  
378  	if (ctx->owner != current) {
379  		dax_dbg("devmap called from wrong thread");
380  		return -EINVAL;
381  	}
382  
383  	if (len != DAX_MMAP_LEN) {
384  		dax_dbg("len(%lu) != DAX_MMAP_LEN(%d)", len, DAX_MMAP_LEN);
385  		return -EINVAL;
386  	}
387  
388  	/* completion area is mapped read-only for user */
389  	if (vma->vm_flags & VM_WRITE)
390  		return -EPERM;
391  	vm_flags_clear(vma, VM_MAYWRITE);
392  
393  	if (remap_pfn_range(vma, vma->vm_start, ctx->ca_buf_ra >> PAGE_SHIFT,
394  			    len, vma->vm_page_prot))
395  		return -EAGAIN;
396  
397  	dax_dbg("mmapped completion area at uva 0x%lx", vma->vm_start);
398  	return 0;
399  }
400  
401  /* Unlock user pages. Called during dequeue or device close */
dax_unlock_pages(struct dax_ctx * ctx,int ccb_index,int nelem)402  static void dax_unlock_pages(struct dax_ctx *ctx, int ccb_index, int nelem)
403  {
404  	int i, j;
405  
406  	for (i = ccb_index; i < ccb_index + nelem; i++) {
407  		for (j = 0; j < NUM_STREAM_TYPES; j++) {
408  			struct page *p = ctx->pages[i][j];
409  
410  			if (p) {
411  				dax_dbg("freeing page %p", p);
412  				unpin_user_pages_dirty_lock(&p, 1, j == OUT);
413  				ctx->pages[i][j] = NULL;
414  			}
415  		}
416  	}
417  }
418  
dax_lock_page(void * va,struct page ** p)419  static int dax_lock_page(void *va, struct page **p)
420  {
421  	int ret;
422  
423  	dax_dbg("uva %p", va);
424  
425  	ret = pin_user_pages_fast((unsigned long)va, 1, FOLL_WRITE, p);
426  	if (ret == 1) {
427  		dax_dbg("locked page %p, for VA %p", *p, va);
428  		return 0;
429  	}
430  
431  	dax_dbg("pin_user_pages failed, va=%p, ret=%d", va, ret);
432  	return -1;
433  }
434  
dax_lock_pages(struct dax_ctx * ctx,int idx,int nelem,u64 * err_va)435  static int dax_lock_pages(struct dax_ctx *ctx, int idx,
436  			  int nelem, u64 *err_va)
437  {
438  	int i;
439  
440  	for (i = 0; i < nelem; i++) {
441  		struct dax_ccb *ccbp = &ctx->ccb_buf[i];
442  
443  		/*
444  		 * For each address in the CCB whose type is virtual,
445  		 * lock the page and change the type to virtual alternate
446  		 * context. On error, return the offending address in
447  		 * err_va.
448  		 */
449  		if (ccbp->hdr.out_addr_type == DAX_ADDR_TYPE_VA) {
450  			dax_dbg("output");
451  			if (dax_lock_page(ccbp->out,
452  					  &ctx->pages[i + idx][OUT]) != 0) {
453  				*err_va = (u64)ccbp->out;
454  				goto error;
455  			}
456  			ccbp->hdr.out_addr_type = DAX_ADDR_TYPE_VA_ALT;
457  		}
458  
459  		if (ccbp->hdr.pri_addr_type == DAX_ADDR_TYPE_VA) {
460  			dax_dbg("input");
461  			if (dax_lock_page(ccbp->pri,
462  					  &ctx->pages[i + idx][PRI]) != 0) {
463  				*err_va = (u64)ccbp->pri;
464  				goto error;
465  			}
466  			ccbp->hdr.pri_addr_type = DAX_ADDR_TYPE_VA_ALT;
467  		}
468  
469  		if (ccbp->hdr.sec_addr_type == DAX_ADDR_TYPE_VA) {
470  			dax_dbg("sec input");
471  			if (dax_lock_page(ccbp->sec,
472  					  &ctx->pages[i + idx][SEC]) != 0) {
473  				*err_va = (u64)ccbp->sec;
474  				goto error;
475  			}
476  			ccbp->hdr.sec_addr_type = DAX_ADDR_TYPE_VA_ALT;
477  		}
478  
479  		if (ccbp->hdr.table_addr_type == DAX_ADDR_TYPE_VA) {
480  			dax_dbg("tbl");
481  			if (dax_lock_page(ccbp->tbl,
482  					  &ctx->pages[i + idx][TBL]) != 0) {
483  				*err_va = (u64)ccbp->tbl;
484  				goto error;
485  			}
486  			ccbp->hdr.table_addr_type = DAX_ADDR_TYPE_VA_ALT;
487  		}
488  
489  		/* skip over 2nd 64 bytes of long CCB */
490  		if (ccbp->hdr.longccb)
491  			i++;
492  	}
493  	return DAX_SUBMIT_OK;
494  
495  error:
496  	dax_unlock_pages(ctx, idx, nelem);
497  	return DAX_SUBMIT_ERR_NOACCESS;
498  }
499  
dax_ccb_wait(struct dax_ctx * ctx,int idx)500  static void dax_ccb_wait(struct dax_ctx *ctx, int idx)
501  {
502  	int ret, nretries;
503  	u16 kill_res;
504  
505  	dax_dbg("idx=%d", idx);
506  
507  	for (nretries = 0; nretries < DAX_CCB_RETRIES; nretries++) {
508  		if (ctx->ca_buf[idx].status == CCA_STAT_NOT_COMPLETED)
509  			udelay(DAX_CCB_USEC);
510  		else
511  			return;
512  	}
513  	dax_dbg("ctx (%p): CCB[%d] timed out, wait usec=%d, retries=%d. Killing ccb",
514  		(void *)ctx, idx, DAX_CCB_USEC, DAX_CCB_RETRIES);
515  
516  	ret = dax_ccb_kill(ctx->ca_buf_ra + idx * sizeof(struct dax_cca),
517  			   &kill_res);
518  	dax_dbg("Kill CCB[%d] %s", idx, ret ? "failed" : "succeeded");
519  }
520  
dax_close(struct inode * ino,struct file * f)521  static int dax_close(struct inode *ino, struct file *f)
522  {
523  	struct dax_ctx *ctx = (struct dax_ctx *)f->private_data;
524  	int i;
525  
526  	f->private_data = NULL;
527  
528  	for (i = 0; i < DAX_CA_ELEMS; i++) {
529  		if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) {
530  			dax_dbg("CCB[%d] not completed", i);
531  			dax_ccb_wait(ctx, i);
532  		}
533  		dax_unlock_pages(ctx, i, 1);
534  	}
535  
536  	kfree(ctx->ccb_buf);
537  	kfree(ctx->ca_buf);
538  	dax_stat_dbg("CCBs: %d good, %d bad", ctx->ccb_count, ctx->fail_count);
539  	kfree(ctx);
540  
541  	return 0;
542  }
543  
dax_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)544  static ssize_t dax_read(struct file *f, char __user *buf,
545  			size_t count, loff_t *ppos)
546  {
547  	struct dax_ctx *ctx = f->private_data;
548  
549  	if (ctx->client != current)
550  		return -EUSERS;
551  
552  	ctx->client = NULL;
553  
554  	if (count != sizeof(union ccb_result))
555  		return -EINVAL;
556  	if (copy_to_user(buf, &ctx->result, sizeof(union ccb_result)))
557  		return -EFAULT;
558  	return count;
559  }
560  
dax_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)561  static ssize_t dax_write(struct file *f, const char __user *buf,
562  			 size_t count, loff_t *ppos)
563  {
564  	struct dax_ctx *ctx = f->private_data;
565  	struct dax_command hdr;
566  	unsigned long ca;
567  	int i, idx, ret;
568  
569  	if (ctx->client != NULL)
570  		return -EINVAL;
571  
572  	if (count == 0 || count > DAX_MAX_CCBS * sizeof(struct dax_ccb))
573  		return -EINVAL;
574  
575  	if (count % sizeof(struct dax_ccb) == 0)
576  		return dax_ccb_exec(ctx, buf, count, ppos); /* CCB EXEC */
577  
578  	if (count != sizeof(struct dax_command))
579  		return -EINVAL;
580  
581  	/* immediate command */
582  	if (ctx->owner != current)
583  		return -EUSERS;
584  
585  	if (copy_from_user(&hdr, buf, sizeof(hdr)))
586  		return -EFAULT;
587  
588  	ca = ctx->ca_buf_ra + hdr.ca_offset;
589  
590  	switch (hdr.command) {
591  	case CCB_KILL:
592  		if (hdr.ca_offset >= DAX_MMAP_LEN) {
593  			dax_dbg("invalid ca_offset (%d) >= ca_buflen (%d)",
594  				hdr.ca_offset, DAX_MMAP_LEN);
595  			return -EINVAL;
596  		}
597  
598  		ret = dax_ccb_kill(ca, &ctx->result.kill.action);
599  		if (ret != 0) {
600  			dax_dbg("dax_ccb_kill failed (ret=%d)", ret);
601  			return ret;
602  		}
603  
604  		dax_info_dbg("killed (ca_offset %d)", hdr.ca_offset);
605  		idx = hdr.ca_offset / sizeof(struct dax_cca);
606  		ctx->ca_buf[idx].status = CCA_STAT_KILLED;
607  		ctx->ca_buf[idx].err = CCA_ERR_KILLED;
608  		ctx->client = current;
609  		return count;
610  
611  	case CCB_INFO:
612  		if (hdr.ca_offset >= DAX_MMAP_LEN) {
613  			dax_dbg("invalid ca_offset (%d) >= ca_buflen (%d)",
614  				hdr.ca_offset, DAX_MMAP_LEN);
615  			return -EINVAL;
616  		}
617  
618  		ret = dax_ccb_info(ca, &ctx->result.info);
619  		if (ret != 0) {
620  			dax_dbg("dax_ccb_info failed (ret=%d)", ret);
621  			return ret;
622  		}
623  
624  		dax_info_dbg("info succeeded on ca_offset %d", hdr.ca_offset);
625  		ctx->client = current;
626  		return count;
627  
628  	case CCB_DEQUEUE:
629  		for (i = 0; i < DAX_CA_ELEMS; i++) {
630  			if (ctx->ca_buf[i].status !=
631  			    CCA_STAT_NOT_COMPLETED)
632  				dax_unlock_pages(ctx, i, 1);
633  		}
634  		return count;
635  
636  	default:
637  		return -EINVAL;
638  	}
639  }
640  
dax_open(struct inode * inode,struct file * f)641  static int dax_open(struct inode *inode, struct file *f)
642  {
643  	struct dax_ctx *ctx = NULL;
644  	int i;
645  
646  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
647  	if (ctx == NULL)
648  		goto done;
649  
650  	ctx->ccb_buf = kcalloc(DAX_MAX_CCBS, sizeof(struct dax_ccb),
651  			       GFP_KERNEL);
652  	if (ctx->ccb_buf == NULL)
653  		goto done;
654  
655  	ctx->ccb_buf_ra = virt_to_phys(ctx->ccb_buf);
656  	dax_dbg("ctx->ccb_buf=0x%p, ccb_buf_ra=0x%llx",
657  		(void *)ctx->ccb_buf, ctx->ccb_buf_ra);
658  
659  	/* allocate CCB completion area buffer */
660  	ctx->ca_buf = kzalloc(DAX_MMAP_LEN, GFP_KERNEL);
661  	if (ctx->ca_buf == NULL)
662  		goto alloc_error;
663  	for (i = 0; i < DAX_CA_ELEMS; i++)
664  		ctx->ca_buf[i].status = CCA_STAT_COMPLETED;
665  
666  	ctx->ca_buf_ra = virt_to_phys(ctx->ca_buf);
667  	dax_dbg("ctx=0x%p, ctx->ca_buf=0x%p, ca_buf_ra=0x%llx",
668  		(void *)ctx, (void *)ctx->ca_buf, ctx->ca_buf_ra);
669  
670  	ctx->owner = current;
671  	f->private_data = ctx;
672  	return 0;
673  
674  alloc_error:
675  	kfree(ctx->ccb_buf);
676  done:
677  	kfree(ctx);
678  	return -ENOMEM;
679  }
680  
dax_hv_errno(unsigned long hv_ret,int * ret)681  static char *dax_hv_errno(unsigned long hv_ret, int *ret)
682  {
683  	switch (hv_ret) {
684  	case HV_EBADALIGN:
685  		*ret = -EFAULT;
686  		return "HV_EBADALIGN";
687  	case HV_ENORADDR:
688  		*ret = -EFAULT;
689  		return "HV_ENORADDR";
690  	case HV_EINVAL:
691  		*ret = -EINVAL;
692  		return "HV_EINVAL";
693  	case HV_EWOULDBLOCK:
694  		*ret = -EAGAIN;
695  		return "HV_EWOULDBLOCK";
696  	case HV_ENOACCESS:
697  		*ret = -EPERM;
698  		return "HV_ENOACCESS";
699  	default:
700  		break;
701  	}
702  
703  	*ret = -EIO;
704  	return "UNKNOWN";
705  }
706  
dax_ccb_kill(u64 ca,u16 * kill_res)707  static int dax_ccb_kill(u64 ca, u16 *kill_res)
708  {
709  	unsigned long hv_ret;
710  	int count, ret = 0;
711  	char *err_str;
712  
713  	for (count = 0; count < DAX_CCB_RETRIES; count++) {
714  		dax_dbg("attempting kill on ca_ra 0x%llx", ca);
715  		hv_ret = sun4v_ccb_kill(ca, kill_res);
716  
717  		if (hv_ret == HV_EOK) {
718  			dax_info_dbg("HV_EOK (ca_ra 0x%llx): %d", ca,
719  				     *kill_res);
720  		} else {
721  			err_str = dax_hv_errno(hv_ret, &ret);
722  			dax_dbg("%s (ca_ra 0x%llx)", err_str, ca);
723  		}
724  
725  		if (ret != -EAGAIN)
726  			return ret;
727  		dax_info_dbg("ccb_kill count = %d", count);
728  		udelay(DAX_CCB_USEC);
729  	}
730  
731  	return -EAGAIN;
732  }
733  
dax_ccb_info(u64 ca,struct ccb_info_result * info)734  static int dax_ccb_info(u64 ca, struct ccb_info_result *info)
735  {
736  	unsigned long hv_ret;
737  	char *err_str;
738  	int ret = 0;
739  
740  	dax_dbg("attempting info on ca_ra 0x%llx", ca);
741  	hv_ret = sun4v_ccb_info(ca, info);
742  
743  	if (hv_ret == HV_EOK) {
744  		dax_info_dbg("HV_EOK (ca_ra 0x%llx): %d", ca, info->state);
745  		if (info->state == DAX_CCB_ENQUEUED) {
746  			dax_info_dbg("dax_unit %d, queue_num %d, queue_pos %d",
747  				     info->inst_num, info->q_num, info->q_pos);
748  		}
749  	} else {
750  		err_str = dax_hv_errno(hv_ret, &ret);
751  		dax_dbg("%s (ca_ra 0x%llx)", err_str, ca);
752  	}
753  
754  	return ret;
755  }
756  
dax_prt_ccbs(struct dax_ccb * ccb,int nelem)757  static void dax_prt_ccbs(struct dax_ccb *ccb, int nelem)
758  {
759  	int i, j;
760  	u64 *ccbp;
761  
762  	dax_dbg("ccb buffer:");
763  	for (i = 0; i < nelem; i++) {
764  		ccbp = (u64 *)&ccb[i];
765  		dax_dbg(" %sccb[%d]", ccb[i].hdr.longccb ? "long " : "",  i);
766  		for (j = 0; j < 8; j++)
767  			dax_dbg("\tccb[%d].dwords[%d]=0x%llx",
768  				i, j, *(ccbp + j));
769  	}
770  }
771  
772  /*
773   * Validates user CCB content.  Also sets completion address and address types
774   * for all addresses contained in CCB.
775   */
dax_preprocess_usr_ccbs(struct dax_ctx * ctx,int idx,int nelem)776  static int dax_preprocess_usr_ccbs(struct dax_ctx *ctx, int idx, int nelem)
777  {
778  	int i;
779  
780  	/*
781  	 * The user is not allowed to specify real address types in
782  	 * the CCB header.  This must be enforced by the kernel before
783  	 * submitting the CCBs to HV.  The only allowed values for all
784  	 * address fields are VA or IMM
785  	 */
786  	for (i = 0; i < nelem; i++) {
787  		struct dax_ccb *ccbp = &ctx->ccb_buf[i];
788  		unsigned long ca_offset;
789  
790  		if (ccbp->hdr.ccb_version > max_ccb_version)
791  			return DAX_SUBMIT_ERR_CCB_INVAL;
792  
793  		switch (ccbp->hdr.opcode) {
794  		case DAX_OP_SYNC_NOP:
795  		case DAX_OP_EXTRACT:
796  		case DAX_OP_SCAN_VALUE:
797  		case DAX_OP_SCAN_RANGE:
798  		case DAX_OP_TRANSLATE:
799  		case DAX_OP_SCAN_VALUE | DAX_OP_INVERT:
800  		case DAX_OP_SCAN_RANGE | DAX_OP_INVERT:
801  		case DAX_OP_TRANSLATE | DAX_OP_INVERT:
802  		case DAX_OP_SELECT:
803  			break;
804  		default:
805  			return DAX_SUBMIT_ERR_CCB_INVAL;
806  		}
807  
808  		if (ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_VA &&
809  		    ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_NONE) {
810  			dax_dbg("invalid out_addr_type in user CCB[%d]", i);
811  			return DAX_SUBMIT_ERR_CCB_INVAL;
812  		}
813  
814  		if (ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_VA &&
815  		    ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_NONE) {
816  			dax_dbg("invalid pri_addr_type in user CCB[%d]", i);
817  			return DAX_SUBMIT_ERR_CCB_INVAL;
818  		}
819  
820  		if (ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_VA &&
821  		    ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_NONE) {
822  			dax_dbg("invalid sec_addr_type in user CCB[%d]", i);
823  			return DAX_SUBMIT_ERR_CCB_INVAL;
824  		}
825  
826  		if (ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_VA &&
827  		    ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_NONE) {
828  			dax_dbg("invalid table_addr_type in user CCB[%d]", i);
829  			return DAX_SUBMIT_ERR_CCB_INVAL;
830  		}
831  
832  		/* set completion (real) address and address type */
833  		ccbp->hdr.cca_addr_type = DAX_ADDR_TYPE_RA;
834  		ca_offset = (idx + i) * sizeof(struct dax_cca);
835  		ccbp->ca = (void *)ctx->ca_buf_ra + ca_offset;
836  		memset(&ctx->ca_buf[idx + i], 0, sizeof(struct dax_cca));
837  
838  		dax_dbg("ccb[%d]=%p, ca_offset=0x%lx, compl RA=0x%llx",
839  			i, ccbp, ca_offset, ctx->ca_buf_ra + ca_offset);
840  
841  		/* skip over 2nd 64 bytes of long CCB */
842  		if (ccbp->hdr.longccb)
843  			i++;
844  	}
845  
846  	return DAX_SUBMIT_OK;
847  }
848  
dax_ccb_exec(struct dax_ctx * ctx,const char __user * buf,size_t count,loff_t * ppos)849  static int dax_ccb_exec(struct dax_ctx *ctx, const char __user *buf,
850  			size_t count, loff_t *ppos)
851  {
852  	unsigned long accepted_len, hv_rv;
853  	int i, idx, nccbs, naccepted;
854  
855  	ctx->client = current;
856  	idx = *ppos;
857  	nccbs = count / sizeof(struct dax_ccb);
858  
859  	if (ctx->owner != current) {
860  		dax_dbg("wrong thread");
861  		ctx->result.exec.status = DAX_SUBMIT_ERR_THR_INIT;
862  		return 0;
863  	}
864  	dax_dbg("args: ccb_buf_len=%ld, idx=%d", count, idx);
865  
866  	/* for given index and length, verify ca_buf range exists */
867  	if (idx < 0 || idx > (DAX_CA_ELEMS - nccbs)) {
868  		ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL;
869  		return 0;
870  	}
871  
872  	/*
873  	 * Copy CCBs into kernel buffer to prevent modification by the
874  	 * user in between validation and submission.
875  	 */
876  	if (copy_from_user(ctx->ccb_buf, buf, count)) {
877  		dax_dbg("copyin of user CCB buffer failed");
878  		ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_ARR_MMU_MISS;
879  		return 0;
880  	}
881  
882  	/* check to see if ca_buf[idx] .. ca_buf[idx + nccbs] are available */
883  	for (i = idx; i < idx + nccbs; i++) {
884  		if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) {
885  			dax_dbg("CA range not available, dequeue needed");
886  			ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL;
887  			return 0;
888  		}
889  	}
890  	dax_unlock_pages(ctx, idx, nccbs);
891  
892  	ctx->result.exec.status = dax_preprocess_usr_ccbs(ctx, idx, nccbs);
893  	if (ctx->result.exec.status != DAX_SUBMIT_OK)
894  		return 0;
895  
896  	ctx->result.exec.status = dax_lock_pages(ctx, idx, nccbs,
897  						 &ctx->result.exec.status_data);
898  	if (ctx->result.exec.status != DAX_SUBMIT_OK)
899  		return 0;
900  
901  	if (dax_debug & DAX_DBG_FLG_BASIC)
902  		dax_prt_ccbs(ctx->ccb_buf, nccbs);
903  
904  	hv_rv = sun4v_ccb_submit(ctx->ccb_buf_ra, count,
905  				 HV_CCB_QUERY_CMD | HV_CCB_VA_SECONDARY, 0,
906  				 &accepted_len, &ctx->result.exec.status_data);
907  
908  	switch (hv_rv) {
909  	case HV_EOK:
910  		/*
911  		 * Hcall succeeded with no errors but the accepted
912  		 * length may be less than the requested length.  The
913  		 * only way the driver can resubmit the remainder is
914  		 * to wait for completion of the submitted CCBs since
915  		 * there is no way to guarantee the ordering semantics
916  		 * required by the client applications.  Therefore we
917  		 * let the user library deal with resubmissions.
918  		 */
919  		ctx->result.exec.status = DAX_SUBMIT_OK;
920  		break;
921  	case HV_EWOULDBLOCK:
922  		/*
923  		 * This is a transient HV API error. The user library
924  		 * can retry.
925  		 */
926  		dax_dbg("hcall returned HV_EWOULDBLOCK");
927  		ctx->result.exec.status = DAX_SUBMIT_ERR_WOULDBLOCK;
928  		break;
929  	case HV_ENOMAP:
930  		/*
931  		 * HV was unable to translate a VA. The VA it could
932  		 * not translate is returned in the status_data param.
933  		 */
934  		dax_dbg("hcall returned HV_ENOMAP");
935  		ctx->result.exec.status = DAX_SUBMIT_ERR_NOMAP;
936  		break;
937  	case HV_EINVAL:
938  		/*
939  		 * This is the result of an invalid user CCB as HV is
940  		 * validating some of the user CCB fields.  Pass this
941  		 * error back to the user. There is no supporting info
942  		 * to isolate the invalid field.
943  		 */
944  		dax_dbg("hcall returned HV_EINVAL");
945  		ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_INVAL;
946  		break;
947  	case HV_ENOACCESS:
948  		/*
949  		 * HV found a VA that did not have the appropriate
950  		 * permissions (such as the w bit). The VA in question
951  		 * is returned in status_data param.
952  		 */
953  		dax_dbg("hcall returned HV_ENOACCESS");
954  		ctx->result.exec.status = DAX_SUBMIT_ERR_NOACCESS;
955  		break;
956  	case HV_EUNAVAILABLE:
957  		/*
958  		 * The requested CCB operation could not be performed
959  		 * at this time. Return the specific unavailable code
960  		 * in the status_data field.
961  		 */
962  		dax_dbg("hcall returned HV_EUNAVAILABLE");
963  		ctx->result.exec.status = DAX_SUBMIT_ERR_UNAVAIL;
964  		break;
965  	default:
966  		ctx->result.exec.status = DAX_SUBMIT_ERR_INTERNAL;
967  		dax_dbg("unknown hcall return value (%ld)", hv_rv);
968  		break;
969  	}
970  
971  	/* unlock pages associated with the unaccepted CCBs */
972  	naccepted = accepted_len / sizeof(struct dax_ccb);
973  	dax_unlock_pages(ctx, idx + naccepted, nccbs - naccepted);
974  
975  	/* mark unaccepted CCBs as not completed */
976  	for (i = idx + naccepted; i < idx + nccbs; i++)
977  		ctx->ca_buf[i].status = CCA_STAT_COMPLETED;
978  
979  	ctx->ccb_count += naccepted;
980  	ctx->fail_count += nccbs - naccepted;
981  
982  	dax_dbg("hcall rv=%ld, accepted_len=%ld, status_data=0x%llx, ret status=%d",
983  		hv_rv, accepted_len, ctx->result.exec.status_data,
984  		ctx->result.exec.status);
985  
986  	if (count == accepted_len)
987  		ctx->client = NULL; /* no read needed to complete protocol */
988  	return accepted_len;
989  }
990