1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Driver for IBM PowerNV compression accelerator
4  *
5  * Copyright (C) 2015 Dan Streetman, IBM Corp
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include "nx-842.h"
11 
12 #include <linux/timer.h>
13 
14 #include <asm/prom.h>
15 #include <asm/icswx.h>
16 #include <asm/vas.h>
17 #include <asm/reg.h>
18 #include <asm/opal-api.h>
19 #include <asm/opal.h>
20 
21 MODULE_LICENSE("GPL");
22 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
23 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors");
24 MODULE_ALIAS_CRYPTO("842");
25 MODULE_ALIAS_CRYPTO("842-nx");
26 
27 #define WORKMEM_ALIGN	(CRB_ALIGN)
28 #define CSB_WAIT_MAX	(5000) /* ms */
29 #define VAS_RETRIES	(10)
30 
31 struct nx842_workmem {
32 	/* Below fields must be properly aligned */
33 	struct coprocessor_request_block crb; /* CRB_ALIGN align */
34 	struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
35 	struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
36 	/* Above fields must be properly aligned */
37 
38 	ktime_t start;
39 
40 	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
41 } __packed __aligned(WORKMEM_ALIGN);
42 
43 struct nx_coproc {
44 	unsigned int chip_id;
45 	unsigned int ct;	/* Can be 842 or GZIP high/normal*/
46 	unsigned int ci;	/* Coprocessor instance, used with icswx */
47 	struct {
48 		struct vas_window *rxwin;
49 		int id;
50 	} vas;
51 	struct list_head list;
52 };
53 
54 /*
55  * Send the request to NX engine on the chip for the corresponding CPU
56  * where the process is executing. Use with VAS function.
57  */
58 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
59 
60 /* no cpu hotplug on powernv, so this list never changes after init */
61 static LIST_HEAD(nx_coprocs);
62 static unsigned int nx842_ct;	/* used in icswx function */
63 
64 /*
65  * Using same values as in skiboot or coprocessor type representing
66  * in NX workbook.
67  */
68 #define NX_CT_GZIP	(2)	/* on P9 and later */
69 #define NX_CT_842	(3)
70 
71 static int (*nx842_powernv_exec)(const unsigned char *in,
72 				unsigned int inlen, unsigned char *out,
73 				unsigned int *outlenp, void *workmem, int fc);
74 
75 /*
76  * setup_indirect_dde - Setup an indirect DDE
77  *
78  * The DDE is setup with the DDE count, byte count, and address of
79  * first direct DDE in the list.
80  */
setup_indirect_dde(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned int dde_count,unsigned int byte_count)81 static void setup_indirect_dde(struct data_descriptor_entry *dde,
82 			       struct data_descriptor_entry *ddl,
83 			       unsigned int dde_count, unsigned int byte_count)
84 {
85 	dde->flags = 0;
86 	dde->count = dde_count;
87 	dde->index = 0;
88 	dde->length = cpu_to_be32(byte_count);
89 	dde->address = cpu_to_be64(nx842_get_pa(ddl));
90 }
91 
92 /*
93  * setup_direct_dde - Setup single DDE from buffer
94  *
95  * The DDE is setup with the buffer and length.  The buffer must be properly
96  * aligned.  The used length is returned.
97  * Returns:
98  *   N    Successfully set up DDE with N bytes
99  */
setup_direct_dde(struct data_descriptor_entry * dde,unsigned long pa,unsigned int len)100 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
101 				     unsigned long pa, unsigned int len)
102 {
103 	unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
104 
105 	dde->flags = 0;
106 	dde->count = 0;
107 	dde->index = 0;
108 	dde->length = cpu_to_be32(l);
109 	dde->address = cpu_to_be64(pa);
110 
111 	return l;
112 }
113 
114 /*
115  * setup_ddl - Setup DDL from buffer
116  *
117  * Returns:
118  *   0		Successfully set up DDL
119  */
setup_ddl(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned char * buf,unsigned int len,bool in)120 static int setup_ddl(struct data_descriptor_entry *dde,
121 		     struct data_descriptor_entry *ddl,
122 		     unsigned char *buf, unsigned int len,
123 		     bool in)
124 {
125 	unsigned long pa = nx842_get_pa(buf);
126 	int i, ret, total_len = len;
127 
128 	if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
129 		pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
130 			 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
131 		return -EINVAL;
132 	}
133 
134 	/* only need to check last mult; since buffer must be
135 	 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
136 	 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
137 	 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
138 	 */
139 	if (len % DDE_BUFFER_LAST_MULT) {
140 		pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
141 			 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
142 		if (in)
143 			return -EINVAL;
144 		len = round_down(len, DDE_BUFFER_LAST_MULT);
145 	}
146 
147 	/* use a single direct DDE */
148 	if (len <= LEN_ON_PAGE(pa)) {
149 		ret = setup_direct_dde(dde, pa, len);
150 		WARN_ON(ret < len);
151 		return 0;
152 	}
153 
154 	/* use the DDL */
155 	for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
156 		ret = setup_direct_dde(&ddl[i], pa, len);
157 		buf += ret;
158 		len -= ret;
159 		pa = nx842_get_pa(buf);
160 	}
161 
162 	if (len > 0) {
163 		pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
164 			 total_len, in ? "input" : "output", len);
165 		if (in)
166 			return -EMSGSIZE;
167 		total_len -= len;
168 	}
169 	setup_indirect_dde(dde, ddl, i, total_len);
170 
171 	return 0;
172 }
173 
174 #define CSB_ERR(csb, msg, ...)					\
175 	pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n",	\
176 	       ##__VA_ARGS__, (csb)->flags,			\
177 	       (csb)->cs, (csb)->cc, (csb)->ce,			\
178 	       be32_to_cpu((csb)->count))
179 
180 #define CSB_ERR_ADDR(csb, msg, ...)				\
181 	CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__,		\
182 		(unsigned long)be64_to_cpu((csb)->address))
183 
wait_for_csb(struct nx842_workmem * wmem,struct coprocessor_status_block * csb)184 static int wait_for_csb(struct nx842_workmem *wmem,
185 			struct coprocessor_status_block *csb)
186 {
187 	ktime_t start = wmem->start, now = ktime_get();
188 	ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
189 
190 	while (!(READ_ONCE(csb->flags) & CSB_V)) {
191 		cpu_relax();
192 		now = ktime_get();
193 		if (ktime_after(now, timeout))
194 			break;
195 	}
196 
197 	/* hw has updated csb and output buffer */
198 	barrier();
199 
200 	/* check CSB flags */
201 	if (!(csb->flags & CSB_V)) {
202 		CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
203 			(long)ktime_us_delta(now, start));
204 		return -ETIMEDOUT;
205 	}
206 	if (csb->flags & CSB_F) {
207 		CSB_ERR(csb, "Invalid CSB format");
208 		return -EPROTO;
209 	}
210 	if (csb->flags & CSB_CH) {
211 		CSB_ERR(csb, "Invalid CSB chaining state");
212 		return -EPROTO;
213 	}
214 
215 	/* verify CSB completion sequence is 0 */
216 	if (csb->cs) {
217 		CSB_ERR(csb, "Invalid CSB completion sequence");
218 		return -EPROTO;
219 	}
220 
221 	/* check CSB Completion Code */
222 	switch (csb->cc) {
223 	/* no error */
224 	case CSB_CC_SUCCESS:
225 		break;
226 	case CSB_CC_TPBC_GT_SPBC:
227 		/* not an error, but the compressed data is
228 		 * larger than the uncompressed data :(
229 		 */
230 		break;
231 
232 	/* input data errors */
233 	case CSB_CC_OPERAND_OVERLAP:
234 		/* input and output buffers overlap */
235 		CSB_ERR(csb, "Operand Overlap error");
236 		return -EINVAL;
237 	case CSB_CC_INVALID_OPERAND:
238 		CSB_ERR(csb, "Invalid operand");
239 		return -EINVAL;
240 	case CSB_CC_NOSPC:
241 		/* output buffer too small */
242 		return -ENOSPC;
243 	case CSB_CC_ABORT:
244 		CSB_ERR(csb, "Function aborted");
245 		return -EINTR;
246 	case CSB_CC_CRC_MISMATCH:
247 		CSB_ERR(csb, "CRC mismatch");
248 		return -EINVAL;
249 	case CSB_CC_TEMPL_INVALID:
250 		CSB_ERR(csb, "Compressed data template invalid");
251 		return -EINVAL;
252 	case CSB_CC_TEMPL_OVERFLOW:
253 		CSB_ERR(csb, "Compressed data template shows data past end");
254 		return -EINVAL;
255 	case CSB_CC_EXCEED_BYTE_COUNT:	/* P9 or later */
256 		/*
257 		 * DDE byte count exceeds the limit specified in Maximum
258 		 * byte count register.
259 		 */
260 		CSB_ERR(csb, "DDE byte count exceeds the limit");
261 		return -EINVAL;
262 
263 	/* these should not happen */
264 	case CSB_CC_INVALID_ALIGN:
265 		/* setup_ddl should have detected this */
266 		CSB_ERR_ADDR(csb, "Invalid alignment");
267 		return -EINVAL;
268 	case CSB_CC_DATA_LENGTH:
269 		/* setup_ddl should have detected this */
270 		CSB_ERR(csb, "Invalid data length");
271 		return -EINVAL;
272 	case CSB_CC_WR_TRANSLATION:
273 	case CSB_CC_TRANSLATION:
274 	case CSB_CC_TRANSLATION_DUP1:
275 	case CSB_CC_TRANSLATION_DUP2:
276 	case CSB_CC_TRANSLATION_DUP3:
277 	case CSB_CC_TRANSLATION_DUP4:
278 	case CSB_CC_TRANSLATION_DUP5:
279 	case CSB_CC_TRANSLATION_DUP6:
280 		/* should not happen, we use physical addrs */
281 		CSB_ERR_ADDR(csb, "Translation error");
282 		return -EPROTO;
283 	case CSB_CC_WR_PROTECTION:
284 	case CSB_CC_PROTECTION:
285 	case CSB_CC_PROTECTION_DUP1:
286 	case CSB_CC_PROTECTION_DUP2:
287 	case CSB_CC_PROTECTION_DUP3:
288 	case CSB_CC_PROTECTION_DUP4:
289 	case CSB_CC_PROTECTION_DUP5:
290 	case CSB_CC_PROTECTION_DUP6:
291 		/* should not happen, we use physical addrs */
292 		CSB_ERR_ADDR(csb, "Protection error");
293 		return -EPROTO;
294 	case CSB_CC_PRIVILEGE:
295 		/* shouldn't happen, we're in HYP mode */
296 		CSB_ERR(csb, "Insufficient Privilege error");
297 		return -EPROTO;
298 	case CSB_CC_EXCESSIVE_DDE:
299 		/* shouldn't happen, setup_ddl doesn't use many dde's */
300 		CSB_ERR(csb, "Too many DDEs in DDL");
301 		return -EINVAL;
302 	case CSB_CC_TRANSPORT:
303 	case CSB_CC_INVALID_CRB:	/* P9 or later */
304 		/* shouldn't happen, we setup CRB correctly */
305 		CSB_ERR(csb, "Invalid CRB");
306 		return -EINVAL;
307 	case CSB_CC_INVALID_DDE:	/* P9 or later */
308 		/*
309 		 * shouldn't happen, setup_direct/indirect_dde creates
310 		 * DDE right
311 		 */
312 		CSB_ERR(csb, "Invalid DDE");
313 		return -EINVAL;
314 	case CSB_CC_SEGMENTED_DDL:
315 		/* shouldn't happen, setup_ddl creates DDL right */
316 		CSB_ERR(csb, "Segmented DDL error");
317 		return -EINVAL;
318 	case CSB_CC_DDE_OVERFLOW:
319 		/* shouldn't happen, setup_ddl creates DDL right */
320 		CSB_ERR(csb, "DDE overflow error");
321 		return -EINVAL;
322 	case CSB_CC_SESSION:
323 		/* should not happen with ICSWX */
324 		CSB_ERR(csb, "Session violation error");
325 		return -EPROTO;
326 	case CSB_CC_CHAIN:
327 		/* should not happen, we don't use chained CRBs */
328 		CSB_ERR(csb, "Chained CRB error");
329 		return -EPROTO;
330 	case CSB_CC_SEQUENCE:
331 		/* should not happen, we don't use chained CRBs */
332 		CSB_ERR(csb, "CRB sequence number error");
333 		return -EPROTO;
334 	case CSB_CC_UNKNOWN_CODE:
335 		CSB_ERR(csb, "Unknown subfunction code");
336 		return -EPROTO;
337 
338 	/* hardware errors */
339 	case CSB_CC_RD_EXTERNAL:
340 	case CSB_CC_RD_EXTERNAL_DUP1:
341 	case CSB_CC_RD_EXTERNAL_DUP2:
342 	case CSB_CC_RD_EXTERNAL_DUP3:
343 		CSB_ERR_ADDR(csb, "Read error outside coprocessor");
344 		return -EPROTO;
345 	case CSB_CC_WR_EXTERNAL:
346 		CSB_ERR_ADDR(csb, "Write error outside coprocessor");
347 		return -EPROTO;
348 	case CSB_CC_INTERNAL:
349 		CSB_ERR(csb, "Internal error in coprocessor");
350 		return -EPROTO;
351 	case CSB_CC_PROVISION:
352 		CSB_ERR(csb, "Storage provision error");
353 		return -EPROTO;
354 	case CSB_CC_HW:
355 		CSB_ERR(csb, "Correctable hardware error");
356 		return -EPROTO;
357 	case CSB_CC_HW_EXPIRED_TIMER:	/* P9 or later */
358 		CSB_ERR(csb, "Job did not finish within allowed time");
359 		return -EPROTO;
360 
361 	default:
362 		CSB_ERR(csb, "Invalid CC %d", csb->cc);
363 		return -EPROTO;
364 	}
365 
366 	/* check Completion Extension state */
367 	if (csb->ce & CSB_CE_TERMINATION) {
368 		CSB_ERR(csb, "CSB request was terminated");
369 		return -EPROTO;
370 	}
371 	if (csb->ce & CSB_CE_INCOMPLETE) {
372 		CSB_ERR(csb, "CSB request not complete");
373 		return -EPROTO;
374 	}
375 	if (!(csb->ce & CSB_CE_TPBC)) {
376 		CSB_ERR(csb, "TPBC not provided, unknown target length");
377 		return -EPROTO;
378 	}
379 
380 	/* successful completion */
381 	pr_debug_ratelimited("Processed %u bytes in %lu us\n",
382 			     be32_to_cpu(csb->count),
383 			     (unsigned long)ktime_us_delta(now, start));
384 
385 	return 0;
386 }
387 
nx842_config_crb(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int outlen,struct nx842_workmem * wmem)388 static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
389 			unsigned char *out, unsigned int outlen,
390 			struct nx842_workmem *wmem)
391 {
392 	struct coprocessor_request_block *crb;
393 	struct coprocessor_status_block *csb;
394 	u64 csb_addr;
395 	int ret;
396 
397 	crb = &wmem->crb;
398 	csb = &crb->csb;
399 
400 	/* Clear any previous values */
401 	memset(crb, 0, sizeof(*crb));
402 
403 	/* set up DDLs */
404 	ret = setup_ddl(&crb->source, wmem->ddl_in,
405 			(unsigned char *)in, inlen, true);
406 	if (ret)
407 		return ret;
408 
409 	ret = setup_ddl(&crb->target, wmem->ddl_out,
410 			out, outlen, false);
411 	if (ret)
412 		return ret;
413 
414 	/* set up CRB's CSB addr */
415 	csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
416 	csb_addr |= CRB_CSB_AT; /* Addrs are phys */
417 	crb->csb_addr = cpu_to_be64(csb_addr);
418 
419 	return 0;
420 }
421 
422 /**
423  * nx842_exec_icswx - compress/decompress data using the 842 algorithm
424  *
425  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
426  * This compresses or decompresses the provided input buffer into the provided
427  * output buffer.
428  *
429  * Upon return from this function @outlen contains the length of the
430  * output data.  If there is an error then @outlen will be 0 and an
431  * error will be specified by the return code from this function.
432  *
433  * The @workmem buffer should only be used by one function call at a time.
434  *
435  * @in: input buffer pointer
436  * @inlen: input buffer size
437  * @out: output buffer pointer
438  * @outlenp: output buffer size pointer
439  * @workmem: working memory buffer pointer, size determined by
440  *           nx842_powernv_driver.workmem_size
441  * @fc: function code, see CCW Function Codes in nx-842.h
442  *
443  * Returns:
444  *   0		Success, output of length @outlenp stored in the buffer at @out
445  *   -ENODEV	Hardware unavailable
446  *   -ENOSPC	Output buffer is to small
447  *   -EMSGSIZE	Input buffer too large
448  *   -EINVAL	buffer constraints do not fix nx842_constraints
449  *   -EPROTO	hardware error during operation
450  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
451  *   -EINTR	operation was aborted
452  */
nx842_exec_icswx(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)453 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
454 				  unsigned char *out, unsigned int *outlenp,
455 				  void *workmem, int fc)
456 {
457 	struct coprocessor_request_block *crb;
458 	struct coprocessor_status_block *csb;
459 	struct nx842_workmem *wmem;
460 	int ret;
461 	u32 ccw;
462 	unsigned int outlen = *outlenp;
463 
464 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
465 
466 	*outlenp = 0;
467 
468 	/* shoudn't happen, we don't load without a coproc */
469 	if (!nx842_ct) {
470 		pr_err_ratelimited("coprocessor CT is 0");
471 		return -ENODEV;
472 	}
473 
474 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
475 	if (ret)
476 		return ret;
477 
478 	crb = &wmem->crb;
479 	csb = &crb->csb;
480 
481 	/* set up CCW */
482 	ccw = 0;
483 	ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
484 	ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
485 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
486 
487 	wmem->start = ktime_get();
488 
489 	/* do ICSWX */
490 	ret = icswx(cpu_to_be32(ccw), crb);
491 
492 	pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
493 			     (unsigned int)ccw,
494 			     (unsigned int)be32_to_cpu(crb->ccw));
495 
496 	/*
497 	 * NX842 coprocessor sets 3rd bit in CR register with XER[S0].
498 	 * XER[S0] is the integer summary overflow bit which is nothing
499 	 * to do NX. Since this bit can be set with other return values,
500 	 * mask this bit.
501 	 */
502 	ret &= ~ICSWX_XERS0;
503 
504 	switch (ret) {
505 	case ICSWX_INITIATED:
506 		ret = wait_for_csb(wmem, csb);
507 		break;
508 	case ICSWX_BUSY:
509 		pr_debug_ratelimited("842 Coprocessor busy\n");
510 		ret = -EBUSY;
511 		break;
512 	case ICSWX_REJECTED:
513 		pr_err_ratelimited("ICSWX rejected\n");
514 		ret = -EPROTO;
515 		break;
516 	}
517 
518 	if (!ret)
519 		*outlenp = be32_to_cpu(csb->count);
520 
521 	return ret;
522 }
523 
524 /**
525  * nx842_exec_vas - compress/decompress data using the 842 algorithm
526  *
527  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
528  * This compresses or decompresses the provided input buffer into the provided
529  * output buffer.
530  *
531  * Upon return from this function @outlen contains the length of the
532  * output data.  If there is an error then @outlen will be 0 and an
533  * error will be specified by the return code from this function.
534  *
535  * The @workmem buffer should only be used by one function call at a time.
536  *
537  * @in: input buffer pointer
538  * @inlen: input buffer size
539  * @out: output buffer pointer
540  * @outlenp: output buffer size pointer
541  * @workmem: working memory buffer pointer, size determined by
542  *           nx842_powernv_driver.workmem_size
543  * @fc: function code, see CCW Function Codes in nx-842.h
544  *
545  * Returns:
546  *   0		Success, output of length @outlenp stored in the buffer
547  *		at @out
548  *   -ENODEV	Hardware unavailable
549  *   -ENOSPC	Output buffer is to small
550  *   -EMSGSIZE	Input buffer too large
551  *   -EINVAL	buffer constraints do not fix nx842_constraints
552  *   -EPROTO	hardware error during operation
553  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
554  *   -EINTR	operation was aborted
555  */
nx842_exec_vas(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)556 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
557 				  unsigned char *out, unsigned int *outlenp,
558 				  void *workmem, int fc)
559 {
560 	struct coprocessor_request_block *crb;
561 	struct coprocessor_status_block *csb;
562 	struct nx842_workmem *wmem;
563 	struct vas_window *txwin;
564 	int ret, i = 0;
565 	u32 ccw;
566 	unsigned int outlen = *outlenp;
567 
568 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
569 
570 	*outlenp = 0;
571 
572 	crb = &wmem->crb;
573 	csb = &crb->csb;
574 
575 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
576 	if (ret)
577 		return ret;
578 
579 	ccw = 0;
580 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
581 	crb->ccw = cpu_to_be32(ccw);
582 
583 	do {
584 		wmem->start = ktime_get();
585 		preempt_disable();
586 		txwin = this_cpu_read(cpu_txwin);
587 
588 		/*
589 		 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
590 		 * @crb and @offset.
591 		 */
592 		vas_copy_crb(crb, 0);
593 
594 		/*
595 		 * VAS paste previously copied CRB to NX.
596 		 * @txwin, @offset and @last (must be true).
597 		 */
598 		ret = vas_paste_crb(txwin, 0, 1);
599 		preempt_enable();
600 		/*
601 		 * Retry copy/paste function for VAS failures.
602 		 */
603 	} while (ret && (i++ < VAS_RETRIES));
604 
605 	if (ret) {
606 		pr_err_ratelimited("VAS copy/paste failed\n");
607 		return ret;
608 	}
609 
610 	ret = wait_for_csb(wmem, csb);
611 	if (!ret)
612 		*outlenp = be32_to_cpu(csb->count);
613 
614 	return ret;
615 }
616 
617 /**
618  * nx842_powernv_compress - Compress data using the 842 algorithm
619  *
620  * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
621  * The input buffer is compressed and the result is stored in the
622  * provided output buffer.
623  *
624  * Upon return from this function @outlen contains the length of the
625  * compressed data.  If there is an error then @outlen will be 0 and an
626  * error will be specified by the return code from this function.
627  *
628  * @in: input buffer pointer
629  * @inlen: input buffer size
630  * @out: output buffer pointer
631  * @outlenp: output buffer size pointer
632  * @wmem: working memory buffer pointer, size determined by
633  *        nx842_powernv_driver.workmem_size
634  *
635  * Returns: see @nx842_powernv_exec()
636  */
nx842_powernv_compress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)637 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
638 				  unsigned char *out, unsigned int *outlenp,
639 				  void *wmem)
640 {
641 	return nx842_powernv_exec(in, inlen, out, outlenp,
642 				      wmem, CCW_FC_842_COMP_CRC);
643 }
644 
645 /**
646  * nx842_powernv_decompress - Decompress data using the 842 algorithm
647  *
648  * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
649  * The input buffer is decompressed and the result is stored in the
650  * provided output buffer.
651  *
652  * Upon return from this function @outlen contains the length of the
653  * decompressed data.  If there is an error then @outlen will be 0 and an
654  * error will be specified by the return code from this function.
655  *
656  * @in: input buffer pointer
657  * @inlen: input buffer size
658  * @out: output buffer pointer
659  * @outlenp: output buffer size pointer
660  * @wmem: working memory buffer pointer, size determined by
661  *        nx842_powernv_driver.workmem_size
662  *
663  * Returns: see @nx842_powernv_exec()
664  */
nx842_powernv_decompress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)665 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
666 				    unsigned char *out, unsigned int *outlenp,
667 				    void *wmem)
668 {
669 	return nx842_powernv_exec(in, inlen, out, outlenp,
670 				      wmem, CCW_FC_842_DECOMP_CRC);
671 }
672 
nx_add_coprocs_list(struct nx_coproc * coproc,int chipid)673 static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
674 					int chipid)
675 {
676 	coproc->chip_id = chipid;
677 	INIT_LIST_HEAD(&coproc->list);
678 	list_add(&coproc->list, &nx_coprocs);
679 }
680 
nx_alloc_txwin(struct nx_coproc * coproc)681 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
682 {
683 	struct vas_window *txwin = NULL;
684 	struct vas_tx_win_attr txattr;
685 
686 	/*
687 	 * Kernel requests will be high priority. So open send
688 	 * windows only for high priority RxFIFO entries.
689 	 */
690 	vas_init_tx_win_attr(&txattr, coproc->ct);
691 	txattr.lpid = 0;	/* lpid is 0 for kernel requests */
692 
693 	/*
694 	 * Open a VAS send window which is used to send request to NX.
695 	 */
696 	txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
697 	if (IS_ERR(txwin))
698 		pr_err("ibm,nx-842: Can not open TX window: %ld\n",
699 				PTR_ERR(txwin));
700 
701 	return txwin;
702 }
703 
704 /*
705  * Identify chip ID for each CPU, open send wndow for the corresponding NX
706  * engine and save txwin in percpu cpu_txwin.
707  * cpu_txwin is used in copy/paste operation for each compression /
708  * decompression request.
709  */
nx_open_percpu_txwins(void)710 static int nx_open_percpu_txwins(void)
711 {
712 	struct nx_coproc *coproc, *n;
713 	unsigned int i, chip_id;
714 
715 	for_each_possible_cpu(i) {
716 		struct vas_window *txwin = NULL;
717 
718 		chip_id = cpu_to_chip_id(i);
719 
720 		list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
721 			/*
722 			 * Kernel requests use only high priority FIFOs. So
723 			 * open send windows for these FIFOs.
724 			 * GZIP is not supported in kernel right now.
725 			 */
726 
727 			if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
728 				continue;
729 
730 			if (coproc->chip_id == chip_id) {
731 				txwin = nx_alloc_txwin(coproc);
732 				if (IS_ERR(txwin))
733 					return PTR_ERR(txwin);
734 
735 				per_cpu(cpu_txwin, i) = txwin;
736 				break;
737 			}
738 		}
739 
740 		if (!per_cpu(cpu_txwin, i)) {
741 			/* shouldn't happen, Each chip will have NX engine */
742 			pr_err("NX engine is not available for CPU %d\n", i);
743 			return -EINVAL;
744 		}
745 	}
746 
747 	return 0;
748 }
749 
nx_set_ct(struct nx_coproc * coproc,const char * priority,int high,int normal)750 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
751 				int high, int normal)
752 {
753 	if (!strcmp(priority, "High"))
754 		coproc->ct = high;
755 	else if (!strcmp(priority, "Normal"))
756 		coproc->ct = normal;
757 	else {
758 		pr_err("Invalid RxFIFO priority value\n");
759 		return -EINVAL;
760 	}
761 
762 	return 0;
763 }
764 
vas_cfg_coproc_info(struct device_node * dn,int chip_id,int vasid,int type,int * ct)765 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
766 					int vasid, int type, int *ct)
767 {
768 	struct vas_window *rxwin = NULL;
769 	struct vas_rx_win_attr rxattr;
770 	u32 lpid, pid, tid, fifo_size;
771 	struct nx_coproc *coproc;
772 	u64 rx_fifo;
773 	const char *priority;
774 	int ret;
775 
776 	ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
777 	if (ret) {
778 		pr_err("Missing rx-fifo-address property\n");
779 		return ret;
780 	}
781 
782 	ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
783 	if (ret) {
784 		pr_err("Missing rx-fifo-size property\n");
785 		return ret;
786 	}
787 
788 	ret = of_property_read_u32(dn, "lpid", &lpid);
789 	if (ret) {
790 		pr_err("Missing lpid property\n");
791 		return ret;
792 	}
793 
794 	ret = of_property_read_u32(dn, "pid", &pid);
795 	if (ret) {
796 		pr_err("Missing pid property\n");
797 		return ret;
798 	}
799 
800 	ret = of_property_read_u32(dn, "tid", &tid);
801 	if (ret) {
802 		pr_err("Missing tid property\n");
803 		return ret;
804 	}
805 
806 	ret = of_property_read_string(dn, "priority", &priority);
807 	if (ret) {
808 		pr_err("Missing priority property\n");
809 		return ret;
810 	}
811 
812 	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
813 	if (!coproc)
814 		return -ENOMEM;
815 
816 	if (type == NX_CT_842)
817 		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI,
818 			VAS_COP_TYPE_842);
819 	else if (type == NX_CT_GZIP)
820 		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI,
821 				VAS_COP_TYPE_GZIP);
822 
823 	if (ret)
824 		goto err_out;
825 
826 	vas_init_rx_win_attr(&rxattr, coproc->ct);
827 	rxattr.rx_fifo = rx_fifo;
828 	rxattr.rx_fifo_size = fifo_size;
829 	rxattr.lnotify_lpid = lpid;
830 	rxattr.lnotify_pid = pid;
831 	rxattr.lnotify_tid = tid;
832 	/*
833 	 * Maximum RX window credits can not be more than #CRBs in
834 	 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
835 	 */
836 	rxattr.wcreds_max = fifo_size / CRB_SIZE;
837 
838 	/*
839 	 * Open a VAS receice window which is used to configure RxFIFO
840 	 * for NX.
841 	 */
842 	rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
843 	if (IS_ERR(rxwin)) {
844 		ret = PTR_ERR(rxwin);
845 		pr_err("setting RxFIFO with VAS failed: %d\n",
846 			ret);
847 		goto err_out;
848 	}
849 
850 	coproc->vas.rxwin = rxwin;
851 	coproc->vas.id = vasid;
852 	nx_add_coprocs_list(coproc, chip_id);
853 
854 	/*
855 	 * (lpid, pid, tid) combination has to be unique for each
856 	 * coprocessor instance in the system. So to make it
857 	 * unique, skiboot uses coprocessor type such as 842 or
858 	 * GZIP for pid and provides this value to kernel in pid
859 	 * device-tree property.
860 	 */
861 	*ct = pid;
862 
863 	return 0;
864 
865 err_out:
866 	kfree(coproc);
867 	return ret;
868 }
869 
nx_coproc_init(int chip_id,int ct_842,int ct_gzip)870 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip)
871 {
872 	int ret = 0;
873 
874 	if (opal_check_token(OPAL_NX_COPROC_INIT)) {
875 		ret = opal_nx_coproc_init(chip_id, ct_842);
876 
877 		if (!ret)
878 			ret = opal_nx_coproc_init(chip_id, ct_gzip);
879 
880 		if (ret) {
881 			ret = opal_error_code(ret);
882 			pr_err("Failed to initialize NX for chip(%d): %d\n",
883 				chip_id, ret);
884 		}
885 	} else
886 		pr_warn("Firmware doesn't support NX initialization\n");
887 
888 	return ret;
889 }
890 
find_nx_device_tree(struct device_node * dn,int chip_id,int vasid,int type,char * devname,int * ct)891 static int __init find_nx_device_tree(struct device_node *dn, int chip_id,
892 					int vasid, int type, char *devname,
893 					int *ct)
894 {
895 	int ret = 0;
896 
897 	if (of_device_is_compatible(dn, devname)) {
898 		ret  = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct);
899 		if (ret)
900 			of_node_put(dn);
901 	}
902 
903 	return ret;
904 }
905 
nx_powernv_probe_vas(struct device_node * pn)906 static int __init nx_powernv_probe_vas(struct device_node *pn)
907 {
908 	int chip_id, vasid, ret = 0;
909 	int ct_842 = 0, ct_gzip = 0;
910 	struct device_node *dn;
911 
912 	chip_id = of_get_ibm_chip_id(pn);
913 	if (chip_id < 0) {
914 		pr_err("ibm,chip-id missing\n");
915 		return -EINVAL;
916 	}
917 
918 	vasid = chip_to_vas_id(chip_id);
919 	if (vasid < 0) {
920 		pr_err("Unable to map chip_id %d to vasid\n", chip_id);
921 		return -EINVAL;
922 	}
923 
924 	for_each_child_of_node(pn, dn) {
925 		ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842,
926 					"ibm,p9-nx-842", &ct_842);
927 
928 		if (!ret)
929 			ret = find_nx_device_tree(dn, chip_id, vasid,
930 				NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip);
931 
932 		if (ret) {
933 			of_node_put(dn);
934 			return ret;
935 		}
936 	}
937 
938 	if (!ct_842 || !ct_gzip) {
939 		pr_err("NX FIFO nodes are missing\n");
940 		return -EINVAL;
941 	}
942 
943 	/*
944 	 * Initialize NX instance for both high and normal priority FIFOs.
945 	 */
946 	ret = nx_coproc_init(chip_id, ct_842, ct_gzip);
947 
948 	return ret;
949 }
950 
nx842_powernv_probe(struct device_node * dn)951 static int __init nx842_powernv_probe(struct device_node *dn)
952 {
953 	struct nx_coproc *coproc;
954 	unsigned int ct, ci;
955 	int chip_id;
956 
957 	chip_id = of_get_ibm_chip_id(dn);
958 	if (chip_id < 0) {
959 		pr_err("ibm,chip-id missing\n");
960 		return -EINVAL;
961 	}
962 
963 	if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
964 		pr_err("ibm,842-coprocessor-type missing\n");
965 		return -EINVAL;
966 	}
967 
968 	if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
969 		pr_err("ibm,842-coprocessor-instance missing\n");
970 		return -EINVAL;
971 	}
972 
973 	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
974 	if (!coproc)
975 		return -ENOMEM;
976 
977 	coproc->ct = ct;
978 	coproc->ci = ci;
979 	nx_add_coprocs_list(coproc, chip_id);
980 
981 	pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
982 
983 	if (!nx842_ct)
984 		nx842_ct = ct;
985 	else if (nx842_ct != ct)
986 		pr_err("NX842 chip %d, CT %d != first found CT %d\n",
987 		       chip_id, ct, nx842_ct);
988 
989 	return 0;
990 }
991 
nx_delete_coprocs(void)992 static void nx_delete_coprocs(void)
993 {
994 	struct nx_coproc *coproc, *n;
995 	struct vas_window *txwin;
996 	int i;
997 
998 	/*
999 	 * close percpu txwins that are opened for the corresponding coproc.
1000 	 */
1001 	for_each_possible_cpu(i) {
1002 		txwin = per_cpu(cpu_txwin, i);
1003 		if (txwin)
1004 			vas_win_close(txwin);
1005 
1006 		per_cpu(cpu_txwin, i) = NULL;
1007 	}
1008 
1009 	list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
1010 		if (coproc->vas.rxwin)
1011 			vas_win_close(coproc->vas.rxwin);
1012 
1013 		list_del(&coproc->list);
1014 		kfree(coproc);
1015 	}
1016 }
1017 
1018 static struct nx842_constraints nx842_powernv_constraints = {
1019 	.alignment =	DDE_BUFFER_ALIGN,
1020 	.multiple =	DDE_BUFFER_LAST_MULT,
1021 	.minimum =	DDE_BUFFER_LAST_MULT,
1022 	.maximum =	(DDL_LEN_MAX - 1) * PAGE_SIZE,
1023 };
1024 
1025 static struct nx842_driver nx842_powernv_driver = {
1026 	.name =		KBUILD_MODNAME,
1027 	.owner =	THIS_MODULE,
1028 	.workmem_size =	sizeof(struct nx842_workmem),
1029 	.constraints =	&nx842_powernv_constraints,
1030 	.compress =	nx842_powernv_compress,
1031 	.decompress =	nx842_powernv_decompress,
1032 };
1033 
nx842_powernv_crypto_init(struct crypto_tfm * tfm)1034 static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
1035 {
1036 	return nx842_crypto_init(tfm, &nx842_powernv_driver);
1037 }
1038 
1039 static struct crypto_alg nx842_powernv_alg = {
1040 	.cra_name		= "842",
1041 	.cra_driver_name	= "842-nx",
1042 	.cra_priority		= 300,
1043 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
1044 	.cra_ctxsize		= sizeof(struct nx842_crypto_ctx),
1045 	.cra_module		= THIS_MODULE,
1046 	.cra_init		= nx842_powernv_crypto_init,
1047 	.cra_exit		= nx842_crypto_exit,
1048 	.cra_u			= { .compress = {
1049 	.coa_compress		= nx842_crypto_compress,
1050 	.coa_decompress		= nx842_crypto_decompress } }
1051 };
1052 
nx_compress_powernv_init(void)1053 static __init int nx_compress_powernv_init(void)
1054 {
1055 	struct device_node *dn;
1056 	int ret;
1057 
1058 	/* verify workmem size/align restrictions */
1059 	BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
1060 	BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
1061 	BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
1062 	/* verify buffer size/align restrictions */
1063 	BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
1064 	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
1065 	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
1066 
1067 	for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
1068 		ret = nx_powernv_probe_vas(dn);
1069 		if (ret) {
1070 			nx_delete_coprocs();
1071 			of_node_put(dn);
1072 			return ret;
1073 		}
1074 	}
1075 
1076 	if (list_empty(&nx_coprocs)) {
1077 		for_each_compatible_node(dn, NULL, "ibm,power-nx")
1078 			nx842_powernv_probe(dn);
1079 
1080 		if (!nx842_ct)
1081 			return -ENODEV;
1082 
1083 		nx842_powernv_exec = nx842_exec_icswx;
1084 	} else {
1085 		/*
1086 		 * Register VAS user space API for NX GZIP so
1087 		 * that user space can use GZIP engine.
1088 		 * Using high FIFO priority for kernel requests and
1089 		 * normal FIFO priority is assigned for userspace.
1090 		 * 842 compression is supported only in kernel.
1091 		 */
1092 		ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP,
1093 					       "nx-gzip");
1094 
1095 		/*
1096 		 * GZIP is not supported in kernel right now.
1097 		 * So open tx windows only for 842.
1098 		 */
1099 		if (!ret)
1100 			ret = nx_open_percpu_txwins();
1101 
1102 		if (ret) {
1103 			nx_delete_coprocs();
1104 			return ret;
1105 		}
1106 
1107 		nx842_powernv_exec = nx842_exec_vas;
1108 	}
1109 
1110 	ret = crypto_register_alg(&nx842_powernv_alg);
1111 	if (ret) {
1112 		nx_delete_coprocs();
1113 		return ret;
1114 	}
1115 
1116 	return 0;
1117 }
1118 module_init(nx_compress_powernv_init);
1119 
nx_compress_powernv_exit(void)1120 static void __exit nx_compress_powernv_exit(void)
1121 {
1122 	/*
1123 	 * GZIP engine is supported only in power9 or later and nx842_ct
1124 	 * is used on power8 (icswx).
1125 	 * VAS API for NX GZIP is registered during init for user space
1126 	 * use. So delete this API use for GZIP engine.
1127 	 */
1128 	if (!nx842_ct)
1129 		vas_unregister_api_powernv();
1130 
1131 	crypto_unregister_alg(&nx842_powernv_alg);
1132 
1133 	nx_delete_coprocs();
1134 }
1135 module_exit(nx_compress_powernv_exit);
1136