1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Driver for IBM PowerNV compression accelerator
4  *
5  * Copyright (C) 2015 Dan Streetman, IBM Corp
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include "nx-842.h"
11 
12 #include <linux/timer.h>
13 
14 #include <asm/prom.h>
15 #include <asm/icswx.h>
16 #include <asm/vas.h>
17 #include <asm/reg.h>
18 #include <asm/opal-api.h>
19 #include <asm/opal.h>
20 
21 MODULE_LICENSE("GPL");
22 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
23 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors");
24 MODULE_ALIAS_CRYPTO("842");
25 MODULE_ALIAS_CRYPTO("842-nx");
26 
27 #define WORKMEM_ALIGN	(CRB_ALIGN)
28 #define CSB_WAIT_MAX	(5000) /* ms */
29 #define VAS_RETRIES	(10)
30 
31 struct nx842_workmem {
32 	/* Below fields must be properly aligned */
33 	struct coprocessor_request_block crb; /* CRB_ALIGN align */
34 	struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
35 	struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
36 	/* Above fields must be properly aligned */
37 
38 	ktime_t start;
39 
40 	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
41 } __packed __aligned(WORKMEM_ALIGN);
42 
43 struct nx_coproc {
44 	unsigned int chip_id;
45 	unsigned int ct;	/* Can be 842 or GZIP high/normal*/
46 	unsigned int ci;	/* Coprocessor instance, used with icswx */
47 	struct {
48 		struct vas_window *rxwin;
49 		int id;
50 	} vas;
51 	struct list_head list;
52 };
53 
54 /*
55  * Send the request to NX engine on the chip for the corresponding CPU
56  * where the process is executing. Use with VAS function.
57  */
58 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
59 
60 /* no cpu hotplug on powernv, so this list never changes after init */
61 static LIST_HEAD(nx_coprocs);
62 static unsigned int nx842_ct;	/* used in icswx function */
63 
64 /*
65  * Using same values as in skiboot or coprocessor type representing
66  * in NX workbook.
67  */
68 #define NX_CT_GZIP	(2)	/* on P9 and later */
69 #define NX_CT_842	(3)
70 
71 static int (*nx842_powernv_exec)(const unsigned char *in,
72 				unsigned int inlen, unsigned char *out,
73 				unsigned int *outlenp, void *workmem, int fc);
74 
75 /**
76  * setup_indirect_dde - Setup an indirect DDE
77  *
78  * The DDE is setup with the the DDE count, byte count, and address of
79  * first direct DDE in the list.
80  */
81 static void setup_indirect_dde(struct data_descriptor_entry *dde,
82 			       struct data_descriptor_entry *ddl,
83 			       unsigned int dde_count, unsigned int byte_count)
84 {
85 	dde->flags = 0;
86 	dde->count = dde_count;
87 	dde->index = 0;
88 	dde->length = cpu_to_be32(byte_count);
89 	dde->address = cpu_to_be64(nx842_get_pa(ddl));
90 }
91 
92 /**
93  * setup_direct_dde - Setup single DDE from buffer
94  *
95  * The DDE is setup with the buffer and length.  The buffer must be properly
96  * aligned.  The used length is returned.
97  * Returns:
98  *   N    Successfully set up DDE with N bytes
99  */
100 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
101 				     unsigned long pa, unsigned int len)
102 {
103 	unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
104 
105 	dde->flags = 0;
106 	dde->count = 0;
107 	dde->index = 0;
108 	dde->length = cpu_to_be32(l);
109 	dde->address = cpu_to_be64(pa);
110 
111 	return l;
112 }
113 
114 /**
115  * setup_ddl - Setup DDL from buffer
116  *
117  * Returns:
118  *   0		Successfully set up DDL
119  */
120 static int setup_ddl(struct data_descriptor_entry *dde,
121 		     struct data_descriptor_entry *ddl,
122 		     unsigned char *buf, unsigned int len,
123 		     bool in)
124 {
125 	unsigned long pa = nx842_get_pa(buf);
126 	int i, ret, total_len = len;
127 
128 	if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
129 		pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
130 			 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
131 		return -EINVAL;
132 	}
133 
134 	/* only need to check last mult; since buffer must be
135 	 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
136 	 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
137 	 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
138 	 */
139 	if (len % DDE_BUFFER_LAST_MULT) {
140 		pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
141 			 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
142 		if (in)
143 			return -EINVAL;
144 		len = round_down(len, DDE_BUFFER_LAST_MULT);
145 	}
146 
147 	/* use a single direct DDE */
148 	if (len <= LEN_ON_PAGE(pa)) {
149 		ret = setup_direct_dde(dde, pa, len);
150 		WARN_ON(ret < len);
151 		return 0;
152 	}
153 
154 	/* use the DDL */
155 	for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
156 		ret = setup_direct_dde(&ddl[i], pa, len);
157 		buf += ret;
158 		len -= ret;
159 		pa = nx842_get_pa(buf);
160 	}
161 
162 	if (len > 0) {
163 		pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
164 			 total_len, in ? "input" : "output", len);
165 		if (in)
166 			return -EMSGSIZE;
167 		total_len -= len;
168 	}
169 	setup_indirect_dde(dde, ddl, i, total_len);
170 
171 	return 0;
172 }
173 
174 #define CSB_ERR(csb, msg, ...)					\
175 	pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n",	\
176 	       ##__VA_ARGS__, (csb)->flags,			\
177 	       (csb)->cs, (csb)->cc, (csb)->ce,			\
178 	       be32_to_cpu((csb)->count))
179 
180 #define CSB_ERR_ADDR(csb, msg, ...)				\
181 	CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__,		\
182 		(unsigned long)be64_to_cpu((csb)->address))
183 
184 /**
185  * wait_for_csb
186  */
187 static int wait_for_csb(struct nx842_workmem *wmem,
188 			struct coprocessor_status_block *csb)
189 {
190 	ktime_t start = wmem->start, now = ktime_get();
191 	ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
192 
193 	while (!(READ_ONCE(csb->flags) & CSB_V)) {
194 		cpu_relax();
195 		now = ktime_get();
196 		if (ktime_after(now, timeout))
197 			break;
198 	}
199 
200 	/* hw has updated csb and output buffer */
201 	barrier();
202 
203 	/* check CSB flags */
204 	if (!(csb->flags & CSB_V)) {
205 		CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
206 			(long)ktime_us_delta(now, start));
207 		return -ETIMEDOUT;
208 	}
209 	if (csb->flags & CSB_F) {
210 		CSB_ERR(csb, "Invalid CSB format");
211 		return -EPROTO;
212 	}
213 	if (csb->flags & CSB_CH) {
214 		CSB_ERR(csb, "Invalid CSB chaining state");
215 		return -EPROTO;
216 	}
217 
218 	/* verify CSB completion sequence is 0 */
219 	if (csb->cs) {
220 		CSB_ERR(csb, "Invalid CSB completion sequence");
221 		return -EPROTO;
222 	}
223 
224 	/* check CSB Completion Code */
225 	switch (csb->cc) {
226 	/* no error */
227 	case CSB_CC_SUCCESS:
228 		break;
229 	case CSB_CC_TPBC_GT_SPBC:
230 		/* not an error, but the compressed data is
231 		 * larger than the uncompressed data :(
232 		 */
233 		break;
234 
235 	/* input data errors */
236 	case CSB_CC_OPERAND_OVERLAP:
237 		/* input and output buffers overlap */
238 		CSB_ERR(csb, "Operand Overlap error");
239 		return -EINVAL;
240 	case CSB_CC_INVALID_OPERAND:
241 		CSB_ERR(csb, "Invalid operand");
242 		return -EINVAL;
243 	case CSB_CC_NOSPC:
244 		/* output buffer too small */
245 		return -ENOSPC;
246 	case CSB_CC_ABORT:
247 		CSB_ERR(csb, "Function aborted");
248 		return -EINTR;
249 	case CSB_CC_CRC_MISMATCH:
250 		CSB_ERR(csb, "CRC mismatch");
251 		return -EINVAL;
252 	case CSB_CC_TEMPL_INVALID:
253 		CSB_ERR(csb, "Compressed data template invalid");
254 		return -EINVAL;
255 	case CSB_CC_TEMPL_OVERFLOW:
256 		CSB_ERR(csb, "Compressed data template shows data past end");
257 		return -EINVAL;
258 	case CSB_CC_EXCEED_BYTE_COUNT:	/* P9 or later */
259 		/*
260 		 * DDE byte count exceeds the limit specified in Maximum
261 		 * byte count register.
262 		 */
263 		CSB_ERR(csb, "DDE byte count exceeds the limit");
264 		return -EINVAL;
265 
266 	/* these should not happen */
267 	case CSB_CC_INVALID_ALIGN:
268 		/* setup_ddl should have detected this */
269 		CSB_ERR_ADDR(csb, "Invalid alignment");
270 		return -EINVAL;
271 	case CSB_CC_DATA_LENGTH:
272 		/* setup_ddl should have detected this */
273 		CSB_ERR(csb, "Invalid data length");
274 		return -EINVAL;
275 	case CSB_CC_WR_TRANSLATION:
276 	case CSB_CC_TRANSLATION:
277 	case CSB_CC_TRANSLATION_DUP1:
278 	case CSB_CC_TRANSLATION_DUP2:
279 	case CSB_CC_TRANSLATION_DUP3:
280 	case CSB_CC_TRANSLATION_DUP4:
281 	case CSB_CC_TRANSLATION_DUP5:
282 	case CSB_CC_TRANSLATION_DUP6:
283 		/* should not happen, we use physical addrs */
284 		CSB_ERR_ADDR(csb, "Translation error");
285 		return -EPROTO;
286 	case CSB_CC_WR_PROTECTION:
287 	case CSB_CC_PROTECTION:
288 	case CSB_CC_PROTECTION_DUP1:
289 	case CSB_CC_PROTECTION_DUP2:
290 	case CSB_CC_PROTECTION_DUP3:
291 	case CSB_CC_PROTECTION_DUP4:
292 	case CSB_CC_PROTECTION_DUP5:
293 	case CSB_CC_PROTECTION_DUP6:
294 		/* should not happen, we use physical addrs */
295 		CSB_ERR_ADDR(csb, "Protection error");
296 		return -EPROTO;
297 	case CSB_CC_PRIVILEGE:
298 		/* shouldn't happen, we're in HYP mode */
299 		CSB_ERR(csb, "Insufficient Privilege error");
300 		return -EPROTO;
301 	case CSB_CC_EXCESSIVE_DDE:
302 		/* shouldn't happen, setup_ddl doesn't use many dde's */
303 		CSB_ERR(csb, "Too many DDEs in DDL");
304 		return -EINVAL;
305 	case CSB_CC_TRANSPORT:
306 	case CSB_CC_INVALID_CRB:	/* P9 or later */
307 		/* shouldn't happen, we setup CRB correctly */
308 		CSB_ERR(csb, "Invalid CRB");
309 		return -EINVAL;
310 	case CSB_CC_INVALID_DDE:	/* P9 or later */
311 		/*
312 		 * shouldn't happen, setup_direct/indirect_dde creates
313 		 * DDE right
314 		 */
315 		CSB_ERR(csb, "Invalid DDE");
316 		return -EINVAL;
317 	case CSB_CC_SEGMENTED_DDL:
318 		/* shouldn't happen, setup_ddl creates DDL right */
319 		CSB_ERR(csb, "Segmented DDL error");
320 		return -EINVAL;
321 	case CSB_CC_DDE_OVERFLOW:
322 		/* shouldn't happen, setup_ddl creates DDL right */
323 		CSB_ERR(csb, "DDE overflow error");
324 		return -EINVAL;
325 	case CSB_CC_SESSION:
326 		/* should not happen with ICSWX */
327 		CSB_ERR(csb, "Session violation error");
328 		return -EPROTO;
329 	case CSB_CC_CHAIN:
330 		/* should not happen, we don't use chained CRBs */
331 		CSB_ERR(csb, "Chained CRB error");
332 		return -EPROTO;
333 	case CSB_CC_SEQUENCE:
334 		/* should not happen, we don't use chained CRBs */
335 		CSB_ERR(csb, "CRB sequence number error");
336 		return -EPROTO;
337 	case CSB_CC_UNKNOWN_CODE:
338 		CSB_ERR(csb, "Unknown subfunction code");
339 		return -EPROTO;
340 
341 	/* hardware errors */
342 	case CSB_CC_RD_EXTERNAL:
343 	case CSB_CC_RD_EXTERNAL_DUP1:
344 	case CSB_CC_RD_EXTERNAL_DUP2:
345 	case CSB_CC_RD_EXTERNAL_DUP3:
346 		CSB_ERR_ADDR(csb, "Read error outside coprocessor");
347 		return -EPROTO;
348 	case CSB_CC_WR_EXTERNAL:
349 		CSB_ERR_ADDR(csb, "Write error outside coprocessor");
350 		return -EPROTO;
351 	case CSB_CC_INTERNAL:
352 		CSB_ERR(csb, "Internal error in coprocessor");
353 		return -EPROTO;
354 	case CSB_CC_PROVISION:
355 		CSB_ERR(csb, "Storage provision error");
356 		return -EPROTO;
357 	case CSB_CC_HW:
358 		CSB_ERR(csb, "Correctable hardware error");
359 		return -EPROTO;
360 	case CSB_CC_HW_EXPIRED_TIMER:	/* P9 or later */
361 		CSB_ERR(csb, "Job did not finish within allowed time");
362 		return -EPROTO;
363 
364 	default:
365 		CSB_ERR(csb, "Invalid CC %d", csb->cc);
366 		return -EPROTO;
367 	}
368 
369 	/* check Completion Extension state */
370 	if (csb->ce & CSB_CE_TERMINATION) {
371 		CSB_ERR(csb, "CSB request was terminated");
372 		return -EPROTO;
373 	}
374 	if (csb->ce & CSB_CE_INCOMPLETE) {
375 		CSB_ERR(csb, "CSB request not complete");
376 		return -EPROTO;
377 	}
378 	if (!(csb->ce & CSB_CE_TPBC)) {
379 		CSB_ERR(csb, "TPBC not provided, unknown target length");
380 		return -EPROTO;
381 	}
382 
383 	/* successful completion */
384 	pr_debug_ratelimited("Processed %u bytes in %lu us\n",
385 			     be32_to_cpu(csb->count),
386 			     (unsigned long)ktime_us_delta(now, start));
387 
388 	return 0;
389 }
390 
391 static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
392 			unsigned char *out, unsigned int outlen,
393 			struct nx842_workmem *wmem)
394 {
395 	struct coprocessor_request_block *crb;
396 	struct coprocessor_status_block *csb;
397 	u64 csb_addr;
398 	int ret;
399 
400 	crb = &wmem->crb;
401 	csb = &crb->csb;
402 
403 	/* Clear any previous values */
404 	memset(crb, 0, sizeof(*crb));
405 
406 	/* set up DDLs */
407 	ret = setup_ddl(&crb->source, wmem->ddl_in,
408 			(unsigned char *)in, inlen, true);
409 	if (ret)
410 		return ret;
411 
412 	ret = setup_ddl(&crb->target, wmem->ddl_out,
413 			out, outlen, false);
414 	if (ret)
415 		return ret;
416 
417 	/* set up CRB's CSB addr */
418 	csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
419 	csb_addr |= CRB_CSB_AT; /* Addrs are phys */
420 	crb->csb_addr = cpu_to_be64(csb_addr);
421 
422 	return 0;
423 }
424 
425 /**
426  * nx842_exec_icswx - compress/decompress data using the 842 algorithm
427  *
428  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
429  * This compresses or decompresses the provided input buffer into the provided
430  * output buffer.
431  *
432  * Upon return from this function @outlen contains the length of the
433  * output data.  If there is an error then @outlen will be 0 and an
434  * error will be specified by the return code from this function.
435  *
436  * The @workmem buffer should only be used by one function call at a time.
437  *
438  * @in: input buffer pointer
439  * @inlen: input buffer size
440  * @out: output buffer pointer
441  * @outlenp: output buffer size pointer
442  * @workmem: working memory buffer pointer, size determined by
443  *           nx842_powernv_driver.workmem_size
444  * @fc: function code, see CCW Function Codes in nx-842.h
445  *
446  * Returns:
447  *   0		Success, output of length @outlenp stored in the buffer at @out
448  *   -ENODEV	Hardware unavailable
449  *   -ENOSPC	Output buffer is to small
450  *   -EMSGSIZE	Input buffer too large
451  *   -EINVAL	buffer constraints do not fix nx842_constraints
452  *   -EPROTO	hardware error during operation
453  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
454  *   -EINTR	operation was aborted
455  */
456 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
457 				  unsigned char *out, unsigned int *outlenp,
458 				  void *workmem, int fc)
459 {
460 	struct coprocessor_request_block *crb;
461 	struct coprocessor_status_block *csb;
462 	struct nx842_workmem *wmem;
463 	int ret;
464 	u32 ccw;
465 	unsigned int outlen = *outlenp;
466 
467 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
468 
469 	*outlenp = 0;
470 
471 	/* shoudn't happen, we don't load without a coproc */
472 	if (!nx842_ct) {
473 		pr_err_ratelimited("coprocessor CT is 0");
474 		return -ENODEV;
475 	}
476 
477 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
478 	if (ret)
479 		return ret;
480 
481 	crb = &wmem->crb;
482 	csb = &crb->csb;
483 
484 	/* set up CCW */
485 	ccw = 0;
486 	ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
487 	ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
488 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
489 
490 	wmem->start = ktime_get();
491 
492 	/* do ICSWX */
493 	ret = icswx(cpu_to_be32(ccw), crb);
494 
495 	pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
496 			     (unsigned int)ccw,
497 			     (unsigned int)be32_to_cpu(crb->ccw));
498 
499 	/*
500 	 * NX842 coprocessor sets 3rd bit in CR register with XER[S0].
501 	 * XER[S0] is the integer summary overflow bit which is nothing
502 	 * to do NX. Since this bit can be set with other return values,
503 	 * mask this bit.
504 	 */
505 	ret &= ~ICSWX_XERS0;
506 
507 	switch (ret) {
508 	case ICSWX_INITIATED:
509 		ret = wait_for_csb(wmem, csb);
510 		break;
511 	case ICSWX_BUSY:
512 		pr_debug_ratelimited("842 Coprocessor busy\n");
513 		ret = -EBUSY;
514 		break;
515 	case ICSWX_REJECTED:
516 		pr_err_ratelimited("ICSWX rejected\n");
517 		ret = -EPROTO;
518 		break;
519 	}
520 
521 	if (!ret)
522 		*outlenp = be32_to_cpu(csb->count);
523 
524 	return ret;
525 }
526 
527 /**
528  * nx842_exec_vas - compress/decompress data using the 842 algorithm
529  *
530  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
531  * This compresses or decompresses the provided input buffer into the provided
532  * output buffer.
533  *
534  * Upon return from this function @outlen contains the length of the
535  * output data.  If there is an error then @outlen will be 0 and an
536  * error will be specified by the return code from this function.
537  *
538  * The @workmem buffer should only be used by one function call at a time.
539  *
540  * @in: input buffer pointer
541  * @inlen: input buffer size
542  * @out: output buffer pointer
543  * @outlenp: output buffer size pointer
544  * @workmem: working memory buffer pointer, size determined by
545  *           nx842_powernv_driver.workmem_size
546  * @fc: function code, see CCW Function Codes in nx-842.h
547  *
548  * Returns:
549  *   0		Success, output of length @outlenp stored in the buffer
550  *		at @out
551  *   -ENODEV	Hardware unavailable
552  *   -ENOSPC	Output buffer is to small
553  *   -EMSGSIZE	Input buffer too large
554  *   -EINVAL	buffer constraints do not fix nx842_constraints
555  *   -EPROTO	hardware error during operation
556  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
557  *   -EINTR	operation was aborted
558  */
559 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
560 				  unsigned char *out, unsigned int *outlenp,
561 				  void *workmem, int fc)
562 {
563 	struct coprocessor_request_block *crb;
564 	struct coprocessor_status_block *csb;
565 	struct nx842_workmem *wmem;
566 	struct vas_window *txwin;
567 	int ret, i = 0;
568 	u32 ccw;
569 	unsigned int outlen = *outlenp;
570 
571 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
572 
573 	*outlenp = 0;
574 
575 	crb = &wmem->crb;
576 	csb = &crb->csb;
577 
578 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
579 	if (ret)
580 		return ret;
581 
582 	ccw = 0;
583 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
584 	crb->ccw = cpu_to_be32(ccw);
585 
586 	do {
587 		wmem->start = ktime_get();
588 		preempt_disable();
589 		txwin = this_cpu_read(cpu_txwin);
590 
591 		/*
592 		 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
593 		 * @crb and @offset.
594 		 */
595 		vas_copy_crb(crb, 0);
596 
597 		/*
598 		 * VAS paste previously copied CRB to NX.
599 		 * @txwin, @offset and @last (must be true).
600 		 */
601 		ret = vas_paste_crb(txwin, 0, 1);
602 		preempt_enable();
603 		/*
604 		 * Retry copy/paste function for VAS failures.
605 		 */
606 	} while (ret && (i++ < VAS_RETRIES));
607 
608 	if (ret) {
609 		pr_err_ratelimited("VAS copy/paste failed\n");
610 		return ret;
611 	}
612 
613 	ret = wait_for_csb(wmem, csb);
614 	if (!ret)
615 		*outlenp = be32_to_cpu(csb->count);
616 
617 	return ret;
618 }
619 
620 /**
621  * nx842_powernv_compress - Compress data using the 842 algorithm
622  *
623  * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
624  * The input buffer is compressed and the result is stored in the
625  * provided output buffer.
626  *
627  * Upon return from this function @outlen contains the length of the
628  * compressed data.  If there is an error then @outlen will be 0 and an
629  * error will be specified by the return code from this function.
630  *
631  * @in: input buffer pointer
632  * @inlen: input buffer size
633  * @out: output buffer pointer
634  * @outlenp: output buffer size pointer
635  * @workmem: working memory buffer pointer, size determined by
636  *           nx842_powernv_driver.workmem_size
637  *
638  * Returns: see @nx842_powernv_exec()
639  */
640 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
641 				  unsigned char *out, unsigned int *outlenp,
642 				  void *wmem)
643 {
644 	return nx842_powernv_exec(in, inlen, out, outlenp,
645 				      wmem, CCW_FC_842_COMP_CRC);
646 }
647 
648 /**
649  * nx842_powernv_decompress - Decompress data using the 842 algorithm
650  *
651  * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
652  * The input buffer is decompressed and the result is stored in the
653  * provided output buffer.
654  *
655  * Upon return from this function @outlen contains the length of the
656  * decompressed data.  If there is an error then @outlen will be 0 and an
657  * error will be specified by the return code from this function.
658  *
659  * @in: input buffer pointer
660  * @inlen: input buffer size
661  * @out: output buffer pointer
662  * @outlenp: output buffer size pointer
663  * @wmem: working memory buffer pointer, size determined by
664  *        nx842_powernv_driver.workmem_size
665  *
666  * Returns: see @nx842_powernv_exec()
667  */
668 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
669 				    unsigned char *out, unsigned int *outlenp,
670 				    void *wmem)
671 {
672 	return nx842_powernv_exec(in, inlen, out, outlenp,
673 				      wmem, CCW_FC_842_DECOMP_CRC);
674 }
675 
676 static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
677 					int chipid)
678 {
679 	coproc->chip_id = chipid;
680 	INIT_LIST_HEAD(&coproc->list);
681 	list_add(&coproc->list, &nx_coprocs);
682 }
683 
684 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
685 {
686 	struct vas_window *txwin = NULL;
687 	struct vas_tx_win_attr txattr;
688 
689 	/*
690 	 * Kernel requests will be high priority. So open send
691 	 * windows only for high priority RxFIFO entries.
692 	 */
693 	vas_init_tx_win_attr(&txattr, coproc->ct);
694 	txattr.lpid = 0;	/* lpid is 0 for kernel requests */
695 
696 	/*
697 	 * Open a VAS send window which is used to send request to NX.
698 	 */
699 	txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
700 	if (IS_ERR(txwin))
701 		pr_err("ibm,nx-842: Can not open TX window: %ld\n",
702 				PTR_ERR(txwin));
703 
704 	return txwin;
705 }
706 
707 /*
708  * Identify chip ID for each CPU, open send wndow for the corresponding NX
709  * engine and save txwin in percpu cpu_txwin.
710  * cpu_txwin is used in copy/paste operation for each compression /
711  * decompression request.
712  */
713 static int nx_open_percpu_txwins(void)
714 {
715 	struct nx_coproc *coproc, *n;
716 	unsigned int i, chip_id;
717 
718 	for_each_possible_cpu(i) {
719 		struct vas_window *txwin = NULL;
720 
721 		chip_id = cpu_to_chip_id(i);
722 
723 		list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
724 			/*
725 			 * Kernel requests use only high priority FIFOs. So
726 			 * open send windows for these FIFOs.
727 			 * GZIP is not supported in kernel right now.
728 			 */
729 
730 			if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
731 				continue;
732 
733 			if (coproc->chip_id == chip_id) {
734 				txwin = nx_alloc_txwin(coproc);
735 				if (IS_ERR(txwin))
736 					return PTR_ERR(txwin);
737 
738 				per_cpu(cpu_txwin, i) = txwin;
739 				break;
740 			}
741 		}
742 
743 		if (!per_cpu(cpu_txwin, i)) {
744 			/* shouldn't happen, Each chip will have NX engine */
745 			pr_err("NX engine is not available for CPU %d\n", i);
746 			return -EINVAL;
747 		}
748 	}
749 
750 	return 0;
751 }
752 
753 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
754 				int high, int normal)
755 {
756 	if (!strcmp(priority, "High"))
757 		coproc->ct = high;
758 	else if (!strcmp(priority, "Normal"))
759 		coproc->ct = normal;
760 	else {
761 		pr_err("Invalid RxFIFO priority value\n");
762 		return -EINVAL;
763 	}
764 
765 	return 0;
766 }
767 
768 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
769 					int vasid, int type, int *ct)
770 {
771 	struct vas_window *rxwin = NULL;
772 	struct vas_rx_win_attr rxattr;
773 	u32 lpid, pid, tid, fifo_size;
774 	struct nx_coproc *coproc;
775 	u64 rx_fifo;
776 	const char *priority;
777 	int ret;
778 
779 	ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
780 	if (ret) {
781 		pr_err("Missing rx-fifo-address property\n");
782 		return ret;
783 	}
784 
785 	ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
786 	if (ret) {
787 		pr_err("Missing rx-fifo-size property\n");
788 		return ret;
789 	}
790 
791 	ret = of_property_read_u32(dn, "lpid", &lpid);
792 	if (ret) {
793 		pr_err("Missing lpid property\n");
794 		return ret;
795 	}
796 
797 	ret = of_property_read_u32(dn, "pid", &pid);
798 	if (ret) {
799 		pr_err("Missing pid property\n");
800 		return ret;
801 	}
802 
803 	ret = of_property_read_u32(dn, "tid", &tid);
804 	if (ret) {
805 		pr_err("Missing tid property\n");
806 		return ret;
807 	}
808 
809 	ret = of_property_read_string(dn, "priority", &priority);
810 	if (ret) {
811 		pr_err("Missing priority property\n");
812 		return ret;
813 	}
814 
815 	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
816 	if (!coproc)
817 		return -ENOMEM;
818 
819 	if (type == NX_CT_842)
820 		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI,
821 			VAS_COP_TYPE_842);
822 	else if (type == NX_CT_GZIP)
823 		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI,
824 				VAS_COP_TYPE_GZIP);
825 
826 	if (ret)
827 		goto err_out;
828 
829 	vas_init_rx_win_attr(&rxattr, coproc->ct);
830 	rxattr.rx_fifo = (void *)rx_fifo;
831 	rxattr.rx_fifo_size = fifo_size;
832 	rxattr.lnotify_lpid = lpid;
833 	rxattr.lnotify_pid = pid;
834 	rxattr.lnotify_tid = tid;
835 	/*
836 	 * Maximum RX window credits can not be more than #CRBs in
837 	 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
838 	 */
839 	rxattr.wcreds_max = fifo_size / CRB_SIZE;
840 
841 	/*
842 	 * Open a VAS receice window which is used to configure RxFIFO
843 	 * for NX.
844 	 */
845 	rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
846 	if (IS_ERR(rxwin)) {
847 		ret = PTR_ERR(rxwin);
848 		pr_err("setting RxFIFO with VAS failed: %d\n",
849 			ret);
850 		goto err_out;
851 	}
852 
853 	coproc->vas.rxwin = rxwin;
854 	coproc->vas.id = vasid;
855 	nx_add_coprocs_list(coproc, chip_id);
856 
857 	/*
858 	 * (lpid, pid, tid) combination has to be unique for each
859 	 * coprocessor instance in the system. So to make it
860 	 * unique, skiboot uses coprocessor type such as 842 or
861 	 * GZIP for pid and provides this value to kernel in pid
862 	 * device-tree property.
863 	 */
864 	*ct = pid;
865 
866 	return 0;
867 
868 err_out:
869 	kfree(coproc);
870 	return ret;
871 }
872 
873 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip)
874 {
875 	int ret = 0;
876 
877 	if (opal_check_token(OPAL_NX_COPROC_INIT)) {
878 		ret = opal_nx_coproc_init(chip_id, ct_842);
879 
880 		if (!ret)
881 			ret = opal_nx_coproc_init(chip_id, ct_gzip);
882 
883 		if (ret) {
884 			ret = opal_error_code(ret);
885 			pr_err("Failed to initialize NX for chip(%d): %d\n",
886 				chip_id, ret);
887 		}
888 	} else
889 		pr_warn("Firmware doesn't support NX initialization\n");
890 
891 	return ret;
892 }
893 
894 static int __init find_nx_device_tree(struct device_node *dn, int chip_id,
895 					int vasid, int type, char *devname,
896 					int *ct)
897 {
898 	int ret = 0;
899 
900 	if (of_device_is_compatible(dn, devname)) {
901 		ret  = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct);
902 		if (ret)
903 			of_node_put(dn);
904 	}
905 
906 	return ret;
907 }
908 
909 static int __init nx_powernv_probe_vas(struct device_node *pn)
910 {
911 	int chip_id, vasid, ret = 0;
912 	int ct_842 = 0, ct_gzip = 0;
913 	struct device_node *dn;
914 
915 	chip_id = of_get_ibm_chip_id(pn);
916 	if (chip_id < 0) {
917 		pr_err("ibm,chip-id missing\n");
918 		return -EINVAL;
919 	}
920 
921 	vasid = chip_to_vas_id(chip_id);
922 	if (vasid < 0) {
923 		pr_err("Unable to map chip_id %d to vasid\n", chip_id);
924 		return -EINVAL;
925 	}
926 
927 	for_each_child_of_node(pn, dn) {
928 		ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842,
929 					"ibm,p9-nx-842", &ct_842);
930 
931 		if (!ret)
932 			ret = find_nx_device_tree(dn, chip_id, vasid,
933 				NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip);
934 
935 		if (ret) {
936 			of_node_put(dn);
937 			return ret;
938 		}
939 	}
940 
941 	if (!ct_842 || !ct_gzip) {
942 		pr_err("NX FIFO nodes are missing\n");
943 		return -EINVAL;
944 	}
945 
946 	/*
947 	 * Initialize NX instance for both high and normal priority FIFOs.
948 	 */
949 	ret = nx_coproc_init(chip_id, ct_842, ct_gzip);
950 
951 	return ret;
952 }
953 
954 static int __init nx842_powernv_probe(struct device_node *dn)
955 {
956 	struct nx_coproc *coproc;
957 	unsigned int ct, ci;
958 	int chip_id;
959 
960 	chip_id = of_get_ibm_chip_id(dn);
961 	if (chip_id < 0) {
962 		pr_err("ibm,chip-id missing\n");
963 		return -EINVAL;
964 	}
965 
966 	if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
967 		pr_err("ibm,842-coprocessor-type missing\n");
968 		return -EINVAL;
969 	}
970 
971 	if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
972 		pr_err("ibm,842-coprocessor-instance missing\n");
973 		return -EINVAL;
974 	}
975 
976 	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
977 	if (!coproc)
978 		return -ENOMEM;
979 
980 	coproc->ct = ct;
981 	coproc->ci = ci;
982 	nx_add_coprocs_list(coproc, chip_id);
983 
984 	pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
985 
986 	if (!nx842_ct)
987 		nx842_ct = ct;
988 	else if (nx842_ct != ct)
989 		pr_err("NX842 chip %d, CT %d != first found CT %d\n",
990 		       chip_id, ct, nx842_ct);
991 
992 	return 0;
993 }
994 
995 static void nx_delete_coprocs(void)
996 {
997 	struct nx_coproc *coproc, *n;
998 	struct vas_window *txwin;
999 	int i;
1000 
1001 	/*
1002 	 * close percpu txwins that are opened for the corresponding coproc.
1003 	 */
1004 	for_each_possible_cpu(i) {
1005 		txwin = per_cpu(cpu_txwin, i);
1006 		if (txwin)
1007 			vas_win_close(txwin);
1008 
1009 		per_cpu(cpu_txwin, i) = NULL;
1010 	}
1011 
1012 	list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
1013 		if (coproc->vas.rxwin)
1014 			vas_win_close(coproc->vas.rxwin);
1015 
1016 		list_del(&coproc->list);
1017 		kfree(coproc);
1018 	}
1019 }
1020 
1021 static struct nx842_constraints nx842_powernv_constraints = {
1022 	.alignment =	DDE_BUFFER_ALIGN,
1023 	.multiple =	DDE_BUFFER_LAST_MULT,
1024 	.minimum =	DDE_BUFFER_LAST_MULT,
1025 	.maximum =	(DDL_LEN_MAX - 1) * PAGE_SIZE,
1026 };
1027 
1028 static struct nx842_driver nx842_powernv_driver = {
1029 	.name =		KBUILD_MODNAME,
1030 	.owner =	THIS_MODULE,
1031 	.workmem_size =	sizeof(struct nx842_workmem),
1032 	.constraints =	&nx842_powernv_constraints,
1033 	.compress =	nx842_powernv_compress,
1034 	.decompress =	nx842_powernv_decompress,
1035 };
1036 
1037 static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
1038 {
1039 	return nx842_crypto_init(tfm, &nx842_powernv_driver);
1040 }
1041 
1042 static struct crypto_alg nx842_powernv_alg = {
1043 	.cra_name		= "842",
1044 	.cra_driver_name	= "842-nx",
1045 	.cra_priority		= 300,
1046 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
1047 	.cra_ctxsize		= sizeof(struct nx842_crypto_ctx),
1048 	.cra_module		= THIS_MODULE,
1049 	.cra_init		= nx842_powernv_crypto_init,
1050 	.cra_exit		= nx842_crypto_exit,
1051 	.cra_u			= { .compress = {
1052 	.coa_compress		= nx842_crypto_compress,
1053 	.coa_decompress		= nx842_crypto_decompress } }
1054 };
1055 
1056 static __init int nx_compress_powernv_init(void)
1057 {
1058 	struct device_node *dn;
1059 	int ret;
1060 
1061 	/* verify workmem size/align restrictions */
1062 	BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
1063 	BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
1064 	BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
1065 	/* verify buffer size/align restrictions */
1066 	BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
1067 	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
1068 	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
1069 
1070 	for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
1071 		ret = nx_powernv_probe_vas(dn);
1072 		if (ret) {
1073 			nx_delete_coprocs();
1074 			of_node_put(dn);
1075 			return ret;
1076 		}
1077 	}
1078 
1079 	if (list_empty(&nx_coprocs)) {
1080 		for_each_compatible_node(dn, NULL, "ibm,power-nx")
1081 			nx842_powernv_probe(dn);
1082 
1083 		if (!nx842_ct)
1084 			return -ENODEV;
1085 
1086 		nx842_powernv_exec = nx842_exec_icswx;
1087 	} else {
1088 		/*
1089 		 * Register VAS user space API for NX GZIP so
1090 		 * that user space can use GZIP engine.
1091 		 * Using high FIFO priority for kernel requests and
1092 		 * normal FIFO priority is assigned for userspace.
1093 		 * 842 compression is supported only in kernel.
1094 		 */
1095 		ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP,
1096 					       "nx-gzip");
1097 
1098 		/*
1099 		 * GZIP is not supported in kernel right now.
1100 		 * So open tx windows only for 842.
1101 		 */
1102 		if (!ret)
1103 			ret = nx_open_percpu_txwins();
1104 
1105 		if (ret) {
1106 			nx_delete_coprocs();
1107 			return ret;
1108 		}
1109 
1110 		nx842_powernv_exec = nx842_exec_vas;
1111 	}
1112 
1113 	ret = crypto_register_alg(&nx842_powernv_alg);
1114 	if (ret) {
1115 		nx_delete_coprocs();
1116 		return ret;
1117 	}
1118 
1119 	return 0;
1120 }
1121 module_init(nx_compress_powernv_init);
1122 
1123 static void __exit nx_compress_powernv_exit(void)
1124 {
1125 	/*
1126 	 * GZIP engine is supported only in power9 or later and nx842_ct
1127 	 * is used on power8 (icswx).
1128 	 * VAS API for NX GZIP is registered during init for user space
1129 	 * use. So delete this API use for GZIP engine.
1130 	 */
1131 	if (!nx842_ct)
1132 		vas_unregister_api_powernv();
1133 
1134 	crypto_unregister_alg(&nx842_powernv_alg);
1135 
1136 	nx_delete_coprocs();
1137 }
1138 module_exit(nx_compress_powernv_exit);
1139