xref: /openbmc/linux/drivers/crypto/ccp/ccp-dev-v5.c (revision 82003e04)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Gary R Hook <gary.hook@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/kthread.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/interrupt.h>
19 #include <linux/compiler.h>
20 #include <linux/ccp.h>
21 
22 #include "ccp-dev.h"
23 
24 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
25 {
26 	struct ccp_device *ccp;
27 	int start;
28 
29 	/* First look at the map for the queue */
30 	if (cmd_q->lsb >= 0) {
31 		start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
32 							LSB_SIZE,
33 							0, count, 0);
34 		if (start < LSB_SIZE) {
35 			bitmap_set(cmd_q->lsbmap, start, count);
36 			return start + cmd_q->lsb * LSB_SIZE;
37 		}
38 	}
39 
40 	/* No joy; try to get an entry from the shared blocks */
41 	ccp = cmd_q->ccp;
42 	for (;;) {
43 		mutex_lock(&ccp->sb_mutex);
44 
45 		start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
46 							MAX_LSB_CNT * LSB_SIZE,
47 							0,
48 							count, 0);
49 		if (start <= MAX_LSB_CNT * LSB_SIZE) {
50 			bitmap_set(ccp->lsbmap, start, count);
51 
52 			mutex_unlock(&ccp->sb_mutex);
53 			return start * LSB_ITEM_SIZE;
54 		}
55 
56 		ccp->sb_avail = 0;
57 
58 		mutex_unlock(&ccp->sb_mutex);
59 
60 		/* Wait for KSB entries to become available */
61 		if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
62 			return 0;
63 	}
64 }
65 
66 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
67 			 unsigned int count)
68 {
69 	int lsbno = start / LSB_SIZE;
70 
71 	if (!start)
72 		return;
73 
74 	if (cmd_q->lsb == lsbno) {
75 		/* An entry from the private LSB */
76 		bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
77 	} else {
78 		/* From the shared LSBs */
79 		struct ccp_device *ccp = cmd_q->ccp;
80 
81 		mutex_lock(&ccp->sb_mutex);
82 		bitmap_clear(ccp->lsbmap, start, count);
83 		ccp->sb_avail = 1;
84 		mutex_unlock(&ccp->sb_mutex);
85 		wake_up_interruptible_all(&ccp->sb_queue);
86 	}
87 }
88 
89 /* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
90 union ccp_function {
91 	struct {
92 		u16 size:7;
93 		u16 encrypt:1;
94 		u16 mode:5;
95 		u16 type:2;
96 	} aes;
97 	struct {
98 		u16 size:7;
99 		u16 encrypt:1;
100 		u16 rsvd:5;
101 		u16 type:2;
102 	} aes_xts;
103 	struct {
104 		u16 rsvd1:10;
105 		u16 type:4;
106 		u16 rsvd2:1;
107 	} sha;
108 	struct {
109 		u16 mode:3;
110 		u16 size:12;
111 	} rsa;
112 	struct {
113 		u16 byteswap:2;
114 		u16 bitwise:3;
115 		u16 reflect:2;
116 		u16 rsvd:8;
117 	} pt;
118 	struct  {
119 		u16 rsvd:13;
120 	} zlib;
121 	struct {
122 		u16 size:10;
123 		u16 type:2;
124 		u16 mode:3;
125 	} ecc;
126 	u16 raw;
127 };
128 
129 #define	CCP_AES_SIZE(p)		((p)->aes.size)
130 #define	CCP_AES_ENCRYPT(p)	((p)->aes.encrypt)
131 #define	CCP_AES_MODE(p)		((p)->aes.mode)
132 #define	CCP_AES_TYPE(p)		((p)->aes.type)
133 #define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
134 #define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
135 #define	CCP_SHA_TYPE(p)		((p)->sha.type)
136 #define	CCP_RSA_SIZE(p)		((p)->rsa.size)
137 #define	CCP_PT_BYTESWAP(p)	((p)->pt.byteswap)
138 #define	CCP_PT_BITWISE(p)	((p)->pt.bitwise)
139 #define	CCP_ECC_MODE(p)		((p)->ecc.mode)
140 #define	CCP_ECC_AFFINE(p)	((p)->ecc.one)
141 
142 /* Word 0 */
143 #define CCP5_CMD_DW0(p)		((p)->dw0)
144 #define CCP5_CMD_SOC(p)		(CCP5_CMD_DW0(p).soc)
145 #define CCP5_CMD_IOC(p)		(CCP5_CMD_DW0(p).ioc)
146 #define CCP5_CMD_INIT(p)	(CCP5_CMD_DW0(p).init)
147 #define CCP5_CMD_EOM(p)		(CCP5_CMD_DW0(p).eom)
148 #define CCP5_CMD_FUNCTION(p)	(CCP5_CMD_DW0(p).function)
149 #define CCP5_CMD_ENGINE(p)	(CCP5_CMD_DW0(p).engine)
150 #define CCP5_CMD_PROT(p)	(CCP5_CMD_DW0(p).prot)
151 
152 /* Word 1 */
153 #define CCP5_CMD_DW1(p)		((p)->length)
154 #define CCP5_CMD_LEN(p)		(CCP5_CMD_DW1(p))
155 
156 /* Word 2 */
157 #define CCP5_CMD_DW2(p)		((p)->src_lo)
158 #define CCP5_CMD_SRC_LO(p)	(CCP5_CMD_DW2(p))
159 
160 /* Word 3 */
161 #define CCP5_CMD_DW3(p)		((p)->dw3)
162 #define CCP5_CMD_SRC_MEM(p)	((p)->dw3.src_mem)
163 #define CCP5_CMD_SRC_HI(p)	((p)->dw3.src_hi)
164 #define CCP5_CMD_LSB_ID(p)	((p)->dw3.lsb_cxt_id)
165 #define CCP5_CMD_FIX_SRC(p)	((p)->dw3.fixed)
166 
167 /* Words 4/5 */
168 #define CCP5_CMD_DW4(p)		((p)->dw4)
169 #define CCP5_CMD_DST_LO(p)	(CCP5_CMD_DW4(p).dst_lo)
170 #define CCP5_CMD_DW5(p)		((p)->dw5.fields.dst_hi)
171 #define CCP5_CMD_DST_HI(p)	(CCP5_CMD_DW5(p))
172 #define CCP5_CMD_DST_MEM(p)	((p)->dw5.fields.dst_mem)
173 #define CCP5_CMD_FIX_DST(p)	((p)->dw5.fields.fixed)
174 #define CCP5_CMD_SHA_LO(p)	((p)->dw4.sha_len_lo)
175 #define CCP5_CMD_SHA_HI(p)	((p)->dw5.sha_len_hi)
176 
177 /* Word 6/7 */
178 #define CCP5_CMD_DW6(p)		((p)->key_lo)
179 #define CCP5_CMD_KEY_LO(p)	(CCP5_CMD_DW6(p))
180 #define CCP5_CMD_DW7(p)		((p)->dw7)
181 #define CCP5_CMD_KEY_HI(p)	((p)->dw7.key_hi)
182 #define CCP5_CMD_KEY_MEM(p)	((p)->dw7.key_mem)
183 
184 static inline u32 low_address(unsigned long addr)
185 {
186 	return (u64)addr & 0x0ffffffff;
187 }
188 
189 static inline u32 high_address(unsigned long addr)
190 {
191 	return ((u64)addr >> 32) & 0x00000ffff;
192 }
193 
194 static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
195 {
196 	unsigned int head_idx, n;
197 	u32 head_lo, queue_start;
198 
199 	queue_start = low_address(cmd_q->qdma_tail);
200 	head_lo = ioread32(cmd_q->reg_head_lo);
201 	head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
202 
203 	n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
204 
205 	return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
206 }
207 
208 static int ccp5_do_cmd(struct ccp5_desc *desc,
209 		       struct ccp_cmd_queue *cmd_q)
210 {
211 	u32 *mP;
212 	__le32 *dP;
213 	u32 tail;
214 	int	i;
215 	int ret = 0;
216 
217 	if (CCP5_CMD_SOC(desc)) {
218 		CCP5_CMD_IOC(desc) = 1;
219 		CCP5_CMD_SOC(desc) = 0;
220 	}
221 	mutex_lock(&cmd_q->q_mutex);
222 
223 	mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
224 	dP = (__le32 *) desc;
225 	for (i = 0; i < 8; i++)
226 		mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
227 
228 	cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
229 
230 	/* The data used by this command must be flushed to memory */
231 	wmb();
232 
233 	/* Write the new tail address back to the queue register */
234 	tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
235 	iowrite32(tail, cmd_q->reg_tail_lo);
236 
237 	/* Turn the queue back on using our cached control register */
238 	iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
239 	mutex_unlock(&cmd_q->q_mutex);
240 
241 	if (CCP5_CMD_IOC(desc)) {
242 		/* Wait for the job to complete */
243 		ret = wait_event_interruptible(cmd_q->int_queue,
244 					       cmd_q->int_rcvd);
245 		if (ret || cmd_q->cmd_error) {
246 			if (cmd_q->cmd_error)
247 				ccp_log_error(cmd_q->ccp,
248 					      cmd_q->cmd_error);
249 			/* A version 5 device doesn't use Job IDs... */
250 			if (!ret)
251 				ret = -EIO;
252 		}
253 		cmd_q->int_rcvd = 0;
254 	}
255 
256 	return 0;
257 }
258 
259 static int ccp5_perform_aes(struct ccp_op *op)
260 {
261 	struct ccp5_desc desc;
262 	union ccp_function function;
263 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
264 
265 	/* Zero out all the fields of the command desc */
266 	memset(&desc, 0, Q_DESC_SIZE);
267 
268 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
269 
270 	CCP5_CMD_SOC(&desc) = op->soc;
271 	CCP5_CMD_IOC(&desc) = 1;
272 	CCP5_CMD_INIT(&desc) = op->init;
273 	CCP5_CMD_EOM(&desc) = op->eom;
274 	CCP5_CMD_PROT(&desc) = 0;
275 
276 	function.raw = 0;
277 	CCP_AES_ENCRYPT(&function) = op->u.aes.action;
278 	CCP_AES_MODE(&function) = op->u.aes.mode;
279 	CCP_AES_TYPE(&function) = op->u.aes.type;
280 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
281 		CCP_AES_SIZE(&function) = 0x7f;
282 
283 	CCP5_CMD_FUNCTION(&desc) = function.raw;
284 
285 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
286 
287 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
288 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
289 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
290 
291 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
292 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
293 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
294 
295 	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
296 	CCP5_CMD_KEY_HI(&desc) = 0;
297 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
298 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
299 
300 	return ccp5_do_cmd(&desc, op->cmd_q);
301 }
302 
303 static int ccp5_perform_xts_aes(struct ccp_op *op)
304 {
305 	struct ccp5_desc desc;
306 	union ccp_function function;
307 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
308 
309 	/* Zero out all the fields of the command desc */
310 	memset(&desc, 0, Q_DESC_SIZE);
311 
312 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
313 
314 	CCP5_CMD_SOC(&desc) = op->soc;
315 	CCP5_CMD_IOC(&desc) = 1;
316 	CCP5_CMD_INIT(&desc) = op->init;
317 	CCP5_CMD_EOM(&desc) = op->eom;
318 	CCP5_CMD_PROT(&desc) = 0;
319 
320 	function.raw = 0;
321 	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
322 	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
323 	CCP5_CMD_FUNCTION(&desc) = function.raw;
324 
325 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
326 
327 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
328 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
329 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
330 
331 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
332 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
333 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
334 
335 	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
336 	CCP5_CMD_KEY_HI(&desc) =  0;
337 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
338 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
339 
340 	return ccp5_do_cmd(&desc, op->cmd_q);
341 }
342 
343 static int ccp5_perform_sha(struct ccp_op *op)
344 {
345 	struct ccp5_desc desc;
346 	union ccp_function function;
347 
348 	/* Zero out all the fields of the command desc */
349 	memset(&desc, 0, Q_DESC_SIZE);
350 
351 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
352 
353 	CCP5_CMD_SOC(&desc) = op->soc;
354 	CCP5_CMD_IOC(&desc) = 1;
355 	CCP5_CMD_INIT(&desc) = 1;
356 	CCP5_CMD_EOM(&desc) = op->eom;
357 	CCP5_CMD_PROT(&desc) = 0;
358 
359 	function.raw = 0;
360 	CCP_SHA_TYPE(&function) = op->u.sha.type;
361 	CCP5_CMD_FUNCTION(&desc) = function.raw;
362 
363 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
364 
365 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
366 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
367 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
368 
369 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
370 
371 	if (op->eom) {
372 		CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
373 		CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
374 	} else {
375 		CCP5_CMD_SHA_LO(&desc) = 0;
376 		CCP5_CMD_SHA_HI(&desc) = 0;
377 	}
378 
379 	return ccp5_do_cmd(&desc, op->cmd_q);
380 }
381 
382 static int ccp5_perform_rsa(struct ccp_op *op)
383 {
384 	struct ccp5_desc desc;
385 	union ccp_function function;
386 
387 	/* Zero out all the fields of the command desc */
388 	memset(&desc, 0, Q_DESC_SIZE);
389 
390 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
391 
392 	CCP5_CMD_SOC(&desc) = op->soc;
393 	CCP5_CMD_IOC(&desc) = 1;
394 	CCP5_CMD_INIT(&desc) = 0;
395 	CCP5_CMD_EOM(&desc) = 1;
396 	CCP5_CMD_PROT(&desc) = 0;
397 
398 	function.raw = 0;
399 	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
400 	CCP5_CMD_FUNCTION(&desc) = function.raw;
401 
402 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
403 
404 	/* Source is from external memory */
405 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
406 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
407 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
408 
409 	/* Destination is in external memory */
410 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
411 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
412 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
413 
414 	/* Key (Exponent) is in external memory */
415 	CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
416 	CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
417 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
418 
419 	return ccp5_do_cmd(&desc, op->cmd_q);
420 }
421 
422 static int ccp5_perform_passthru(struct ccp_op *op)
423 {
424 	struct ccp5_desc desc;
425 	union ccp_function function;
426 	struct ccp_dma_info *saddr = &op->src.u.dma;
427 	struct ccp_dma_info *daddr = &op->dst.u.dma;
428 
429 	memset(&desc, 0, Q_DESC_SIZE);
430 
431 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
432 
433 	CCP5_CMD_SOC(&desc) = 0;
434 	CCP5_CMD_IOC(&desc) = 1;
435 	CCP5_CMD_INIT(&desc) = 0;
436 	CCP5_CMD_EOM(&desc) = op->eom;
437 	CCP5_CMD_PROT(&desc) = 0;
438 
439 	function.raw = 0;
440 	CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
441 	CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
442 	CCP5_CMD_FUNCTION(&desc) = function.raw;
443 
444 	/* Length of source data is always 256 bytes */
445 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
446 		CCP5_CMD_LEN(&desc) = saddr->length;
447 	else
448 		CCP5_CMD_LEN(&desc) = daddr->length;
449 
450 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
451 		CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
452 		CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
453 		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
454 
455 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
456 			CCP5_CMD_LSB_ID(&desc) = op->sb_key;
457 	} else {
458 		u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
459 
460 		CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
461 		CCP5_CMD_SRC_HI(&desc) = 0;
462 		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
463 	}
464 
465 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
466 		CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
467 		CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
468 		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
469 	} else {
470 		u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
471 
472 		CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
473 		CCP5_CMD_DST_HI(&desc) = 0;
474 		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
475 	}
476 
477 	return ccp5_do_cmd(&desc, op->cmd_q);
478 }
479 
480 static int ccp5_perform_ecc(struct ccp_op *op)
481 {
482 	struct ccp5_desc desc;
483 	union ccp_function function;
484 
485 	/* Zero out all the fields of the command desc */
486 	memset(&desc, 0, Q_DESC_SIZE);
487 
488 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
489 
490 	CCP5_CMD_SOC(&desc) = 0;
491 	CCP5_CMD_IOC(&desc) = 1;
492 	CCP5_CMD_INIT(&desc) = 0;
493 	CCP5_CMD_EOM(&desc) = 1;
494 	CCP5_CMD_PROT(&desc) = 0;
495 
496 	function.raw = 0;
497 	function.ecc.mode = op->u.ecc.function;
498 	CCP5_CMD_FUNCTION(&desc) = function.raw;
499 
500 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
501 
502 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
503 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
504 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
505 
506 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
507 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
508 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
509 
510 	return ccp5_do_cmd(&desc, op->cmd_q);
511 }
512 
513 static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
514 {
515 	int q_mask = 1 << cmd_q->id;
516 	int queues = 0;
517 	int j;
518 
519 	/* Build a bit mask to know which LSBs this queue has access to.
520 	 * Don't bother with segment 0 as it has special privileges.
521 	 */
522 	for (j = 1; j < MAX_LSB_CNT; j++) {
523 		if (status & q_mask)
524 			bitmap_set(cmd_q->lsbmask, j, 1);
525 		status >>= LSB_REGION_WIDTH;
526 	}
527 	queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
528 	dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
529 		 cmd_q->id, queues);
530 
531 	return queues ? 0 : -EINVAL;
532 }
533 
534 
535 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
536 					int lsb_cnt, int n_lsbs,
537 					unsigned long *lsb_pub)
538 {
539 	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
540 	int bitno;
541 	int qlsb_wgt;
542 	int i;
543 
544 	/* For each queue:
545 	 * If the count of potential LSBs available to a queue matches the
546 	 * ordinal given to us in lsb_cnt:
547 	 * Copy the mask of possible LSBs for this queue into "qlsb";
548 	 * For each bit in qlsb, see if the corresponding bit in the
549 	 * aggregation mask is set; if so, we have a match.
550 	 *     If we have a match, clear the bit in the aggregation to
551 	 *     mark it as no longer available.
552 	 *     If there is no match, clear the bit in qlsb and keep looking.
553 	 */
554 	for (i = 0; i < ccp->cmd_q_count; i++) {
555 		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
556 
557 		qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
558 
559 		if (qlsb_wgt == lsb_cnt) {
560 			bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
561 
562 			bitno = find_first_bit(qlsb, MAX_LSB_CNT);
563 			while (bitno < MAX_LSB_CNT) {
564 				if (test_bit(bitno, lsb_pub)) {
565 					/* We found an available LSB
566 					 * that this queue can access
567 					 */
568 					cmd_q->lsb = bitno;
569 					bitmap_clear(lsb_pub, bitno, 1);
570 					dev_info(ccp->dev,
571 						 "Queue %d gets LSB %d\n",
572 						 i, bitno);
573 					break;
574 				}
575 				bitmap_clear(qlsb, bitno, 1);
576 				bitno = find_first_bit(qlsb, MAX_LSB_CNT);
577 			}
578 			if (bitno >= MAX_LSB_CNT)
579 				return -EINVAL;
580 			n_lsbs--;
581 		}
582 	}
583 	return n_lsbs;
584 }
585 
586 /* For each queue, from the most- to least-constrained:
587  * find an LSB that can be assigned to the queue. If there are N queues that
588  * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
589  * dedicated LSB. Remaining LSB regions become a shared resource.
590  * If we have fewer LSBs than queues, all LSB regions become shared resources.
591  */
592 static int ccp_assign_lsbs(struct ccp_device *ccp)
593 {
594 	DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
595 	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
596 	int n_lsbs = 0;
597 	int bitno;
598 	int i, lsb_cnt;
599 	int rc = 0;
600 
601 	bitmap_zero(lsb_pub, MAX_LSB_CNT);
602 
603 	/* Create an aggregate bitmap to get a total count of available LSBs */
604 	for (i = 0; i < ccp->cmd_q_count; i++)
605 		bitmap_or(lsb_pub,
606 			  lsb_pub, ccp->cmd_q[i].lsbmask,
607 			  MAX_LSB_CNT);
608 
609 	n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
610 
611 	if (n_lsbs >= ccp->cmd_q_count) {
612 		/* We have enough LSBS to give every queue a private LSB.
613 		 * Brute force search to start with the queues that are more
614 		 * constrained in LSB choice. When an LSB is privately
615 		 * assigned, it is removed from the public mask.
616 		 * This is an ugly N squared algorithm with some optimization.
617 		 */
618 		for (lsb_cnt = 1;
619 		     n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
620 		     lsb_cnt++) {
621 			rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
622 							  lsb_pub);
623 			if (rc < 0)
624 				return -EINVAL;
625 			n_lsbs = rc;
626 		}
627 	}
628 
629 	rc = 0;
630 	/* What's left of the LSBs, according to the public mask, now become
631 	 * shared. Any zero bits in the lsb_pub mask represent an LSB region
632 	 * that can't be used as a shared resource, so mark the LSB slots for
633 	 * them as "in use".
634 	 */
635 	bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
636 
637 	bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
638 	while (bitno < MAX_LSB_CNT) {
639 		bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
640 		bitmap_set(qlsb, bitno, 1);
641 		bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
642 	}
643 
644 	return rc;
645 }
646 
647 static int ccp5_init(struct ccp_device *ccp)
648 {
649 	struct device *dev = ccp->dev;
650 	struct ccp_cmd_queue *cmd_q;
651 	struct dma_pool *dma_pool;
652 	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
653 	unsigned int qmr, qim, i;
654 	u64 status;
655 	u32 status_lo, status_hi;
656 	int ret;
657 
658 	/* Find available queues */
659 	qim = 0;
660 	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
661 	for (i = 0; i < MAX_HW_QUEUES; i++) {
662 
663 		if (!(qmr & (1 << i)))
664 			continue;
665 
666 		/* Allocate a dma pool for this queue */
667 		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
668 			 ccp->name, i);
669 		dma_pool = dma_pool_create(dma_pool_name, dev,
670 					   CCP_DMAPOOL_MAX_SIZE,
671 					   CCP_DMAPOOL_ALIGN, 0);
672 		if (!dma_pool) {
673 			dev_err(dev, "unable to allocate dma pool\n");
674 			ret = -ENOMEM;
675 		}
676 
677 		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
678 		ccp->cmd_q_count++;
679 
680 		cmd_q->ccp = ccp;
681 		cmd_q->id = i;
682 		cmd_q->dma_pool = dma_pool;
683 		mutex_init(&cmd_q->q_mutex);
684 
685 		/* Page alignment satisfies our needs for N <= 128 */
686 		BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
687 		cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
688 		cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
689 						   &cmd_q->qbase_dma,
690 						   GFP_KERNEL);
691 		if (!cmd_q->qbase) {
692 			dev_err(dev, "unable to allocate command queue\n");
693 			ret = -ENOMEM;
694 			goto e_pool;
695 		}
696 
697 		cmd_q->qidx = 0;
698 		/* Preset some register values and masks that are queue
699 		 * number dependent
700 		 */
701 		cmd_q->reg_control = ccp->io_regs +
702 				     CMD5_Q_STATUS_INCR * (i + 1);
703 		cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
704 		cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
705 		cmd_q->reg_int_enable = cmd_q->reg_control +
706 					CMD5_Q_INT_ENABLE_BASE;
707 		cmd_q->reg_interrupt_status = cmd_q->reg_control +
708 					      CMD5_Q_INTERRUPT_STATUS_BASE;
709 		cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
710 		cmd_q->reg_int_status = cmd_q->reg_control +
711 					CMD5_Q_INT_STATUS_BASE;
712 		cmd_q->reg_dma_status = cmd_q->reg_control +
713 					CMD5_Q_DMA_STATUS_BASE;
714 		cmd_q->reg_dma_read_status = cmd_q->reg_control +
715 					     CMD5_Q_DMA_READ_STATUS_BASE;
716 		cmd_q->reg_dma_write_status = cmd_q->reg_control +
717 					      CMD5_Q_DMA_WRITE_STATUS_BASE;
718 
719 		init_waitqueue_head(&cmd_q->int_queue);
720 
721 		dev_dbg(dev, "queue #%u available\n", i);
722 	}
723 	if (ccp->cmd_q_count == 0) {
724 		dev_notice(dev, "no command queues available\n");
725 		ret = -EIO;
726 		goto e_pool;
727 	}
728 	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
729 
730 	/* Turn off the queues and disable interrupts until ready */
731 	for (i = 0; i < ccp->cmd_q_count; i++) {
732 		cmd_q = &ccp->cmd_q[i];
733 
734 		cmd_q->qcontrol = 0; /* Start with nothing */
735 		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
736 
737 		/* Disable the interrupts */
738 		iowrite32(0x00, cmd_q->reg_int_enable);
739 		ioread32(cmd_q->reg_int_status);
740 		ioread32(cmd_q->reg_status);
741 
742 		/* Clear the interrupts */
743 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
744 	}
745 
746 	dev_dbg(dev, "Requesting an IRQ...\n");
747 	/* Request an irq */
748 	ret = ccp->get_irq(ccp);
749 	if (ret) {
750 		dev_err(dev, "unable to allocate an IRQ\n");
751 		goto e_pool;
752 	}
753 
754 	/* Initialize the queue used to suspend */
755 	init_waitqueue_head(&ccp->suspend_queue);
756 
757 	dev_dbg(dev, "Loading LSB map...\n");
758 	/* Copy the private LSB mask to the public registers */
759 	status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
760 	status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
761 	iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
762 	iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
763 	status = ((u64)status_hi<<30) | (u64)status_lo;
764 
765 	dev_dbg(dev, "Configuring virtual queues...\n");
766 	/* Configure size of each virtual queue accessible to host */
767 	for (i = 0; i < ccp->cmd_q_count; i++) {
768 		u32 dma_addr_lo;
769 		u32 dma_addr_hi;
770 
771 		cmd_q = &ccp->cmd_q[i];
772 
773 		cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
774 		cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
775 
776 		cmd_q->qdma_tail = cmd_q->qbase_dma;
777 		dma_addr_lo = low_address(cmd_q->qdma_tail);
778 		iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
779 		iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
780 
781 		dma_addr_hi = high_address(cmd_q->qdma_tail);
782 		cmd_q->qcontrol |= (dma_addr_hi << 16);
783 		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
784 
785 		/* Find the LSB regions accessible to the queue */
786 		ccp_find_lsb_regions(cmd_q, status);
787 		cmd_q->lsb = -1; /* Unassigned value */
788 	}
789 
790 	dev_dbg(dev, "Assigning LSBs...\n");
791 	ret = ccp_assign_lsbs(ccp);
792 	if (ret) {
793 		dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
794 		goto e_irq;
795 	}
796 
797 	/* Optimization: pre-allocate LSB slots for each queue */
798 	for (i = 0; i < ccp->cmd_q_count; i++) {
799 		ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
800 		ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
801 	}
802 
803 	dev_dbg(dev, "Starting threads...\n");
804 	/* Create a kthread for each queue */
805 	for (i = 0; i < ccp->cmd_q_count; i++) {
806 		struct task_struct *kthread;
807 
808 		cmd_q = &ccp->cmd_q[i];
809 
810 		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
811 					 "%s-q%u", ccp->name, cmd_q->id);
812 		if (IS_ERR(kthread)) {
813 			dev_err(dev, "error creating queue thread (%ld)\n",
814 				PTR_ERR(kthread));
815 			ret = PTR_ERR(kthread);
816 			goto e_kthread;
817 		}
818 
819 		cmd_q->kthread = kthread;
820 		wake_up_process(kthread);
821 	}
822 
823 	dev_dbg(dev, "Enabling interrupts...\n");
824 	/* Enable interrupts */
825 	for (i = 0; i < ccp->cmd_q_count; i++) {
826 		cmd_q = &ccp->cmd_q[i];
827 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
828 	}
829 
830 	dev_dbg(dev, "Registering device...\n");
831 	/* Put this on the unit list to make it available */
832 	ccp_add_device(ccp);
833 
834 	ret = ccp_register_rng(ccp);
835 	if (ret)
836 		goto e_kthread;
837 
838 	/* Register the DMA engine support */
839 	ret = ccp_dmaengine_register(ccp);
840 	if (ret)
841 		goto e_hwrng;
842 
843 	return 0;
844 
845 e_hwrng:
846 	ccp_unregister_rng(ccp);
847 
848 e_kthread:
849 	for (i = 0; i < ccp->cmd_q_count; i++)
850 		if (ccp->cmd_q[i].kthread)
851 			kthread_stop(ccp->cmd_q[i].kthread);
852 
853 e_irq:
854 	ccp->free_irq(ccp);
855 
856 e_pool:
857 	for (i = 0; i < ccp->cmd_q_count; i++)
858 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
859 
860 	return ret;
861 }
862 
863 static void ccp5_destroy(struct ccp_device *ccp)
864 {
865 	struct device *dev = ccp->dev;
866 	struct ccp_cmd_queue *cmd_q;
867 	struct ccp_cmd *cmd;
868 	unsigned int i;
869 
870 	/* Unregister the DMA engine */
871 	ccp_dmaengine_unregister(ccp);
872 
873 	/* Unregister the RNG */
874 	ccp_unregister_rng(ccp);
875 
876 	/* Remove this device from the list of available units first */
877 	ccp_del_device(ccp);
878 
879 	/* Disable and clear interrupts */
880 	for (i = 0; i < ccp->cmd_q_count; i++) {
881 		cmd_q = &ccp->cmd_q[i];
882 
883 		/* Turn off the run bit */
884 		iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
885 
886 		/* Disable the interrupts */
887 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
888 
889 		/* Clear the interrupt status */
890 		iowrite32(0x00, cmd_q->reg_int_enable);
891 		ioread32(cmd_q->reg_int_status);
892 		ioread32(cmd_q->reg_status);
893 	}
894 
895 	/* Stop the queue kthreads */
896 	for (i = 0; i < ccp->cmd_q_count; i++)
897 		if (ccp->cmd_q[i].kthread)
898 			kthread_stop(ccp->cmd_q[i].kthread);
899 
900 	ccp->free_irq(ccp);
901 
902 	for (i = 0; i < ccp->cmd_q_count; i++) {
903 		cmd_q = &ccp->cmd_q[i];
904 		dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
905 				  cmd_q->qbase_dma);
906 	}
907 
908 	/* Flush the cmd and backlog queue */
909 	while (!list_empty(&ccp->cmd)) {
910 		/* Invoke the callback directly with an error code */
911 		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
912 		list_del(&cmd->entry);
913 		cmd->callback(cmd->data, -ENODEV);
914 	}
915 	while (!list_empty(&ccp->backlog)) {
916 		/* Invoke the callback directly with an error code */
917 		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
918 		list_del(&cmd->entry);
919 		cmd->callback(cmd->data, -ENODEV);
920 	}
921 }
922 
923 static irqreturn_t ccp5_irq_handler(int irq, void *data)
924 {
925 	struct device *dev = data;
926 	struct ccp_device *ccp = dev_get_drvdata(dev);
927 	u32 status;
928 	unsigned int i;
929 
930 	for (i = 0; i < ccp->cmd_q_count; i++) {
931 		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
932 
933 		status = ioread32(cmd_q->reg_interrupt_status);
934 
935 		if (status) {
936 			cmd_q->int_status = status;
937 			cmd_q->q_status = ioread32(cmd_q->reg_status);
938 			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
939 
940 			/* On error, only save the first error value */
941 			if ((status & INT_ERROR) && !cmd_q->cmd_error)
942 				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
943 
944 			cmd_q->int_rcvd = 1;
945 
946 			/* Acknowledge the interrupt and wake the kthread */
947 			iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
948 			wake_up_interruptible(&cmd_q->int_queue);
949 		}
950 	}
951 
952 	return IRQ_HANDLED;
953 }
954 
955 static void ccp5_config(struct ccp_device *ccp)
956 {
957 	/* Public side */
958 	iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
959 }
960 
961 static void ccp5other_config(struct ccp_device *ccp)
962 {
963 	int i;
964 	u32 rnd;
965 
966 	/* We own all of the queues on the NTB CCP */
967 
968 	iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
969 	iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
970 	for (i = 0; i < 12; i++) {
971 		rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
972 		iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
973 	}
974 
975 	iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
976 	iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
977 	iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
978 
979 	iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
980 	iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
981 
982 	iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
983 
984 	ccp5_config(ccp);
985 }
986 
987 /* Version 5 adds some function, but is essentially the same as v5 */
988 static const struct ccp_actions ccp5_actions = {
989 	.aes = ccp5_perform_aes,
990 	.xts_aes = ccp5_perform_xts_aes,
991 	.sha = ccp5_perform_sha,
992 	.rsa = ccp5_perform_rsa,
993 	.passthru = ccp5_perform_passthru,
994 	.ecc = ccp5_perform_ecc,
995 	.sballoc = ccp_lsb_alloc,
996 	.sbfree = ccp_lsb_free,
997 	.init = ccp5_init,
998 	.destroy = ccp5_destroy,
999 	.get_free_slots = ccp5_get_free_slots,
1000 	.irqhandler = ccp5_irq_handler,
1001 };
1002 
1003 const struct ccp_vdata ccpv5a = {
1004 	.version = CCP_VERSION(5, 0),
1005 	.setup = ccp5_config,
1006 	.perform = &ccp5_actions,
1007 	.bar = 2,
1008 	.offset = 0x0,
1009 };
1010 
1011 const struct ccp_vdata ccpv5b = {
1012 	.version = CCP_VERSION(5, 0),
1013 	.setup = ccp5other_config,
1014 	.perform = &ccp5_actions,
1015 	.bar = 2,
1016 	.offset = 0x0,
1017 };
1018