xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision 4f3db074)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
27 
28 #include "ccp-dev.h"
29 
30 enum ccp_memtype {
31 	CCP_MEMTYPE_SYSTEM = 0,
32 	CCP_MEMTYPE_KSB,
33 	CCP_MEMTYPE_LOCAL,
34 	CCP_MEMTYPE__LAST,
35 };
36 
37 struct ccp_dma_info {
38 	dma_addr_t address;
39 	unsigned int offset;
40 	unsigned int length;
41 	enum dma_data_direction dir;
42 };
43 
44 struct ccp_dm_workarea {
45 	struct device *dev;
46 	struct dma_pool *dma_pool;
47 	unsigned int length;
48 
49 	u8 *address;
50 	struct ccp_dma_info dma;
51 };
52 
53 struct ccp_sg_workarea {
54 	struct scatterlist *sg;
55 	unsigned int nents;
56 	unsigned int length;
57 
58 	struct scatterlist *dma_sg;
59 	struct device *dma_dev;
60 	unsigned int dma_count;
61 	enum dma_data_direction dma_dir;
62 
63 	unsigned int sg_used;
64 
65 	u64 bytes_left;
66 };
67 
68 struct ccp_data {
69 	struct ccp_sg_workarea sg_wa;
70 	struct ccp_dm_workarea dm_wa;
71 };
72 
73 struct ccp_mem {
74 	enum ccp_memtype type;
75 	union {
76 		struct ccp_dma_info dma;
77 		u32 ksb;
78 	} u;
79 };
80 
81 struct ccp_aes_op {
82 	enum ccp_aes_type type;
83 	enum ccp_aes_mode mode;
84 	enum ccp_aes_action action;
85 };
86 
87 struct ccp_xts_aes_op {
88 	enum ccp_aes_action action;
89 	enum ccp_xts_aes_unit_size unit_size;
90 };
91 
92 struct ccp_sha_op {
93 	enum ccp_sha_type type;
94 	u64 msg_bits;
95 };
96 
97 struct ccp_rsa_op {
98 	u32 mod_size;
99 	u32 input_len;
100 };
101 
102 struct ccp_passthru_op {
103 	enum ccp_passthru_bitwise bit_mod;
104 	enum ccp_passthru_byteswap byte_swap;
105 };
106 
107 struct ccp_ecc_op {
108 	enum ccp_ecc_function function;
109 };
110 
111 struct ccp_op {
112 	struct ccp_cmd_queue *cmd_q;
113 
114 	u32 jobid;
115 	u32 ioc;
116 	u32 soc;
117 	u32 ksb_key;
118 	u32 ksb_ctx;
119 	u32 init;
120 	u32 eom;
121 
122 	struct ccp_mem src;
123 	struct ccp_mem dst;
124 
125 	union {
126 		struct ccp_aes_op aes;
127 		struct ccp_xts_aes_op xts;
128 		struct ccp_sha_op sha;
129 		struct ccp_rsa_op rsa;
130 		struct ccp_passthru_op passthru;
131 		struct ccp_ecc_op ecc;
132 	} u;
133 };
134 
135 /* SHA initial context values */
136 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
137 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
138 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
139 	cpu_to_be32(SHA1_H4), 0, 0, 0,
140 };
141 
142 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
143 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
144 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
145 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
146 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
147 };
148 
149 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
150 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
151 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
152 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
153 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
154 };
155 
156 /* The CCP cannot perform zero-length sha operations so the caller
157  * is required to buffer data for the final operation.  However, a
158  * sha operation for a message with a total length of zero is valid
159  * so known values are required to supply the result.
160  */
161 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
162 	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
163 	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
164 	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
165 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
166 };
167 
168 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
169 	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
170 	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
171 	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
172 	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
173 };
174 
175 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
176 	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
177 	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
178 	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
179 	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
180 };
181 
182 static u32 ccp_addr_lo(struct ccp_dma_info *info)
183 {
184 	return lower_32_bits(info->address + info->offset);
185 }
186 
187 static u32 ccp_addr_hi(struct ccp_dma_info *info)
188 {
189 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
190 }
191 
192 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
193 {
194 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
195 	struct ccp_device *ccp = cmd_q->ccp;
196 	void __iomem *cr_addr;
197 	u32 cr0, cmd;
198 	unsigned int i;
199 	int ret = 0;
200 
201 	/* We could read a status register to see how many free slots
202 	 * are actually available, but reading that register resets it
203 	 * and you could lose some error information.
204 	 */
205 	cmd_q->free_slots--;
206 
207 	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
208 	      | (op->jobid << REQ0_JOBID_SHIFT)
209 	      | REQ0_WAIT_FOR_WRITE;
210 
211 	if (op->soc)
212 		cr0 |= REQ0_STOP_ON_COMPLETE
213 		       | REQ0_INT_ON_COMPLETE;
214 
215 	if (op->ioc || !cmd_q->free_slots)
216 		cr0 |= REQ0_INT_ON_COMPLETE;
217 
218 	/* Start at CMD_REQ1 */
219 	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
220 
221 	mutex_lock(&ccp->req_mutex);
222 
223 	/* Write CMD_REQ1 through CMD_REQx first */
224 	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
225 		iowrite32(*(cr + i), cr_addr);
226 
227 	/* Tell the CCP to start */
228 	wmb();
229 	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
230 
231 	mutex_unlock(&ccp->req_mutex);
232 
233 	if (cr0 & REQ0_INT_ON_COMPLETE) {
234 		/* Wait for the job to complete */
235 		ret = wait_event_interruptible(cmd_q->int_queue,
236 					       cmd_q->int_rcvd);
237 		if (ret || cmd_q->cmd_error) {
238 			/* On error delete all related jobs from the queue */
239 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
240 			      | op->jobid;
241 
242 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
243 
244 			if (!ret)
245 				ret = -EIO;
246 		} else if (op->soc) {
247 			/* Delete just head job from the queue on SoC */
248 			cmd = DEL_Q_ACTIVE
249 			      | (cmd_q->id << DEL_Q_ID_SHIFT)
250 			      | op->jobid;
251 
252 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
253 		}
254 
255 		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
256 
257 		cmd_q->int_rcvd = 0;
258 	}
259 
260 	return ret;
261 }
262 
263 static int ccp_perform_aes(struct ccp_op *op)
264 {
265 	u32 cr[6];
266 
267 	/* Fill out the register contents for REQ1 through REQ6 */
268 	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
269 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
270 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
271 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
272 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
273 	cr[1] = op->src.u.dma.length - 1;
274 	cr[2] = ccp_addr_lo(&op->src.u.dma);
275 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
276 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
277 		| ccp_addr_hi(&op->src.u.dma);
278 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
279 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
280 		| ccp_addr_hi(&op->dst.u.dma);
281 
282 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
283 		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
284 
285 	if (op->eom)
286 		cr[0] |= REQ1_EOM;
287 
288 	if (op->init)
289 		cr[0] |= REQ1_INIT;
290 
291 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
292 }
293 
294 static int ccp_perform_xts_aes(struct ccp_op *op)
295 {
296 	u32 cr[6];
297 
298 	/* Fill out the register contents for REQ1 through REQ6 */
299 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
300 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
301 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
302 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
303 	cr[1] = op->src.u.dma.length - 1;
304 	cr[2] = ccp_addr_lo(&op->src.u.dma);
305 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
306 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
307 		| ccp_addr_hi(&op->src.u.dma);
308 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
309 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
310 		| ccp_addr_hi(&op->dst.u.dma);
311 
312 	if (op->eom)
313 		cr[0] |= REQ1_EOM;
314 
315 	if (op->init)
316 		cr[0] |= REQ1_INIT;
317 
318 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
319 }
320 
321 static int ccp_perform_sha(struct ccp_op *op)
322 {
323 	u32 cr[6];
324 
325 	/* Fill out the register contents for REQ1 through REQ6 */
326 	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
327 		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
328 		| REQ1_INIT;
329 	cr[1] = op->src.u.dma.length - 1;
330 	cr[2] = ccp_addr_lo(&op->src.u.dma);
331 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
332 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
333 		| ccp_addr_hi(&op->src.u.dma);
334 
335 	if (op->eom) {
336 		cr[0] |= REQ1_EOM;
337 		cr[4] = lower_32_bits(op->u.sha.msg_bits);
338 		cr[5] = upper_32_bits(op->u.sha.msg_bits);
339 	} else {
340 		cr[4] = 0;
341 		cr[5] = 0;
342 	}
343 
344 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
345 }
346 
347 static int ccp_perform_rsa(struct ccp_op *op)
348 {
349 	u32 cr[6];
350 
351 	/* Fill out the register contents for REQ1 through REQ6 */
352 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
353 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
354 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
355 		| REQ1_EOM;
356 	cr[1] = op->u.rsa.input_len - 1;
357 	cr[2] = ccp_addr_lo(&op->src.u.dma);
358 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
359 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
360 		| ccp_addr_hi(&op->src.u.dma);
361 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
362 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
363 		| ccp_addr_hi(&op->dst.u.dma);
364 
365 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
366 }
367 
368 static int ccp_perform_passthru(struct ccp_op *op)
369 {
370 	u32 cr[6];
371 
372 	/* Fill out the register contents for REQ1 through REQ6 */
373 	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
374 		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
375 		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
376 
377 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
378 		cr[1] = op->src.u.dma.length - 1;
379 	else
380 		cr[1] = op->dst.u.dma.length - 1;
381 
382 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
383 		cr[2] = ccp_addr_lo(&op->src.u.dma);
384 		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
385 			| ccp_addr_hi(&op->src.u.dma);
386 
387 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
388 			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
389 	} else {
390 		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
391 		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
392 	}
393 
394 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
395 		cr[4] = ccp_addr_lo(&op->dst.u.dma);
396 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
397 			| ccp_addr_hi(&op->dst.u.dma);
398 	} else {
399 		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
400 		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
401 	}
402 
403 	if (op->eom)
404 		cr[0] |= REQ1_EOM;
405 
406 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
407 }
408 
409 static int ccp_perform_ecc(struct ccp_op *op)
410 {
411 	u32 cr[6];
412 
413 	/* Fill out the register contents for REQ1 through REQ6 */
414 	cr[0] = REQ1_ECC_AFFINE_CONVERT
415 		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
416 		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
417 		| REQ1_EOM;
418 	cr[1] = op->src.u.dma.length - 1;
419 	cr[2] = ccp_addr_lo(&op->src.u.dma);
420 	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
421 		| ccp_addr_hi(&op->src.u.dma);
422 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
423 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
424 		| ccp_addr_hi(&op->dst.u.dma);
425 
426 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
427 }
428 
429 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
430 {
431 	int start;
432 
433 	for (;;) {
434 		mutex_lock(&ccp->ksb_mutex);
435 
436 		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
437 							ccp->ksb_count,
438 							ccp->ksb_start,
439 							count, 0);
440 		if (start <= ccp->ksb_count) {
441 			bitmap_set(ccp->ksb, start, count);
442 
443 			mutex_unlock(&ccp->ksb_mutex);
444 			break;
445 		}
446 
447 		ccp->ksb_avail = 0;
448 
449 		mutex_unlock(&ccp->ksb_mutex);
450 
451 		/* Wait for KSB entries to become available */
452 		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
453 			return 0;
454 	}
455 
456 	return KSB_START + start;
457 }
458 
459 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
460 			 unsigned int count)
461 {
462 	if (!start)
463 		return;
464 
465 	mutex_lock(&ccp->ksb_mutex);
466 
467 	bitmap_clear(ccp->ksb, start - KSB_START, count);
468 
469 	ccp->ksb_avail = 1;
470 
471 	mutex_unlock(&ccp->ksb_mutex);
472 
473 	wake_up_interruptible_all(&ccp->ksb_queue);
474 }
475 
476 static u32 ccp_gen_jobid(struct ccp_device *ccp)
477 {
478 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
479 }
480 
481 static void ccp_sg_free(struct ccp_sg_workarea *wa)
482 {
483 	if (wa->dma_count)
484 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
485 
486 	wa->dma_count = 0;
487 }
488 
489 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
490 				struct scatterlist *sg, u64 len,
491 				enum dma_data_direction dma_dir)
492 {
493 	memset(wa, 0, sizeof(*wa));
494 
495 	wa->sg = sg;
496 	if (!sg)
497 		return 0;
498 
499 	wa->nents = sg_nents(sg);
500 	wa->length = sg->length;
501 	wa->bytes_left = len;
502 	wa->sg_used = 0;
503 
504 	if (len == 0)
505 		return 0;
506 
507 	if (dma_dir == DMA_NONE)
508 		return 0;
509 
510 	wa->dma_sg = sg;
511 	wa->dma_dev = dev;
512 	wa->dma_dir = dma_dir;
513 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
514 	if (!wa->dma_count)
515 		return -ENOMEM;
516 
517 	return 0;
518 }
519 
520 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
521 {
522 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
523 
524 	if (!wa->sg)
525 		return;
526 
527 	wa->sg_used += nbytes;
528 	wa->bytes_left -= nbytes;
529 	if (wa->sg_used == wa->sg->length) {
530 		wa->sg = sg_next(wa->sg);
531 		wa->sg_used = 0;
532 	}
533 }
534 
535 static void ccp_dm_free(struct ccp_dm_workarea *wa)
536 {
537 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
538 		if (wa->address)
539 			dma_pool_free(wa->dma_pool, wa->address,
540 				      wa->dma.address);
541 	} else {
542 		if (wa->dma.address)
543 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
544 					 wa->dma.dir);
545 		kfree(wa->address);
546 	}
547 
548 	wa->address = NULL;
549 	wa->dma.address = 0;
550 }
551 
552 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
553 				struct ccp_cmd_queue *cmd_q,
554 				unsigned int len,
555 				enum dma_data_direction dir)
556 {
557 	memset(wa, 0, sizeof(*wa));
558 
559 	if (!len)
560 		return 0;
561 
562 	wa->dev = cmd_q->ccp->dev;
563 	wa->length = len;
564 
565 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
566 		wa->dma_pool = cmd_q->dma_pool;
567 
568 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
569 					     &wa->dma.address);
570 		if (!wa->address)
571 			return -ENOMEM;
572 
573 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
574 
575 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
576 	} else {
577 		wa->address = kzalloc(len, GFP_KERNEL);
578 		if (!wa->address)
579 			return -ENOMEM;
580 
581 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
582 						 dir);
583 		if (!wa->dma.address)
584 			return -ENOMEM;
585 
586 		wa->dma.length = len;
587 	}
588 	wa->dma.dir = dir;
589 
590 	return 0;
591 }
592 
593 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
594 			    struct scatterlist *sg, unsigned int sg_offset,
595 			    unsigned int len)
596 {
597 	WARN_ON(!wa->address);
598 
599 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
600 				 0);
601 }
602 
603 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
604 			    struct scatterlist *sg, unsigned int sg_offset,
605 			    unsigned int len)
606 {
607 	WARN_ON(!wa->address);
608 
609 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
610 				 1);
611 }
612 
613 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
614 				    struct scatterlist *sg,
615 				    unsigned int len, unsigned int se_len,
616 				    bool sign_extend)
617 {
618 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
619 	u8 buffer[CCP_REVERSE_BUF_SIZE];
620 
621 	BUG_ON(se_len > sizeof(buffer));
622 
623 	sg_offset = len;
624 	dm_offset = 0;
625 	nbytes = len;
626 	while (nbytes) {
627 		ksb_len = min_t(unsigned int, nbytes, se_len);
628 		sg_offset -= ksb_len;
629 
630 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
631 		for (i = 0; i < ksb_len; i++)
632 			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
633 
634 		dm_offset += ksb_len;
635 		nbytes -= ksb_len;
636 
637 		if ((ksb_len != se_len) && sign_extend) {
638 			/* Must sign-extend to nearest sign-extend length */
639 			if (wa->address[dm_offset - 1] & 0x80)
640 				memset(wa->address + dm_offset, 0xff,
641 				       se_len - ksb_len);
642 		}
643 	}
644 }
645 
646 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
647 				    struct scatterlist *sg,
648 				    unsigned int len)
649 {
650 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
651 	u8 buffer[CCP_REVERSE_BUF_SIZE];
652 
653 	sg_offset = 0;
654 	dm_offset = len;
655 	nbytes = len;
656 	while (nbytes) {
657 		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
658 		dm_offset -= ksb_len;
659 
660 		for (i = 0; i < ksb_len; i++)
661 			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
662 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
663 
664 		sg_offset += ksb_len;
665 		nbytes -= ksb_len;
666 	}
667 }
668 
669 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
670 {
671 	ccp_dm_free(&data->dm_wa);
672 	ccp_sg_free(&data->sg_wa);
673 }
674 
675 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
676 			 struct scatterlist *sg, u64 sg_len,
677 			 unsigned int dm_len,
678 			 enum dma_data_direction dir)
679 {
680 	int ret;
681 
682 	memset(data, 0, sizeof(*data));
683 
684 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
685 				   dir);
686 	if (ret)
687 		goto e_err;
688 
689 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
690 	if (ret)
691 		goto e_err;
692 
693 	return 0;
694 
695 e_err:
696 	ccp_free_data(data, cmd_q);
697 
698 	return ret;
699 }
700 
701 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
702 {
703 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
704 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
705 	unsigned int buf_count, nbytes;
706 
707 	/* Clear the buffer if setting it */
708 	if (!from)
709 		memset(dm_wa->address, 0, dm_wa->length);
710 
711 	if (!sg_wa->sg)
712 		return 0;
713 
714 	/* Perform the copy operation
715 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
716 	 *   an unsigned int
717 	 */
718 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
719 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
720 				 nbytes, from);
721 
722 	/* Update the structures and generate the count */
723 	buf_count = 0;
724 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
725 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
726 			     dm_wa->length - buf_count);
727 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
728 
729 		buf_count += nbytes;
730 		ccp_update_sg_workarea(sg_wa, nbytes);
731 	}
732 
733 	return buf_count;
734 }
735 
736 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
737 {
738 	return ccp_queue_buf(data, 0);
739 }
740 
741 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
742 {
743 	return ccp_queue_buf(data, 1);
744 }
745 
746 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
747 			     struct ccp_op *op, unsigned int block_size,
748 			     bool blocksize_op)
749 {
750 	unsigned int sg_src_len, sg_dst_len, op_len;
751 
752 	/* The CCP can only DMA from/to one address each per operation. This
753 	 * requires that we find the smallest DMA area between the source
754 	 * and destination. The resulting len values will always be <= UINT_MAX
755 	 * because the dma length is an unsigned int.
756 	 */
757 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
758 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
759 
760 	if (dst) {
761 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
762 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
763 		op_len = min(sg_src_len, sg_dst_len);
764 	} else {
765 		op_len = sg_src_len;
766 	}
767 
768 	/* The data operation length will be at least block_size in length
769 	 * or the smaller of available sg room remaining for the source or
770 	 * the destination
771 	 */
772 	op_len = max(op_len, block_size);
773 
774 	/* Unless we have to buffer data, there's no reason to wait */
775 	op->soc = 0;
776 
777 	if (sg_src_len < block_size) {
778 		/* Not enough data in the sg element, so it
779 		 * needs to be buffered into a blocksize chunk
780 		 */
781 		int cp_len = ccp_fill_queue_buf(src);
782 
783 		op->soc = 1;
784 		op->src.u.dma.address = src->dm_wa.dma.address;
785 		op->src.u.dma.offset = 0;
786 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
787 	} else {
788 		/* Enough data in the sg element, but we need to
789 		 * adjust for any previously copied data
790 		 */
791 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
792 		op->src.u.dma.offset = src->sg_wa.sg_used;
793 		op->src.u.dma.length = op_len & ~(block_size - 1);
794 
795 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
796 	}
797 
798 	if (dst) {
799 		if (sg_dst_len < block_size) {
800 			/* Not enough room in the sg element or we're on the
801 			 * last piece of data (when using padding), so the
802 			 * output needs to be buffered into a blocksize chunk
803 			 */
804 			op->soc = 1;
805 			op->dst.u.dma.address = dst->dm_wa.dma.address;
806 			op->dst.u.dma.offset = 0;
807 			op->dst.u.dma.length = op->src.u.dma.length;
808 		} else {
809 			/* Enough room in the sg element, but we need to
810 			 * adjust for any previously used area
811 			 */
812 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
813 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
814 			op->dst.u.dma.length = op->src.u.dma.length;
815 		}
816 	}
817 }
818 
819 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
820 			     struct ccp_op *op)
821 {
822 	op->init = 0;
823 
824 	if (dst) {
825 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
826 			ccp_empty_queue_buf(dst);
827 		else
828 			ccp_update_sg_workarea(&dst->sg_wa,
829 					       op->dst.u.dma.length);
830 	}
831 }
832 
833 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
834 				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
835 				u32 byte_swap, bool from)
836 {
837 	struct ccp_op op;
838 
839 	memset(&op, 0, sizeof(op));
840 
841 	op.cmd_q = cmd_q;
842 	op.jobid = jobid;
843 	op.eom = 1;
844 
845 	if (from) {
846 		op.soc = 1;
847 		op.src.type = CCP_MEMTYPE_KSB;
848 		op.src.u.ksb = ksb;
849 		op.dst.type = CCP_MEMTYPE_SYSTEM;
850 		op.dst.u.dma.address = wa->dma.address;
851 		op.dst.u.dma.length = wa->length;
852 	} else {
853 		op.src.type = CCP_MEMTYPE_SYSTEM;
854 		op.src.u.dma.address = wa->dma.address;
855 		op.src.u.dma.length = wa->length;
856 		op.dst.type = CCP_MEMTYPE_KSB;
857 		op.dst.u.ksb = ksb;
858 	}
859 
860 	op.u.passthru.byte_swap = byte_swap;
861 
862 	return ccp_perform_passthru(&op);
863 }
864 
865 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
866 			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
867 			   u32 byte_swap)
868 {
869 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
870 }
871 
872 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
873 			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
874 			     u32 byte_swap)
875 {
876 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
877 }
878 
879 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
880 				struct ccp_cmd *cmd)
881 {
882 	struct ccp_aes_engine *aes = &cmd->u.aes;
883 	struct ccp_dm_workarea key, ctx;
884 	struct ccp_data src;
885 	struct ccp_op op;
886 	unsigned int dm_offset;
887 	int ret;
888 
889 	if (!((aes->key_len == AES_KEYSIZE_128) ||
890 	      (aes->key_len == AES_KEYSIZE_192) ||
891 	      (aes->key_len == AES_KEYSIZE_256)))
892 		return -EINVAL;
893 
894 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
895 		return -EINVAL;
896 
897 	if (aes->iv_len != AES_BLOCK_SIZE)
898 		return -EINVAL;
899 
900 	if (!aes->key || !aes->iv || !aes->src)
901 		return -EINVAL;
902 
903 	if (aes->cmac_final) {
904 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
905 			return -EINVAL;
906 
907 		if (!aes->cmac_key)
908 			return -EINVAL;
909 	}
910 
911 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
912 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
913 
914 	ret = -EIO;
915 	memset(&op, 0, sizeof(op));
916 	op.cmd_q = cmd_q;
917 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
918 	op.ksb_key = cmd_q->ksb_key;
919 	op.ksb_ctx = cmd_q->ksb_ctx;
920 	op.init = 1;
921 	op.u.aes.type = aes->type;
922 	op.u.aes.mode = aes->mode;
923 	op.u.aes.action = aes->action;
924 
925 	/* All supported key sizes fit in a single (32-byte) KSB entry
926 	 * and must be in little endian format. Use the 256-bit byte
927 	 * swap passthru option to convert from big endian to little
928 	 * endian.
929 	 */
930 	ret = ccp_init_dm_workarea(&key, cmd_q,
931 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
932 				   DMA_TO_DEVICE);
933 	if (ret)
934 		return ret;
935 
936 	dm_offset = CCP_KSB_BYTES - aes->key_len;
937 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
938 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
939 			      CCP_PASSTHRU_BYTESWAP_256BIT);
940 	if (ret) {
941 		cmd->engine_error = cmd_q->cmd_error;
942 		goto e_key;
943 	}
944 
945 	/* The AES context fits in a single (32-byte) KSB entry and
946 	 * must be in little endian format. Use the 256-bit byte swap
947 	 * passthru option to convert from big endian to little endian.
948 	 */
949 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
950 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
951 				   DMA_BIDIRECTIONAL);
952 	if (ret)
953 		goto e_key;
954 
955 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
956 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
957 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
958 			      CCP_PASSTHRU_BYTESWAP_256BIT);
959 	if (ret) {
960 		cmd->engine_error = cmd_q->cmd_error;
961 		goto e_ctx;
962 	}
963 
964 	/* Send data to the CCP AES engine */
965 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
966 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
967 	if (ret)
968 		goto e_ctx;
969 
970 	while (src.sg_wa.bytes_left) {
971 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
972 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
973 			op.eom = 1;
974 
975 			/* Push the K1/K2 key to the CCP now */
976 			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
977 						op.ksb_ctx,
978 						CCP_PASSTHRU_BYTESWAP_256BIT);
979 			if (ret) {
980 				cmd->engine_error = cmd_q->cmd_error;
981 				goto e_src;
982 			}
983 
984 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
985 					aes->cmac_key_len);
986 			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
987 					      CCP_PASSTHRU_BYTESWAP_256BIT);
988 			if (ret) {
989 				cmd->engine_error = cmd_q->cmd_error;
990 				goto e_src;
991 			}
992 		}
993 
994 		ret = ccp_perform_aes(&op);
995 		if (ret) {
996 			cmd->engine_error = cmd_q->cmd_error;
997 			goto e_src;
998 		}
999 
1000 		ccp_process_data(&src, NULL, &op);
1001 	}
1002 
1003 	/* Retrieve the AES context - convert from LE to BE using
1004 	 * 32-byte (256-bit) byteswapping
1005 	 */
1006 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1007 				CCP_PASSTHRU_BYTESWAP_256BIT);
1008 	if (ret) {
1009 		cmd->engine_error = cmd_q->cmd_error;
1010 		goto e_src;
1011 	}
1012 
1013 	/* ...but we only need AES_BLOCK_SIZE bytes */
1014 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1015 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1016 
1017 e_src:
1018 	ccp_free_data(&src, cmd_q);
1019 
1020 e_ctx:
1021 	ccp_dm_free(&ctx);
1022 
1023 e_key:
1024 	ccp_dm_free(&key);
1025 
1026 	return ret;
1027 }
1028 
1029 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1030 {
1031 	struct ccp_aes_engine *aes = &cmd->u.aes;
1032 	struct ccp_dm_workarea key, ctx;
1033 	struct ccp_data src, dst;
1034 	struct ccp_op op;
1035 	unsigned int dm_offset;
1036 	bool in_place = false;
1037 	int ret;
1038 
1039 	if (aes->mode == CCP_AES_MODE_CMAC)
1040 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1041 
1042 	if (!((aes->key_len == AES_KEYSIZE_128) ||
1043 	      (aes->key_len == AES_KEYSIZE_192) ||
1044 	      (aes->key_len == AES_KEYSIZE_256)))
1045 		return -EINVAL;
1046 
1047 	if (((aes->mode == CCP_AES_MODE_ECB) ||
1048 	     (aes->mode == CCP_AES_MODE_CBC) ||
1049 	     (aes->mode == CCP_AES_MODE_CFB)) &&
1050 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1051 		return -EINVAL;
1052 
1053 	if (!aes->key || !aes->src || !aes->dst)
1054 		return -EINVAL;
1055 
1056 	if (aes->mode != CCP_AES_MODE_ECB) {
1057 		if (aes->iv_len != AES_BLOCK_SIZE)
1058 			return -EINVAL;
1059 
1060 		if (!aes->iv)
1061 			return -EINVAL;
1062 	}
1063 
1064 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1065 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1066 
1067 	ret = -EIO;
1068 	memset(&op, 0, sizeof(op));
1069 	op.cmd_q = cmd_q;
1070 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1071 	op.ksb_key = cmd_q->ksb_key;
1072 	op.ksb_ctx = cmd_q->ksb_ctx;
1073 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1074 	op.u.aes.type = aes->type;
1075 	op.u.aes.mode = aes->mode;
1076 	op.u.aes.action = aes->action;
1077 
1078 	/* All supported key sizes fit in a single (32-byte) KSB entry
1079 	 * and must be in little endian format. Use the 256-bit byte
1080 	 * swap passthru option to convert from big endian to little
1081 	 * endian.
1082 	 */
1083 	ret = ccp_init_dm_workarea(&key, cmd_q,
1084 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1085 				   DMA_TO_DEVICE);
1086 	if (ret)
1087 		return ret;
1088 
1089 	dm_offset = CCP_KSB_BYTES - aes->key_len;
1090 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1091 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1092 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1093 	if (ret) {
1094 		cmd->engine_error = cmd_q->cmd_error;
1095 		goto e_key;
1096 	}
1097 
1098 	/* The AES context fits in a single (32-byte) KSB entry and
1099 	 * must be in little endian format. Use the 256-bit byte swap
1100 	 * passthru option to convert from big endian to little endian.
1101 	 */
1102 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1103 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1104 				   DMA_BIDIRECTIONAL);
1105 	if (ret)
1106 		goto e_key;
1107 
1108 	if (aes->mode != CCP_AES_MODE_ECB) {
1109 		/* Load the AES context - conver to LE */
1110 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1111 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1112 		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1113 				      CCP_PASSTHRU_BYTESWAP_256BIT);
1114 		if (ret) {
1115 			cmd->engine_error = cmd_q->cmd_error;
1116 			goto e_ctx;
1117 		}
1118 	}
1119 
1120 	/* Prepare the input and output data workareas. For in-place
1121 	 * operations we need to set the dma direction to BIDIRECTIONAL
1122 	 * and copy the src workarea to the dst workarea.
1123 	 */
1124 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1125 		in_place = true;
1126 
1127 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1128 			    AES_BLOCK_SIZE,
1129 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1130 	if (ret)
1131 		goto e_ctx;
1132 
1133 	if (in_place) {
1134 		dst = src;
1135 	} else {
1136 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1137 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1138 		if (ret)
1139 			goto e_src;
1140 	}
1141 
1142 	/* Send data to the CCP AES engine */
1143 	while (src.sg_wa.bytes_left) {
1144 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1145 		if (!src.sg_wa.bytes_left) {
1146 			op.eom = 1;
1147 
1148 			/* Since we don't retrieve the AES context in ECB
1149 			 * mode we have to wait for the operation to complete
1150 			 * on the last piece of data
1151 			 */
1152 			if (aes->mode == CCP_AES_MODE_ECB)
1153 				op.soc = 1;
1154 		}
1155 
1156 		ret = ccp_perform_aes(&op);
1157 		if (ret) {
1158 			cmd->engine_error = cmd_q->cmd_error;
1159 			goto e_dst;
1160 		}
1161 
1162 		ccp_process_data(&src, &dst, &op);
1163 	}
1164 
1165 	if (aes->mode != CCP_AES_MODE_ECB) {
1166 		/* Retrieve the AES context - convert from LE to BE using
1167 		 * 32-byte (256-bit) byteswapping
1168 		 */
1169 		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1170 					CCP_PASSTHRU_BYTESWAP_256BIT);
1171 		if (ret) {
1172 			cmd->engine_error = cmd_q->cmd_error;
1173 			goto e_dst;
1174 		}
1175 
1176 		/* ...but we only need AES_BLOCK_SIZE bytes */
1177 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1178 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1179 	}
1180 
1181 e_dst:
1182 	if (!in_place)
1183 		ccp_free_data(&dst, cmd_q);
1184 
1185 e_src:
1186 	ccp_free_data(&src, cmd_q);
1187 
1188 e_ctx:
1189 	ccp_dm_free(&ctx);
1190 
1191 e_key:
1192 	ccp_dm_free(&key);
1193 
1194 	return ret;
1195 }
1196 
1197 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1198 			       struct ccp_cmd *cmd)
1199 {
1200 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1201 	struct ccp_dm_workarea key, ctx;
1202 	struct ccp_data src, dst;
1203 	struct ccp_op op;
1204 	unsigned int unit_size, dm_offset;
1205 	bool in_place = false;
1206 	int ret;
1207 
1208 	switch (xts->unit_size) {
1209 	case CCP_XTS_AES_UNIT_SIZE_16:
1210 		unit_size = 16;
1211 		break;
1212 	case CCP_XTS_AES_UNIT_SIZE_512:
1213 		unit_size = 512;
1214 		break;
1215 	case CCP_XTS_AES_UNIT_SIZE_1024:
1216 		unit_size = 1024;
1217 		break;
1218 	case CCP_XTS_AES_UNIT_SIZE_2048:
1219 		unit_size = 2048;
1220 		break;
1221 	case CCP_XTS_AES_UNIT_SIZE_4096:
1222 		unit_size = 4096;
1223 		break;
1224 
1225 	default:
1226 		return -EINVAL;
1227 	}
1228 
1229 	if (xts->key_len != AES_KEYSIZE_128)
1230 		return -EINVAL;
1231 
1232 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1233 		return -EINVAL;
1234 
1235 	if (xts->iv_len != AES_BLOCK_SIZE)
1236 		return -EINVAL;
1237 
1238 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1239 		return -EINVAL;
1240 
1241 	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1242 	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1243 
1244 	ret = -EIO;
1245 	memset(&op, 0, sizeof(op));
1246 	op.cmd_q = cmd_q;
1247 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1248 	op.ksb_key = cmd_q->ksb_key;
1249 	op.ksb_ctx = cmd_q->ksb_ctx;
1250 	op.init = 1;
1251 	op.u.xts.action = xts->action;
1252 	op.u.xts.unit_size = xts->unit_size;
1253 
1254 	/* All supported key sizes fit in a single (32-byte) KSB entry
1255 	 * and must be in little endian format. Use the 256-bit byte
1256 	 * swap passthru option to convert from big endian to little
1257 	 * endian.
1258 	 */
1259 	ret = ccp_init_dm_workarea(&key, cmd_q,
1260 				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1261 				   DMA_TO_DEVICE);
1262 	if (ret)
1263 		return ret;
1264 
1265 	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1266 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1267 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1268 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1269 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1270 	if (ret) {
1271 		cmd->engine_error = cmd_q->cmd_error;
1272 		goto e_key;
1273 	}
1274 
1275 	/* The AES context fits in a single (32-byte) KSB entry and
1276 	 * for XTS is already in little endian format so no byte swapping
1277 	 * is needed.
1278 	 */
1279 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1280 				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1281 				   DMA_BIDIRECTIONAL);
1282 	if (ret)
1283 		goto e_key;
1284 
1285 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1286 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1287 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1288 	if (ret) {
1289 		cmd->engine_error = cmd_q->cmd_error;
1290 		goto e_ctx;
1291 	}
1292 
1293 	/* Prepare the input and output data workareas. For in-place
1294 	 * operations we need to set the dma direction to BIDIRECTIONAL
1295 	 * and copy the src workarea to the dst workarea.
1296 	 */
1297 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1298 		in_place = true;
1299 
1300 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1301 			    unit_size,
1302 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1303 	if (ret)
1304 		goto e_ctx;
1305 
1306 	if (in_place) {
1307 		dst = src;
1308 	} else {
1309 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1310 				    unit_size, DMA_FROM_DEVICE);
1311 		if (ret)
1312 			goto e_src;
1313 	}
1314 
1315 	/* Send data to the CCP AES engine */
1316 	while (src.sg_wa.bytes_left) {
1317 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1318 		if (!src.sg_wa.bytes_left)
1319 			op.eom = 1;
1320 
1321 		ret = ccp_perform_xts_aes(&op);
1322 		if (ret) {
1323 			cmd->engine_error = cmd_q->cmd_error;
1324 			goto e_dst;
1325 		}
1326 
1327 		ccp_process_data(&src, &dst, &op);
1328 	}
1329 
1330 	/* Retrieve the AES context - convert from LE to BE using
1331 	 * 32-byte (256-bit) byteswapping
1332 	 */
1333 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1334 				CCP_PASSTHRU_BYTESWAP_256BIT);
1335 	if (ret) {
1336 		cmd->engine_error = cmd_q->cmd_error;
1337 		goto e_dst;
1338 	}
1339 
1340 	/* ...but we only need AES_BLOCK_SIZE bytes */
1341 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1342 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1343 
1344 e_dst:
1345 	if (!in_place)
1346 		ccp_free_data(&dst, cmd_q);
1347 
1348 e_src:
1349 	ccp_free_data(&src, cmd_q);
1350 
1351 e_ctx:
1352 	ccp_dm_free(&ctx);
1353 
1354 e_key:
1355 	ccp_dm_free(&key);
1356 
1357 	return ret;
1358 }
1359 
1360 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1361 {
1362 	struct ccp_sha_engine *sha = &cmd->u.sha;
1363 	struct ccp_dm_workarea ctx;
1364 	struct ccp_data src;
1365 	struct ccp_op op;
1366 	int ret;
1367 
1368 	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1369 		return -EINVAL;
1370 
1371 	if (!sha->ctx)
1372 		return -EINVAL;
1373 
1374 	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1375 		return -EINVAL;
1376 
1377 	if (!sha->src_len) {
1378 		const u8 *sha_zero;
1379 
1380 		/* Not final, just return */
1381 		if (!sha->final)
1382 			return 0;
1383 
1384 		/* CCP can't do a zero length sha operation so the caller
1385 		 * must buffer the data.
1386 		 */
1387 		if (sha->msg_bits)
1388 			return -EINVAL;
1389 
1390 		/* A sha operation for a message with a total length of zero,
1391 		 * return known result.
1392 		 */
1393 		switch (sha->type) {
1394 		case CCP_SHA_TYPE_1:
1395 			sha_zero = ccp_sha1_zero;
1396 			break;
1397 		case CCP_SHA_TYPE_224:
1398 			sha_zero = ccp_sha224_zero;
1399 			break;
1400 		case CCP_SHA_TYPE_256:
1401 			sha_zero = ccp_sha256_zero;
1402 			break;
1403 		default:
1404 			return -EINVAL;
1405 		}
1406 
1407 		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1408 					 sha->ctx_len, 1);
1409 
1410 		return 0;
1411 	}
1412 
1413 	if (!sha->src)
1414 		return -EINVAL;
1415 
1416 	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1417 
1418 	memset(&op, 0, sizeof(op));
1419 	op.cmd_q = cmd_q;
1420 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1421 	op.ksb_ctx = cmd_q->ksb_ctx;
1422 	op.u.sha.type = sha->type;
1423 	op.u.sha.msg_bits = sha->msg_bits;
1424 
1425 	/* The SHA context fits in a single (32-byte) KSB entry and
1426 	 * must be in little endian format. Use the 256-bit byte swap
1427 	 * passthru option to convert from big endian to little endian.
1428 	 */
1429 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1430 				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1431 				   DMA_BIDIRECTIONAL);
1432 	if (ret)
1433 		return ret;
1434 
1435 	if (sha->first) {
1436 		const __be32 *init;
1437 
1438 		switch (sha->type) {
1439 		case CCP_SHA_TYPE_1:
1440 			init = ccp_sha1_init;
1441 			break;
1442 		case CCP_SHA_TYPE_224:
1443 			init = ccp_sha224_init;
1444 			break;
1445 		case CCP_SHA_TYPE_256:
1446 			init = ccp_sha256_init;
1447 			break;
1448 		default:
1449 			ret = -EINVAL;
1450 			goto e_ctx;
1451 		}
1452 		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1453 	} else {
1454 		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1455 	}
1456 
1457 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1458 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1459 	if (ret) {
1460 		cmd->engine_error = cmd_q->cmd_error;
1461 		goto e_ctx;
1462 	}
1463 
1464 	/* Send data to the CCP SHA engine */
1465 	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1466 			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1467 	if (ret)
1468 		goto e_ctx;
1469 
1470 	while (src.sg_wa.bytes_left) {
1471 		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1472 		if (sha->final && !src.sg_wa.bytes_left)
1473 			op.eom = 1;
1474 
1475 		ret = ccp_perform_sha(&op);
1476 		if (ret) {
1477 			cmd->engine_error = cmd_q->cmd_error;
1478 			goto e_data;
1479 		}
1480 
1481 		ccp_process_data(&src, NULL, &op);
1482 	}
1483 
1484 	/* Retrieve the SHA context - convert from LE to BE using
1485 	 * 32-byte (256-bit) byteswapping to BE
1486 	 */
1487 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1488 				CCP_PASSTHRU_BYTESWAP_256BIT);
1489 	if (ret) {
1490 		cmd->engine_error = cmd_q->cmd_error;
1491 		goto e_data;
1492 	}
1493 
1494 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1495 
1496 	if (sha->final && sha->opad) {
1497 		/* HMAC operation, recursively perform final SHA */
1498 		struct ccp_cmd hmac_cmd;
1499 		struct scatterlist sg;
1500 		u64 block_size, digest_size;
1501 		u8 *hmac_buf;
1502 
1503 		switch (sha->type) {
1504 		case CCP_SHA_TYPE_1:
1505 			block_size = SHA1_BLOCK_SIZE;
1506 			digest_size = SHA1_DIGEST_SIZE;
1507 			break;
1508 		case CCP_SHA_TYPE_224:
1509 			block_size = SHA224_BLOCK_SIZE;
1510 			digest_size = SHA224_DIGEST_SIZE;
1511 			break;
1512 		case CCP_SHA_TYPE_256:
1513 			block_size = SHA256_BLOCK_SIZE;
1514 			digest_size = SHA256_DIGEST_SIZE;
1515 			break;
1516 		default:
1517 			ret = -EINVAL;
1518 			goto e_data;
1519 		}
1520 
1521 		if (sha->opad_len != block_size) {
1522 			ret = -EINVAL;
1523 			goto e_data;
1524 		}
1525 
1526 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1527 		if (!hmac_buf) {
1528 			ret = -ENOMEM;
1529 			goto e_data;
1530 		}
1531 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1532 
1533 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1534 		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1535 
1536 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1537 		hmac_cmd.engine = CCP_ENGINE_SHA;
1538 		hmac_cmd.u.sha.type = sha->type;
1539 		hmac_cmd.u.sha.ctx = sha->ctx;
1540 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1541 		hmac_cmd.u.sha.src = &sg;
1542 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1543 		hmac_cmd.u.sha.opad = NULL;
1544 		hmac_cmd.u.sha.opad_len = 0;
1545 		hmac_cmd.u.sha.first = 1;
1546 		hmac_cmd.u.sha.final = 1;
1547 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1548 
1549 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1550 		if (ret)
1551 			cmd->engine_error = hmac_cmd.engine_error;
1552 
1553 		kfree(hmac_buf);
1554 	}
1555 
1556 e_data:
1557 	ccp_free_data(&src, cmd_q);
1558 
1559 e_ctx:
1560 	ccp_dm_free(&ctx);
1561 
1562 	return ret;
1563 }
1564 
1565 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1566 {
1567 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1568 	struct ccp_dm_workarea exp, src;
1569 	struct ccp_data dst;
1570 	struct ccp_op op;
1571 	unsigned int ksb_count, i_len, o_len;
1572 	int ret;
1573 
1574 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1575 		return -EINVAL;
1576 
1577 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1578 		return -EINVAL;
1579 
1580 	/* The RSA modulus must precede the message being acted upon, so
1581 	 * it must be copied to a DMA area where the message and the
1582 	 * modulus can be concatenated.  Therefore the input buffer
1583 	 * length required is twice the output buffer length (which
1584 	 * must be a multiple of 256-bits).
1585 	 */
1586 	o_len = ((rsa->key_size + 255) / 256) * 32;
1587 	i_len = o_len * 2;
1588 
1589 	ksb_count = o_len / CCP_KSB_BYTES;
1590 
1591 	memset(&op, 0, sizeof(op));
1592 	op.cmd_q = cmd_q;
1593 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1594 	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1595 	if (!op.ksb_key)
1596 		return -EIO;
1597 
1598 	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1599 	 * be in little endian format. Reverse copy each 32-byte chunk
1600 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1601 	 * and each byte within that chunk and do not perform any byte swap
1602 	 * operations on the passthru operation.
1603 	 */
1604 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1605 	if (ret)
1606 		goto e_ksb;
1607 
1608 	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1609 				false);
1610 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1611 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1612 	if (ret) {
1613 		cmd->engine_error = cmd_q->cmd_error;
1614 		goto e_exp;
1615 	}
1616 
1617 	/* Concatenate the modulus and the message. Both the modulus and
1618 	 * the operands must be in little endian format.  Since the input
1619 	 * is in big endian format it must be converted.
1620 	 */
1621 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1622 	if (ret)
1623 		goto e_exp;
1624 
1625 	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1626 				false);
1627 	src.address += o_len;	/* Adjust the address for the copy operation */
1628 	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1629 				false);
1630 	src.address -= o_len;	/* Reset the address to original value */
1631 
1632 	/* Prepare the output area for the operation */
1633 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1634 			    o_len, DMA_FROM_DEVICE);
1635 	if (ret)
1636 		goto e_src;
1637 
1638 	op.soc = 1;
1639 	op.src.u.dma.address = src.dma.address;
1640 	op.src.u.dma.offset = 0;
1641 	op.src.u.dma.length = i_len;
1642 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1643 	op.dst.u.dma.offset = 0;
1644 	op.dst.u.dma.length = o_len;
1645 
1646 	op.u.rsa.mod_size = rsa->key_size;
1647 	op.u.rsa.input_len = i_len;
1648 
1649 	ret = ccp_perform_rsa(&op);
1650 	if (ret) {
1651 		cmd->engine_error = cmd_q->cmd_error;
1652 		goto e_dst;
1653 	}
1654 
1655 	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1656 
1657 e_dst:
1658 	ccp_free_data(&dst, cmd_q);
1659 
1660 e_src:
1661 	ccp_dm_free(&src);
1662 
1663 e_exp:
1664 	ccp_dm_free(&exp);
1665 
1666 e_ksb:
1667 	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1668 
1669 	return ret;
1670 }
1671 
1672 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1673 				struct ccp_cmd *cmd)
1674 {
1675 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1676 	struct ccp_dm_workarea mask;
1677 	struct ccp_data src, dst;
1678 	struct ccp_op op;
1679 	bool in_place = false;
1680 	unsigned int i;
1681 	int ret;
1682 
1683 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1684 		return -EINVAL;
1685 
1686 	if (!pt->src || !pt->dst)
1687 		return -EINVAL;
1688 
1689 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1690 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1691 			return -EINVAL;
1692 		if (!pt->mask)
1693 			return -EINVAL;
1694 	}
1695 
1696 	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1697 
1698 	memset(&op, 0, sizeof(op));
1699 	op.cmd_q = cmd_q;
1700 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1701 
1702 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1703 		/* Load the mask */
1704 		op.ksb_key = cmd_q->ksb_key;
1705 
1706 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1707 					   CCP_PASSTHRU_KSB_COUNT *
1708 					   CCP_KSB_BYTES,
1709 					   DMA_TO_DEVICE);
1710 		if (ret)
1711 			return ret;
1712 
1713 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1714 		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1715 				      CCP_PASSTHRU_BYTESWAP_NOOP);
1716 		if (ret) {
1717 			cmd->engine_error = cmd_q->cmd_error;
1718 			goto e_mask;
1719 		}
1720 	}
1721 
1722 	/* Prepare the input and output data workareas. For in-place
1723 	 * operations we need to set the dma direction to BIDIRECTIONAL
1724 	 * and copy the src workarea to the dst workarea.
1725 	 */
1726 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1727 		in_place = true;
1728 
1729 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1730 			    CCP_PASSTHRU_MASKSIZE,
1731 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1732 	if (ret)
1733 		goto e_mask;
1734 
1735 	if (in_place) {
1736 		dst = src;
1737 	} else {
1738 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1739 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1740 		if (ret)
1741 			goto e_src;
1742 	}
1743 
1744 	/* Send data to the CCP Passthru engine
1745 	 *   Because the CCP engine works on a single source and destination
1746 	 *   dma address at a time, each entry in the source scatterlist
1747 	 *   (after the dma_map_sg call) must be less than or equal to the
1748 	 *   (remaining) length in the destination scatterlist entry and the
1749 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1750 	 */
1751 	dst.sg_wa.sg_used = 0;
1752 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1753 		if (!dst.sg_wa.sg ||
1754 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1755 			ret = -EINVAL;
1756 			goto e_dst;
1757 		}
1758 
1759 		if (i == src.sg_wa.dma_count) {
1760 			op.eom = 1;
1761 			op.soc = 1;
1762 		}
1763 
1764 		op.src.type = CCP_MEMTYPE_SYSTEM;
1765 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1766 		op.src.u.dma.offset = 0;
1767 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1768 
1769 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1770 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1771 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1772 		op.dst.u.dma.length = op.src.u.dma.length;
1773 
1774 		ret = ccp_perform_passthru(&op);
1775 		if (ret) {
1776 			cmd->engine_error = cmd_q->cmd_error;
1777 			goto e_dst;
1778 		}
1779 
1780 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1781 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1782 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1783 			dst.sg_wa.sg_used = 0;
1784 		}
1785 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1786 	}
1787 
1788 e_dst:
1789 	if (!in_place)
1790 		ccp_free_data(&dst, cmd_q);
1791 
1792 e_src:
1793 	ccp_free_data(&src, cmd_q);
1794 
1795 e_mask:
1796 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1797 		ccp_dm_free(&mask);
1798 
1799 	return ret;
1800 }
1801 
1802 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1803 {
1804 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1805 	struct ccp_dm_workarea src, dst;
1806 	struct ccp_op op;
1807 	int ret;
1808 	u8 *save;
1809 
1810 	if (!ecc->u.mm.operand_1 ||
1811 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1812 		return -EINVAL;
1813 
1814 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1815 		if (!ecc->u.mm.operand_2 ||
1816 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1817 			return -EINVAL;
1818 
1819 	if (!ecc->u.mm.result ||
1820 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1821 		return -EINVAL;
1822 
1823 	memset(&op, 0, sizeof(op));
1824 	op.cmd_q = cmd_q;
1825 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1826 
1827 	/* Concatenate the modulus and the operands. Both the modulus and
1828 	 * the operands must be in little endian format.  Since the input
1829 	 * is in big endian format it must be converted and placed in a
1830 	 * fixed length buffer.
1831 	 */
1832 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1833 				   DMA_TO_DEVICE);
1834 	if (ret)
1835 		return ret;
1836 
1837 	/* Save the workarea address since it is updated in order to perform
1838 	 * the concatenation
1839 	 */
1840 	save = src.address;
1841 
1842 	/* Copy the ECC modulus */
1843 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1844 				CCP_ECC_OPERAND_SIZE, false);
1845 	src.address += CCP_ECC_OPERAND_SIZE;
1846 
1847 	/* Copy the first operand */
1848 	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1849 				ecc->u.mm.operand_1_len,
1850 				CCP_ECC_OPERAND_SIZE, false);
1851 	src.address += CCP_ECC_OPERAND_SIZE;
1852 
1853 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1854 		/* Copy the second operand */
1855 		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1856 					ecc->u.mm.operand_2_len,
1857 					CCP_ECC_OPERAND_SIZE, false);
1858 		src.address += CCP_ECC_OPERAND_SIZE;
1859 	}
1860 
1861 	/* Restore the workarea address */
1862 	src.address = save;
1863 
1864 	/* Prepare the output area for the operation */
1865 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1866 				   DMA_FROM_DEVICE);
1867 	if (ret)
1868 		goto e_src;
1869 
1870 	op.soc = 1;
1871 	op.src.u.dma.address = src.dma.address;
1872 	op.src.u.dma.offset = 0;
1873 	op.src.u.dma.length = src.length;
1874 	op.dst.u.dma.address = dst.dma.address;
1875 	op.dst.u.dma.offset = 0;
1876 	op.dst.u.dma.length = dst.length;
1877 
1878 	op.u.ecc.function = cmd->u.ecc.function;
1879 
1880 	ret = ccp_perform_ecc(&op);
1881 	if (ret) {
1882 		cmd->engine_error = cmd_q->cmd_error;
1883 		goto e_dst;
1884 	}
1885 
1886 	ecc->ecc_result = le16_to_cpup(
1887 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1888 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1889 		ret = -EIO;
1890 		goto e_dst;
1891 	}
1892 
1893 	/* Save the ECC result */
1894 	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1895 
1896 e_dst:
1897 	ccp_dm_free(&dst);
1898 
1899 e_src:
1900 	ccp_dm_free(&src);
1901 
1902 	return ret;
1903 }
1904 
1905 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1906 {
1907 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1908 	struct ccp_dm_workarea src, dst;
1909 	struct ccp_op op;
1910 	int ret;
1911 	u8 *save;
1912 
1913 	if (!ecc->u.pm.point_1.x ||
1914 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1915 	    !ecc->u.pm.point_1.y ||
1916 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1917 		return -EINVAL;
1918 
1919 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1920 		if (!ecc->u.pm.point_2.x ||
1921 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1922 		    !ecc->u.pm.point_2.y ||
1923 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1924 			return -EINVAL;
1925 	} else {
1926 		if (!ecc->u.pm.domain_a ||
1927 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1928 			return -EINVAL;
1929 
1930 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1931 			if (!ecc->u.pm.scalar ||
1932 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1933 				return -EINVAL;
1934 	}
1935 
1936 	if (!ecc->u.pm.result.x ||
1937 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1938 	    !ecc->u.pm.result.y ||
1939 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1940 		return -EINVAL;
1941 
1942 	memset(&op, 0, sizeof(op));
1943 	op.cmd_q = cmd_q;
1944 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1945 
1946 	/* Concatenate the modulus and the operands. Both the modulus and
1947 	 * the operands must be in little endian format.  Since the input
1948 	 * is in big endian format it must be converted and placed in a
1949 	 * fixed length buffer.
1950 	 */
1951 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1952 				   DMA_TO_DEVICE);
1953 	if (ret)
1954 		return ret;
1955 
1956 	/* Save the workarea address since it is updated in order to perform
1957 	 * the concatenation
1958 	 */
1959 	save = src.address;
1960 
1961 	/* Copy the ECC modulus */
1962 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1963 				CCP_ECC_OPERAND_SIZE, false);
1964 	src.address += CCP_ECC_OPERAND_SIZE;
1965 
1966 	/* Copy the first point X and Y coordinate */
1967 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1968 				ecc->u.pm.point_1.x_len,
1969 				CCP_ECC_OPERAND_SIZE, false);
1970 	src.address += CCP_ECC_OPERAND_SIZE;
1971 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1972 				ecc->u.pm.point_1.y_len,
1973 				CCP_ECC_OPERAND_SIZE, false);
1974 	src.address += CCP_ECC_OPERAND_SIZE;
1975 
1976 	/* Set the first point Z coordianate to 1 */
1977 	*src.address = 0x01;
1978 	src.address += CCP_ECC_OPERAND_SIZE;
1979 
1980 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1981 		/* Copy the second point X and Y coordinate */
1982 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1983 					ecc->u.pm.point_2.x_len,
1984 					CCP_ECC_OPERAND_SIZE, false);
1985 		src.address += CCP_ECC_OPERAND_SIZE;
1986 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1987 					ecc->u.pm.point_2.y_len,
1988 					CCP_ECC_OPERAND_SIZE, false);
1989 		src.address += CCP_ECC_OPERAND_SIZE;
1990 
1991 		/* Set the second point Z coordianate to 1 */
1992 		*src.address = 0x01;
1993 		src.address += CCP_ECC_OPERAND_SIZE;
1994 	} else {
1995 		/* Copy the Domain "a" parameter */
1996 		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1997 					ecc->u.pm.domain_a_len,
1998 					CCP_ECC_OPERAND_SIZE, false);
1999 		src.address += CCP_ECC_OPERAND_SIZE;
2000 
2001 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2002 			/* Copy the scalar value */
2003 			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2004 						ecc->u.pm.scalar_len,
2005 						CCP_ECC_OPERAND_SIZE, false);
2006 			src.address += CCP_ECC_OPERAND_SIZE;
2007 		}
2008 	}
2009 
2010 	/* Restore the workarea address */
2011 	src.address = save;
2012 
2013 	/* Prepare the output area for the operation */
2014 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2015 				   DMA_FROM_DEVICE);
2016 	if (ret)
2017 		goto e_src;
2018 
2019 	op.soc = 1;
2020 	op.src.u.dma.address = src.dma.address;
2021 	op.src.u.dma.offset = 0;
2022 	op.src.u.dma.length = src.length;
2023 	op.dst.u.dma.address = dst.dma.address;
2024 	op.dst.u.dma.offset = 0;
2025 	op.dst.u.dma.length = dst.length;
2026 
2027 	op.u.ecc.function = cmd->u.ecc.function;
2028 
2029 	ret = ccp_perform_ecc(&op);
2030 	if (ret) {
2031 		cmd->engine_error = cmd_q->cmd_error;
2032 		goto e_dst;
2033 	}
2034 
2035 	ecc->ecc_result = le16_to_cpup(
2036 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2037 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2038 		ret = -EIO;
2039 		goto e_dst;
2040 	}
2041 
2042 	/* Save the workarea address since it is updated as we walk through
2043 	 * to copy the point math result
2044 	 */
2045 	save = dst.address;
2046 
2047 	/* Save the ECC result X and Y coordinates */
2048 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2049 				CCP_ECC_MODULUS_BYTES);
2050 	dst.address += CCP_ECC_OUTPUT_SIZE;
2051 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2052 				CCP_ECC_MODULUS_BYTES);
2053 	dst.address += CCP_ECC_OUTPUT_SIZE;
2054 
2055 	/* Restore the workarea address */
2056 	dst.address = save;
2057 
2058 e_dst:
2059 	ccp_dm_free(&dst);
2060 
2061 e_src:
2062 	ccp_dm_free(&src);
2063 
2064 	return ret;
2065 }
2066 
2067 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2068 {
2069 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2070 
2071 	ecc->ecc_result = 0;
2072 
2073 	if (!ecc->mod ||
2074 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2075 		return -EINVAL;
2076 
2077 	switch (ecc->function) {
2078 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2079 	case CCP_ECC_FUNCTION_MADD_384BIT:
2080 	case CCP_ECC_FUNCTION_MINV_384BIT:
2081 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2082 
2083 	case CCP_ECC_FUNCTION_PADD_384BIT:
2084 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2085 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2086 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2087 
2088 	default:
2089 		return -EINVAL;
2090 	}
2091 }
2092 
2093 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2094 {
2095 	int ret;
2096 
2097 	cmd->engine_error = 0;
2098 	cmd_q->cmd_error = 0;
2099 	cmd_q->int_rcvd = 0;
2100 	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2101 
2102 	switch (cmd->engine) {
2103 	case CCP_ENGINE_AES:
2104 		ret = ccp_run_aes_cmd(cmd_q, cmd);
2105 		break;
2106 	case CCP_ENGINE_XTS_AES_128:
2107 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2108 		break;
2109 	case CCP_ENGINE_SHA:
2110 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2111 		break;
2112 	case CCP_ENGINE_RSA:
2113 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2114 		break;
2115 	case CCP_ENGINE_PASSTHRU:
2116 		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2117 		break;
2118 	case CCP_ENGINE_ECC:
2119 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2120 		break;
2121 	default:
2122 		ret = -EINVAL;
2123 	}
2124 
2125 	return ret;
2126 }
2127