xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision a99237af)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  * Author: Gary R Hook <gary.hook@amd.com>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  */
13 
14 #include <linux/module.h>
15 #include <linux/kernel.h>
16 #include <linux/pci.h>
17 #include <linux/interrupt.h>
18 #include <crypto/scatterwalk.h>
19 #include <crypto/des.h>
20 #include <linux/ccp.h>
21 
22 #include "ccp-dev.h"
23 
24 /* SHA initial context values */
25 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
26 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
27 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
28 	cpu_to_be32(SHA1_H4),
29 };
30 
31 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
32 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
33 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
34 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
35 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
36 };
37 
38 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
39 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
40 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
41 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
42 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
43 };
44 
45 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
46 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
47 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
48 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
49 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
50 };
51 
52 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
53 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
54 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
55 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
56 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
57 };
58 
59 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
60 					ccp_gen_jobid(ccp) : 0)
61 
62 static u32 ccp_gen_jobid(struct ccp_device *ccp)
63 {
64 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
65 }
66 
67 static void ccp_sg_free(struct ccp_sg_workarea *wa)
68 {
69 	if (wa->dma_count)
70 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
71 
72 	wa->dma_count = 0;
73 }
74 
75 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
76 				struct scatterlist *sg, u64 len,
77 				enum dma_data_direction dma_dir)
78 {
79 	memset(wa, 0, sizeof(*wa));
80 
81 	wa->sg = sg;
82 	if (!sg)
83 		return 0;
84 
85 	wa->nents = sg_nents_for_len(sg, len);
86 	if (wa->nents < 0)
87 		return wa->nents;
88 
89 	wa->bytes_left = len;
90 	wa->sg_used = 0;
91 
92 	if (len == 0)
93 		return 0;
94 
95 	if (dma_dir == DMA_NONE)
96 		return 0;
97 
98 	wa->dma_sg = sg;
99 	wa->dma_dev = dev;
100 	wa->dma_dir = dma_dir;
101 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
102 	if (!wa->dma_count)
103 		return -ENOMEM;
104 
105 	return 0;
106 }
107 
108 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
109 {
110 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
111 
112 	if (!wa->sg)
113 		return;
114 
115 	wa->sg_used += nbytes;
116 	wa->bytes_left -= nbytes;
117 	if (wa->sg_used == wa->sg->length) {
118 		wa->sg = sg_next(wa->sg);
119 		wa->sg_used = 0;
120 	}
121 }
122 
123 static void ccp_dm_free(struct ccp_dm_workarea *wa)
124 {
125 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
126 		if (wa->address)
127 			dma_pool_free(wa->dma_pool, wa->address,
128 				      wa->dma.address);
129 	} else {
130 		if (wa->dma.address)
131 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
132 					 wa->dma.dir);
133 		kfree(wa->address);
134 	}
135 
136 	wa->address = NULL;
137 	wa->dma.address = 0;
138 }
139 
140 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
141 				struct ccp_cmd_queue *cmd_q,
142 				unsigned int len,
143 				enum dma_data_direction dir)
144 {
145 	memset(wa, 0, sizeof(*wa));
146 
147 	if (!len)
148 		return 0;
149 
150 	wa->dev = cmd_q->ccp->dev;
151 	wa->length = len;
152 
153 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
154 		wa->dma_pool = cmd_q->dma_pool;
155 
156 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
157 					     &wa->dma.address);
158 		if (!wa->address)
159 			return -ENOMEM;
160 
161 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
162 
163 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
164 	} else {
165 		wa->address = kzalloc(len, GFP_KERNEL);
166 		if (!wa->address)
167 			return -ENOMEM;
168 
169 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
170 						 dir);
171 		if (dma_mapping_error(wa->dev, wa->dma.address))
172 			return -ENOMEM;
173 
174 		wa->dma.length = len;
175 	}
176 	wa->dma.dir = dir;
177 
178 	return 0;
179 }
180 
181 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
182 			   struct scatterlist *sg, unsigned int sg_offset,
183 			   unsigned int len)
184 {
185 	WARN_ON(!wa->address);
186 
187 	if (len > (wa->length - wa_offset))
188 		return -EINVAL;
189 
190 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
191 				 0);
192 	return 0;
193 }
194 
195 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
196 			    struct scatterlist *sg, unsigned int sg_offset,
197 			    unsigned int len)
198 {
199 	WARN_ON(!wa->address);
200 
201 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
202 				 1);
203 }
204 
205 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
206 				   unsigned int wa_offset,
207 				   struct scatterlist *sg,
208 				   unsigned int sg_offset,
209 				   unsigned int len)
210 {
211 	u8 *p, *q;
212 	int	rc;
213 
214 	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
215 	if (rc)
216 		return rc;
217 
218 	p = wa->address + wa_offset;
219 	q = p + len - 1;
220 	while (p < q) {
221 		*p = *p ^ *q;
222 		*q = *p ^ *q;
223 		*p = *p ^ *q;
224 		p++;
225 		q--;
226 	}
227 	return 0;
228 }
229 
230 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
231 				    unsigned int wa_offset,
232 				    struct scatterlist *sg,
233 				    unsigned int sg_offset,
234 				    unsigned int len)
235 {
236 	u8 *p, *q;
237 
238 	p = wa->address + wa_offset;
239 	q = p + len - 1;
240 	while (p < q) {
241 		*p = *p ^ *q;
242 		*q = *p ^ *q;
243 		*p = *p ^ *q;
244 		p++;
245 		q--;
246 	}
247 
248 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
249 }
250 
251 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
252 {
253 	ccp_dm_free(&data->dm_wa);
254 	ccp_sg_free(&data->sg_wa);
255 }
256 
257 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
258 			 struct scatterlist *sg, u64 sg_len,
259 			 unsigned int dm_len,
260 			 enum dma_data_direction dir)
261 {
262 	int ret;
263 
264 	memset(data, 0, sizeof(*data));
265 
266 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
267 				   dir);
268 	if (ret)
269 		goto e_err;
270 
271 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
272 	if (ret)
273 		goto e_err;
274 
275 	return 0;
276 
277 e_err:
278 	ccp_free_data(data, cmd_q);
279 
280 	return ret;
281 }
282 
283 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
284 {
285 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
286 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
287 	unsigned int buf_count, nbytes;
288 
289 	/* Clear the buffer if setting it */
290 	if (!from)
291 		memset(dm_wa->address, 0, dm_wa->length);
292 
293 	if (!sg_wa->sg)
294 		return 0;
295 
296 	/* Perform the copy operation
297 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
298 	 *   an unsigned int
299 	 */
300 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
301 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
302 				 nbytes, from);
303 
304 	/* Update the structures and generate the count */
305 	buf_count = 0;
306 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
307 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
308 			     dm_wa->length - buf_count);
309 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
310 
311 		buf_count += nbytes;
312 		ccp_update_sg_workarea(sg_wa, nbytes);
313 	}
314 
315 	return buf_count;
316 }
317 
318 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
319 {
320 	return ccp_queue_buf(data, 0);
321 }
322 
323 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
324 {
325 	return ccp_queue_buf(data, 1);
326 }
327 
328 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
329 			     struct ccp_op *op, unsigned int block_size,
330 			     bool blocksize_op)
331 {
332 	unsigned int sg_src_len, sg_dst_len, op_len;
333 
334 	/* The CCP can only DMA from/to one address each per operation. This
335 	 * requires that we find the smallest DMA area between the source
336 	 * and destination. The resulting len values will always be <= UINT_MAX
337 	 * because the dma length is an unsigned int.
338 	 */
339 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
340 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
341 
342 	if (dst) {
343 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
344 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
345 		op_len = min(sg_src_len, sg_dst_len);
346 	} else {
347 		op_len = sg_src_len;
348 	}
349 
350 	/* The data operation length will be at least block_size in length
351 	 * or the smaller of available sg room remaining for the source or
352 	 * the destination
353 	 */
354 	op_len = max(op_len, block_size);
355 
356 	/* Unless we have to buffer data, there's no reason to wait */
357 	op->soc = 0;
358 
359 	if (sg_src_len < block_size) {
360 		/* Not enough data in the sg element, so it
361 		 * needs to be buffered into a blocksize chunk
362 		 */
363 		int cp_len = ccp_fill_queue_buf(src);
364 
365 		op->soc = 1;
366 		op->src.u.dma.address = src->dm_wa.dma.address;
367 		op->src.u.dma.offset = 0;
368 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
369 	} else {
370 		/* Enough data in the sg element, but we need to
371 		 * adjust for any previously copied data
372 		 */
373 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
374 		op->src.u.dma.offset = src->sg_wa.sg_used;
375 		op->src.u.dma.length = op_len & ~(block_size - 1);
376 
377 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
378 	}
379 
380 	if (dst) {
381 		if (sg_dst_len < block_size) {
382 			/* Not enough room in the sg element or we're on the
383 			 * last piece of data (when using padding), so the
384 			 * output needs to be buffered into a blocksize chunk
385 			 */
386 			op->soc = 1;
387 			op->dst.u.dma.address = dst->dm_wa.dma.address;
388 			op->dst.u.dma.offset = 0;
389 			op->dst.u.dma.length = op->src.u.dma.length;
390 		} else {
391 			/* Enough room in the sg element, but we need to
392 			 * adjust for any previously used area
393 			 */
394 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
395 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
396 			op->dst.u.dma.length = op->src.u.dma.length;
397 		}
398 	}
399 }
400 
401 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
402 			     struct ccp_op *op)
403 {
404 	op->init = 0;
405 
406 	if (dst) {
407 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
408 			ccp_empty_queue_buf(dst);
409 		else
410 			ccp_update_sg_workarea(&dst->sg_wa,
411 					       op->dst.u.dma.length);
412 	}
413 }
414 
415 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
416 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
417 			       u32 byte_swap, bool from)
418 {
419 	struct ccp_op op;
420 
421 	memset(&op, 0, sizeof(op));
422 
423 	op.cmd_q = cmd_q;
424 	op.jobid = jobid;
425 	op.eom = 1;
426 
427 	if (from) {
428 		op.soc = 1;
429 		op.src.type = CCP_MEMTYPE_SB;
430 		op.src.u.sb = sb;
431 		op.dst.type = CCP_MEMTYPE_SYSTEM;
432 		op.dst.u.dma.address = wa->dma.address;
433 		op.dst.u.dma.length = wa->length;
434 	} else {
435 		op.src.type = CCP_MEMTYPE_SYSTEM;
436 		op.src.u.dma.address = wa->dma.address;
437 		op.src.u.dma.length = wa->length;
438 		op.dst.type = CCP_MEMTYPE_SB;
439 		op.dst.u.sb = sb;
440 	}
441 
442 	op.u.passthru.byte_swap = byte_swap;
443 
444 	return cmd_q->ccp->vdata->perform->passthru(&op);
445 }
446 
447 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
448 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
449 			  u32 byte_swap)
450 {
451 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
452 }
453 
454 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
455 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
456 			    u32 byte_swap)
457 {
458 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
459 }
460 
461 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
462 				struct ccp_cmd *cmd)
463 {
464 	struct ccp_aes_engine *aes = &cmd->u.aes;
465 	struct ccp_dm_workarea key, ctx;
466 	struct ccp_data src;
467 	struct ccp_op op;
468 	unsigned int dm_offset;
469 	int ret;
470 
471 	if (!((aes->key_len == AES_KEYSIZE_128) ||
472 	      (aes->key_len == AES_KEYSIZE_192) ||
473 	      (aes->key_len == AES_KEYSIZE_256)))
474 		return -EINVAL;
475 
476 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
477 		return -EINVAL;
478 
479 	if (aes->iv_len != AES_BLOCK_SIZE)
480 		return -EINVAL;
481 
482 	if (!aes->key || !aes->iv || !aes->src)
483 		return -EINVAL;
484 
485 	if (aes->cmac_final) {
486 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
487 			return -EINVAL;
488 
489 		if (!aes->cmac_key)
490 			return -EINVAL;
491 	}
492 
493 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
494 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
495 
496 	ret = -EIO;
497 	memset(&op, 0, sizeof(op));
498 	op.cmd_q = cmd_q;
499 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
500 	op.sb_key = cmd_q->sb_key;
501 	op.sb_ctx = cmd_q->sb_ctx;
502 	op.init = 1;
503 	op.u.aes.type = aes->type;
504 	op.u.aes.mode = aes->mode;
505 	op.u.aes.action = aes->action;
506 
507 	/* All supported key sizes fit in a single (32-byte) SB entry
508 	 * and must be in little endian format. Use the 256-bit byte
509 	 * swap passthru option to convert from big endian to little
510 	 * endian.
511 	 */
512 	ret = ccp_init_dm_workarea(&key, cmd_q,
513 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
514 				   DMA_TO_DEVICE);
515 	if (ret)
516 		return ret;
517 
518 	dm_offset = CCP_SB_BYTES - aes->key_len;
519 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
520 	if (ret)
521 		goto e_key;
522 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
523 			     CCP_PASSTHRU_BYTESWAP_256BIT);
524 	if (ret) {
525 		cmd->engine_error = cmd_q->cmd_error;
526 		goto e_key;
527 	}
528 
529 	/* The AES context fits in a single (32-byte) SB entry and
530 	 * must be in little endian format. Use the 256-bit byte swap
531 	 * passthru option to convert from big endian to little endian.
532 	 */
533 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
534 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
535 				   DMA_BIDIRECTIONAL);
536 	if (ret)
537 		goto e_key;
538 
539 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
540 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
541 	if (ret)
542 		goto e_ctx;
543 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
544 			     CCP_PASSTHRU_BYTESWAP_256BIT);
545 	if (ret) {
546 		cmd->engine_error = cmd_q->cmd_error;
547 		goto e_ctx;
548 	}
549 
550 	/* Send data to the CCP AES engine */
551 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
552 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
553 	if (ret)
554 		goto e_ctx;
555 
556 	while (src.sg_wa.bytes_left) {
557 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
558 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
559 			op.eom = 1;
560 
561 			/* Push the K1/K2 key to the CCP now */
562 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
563 					       op.sb_ctx,
564 					       CCP_PASSTHRU_BYTESWAP_256BIT);
565 			if (ret) {
566 				cmd->engine_error = cmd_q->cmd_error;
567 				goto e_src;
568 			}
569 
570 			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
571 					      aes->cmac_key_len);
572 			if (ret)
573 				goto e_src;
574 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
575 					     CCP_PASSTHRU_BYTESWAP_256BIT);
576 			if (ret) {
577 				cmd->engine_error = cmd_q->cmd_error;
578 				goto e_src;
579 			}
580 		}
581 
582 		ret = cmd_q->ccp->vdata->perform->aes(&op);
583 		if (ret) {
584 			cmd->engine_error = cmd_q->cmd_error;
585 			goto e_src;
586 		}
587 
588 		ccp_process_data(&src, NULL, &op);
589 	}
590 
591 	/* Retrieve the AES context - convert from LE to BE using
592 	 * 32-byte (256-bit) byteswapping
593 	 */
594 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
595 			       CCP_PASSTHRU_BYTESWAP_256BIT);
596 	if (ret) {
597 		cmd->engine_error = cmd_q->cmd_error;
598 		goto e_src;
599 	}
600 
601 	/* ...but we only need AES_BLOCK_SIZE bytes */
602 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
603 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
604 
605 e_src:
606 	ccp_free_data(&src, cmd_q);
607 
608 e_ctx:
609 	ccp_dm_free(&ctx);
610 
611 e_key:
612 	ccp_dm_free(&key);
613 
614 	return ret;
615 }
616 
617 static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
618 			       struct ccp_cmd *cmd)
619 {
620 	struct ccp_aes_engine *aes = &cmd->u.aes;
621 	struct ccp_dm_workarea key, ctx, final_wa, tag;
622 	struct ccp_data src, dst;
623 	struct ccp_data aad;
624 	struct ccp_op op;
625 
626 	unsigned long long *final;
627 	unsigned int dm_offset;
628 	unsigned int ilen;
629 	bool in_place = true; /* Default value */
630 	int ret;
631 
632 	struct scatterlist *p_inp, sg_inp[2];
633 	struct scatterlist *p_tag, sg_tag[2];
634 	struct scatterlist *p_outp, sg_outp[2];
635 	struct scatterlist *p_aad;
636 
637 	if (!aes->iv)
638 		return -EINVAL;
639 
640 	if (!((aes->key_len == AES_KEYSIZE_128) ||
641 		(aes->key_len == AES_KEYSIZE_192) ||
642 		(aes->key_len == AES_KEYSIZE_256)))
643 		return -EINVAL;
644 
645 	if (!aes->key) /* Gotta have a key SGL */
646 		return -EINVAL;
647 
648 	/* First, decompose the source buffer into AAD & PT,
649 	 * and the destination buffer into AAD, CT & tag, or
650 	 * the input into CT & tag.
651 	 * It is expected that the input and output SGs will
652 	 * be valid, even if the AAD and input lengths are 0.
653 	 */
654 	p_aad = aes->src;
655 	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
656 	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
657 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
658 		ilen = aes->src_len;
659 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
660 	} else {
661 		/* Input length for decryption includes tag */
662 		ilen = aes->src_len - AES_BLOCK_SIZE;
663 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
664 	}
665 
666 	memset(&op, 0, sizeof(op));
667 	op.cmd_q = cmd_q;
668 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
669 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
670 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
671 	op.init = 1;
672 	op.u.aes.type = aes->type;
673 
674 	/* Copy the key to the LSB */
675 	ret = ccp_init_dm_workarea(&key, cmd_q,
676 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
677 				   DMA_TO_DEVICE);
678 	if (ret)
679 		return ret;
680 
681 	dm_offset = CCP_SB_BYTES - aes->key_len;
682 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
683 	if (ret)
684 		goto e_key;
685 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
686 			     CCP_PASSTHRU_BYTESWAP_256BIT);
687 	if (ret) {
688 		cmd->engine_error = cmd_q->cmd_error;
689 		goto e_key;
690 	}
691 
692 	/* Copy the context (IV) to the LSB.
693 	 * There is an assumption here that the IV is 96 bits in length, plus
694 	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
695 	 */
696 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
697 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
698 				   DMA_BIDIRECTIONAL);
699 	if (ret)
700 		goto e_key;
701 
702 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
703 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
704 	if (ret)
705 		goto e_ctx;
706 
707 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
708 			     CCP_PASSTHRU_BYTESWAP_256BIT);
709 	if (ret) {
710 		cmd->engine_error = cmd_q->cmd_error;
711 		goto e_ctx;
712 	}
713 
714 	op.init = 1;
715 	if (aes->aad_len > 0) {
716 		/* Step 1: Run a GHASH over the Additional Authenticated Data */
717 		ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
718 				    AES_BLOCK_SIZE,
719 				    DMA_TO_DEVICE);
720 		if (ret)
721 			goto e_ctx;
722 
723 		op.u.aes.mode = CCP_AES_MODE_GHASH;
724 		op.u.aes.action = CCP_AES_GHASHAAD;
725 
726 		while (aad.sg_wa.bytes_left) {
727 			ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
728 
729 			ret = cmd_q->ccp->vdata->perform->aes(&op);
730 			if (ret) {
731 				cmd->engine_error = cmd_q->cmd_error;
732 				goto e_aad;
733 			}
734 
735 			ccp_process_data(&aad, NULL, &op);
736 			op.init = 0;
737 		}
738 	}
739 
740 	op.u.aes.mode = CCP_AES_MODE_GCTR;
741 	op.u.aes.action = aes->action;
742 
743 	if (ilen > 0) {
744 		/* Step 2: Run a GCTR over the plaintext */
745 		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
746 
747 		ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
748 				    AES_BLOCK_SIZE,
749 				    in_place ? DMA_BIDIRECTIONAL
750 					     : DMA_TO_DEVICE);
751 		if (ret)
752 			goto e_ctx;
753 
754 		if (in_place) {
755 			dst = src;
756 		} else {
757 			ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
758 					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
759 			if (ret)
760 				goto e_src;
761 		}
762 
763 		op.soc = 0;
764 		op.eom = 0;
765 		op.init = 1;
766 		while (src.sg_wa.bytes_left) {
767 			ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
768 			if (!src.sg_wa.bytes_left) {
769 				unsigned int nbytes = aes->src_len
770 						      % AES_BLOCK_SIZE;
771 
772 				if (nbytes) {
773 					op.eom = 1;
774 					op.u.aes.size = (nbytes * 8) - 1;
775 				}
776 			}
777 
778 			ret = cmd_q->ccp->vdata->perform->aes(&op);
779 			if (ret) {
780 				cmd->engine_error = cmd_q->cmd_error;
781 				goto e_dst;
782 			}
783 
784 			ccp_process_data(&src, &dst, &op);
785 			op.init = 0;
786 		}
787 	}
788 
789 	/* Step 3: Update the IV portion of the context with the original IV */
790 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
791 			       CCP_PASSTHRU_BYTESWAP_256BIT);
792 	if (ret) {
793 		cmd->engine_error = cmd_q->cmd_error;
794 		goto e_dst;
795 	}
796 
797 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
798 	if (ret)
799 		goto e_dst;
800 
801 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
802 			     CCP_PASSTHRU_BYTESWAP_256BIT);
803 	if (ret) {
804 		cmd->engine_error = cmd_q->cmd_error;
805 		goto e_dst;
806 	}
807 
808 	/* Step 4: Concatenate the lengths of the AAD and source, and
809 	 * hash that 16 byte buffer.
810 	 */
811 	ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
812 				   DMA_BIDIRECTIONAL);
813 	if (ret)
814 		goto e_dst;
815 	final = (unsigned long long *) final_wa.address;
816 	final[0] = cpu_to_be64(aes->aad_len * 8);
817 	final[1] = cpu_to_be64(ilen * 8);
818 
819 	op.u.aes.mode = CCP_AES_MODE_GHASH;
820 	op.u.aes.action = CCP_AES_GHASHFINAL;
821 	op.src.type = CCP_MEMTYPE_SYSTEM;
822 	op.src.u.dma.address = final_wa.dma.address;
823 	op.src.u.dma.length = AES_BLOCK_SIZE;
824 	op.dst.type = CCP_MEMTYPE_SYSTEM;
825 	op.dst.u.dma.address = final_wa.dma.address;
826 	op.dst.u.dma.length = AES_BLOCK_SIZE;
827 	op.eom = 1;
828 	op.u.aes.size = 0;
829 	ret = cmd_q->ccp->vdata->perform->aes(&op);
830 	if (ret)
831 		goto e_dst;
832 
833 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
834 		/* Put the ciphered tag after the ciphertext. */
835 		ccp_get_dm_area(&final_wa, 0, p_tag, 0, AES_BLOCK_SIZE);
836 	} else {
837 		/* Does this ciphered tag match the input? */
838 		ret = ccp_init_dm_workarea(&tag, cmd_q, AES_BLOCK_SIZE,
839 					   DMA_BIDIRECTIONAL);
840 		if (ret)
841 			goto e_tag;
842 		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE);
843 		if (ret)
844 			goto e_tag;
845 
846 		ret = memcmp(tag.address, final_wa.address, AES_BLOCK_SIZE);
847 		ccp_dm_free(&tag);
848 	}
849 
850 e_tag:
851 	ccp_dm_free(&final_wa);
852 
853 e_dst:
854 	if (aes->src_len && !in_place)
855 		ccp_free_data(&dst, cmd_q);
856 
857 e_src:
858 	if (aes->src_len)
859 		ccp_free_data(&src, cmd_q);
860 
861 e_aad:
862 	if (aes->aad_len)
863 		ccp_free_data(&aad, cmd_q);
864 
865 e_ctx:
866 	ccp_dm_free(&ctx);
867 
868 e_key:
869 	ccp_dm_free(&key);
870 
871 	return ret;
872 }
873 
874 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
875 {
876 	struct ccp_aes_engine *aes = &cmd->u.aes;
877 	struct ccp_dm_workarea key, ctx;
878 	struct ccp_data src, dst;
879 	struct ccp_op op;
880 	unsigned int dm_offset;
881 	bool in_place = false;
882 	int ret;
883 
884 	if (aes->mode == CCP_AES_MODE_CMAC)
885 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
886 
887 	if (aes->mode == CCP_AES_MODE_GCM)
888 		return ccp_run_aes_gcm_cmd(cmd_q, cmd);
889 
890 	if (!((aes->key_len == AES_KEYSIZE_128) ||
891 	      (aes->key_len == AES_KEYSIZE_192) ||
892 	      (aes->key_len == AES_KEYSIZE_256)))
893 		return -EINVAL;
894 
895 	if (((aes->mode == CCP_AES_MODE_ECB) ||
896 	     (aes->mode == CCP_AES_MODE_CBC) ||
897 	     (aes->mode == CCP_AES_MODE_CFB)) &&
898 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
899 		return -EINVAL;
900 
901 	if (!aes->key || !aes->src || !aes->dst)
902 		return -EINVAL;
903 
904 	if (aes->mode != CCP_AES_MODE_ECB) {
905 		if (aes->iv_len != AES_BLOCK_SIZE)
906 			return -EINVAL;
907 
908 		if (!aes->iv)
909 			return -EINVAL;
910 	}
911 
912 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
913 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
914 
915 	ret = -EIO;
916 	memset(&op, 0, sizeof(op));
917 	op.cmd_q = cmd_q;
918 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
919 	op.sb_key = cmd_q->sb_key;
920 	op.sb_ctx = cmd_q->sb_ctx;
921 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
922 	op.u.aes.type = aes->type;
923 	op.u.aes.mode = aes->mode;
924 	op.u.aes.action = aes->action;
925 
926 	/* All supported key sizes fit in a single (32-byte) SB entry
927 	 * and must be in little endian format. Use the 256-bit byte
928 	 * swap passthru option to convert from big endian to little
929 	 * endian.
930 	 */
931 	ret = ccp_init_dm_workarea(&key, cmd_q,
932 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
933 				   DMA_TO_DEVICE);
934 	if (ret)
935 		return ret;
936 
937 	dm_offset = CCP_SB_BYTES - aes->key_len;
938 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
939 	if (ret)
940 		goto e_key;
941 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
942 			     CCP_PASSTHRU_BYTESWAP_256BIT);
943 	if (ret) {
944 		cmd->engine_error = cmd_q->cmd_error;
945 		goto e_key;
946 	}
947 
948 	/* The AES context fits in a single (32-byte) SB entry and
949 	 * must be in little endian format. Use the 256-bit byte swap
950 	 * passthru option to convert from big endian to little endian.
951 	 */
952 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
953 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
954 				   DMA_BIDIRECTIONAL);
955 	if (ret)
956 		goto e_key;
957 
958 	if (aes->mode != CCP_AES_MODE_ECB) {
959 		/* Load the AES context - convert to LE */
960 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
961 		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
962 		if (ret)
963 			goto e_ctx;
964 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
965 				     CCP_PASSTHRU_BYTESWAP_256BIT);
966 		if (ret) {
967 			cmd->engine_error = cmd_q->cmd_error;
968 			goto e_ctx;
969 		}
970 	}
971 	switch (aes->mode) {
972 	case CCP_AES_MODE_CFB: /* CFB128 only */
973 	case CCP_AES_MODE_CTR:
974 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
975 		break;
976 	default:
977 		op.u.aes.size = 0;
978 	}
979 
980 	/* Prepare the input and output data workareas. For in-place
981 	 * operations we need to set the dma direction to BIDIRECTIONAL
982 	 * and copy the src workarea to the dst workarea.
983 	 */
984 	if (sg_virt(aes->src) == sg_virt(aes->dst))
985 		in_place = true;
986 
987 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
988 			    AES_BLOCK_SIZE,
989 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
990 	if (ret)
991 		goto e_ctx;
992 
993 	if (in_place) {
994 		dst = src;
995 	} else {
996 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
997 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
998 		if (ret)
999 			goto e_src;
1000 	}
1001 
1002 	/* Send data to the CCP AES engine */
1003 	while (src.sg_wa.bytes_left) {
1004 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1005 		if (!src.sg_wa.bytes_left) {
1006 			op.eom = 1;
1007 
1008 			/* Since we don't retrieve the AES context in ECB
1009 			 * mode we have to wait for the operation to complete
1010 			 * on the last piece of data
1011 			 */
1012 			if (aes->mode == CCP_AES_MODE_ECB)
1013 				op.soc = 1;
1014 		}
1015 
1016 		ret = cmd_q->ccp->vdata->perform->aes(&op);
1017 		if (ret) {
1018 			cmd->engine_error = cmd_q->cmd_error;
1019 			goto e_dst;
1020 		}
1021 
1022 		ccp_process_data(&src, &dst, &op);
1023 	}
1024 
1025 	if (aes->mode != CCP_AES_MODE_ECB) {
1026 		/* Retrieve the AES context - convert from LE to BE using
1027 		 * 32-byte (256-bit) byteswapping
1028 		 */
1029 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1030 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1031 		if (ret) {
1032 			cmd->engine_error = cmd_q->cmd_error;
1033 			goto e_dst;
1034 		}
1035 
1036 		/* ...but we only need AES_BLOCK_SIZE bytes */
1037 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1038 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1039 	}
1040 
1041 e_dst:
1042 	if (!in_place)
1043 		ccp_free_data(&dst, cmd_q);
1044 
1045 e_src:
1046 	ccp_free_data(&src, cmd_q);
1047 
1048 e_ctx:
1049 	ccp_dm_free(&ctx);
1050 
1051 e_key:
1052 	ccp_dm_free(&key);
1053 
1054 	return ret;
1055 }
1056 
1057 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1058 			       struct ccp_cmd *cmd)
1059 {
1060 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1061 	struct ccp_dm_workarea key, ctx;
1062 	struct ccp_data src, dst;
1063 	struct ccp_op op;
1064 	unsigned int unit_size, dm_offset;
1065 	bool in_place = false;
1066 	unsigned int sb_count;
1067 	enum ccp_aes_type aestype;
1068 	int ret;
1069 
1070 	switch (xts->unit_size) {
1071 	case CCP_XTS_AES_UNIT_SIZE_16:
1072 		unit_size = 16;
1073 		break;
1074 	case CCP_XTS_AES_UNIT_SIZE_512:
1075 		unit_size = 512;
1076 		break;
1077 	case CCP_XTS_AES_UNIT_SIZE_1024:
1078 		unit_size = 1024;
1079 		break;
1080 	case CCP_XTS_AES_UNIT_SIZE_2048:
1081 		unit_size = 2048;
1082 		break;
1083 	case CCP_XTS_AES_UNIT_SIZE_4096:
1084 		unit_size = 4096;
1085 		break;
1086 
1087 	default:
1088 		return -EINVAL;
1089 	}
1090 
1091 	if (xts->key_len == AES_KEYSIZE_128)
1092 		aestype = CCP_AES_TYPE_128;
1093 	else if (xts->key_len == AES_KEYSIZE_256)
1094 		aestype = CCP_AES_TYPE_256;
1095 	else
1096 		return -EINVAL;
1097 
1098 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1099 		return -EINVAL;
1100 
1101 	if (xts->iv_len != AES_BLOCK_SIZE)
1102 		return -EINVAL;
1103 
1104 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1105 		return -EINVAL;
1106 
1107 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
1108 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
1109 
1110 	ret = -EIO;
1111 	memset(&op, 0, sizeof(op));
1112 	op.cmd_q = cmd_q;
1113 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1114 	op.sb_key = cmd_q->sb_key;
1115 	op.sb_ctx = cmd_q->sb_ctx;
1116 	op.init = 1;
1117 	op.u.xts.type = aestype;
1118 	op.u.xts.action = xts->action;
1119 	op.u.xts.unit_size = xts->unit_size;
1120 
1121 	/* A version 3 device only supports 128-bit keys, which fits into a
1122 	 * single SB entry. A version 5 device uses a 512-bit vector, so two
1123 	 * SB entries.
1124 	 */
1125 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1126 		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
1127 	else
1128 		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
1129 	ret = ccp_init_dm_workarea(&key, cmd_q,
1130 				   sb_count * CCP_SB_BYTES,
1131 				   DMA_TO_DEVICE);
1132 	if (ret)
1133 		return ret;
1134 
1135 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1136 		/* All supported key sizes must be in little endian format.
1137 		 * Use the 256-bit byte swap passthru option to convert from
1138 		 * big endian to little endian.
1139 		 */
1140 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
1141 		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1142 		if (ret)
1143 			goto e_key;
1144 		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
1145 		if (ret)
1146 			goto e_key;
1147 	} else {
1148 		/* Version 5 CCPs use a 512-bit space for the key: each portion
1149 		 * occupies 256 bits, or one entire slot, and is zero-padded.
1150 		 */
1151 		unsigned int pad;
1152 
1153 		dm_offset = CCP_SB_BYTES;
1154 		pad = dm_offset - xts->key_len;
1155 		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
1156 		if (ret)
1157 			goto e_key;
1158 		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
1159 				      xts->key_len, xts->key_len);
1160 		if (ret)
1161 			goto e_key;
1162 	}
1163 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1164 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1165 	if (ret) {
1166 		cmd->engine_error = cmd_q->cmd_error;
1167 		goto e_key;
1168 	}
1169 
1170 	/* The AES context fits in a single (32-byte) SB entry and
1171 	 * for XTS is already in little endian format so no byte swapping
1172 	 * is needed.
1173 	 */
1174 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1175 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
1176 				   DMA_BIDIRECTIONAL);
1177 	if (ret)
1178 		goto e_key;
1179 
1180 	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1181 	if (ret)
1182 		goto e_ctx;
1183 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1184 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1185 	if (ret) {
1186 		cmd->engine_error = cmd_q->cmd_error;
1187 		goto e_ctx;
1188 	}
1189 
1190 	/* Prepare the input and output data workareas. For in-place
1191 	 * operations we need to set the dma direction to BIDIRECTIONAL
1192 	 * and copy the src workarea to the dst workarea.
1193 	 */
1194 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1195 		in_place = true;
1196 
1197 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1198 			    unit_size,
1199 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1200 	if (ret)
1201 		goto e_ctx;
1202 
1203 	if (in_place) {
1204 		dst = src;
1205 	} else {
1206 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1207 				    unit_size, DMA_FROM_DEVICE);
1208 		if (ret)
1209 			goto e_src;
1210 	}
1211 
1212 	/* Send data to the CCP AES engine */
1213 	while (src.sg_wa.bytes_left) {
1214 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1215 		if (!src.sg_wa.bytes_left)
1216 			op.eom = 1;
1217 
1218 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
1219 		if (ret) {
1220 			cmd->engine_error = cmd_q->cmd_error;
1221 			goto e_dst;
1222 		}
1223 
1224 		ccp_process_data(&src, &dst, &op);
1225 	}
1226 
1227 	/* Retrieve the AES context - convert from LE to BE using
1228 	 * 32-byte (256-bit) byteswapping
1229 	 */
1230 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1231 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1232 	if (ret) {
1233 		cmd->engine_error = cmd_q->cmd_error;
1234 		goto e_dst;
1235 	}
1236 
1237 	/* ...but we only need AES_BLOCK_SIZE bytes */
1238 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1239 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1240 
1241 e_dst:
1242 	if (!in_place)
1243 		ccp_free_data(&dst, cmd_q);
1244 
1245 e_src:
1246 	ccp_free_data(&src, cmd_q);
1247 
1248 e_ctx:
1249 	ccp_dm_free(&ctx);
1250 
1251 e_key:
1252 	ccp_dm_free(&key);
1253 
1254 	return ret;
1255 }
1256 
1257 static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1258 {
1259 	struct ccp_des3_engine *des3 = &cmd->u.des3;
1260 
1261 	struct ccp_dm_workarea key, ctx;
1262 	struct ccp_data src, dst;
1263 	struct ccp_op op;
1264 	unsigned int dm_offset;
1265 	unsigned int len_singlekey;
1266 	bool in_place = false;
1267 	int ret;
1268 
1269 	/* Error checks */
1270 	if (!cmd_q->ccp->vdata->perform->des3)
1271 		return -EINVAL;
1272 
1273 	if (des3->key_len != DES3_EDE_KEY_SIZE)
1274 		return -EINVAL;
1275 
1276 	if (((des3->mode == CCP_DES3_MODE_ECB) ||
1277 		(des3->mode == CCP_DES3_MODE_CBC)) &&
1278 		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
1279 		return -EINVAL;
1280 
1281 	if (!des3->key || !des3->src || !des3->dst)
1282 		return -EINVAL;
1283 
1284 	if (des3->mode != CCP_DES3_MODE_ECB) {
1285 		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
1286 			return -EINVAL;
1287 
1288 		if (!des3->iv)
1289 			return -EINVAL;
1290 	}
1291 
1292 	ret = -EIO;
1293 	/* Zero out all the fields of the command desc */
1294 	memset(&op, 0, sizeof(op));
1295 
1296 	/* Set up the Function field */
1297 	op.cmd_q = cmd_q;
1298 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1299 	op.sb_key = cmd_q->sb_key;
1300 
1301 	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
1302 	op.u.des3.type = des3->type;
1303 	op.u.des3.mode = des3->mode;
1304 	op.u.des3.action = des3->action;
1305 
1306 	/*
1307 	 * All supported key sizes fit in a single (32-byte) KSB entry and
1308 	 * (like AES) must be in little endian format. Use the 256-bit byte
1309 	 * swap passthru option to convert from big endian to little endian.
1310 	 */
1311 	ret = ccp_init_dm_workarea(&key, cmd_q,
1312 				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
1313 				   DMA_TO_DEVICE);
1314 	if (ret)
1315 		return ret;
1316 
1317 	/*
1318 	 * The contents of the key triplet are in the reverse order of what
1319 	 * is required by the engine. Copy the 3 pieces individually to put
1320 	 * them where they belong.
1321 	 */
1322 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
1323 
1324 	len_singlekey = des3->key_len / 3;
1325 	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
1326 			      des3->key, 0, len_singlekey);
1327 	if (ret)
1328 		goto e_key;
1329 	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
1330 			      des3->key, len_singlekey, len_singlekey);
1331 	if (ret)
1332 		goto e_key;
1333 	ret = ccp_set_dm_area(&key, dm_offset,
1334 			      des3->key, 2 * len_singlekey, len_singlekey);
1335 	if (ret)
1336 		goto e_key;
1337 
1338 	/* Copy the key to the SB */
1339 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1340 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1341 	if (ret) {
1342 		cmd->engine_error = cmd_q->cmd_error;
1343 		goto e_key;
1344 	}
1345 
1346 	/*
1347 	 * The DES3 context fits in a single (32-byte) KSB entry and
1348 	 * must be in little endian format. Use the 256-bit byte swap
1349 	 * passthru option to convert from big endian to little endian.
1350 	 */
1351 	if (des3->mode != CCP_DES3_MODE_ECB) {
1352 		u32 load_mode;
1353 
1354 		op.sb_ctx = cmd_q->sb_ctx;
1355 
1356 		ret = ccp_init_dm_workarea(&ctx, cmd_q,
1357 					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
1358 					   DMA_BIDIRECTIONAL);
1359 		if (ret)
1360 			goto e_key;
1361 
1362 		/* Load the context into the LSB */
1363 		dm_offset = CCP_SB_BYTES - des3->iv_len;
1364 		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
1365 				      des3->iv_len);
1366 		if (ret)
1367 			goto e_ctx;
1368 
1369 		if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1370 			load_mode = CCP_PASSTHRU_BYTESWAP_NOOP;
1371 		else
1372 			load_mode = CCP_PASSTHRU_BYTESWAP_256BIT;
1373 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1374 				     load_mode);
1375 		if (ret) {
1376 			cmd->engine_error = cmd_q->cmd_error;
1377 			goto e_ctx;
1378 		}
1379 	}
1380 
1381 	/*
1382 	 * Prepare the input and output data workareas. For in-place
1383 	 * operations we need to set the dma direction to BIDIRECTIONAL
1384 	 * and copy the src workarea to the dst workarea.
1385 	 */
1386 	if (sg_virt(des3->src) == sg_virt(des3->dst))
1387 		in_place = true;
1388 
1389 	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
1390 			DES3_EDE_BLOCK_SIZE,
1391 			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1392 	if (ret)
1393 		goto e_ctx;
1394 
1395 	if (in_place)
1396 		dst = src;
1397 	else {
1398 		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
1399 				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
1400 		if (ret)
1401 			goto e_src;
1402 	}
1403 
1404 	/* Send data to the CCP DES3 engine */
1405 	while (src.sg_wa.bytes_left) {
1406 		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
1407 		if (!src.sg_wa.bytes_left) {
1408 			op.eom = 1;
1409 
1410 			/* Since we don't retrieve the context in ECB mode
1411 			 * we have to wait for the operation to complete
1412 			 * on the last piece of data
1413 			 */
1414 			op.soc = 0;
1415 		}
1416 
1417 		ret = cmd_q->ccp->vdata->perform->des3(&op);
1418 		if (ret) {
1419 			cmd->engine_error = cmd_q->cmd_error;
1420 			goto e_dst;
1421 		}
1422 
1423 		ccp_process_data(&src, &dst, &op);
1424 	}
1425 
1426 	if (des3->mode != CCP_DES3_MODE_ECB) {
1427 		/* Retrieve the context and make BE */
1428 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1429 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1430 		if (ret) {
1431 			cmd->engine_error = cmd_q->cmd_error;
1432 			goto e_dst;
1433 		}
1434 
1435 		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
1436 		if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1437 			dm_offset = CCP_SB_BYTES - des3->iv_len;
1438 		else
1439 			dm_offset = 0;
1440 		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
1441 				DES3_EDE_BLOCK_SIZE);
1442 	}
1443 e_dst:
1444 	if (!in_place)
1445 		ccp_free_data(&dst, cmd_q);
1446 
1447 e_src:
1448 	ccp_free_data(&src, cmd_q);
1449 
1450 e_ctx:
1451 	if (des3->mode != CCP_DES3_MODE_ECB)
1452 		ccp_dm_free(&ctx);
1453 
1454 e_key:
1455 	ccp_dm_free(&key);
1456 
1457 	return ret;
1458 }
1459 
1460 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1461 {
1462 	struct ccp_sha_engine *sha = &cmd->u.sha;
1463 	struct ccp_dm_workarea ctx;
1464 	struct ccp_data src;
1465 	struct ccp_op op;
1466 	unsigned int ioffset, ooffset;
1467 	unsigned int digest_size;
1468 	int sb_count;
1469 	const void *init;
1470 	u64 block_size;
1471 	int ctx_size;
1472 	int ret;
1473 
1474 	switch (sha->type) {
1475 	case CCP_SHA_TYPE_1:
1476 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
1477 			return -EINVAL;
1478 		block_size = SHA1_BLOCK_SIZE;
1479 		break;
1480 	case CCP_SHA_TYPE_224:
1481 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
1482 			return -EINVAL;
1483 		block_size = SHA224_BLOCK_SIZE;
1484 		break;
1485 	case CCP_SHA_TYPE_256:
1486 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
1487 			return -EINVAL;
1488 		block_size = SHA256_BLOCK_SIZE;
1489 		break;
1490 	case CCP_SHA_TYPE_384:
1491 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1492 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
1493 			return -EINVAL;
1494 		block_size = SHA384_BLOCK_SIZE;
1495 		break;
1496 	case CCP_SHA_TYPE_512:
1497 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1498 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
1499 			return -EINVAL;
1500 		block_size = SHA512_BLOCK_SIZE;
1501 		break;
1502 	default:
1503 		return -EINVAL;
1504 	}
1505 
1506 	if (!sha->ctx)
1507 		return -EINVAL;
1508 
1509 	if (!sha->final && (sha->src_len & (block_size - 1)))
1510 		return -EINVAL;
1511 
1512 	/* The version 3 device can't handle zero-length input */
1513 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1514 
1515 		if (!sha->src_len) {
1516 			unsigned int digest_len;
1517 			const u8 *sha_zero;
1518 
1519 			/* Not final, just return */
1520 			if (!sha->final)
1521 				return 0;
1522 
1523 			/* CCP can't do a zero length sha operation so the
1524 			 * caller must buffer the data.
1525 			 */
1526 			if (sha->msg_bits)
1527 				return -EINVAL;
1528 
1529 			/* The CCP cannot perform zero-length sha operations
1530 			 * so the caller is required to buffer data for the
1531 			 * final operation. However, a sha operation for a
1532 			 * message with a total length of zero is valid so
1533 			 * known values are required to supply the result.
1534 			 */
1535 			switch (sha->type) {
1536 			case CCP_SHA_TYPE_1:
1537 				sha_zero = sha1_zero_message_hash;
1538 				digest_len = SHA1_DIGEST_SIZE;
1539 				break;
1540 			case CCP_SHA_TYPE_224:
1541 				sha_zero = sha224_zero_message_hash;
1542 				digest_len = SHA224_DIGEST_SIZE;
1543 				break;
1544 			case CCP_SHA_TYPE_256:
1545 				sha_zero = sha256_zero_message_hash;
1546 				digest_len = SHA256_DIGEST_SIZE;
1547 				break;
1548 			default:
1549 				return -EINVAL;
1550 			}
1551 
1552 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1553 						 digest_len, 1);
1554 
1555 			return 0;
1556 		}
1557 	}
1558 
1559 	/* Set variables used throughout */
1560 	switch (sha->type) {
1561 	case CCP_SHA_TYPE_1:
1562 		digest_size = SHA1_DIGEST_SIZE;
1563 		init = (void *) ccp_sha1_init;
1564 		ctx_size = SHA1_DIGEST_SIZE;
1565 		sb_count = 1;
1566 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1567 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1568 		else
1569 			ooffset = ioffset = 0;
1570 		break;
1571 	case CCP_SHA_TYPE_224:
1572 		digest_size = SHA224_DIGEST_SIZE;
1573 		init = (void *) ccp_sha224_init;
1574 		ctx_size = SHA256_DIGEST_SIZE;
1575 		sb_count = 1;
1576 		ioffset = 0;
1577 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1578 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1579 		else
1580 			ooffset = 0;
1581 		break;
1582 	case CCP_SHA_TYPE_256:
1583 		digest_size = SHA256_DIGEST_SIZE;
1584 		init = (void *) ccp_sha256_init;
1585 		ctx_size = SHA256_DIGEST_SIZE;
1586 		sb_count = 1;
1587 		ooffset = ioffset = 0;
1588 		break;
1589 	case CCP_SHA_TYPE_384:
1590 		digest_size = SHA384_DIGEST_SIZE;
1591 		init = (void *) ccp_sha384_init;
1592 		ctx_size = SHA512_DIGEST_SIZE;
1593 		sb_count = 2;
1594 		ioffset = 0;
1595 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1596 		break;
1597 	case CCP_SHA_TYPE_512:
1598 		digest_size = SHA512_DIGEST_SIZE;
1599 		init = (void *) ccp_sha512_init;
1600 		ctx_size = SHA512_DIGEST_SIZE;
1601 		sb_count = 2;
1602 		ooffset = ioffset = 0;
1603 		break;
1604 	default:
1605 		ret = -EINVAL;
1606 		goto e_data;
1607 	}
1608 
1609 	/* For zero-length plaintext the src pointer is ignored;
1610 	 * otherwise both parts must be valid
1611 	 */
1612 	if (sha->src_len && !sha->src)
1613 		return -EINVAL;
1614 
1615 	memset(&op, 0, sizeof(op));
1616 	op.cmd_q = cmd_q;
1617 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1618 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1619 	op.u.sha.type = sha->type;
1620 	op.u.sha.msg_bits = sha->msg_bits;
1621 
1622 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1623 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1624 	 * first slot, and the left half in the second. Each portion must then
1625 	 * be in little endian format: use the 256-bit byte swap option.
1626 	 */
1627 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1628 				   DMA_BIDIRECTIONAL);
1629 	if (ret)
1630 		return ret;
1631 	if (sha->first) {
1632 		switch (sha->type) {
1633 		case CCP_SHA_TYPE_1:
1634 		case CCP_SHA_TYPE_224:
1635 		case CCP_SHA_TYPE_256:
1636 			memcpy(ctx.address + ioffset, init, ctx_size);
1637 			break;
1638 		case CCP_SHA_TYPE_384:
1639 		case CCP_SHA_TYPE_512:
1640 			memcpy(ctx.address + ctx_size / 2, init,
1641 			       ctx_size / 2);
1642 			memcpy(ctx.address, init + ctx_size / 2,
1643 			       ctx_size / 2);
1644 			break;
1645 		default:
1646 			ret = -EINVAL;
1647 			goto e_ctx;
1648 		}
1649 	} else {
1650 		/* Restore the context */
1651 		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1652 				      sb_count * CCP_SB_BYTES);
1653 		if (ret)
1654 			goto e_ctx;
1655 	}
1656 
1657 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1658 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1659 	if (ret) {
1660 		cmd->engine_error = cmd_q->cmd_error;
1661 		goto e_ctx;
1662 	}
1663 
1664 	if (sha->src) {
1665 		/* Send data to the CCP SHA engine; block_size is set above */
1666 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1667 				    block_size, DMA_TO_DEVICE);
1668 		if (ret)
1669 			goto e_ctx;
1670 
1671 		while (src.sg_wa.bytes_left) {
1672 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1673 			if (sha->final && !src.sg_wa.bytes_left)
1674 				op.eom = 1;
1675 
1676 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1677 			if (ret) {
1678 				cmd->engine_error = cmd_q->cmd_error;
1679 				goto e_data;
1680 			}
1681 
1682 			ccp_process_data(&src, NULL, &op);
1683 		}
1684 	} else {
1685 		op.eom = 1;
1686 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1687 		if (ret) {
1688 			cmd->engine_error = cmd_q->cmd_error;
1689 			goto e_data;
1690 		}
1691 	}
1692 
1693 	/* Retrieve the SHA context - convert from LE to BE using
1694 	 * 32-byte (256-bit) byteswapping to BE
1695 	 */
1696 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1697 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1698 	if (ret) {
1699 		cmd->engine_error = cmd_q->cmd_error;
1700 		goto e_data;
1701 	}
1702 
1703 	if (sha->final) {
1704 		/* Finishing up, so get the digest */
1705 		switch (sha->type) {
1706 		case CCP_SHA_TYPE_1:
1707 		case CCP_SHA_TYPE_224:
1708 		case CCP_SHA_TYPE_256:
1709 			ccp_get_dm_area(&ctx, ooffset,
1710 					sha->ctx, 0,
1711 					digest_size);
1712 			break;
1713 		case CCP_SHA_TYPE_384:
1714 		case CCP_SHA_TYPE_512:
1715 			ccp_get_dm_area(&ctx, 0,
1716 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1717 					LSB_ITEM_SIZE);
1718 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1719 					sha->ctx, 0,
1720 					LSB_ITEM_SIZE - ooffset);
1721 			break;
1722 		default:
1723 			ret = -EINVAL;
1724 			goto e_ctx;
1725 		}
1726 	} else {
1727 		/* Stash the context */
1728 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1729 				sb_count * CCP_SB_BYTES);
1730 	}
1731 
1732 	if (sha->final && sha->opad) {
1733 		/* HMAC operation, recursively perform final SHA */
1734 		struct ccp_cmd hmac_cmd;
1735 		struct scatterlist sg;
1736 		u8 *hmac_buf;
1737 
1738 		if (sha->opad_len != block_size) {
1739 			ret = -EINVAL;
1740 			goto e_data;
1741 		}
1742 
1743 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1744 		if (!hmac_buf) {
1745 			ret = -ENOMEM;
1746 			goto e_data;
1747 		}
1748 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1749 
1750 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1751 		switch (sha->type) {
1752 		case CCP_SHA_TYPE_1:
1753 		case CCP_SHA_TYPE_224:
1754 		case CCP_SHA_TYPE_256:
1755 			memcpy(hmac_buf + block_size,
1756 			       ctx.address + ooffset,
1757 			       digest_size);
1758 			break;
1759 		case CCP_SHA_TYPE_384:
1760 		case CCP_SHA_TYPE_512:
1761 			memcpy(hmac_buf + block_size,
1762 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1763 			       LSB_ITEM_SIZE);
1764 			memcpy(hmac_buf + block_size +
1765 			       (LSB_ITEM_SIZE - ooffset),
1766 			       ctx.address,
1767 			       LSB_ITEM_SIZE);
1768 			break;
1769 		default:
1770 			ret = -EINVAL;
1771 			goto e_ctx;
1772 		}
1773 
1774 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1775 		hmac_cmd.engine = CCP_ENGINE_SHA;
1776 		hmac_cmd.u.sha.type = sha->type;
1777 		hmac_cmd.u.sha.ctx = sha->ctx;
1778 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1779 		hmac_cmd.u.sha.src = &sg;
1780 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1781 		hmac_cmd.u.sha.opad = NULL;
1782 		hmac_cmd.u.sha.opad_len = 0;
1783 		hmac_cmd.u.sha.first = 1;
1784 		hmac_cmd.u.sha.final = 1;
1785 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1786 
1787 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1788 		if (ret)
1789 			cmd->engine_error = hmac_cmd.engine_error;
1790 
1791 		kfree(hmac_buf);
1792 	}
1793 
1794 e_data:
1795 	if (sha->src)
1796 		ccp_free_data(&src, cmd_q);
1797 
1798 e_ctx:
1799 	ccp_dm_free(&ctx);
1800 
1801 	return ret;
1802 }
1803 
1804 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1805 {
1806 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1807 	struct ccp_dm_workarea exp, src, dst;
1808 	struct ccp_op op;
1809 	unsigned int sb_count, i_len, o_len;
1810 	int ret;
1811 
1812 	/* Check against the maximum allowable size, in bits */
1813 	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
1814 		return -EINVAL;
1815 
1816 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1817 		return -EINVAL;
1818 
1819 	memset(&op, 0, sizeof(op));
1820 	op.cmd_q = cmd_q;
1821 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1822 
1823 	/* The RSA modulus must precede the message being acted upon, so
1824 	 * it must be copied to a DMA area where the message and the
1825 	 * modulus can be concatenated.  Therefore the input buffer
1826 	 * length required is twice the output buffer length (which
1827 	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
1828 	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
1829 	 * required.
1830 	 */
1831 	o_len = 32 * ((rsa->key_size + 255) / 256);
1832 	i_len = o_len * 2;
1833 
1834 	sb_count = 0;
1835 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1836 		/* sb_count is the number of storage block slots required
1837 		 * for the modulus.
1838 		 */
1839 		sb_count = o_len / CCP_SB_BYTES;
1840 		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
1841 								sb_count);
1842 		if (!op.sb_key)
1843 			return -EIO;
1844 	} else {
1845 		/* A version 5 device allows a modulus size that will not fit
1846 		 * in the LSB, so the command will transfer it from memory.
1847 		 * Set the sb key to the default, even though it's not used.
1848 		 */
1849 		op.sb_key = cmd_q->sb_key;
1850 	}
1851 
1852 	/* The RSA exponent must be in little endian format. Reverse its
1853 	 * byte order.
1854 	 */
1855 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1856 	if (ret)
1857 		goto e_sb;
1858 
1859 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1860 	if (ret)
1861 		goto e_exp;
1862 
1863 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1864 		/* Copy the exponent to the local storage block, using
1865 		 * as many 32-byte blocks as were allocated above. It's
1866 		 * already little endian, so no further change is required.
1867 		 */
1868 		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1869 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1870 		if (ret) {
1871 			cmd->engine_error = cmd_q->cmd_error;
1872 			goto e_exp;
1873 		}
1874 	} else {
1875 		/* The exponent can be retrieved from memory via DMA. */
1876 		op.exp.u.dma.address = exp.dma.address;
1877 		op.exp.u.dma.offset = 0;
1878 	}
1879 
1880 	/* Concatenate the modulus and the message. Both the modulus and
1881 	 * the operands must be in little endian format.  Since the input
1882 	 * is in big endian format it must be converted.
1883 	 */
1884 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1885 	if (ret)
1886 		goto e_exp;
1887 
1888 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1889 	if (ret)
1890 		goto e_src;
1891 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1892 	if (ret)
1893 		goto e_src;
1894 
1895 	/* Prepare the output area for the operation */
1896 	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
1897 	if (ret)
1898 		goto e_src;
1899 
1900 	op.soc = 1;
1901 	op.src.u.dma.address = src.dma.address;
1902 	op.src.u.dma.offset = 0;
1903 	op.src.u.dma.length = i_len;
1904 	op.dst.u.dma.address = dst.dma.address;
1905 	op.dst.u.dma.offset = 0;
1906 	op.dst.u.dma.length = o_len;
1907 
1908 	op.u.rsa.mod_size = rsa->key_size;
1909 	op.u.rsa.input_len = i_len;
1910 
1911 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1912 	if (ret) {
1913 		cmd->engine_error = cmd_q->cmd_error;
1914 		goto e_dst;
1915 	}
1916 
1917 	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
1918 
1919 e_dst:
1920 	ccp_dm_free(&dst);
1921 
1922 e_src:
1923 	ccp_dm_free(&src);
1924 
1925 e_exp:
1926 	ccp_dm_free(&exp);
1927 
1928 e_sb:
1929 	if (sb_count)
1930 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1931 
1932 	return ret;
1933 }
1934 
1935 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1936 				struct ccp_cmd *cmd)
1937 {
1938 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1939 	struct ccp_dm_workarea mask;
1940 	struct ccp_data src, dst;
1941 	struct ccp_op op;
1942 	bool in_place = false;
1943 	unsigned int i;
1944 	int ret = 0;
1945 
1946 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1947 		return -EINVAL;
1948 
1949 	if (!pt->src || !pt->dst)
1950 		return -EINVAL;
1951 
1952 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1953 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1954 			return -EINVAL;
1955 		if (!pt->mask)
1956 			return -EINVAL;
1957 	}
1958 
1959 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1960 
1961 	memset(&op, 0, sizeof(op));
1962 	op.cmd_q = cmd_q;
1963 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1964 
1965 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1966 		/* Load the mask */
1967 		op.sb_key = cmd_q->sb_key;
1968 
1969 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1970 					   CCP_PASSTHRU_SB_COUNT *
1971 					   CCP_SB_BYTES,
1972 					   DMA_TO_DEVICE);
1973 		if (ret)
1974 			return ret;
1975 
1976 		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1977 		if (ret)
1978 			goto e_mask;
1979 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
1980 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1981 		if (ret) {
1982 			cmd->engine_error = cmd_q->cmd_error;
1983 			goto e_mask;
1984 		}
1985 	}
1986 
1987 	/* Prepare the input and output data workareas. For in-place
1988 	 * operations we need to set the dma direction to BIDIRECTIONAL
1989 	 * and copy the src workarea to the dst workarea.
1990 	 */
1991 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1992 		in_place = true;
1993 
1994 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1995 			    CCP_PASSTHRU_MASKSIZE,
1996 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1997 	if (ret)
1998 		goto e_mask;
1999 
2000 	if (in_place) {
2001 		dst = src;
2002 	} else {
2003 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
2004 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
2005 		if (ret)
2006 			goto e_src;
2007 	}
2008 
2009 	/* Send data to the CCP Passthru engine
2010 	 *   Because the CCP engine works on a single source and destination
2011 	 *   dma address at a time, each entry in the source scatterlist
2012 	 *   (after the dma_map_sg call) must be less than or equal to the
2013 	 *   (remaining) length in the destination scatterlist entry and the
2014 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
2015 	 */
2016 	dst.sg_wa.sg_used = 0;
2017 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
2018 		if (!dst.sg_wa.sg ||
2019 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
2020 			ret = -EINVAL;
2021 			goto e_dst;
2022 		}
2023 
2024 		if (i == src.sg_wa.dma_count) {
2025 			op.eom = 1;
2026 			op.soc = 1;
2027 		}
2028 
2029 		op.src.type = CCP_MEMTYPE_SYSTEM;
2030 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
2031 		op.src.u.dma.offset = 0;
2032 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
2033 
2034 		op.dst.type = CCP_MEMTYPE_SYSTEM;
2035 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
2036 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
2037 		op.dst.u.dma.length = op.src.u.dma.length;
2038 
2039 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
2040 		if (ret) {
2041 			cmd->engine_error = cmd_q->cmd_error;
2042 			goto e_dst;
2043 		}
2044 
2045 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
2046 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
2047 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
2048 			dst.sg_wa.sg_used = 0;
2049 		}
2050 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
2051 	}
2052 
2053 e_dst:
2054 	if (!in_place)
2055 		ccp_free_data(&dst, cmd_q);
2056 
2057 e_src:
2058 	ccp_free_data(&src, cmd_q);
2059 
2060 e_mask:
2061 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
2062 		ccp_dm_free(&mask);
2063 
2064 	return ret;
2065 }
2066 
2067 static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
2068 				      struct ccp_cmd *cmd)
2069 {
2070 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
2071 	struct ccp_dm_workarea mask;
2072 	struct ccp_op op;
2073 	int ret;
2074 
2075 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
2076 		return -EINVAL;
2077 
2078 	if (!pt->src_dma || !pt->dst_dma)
2079 		return -EINVAL;
2080 
2081 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2082 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
2083 			return -EINVAL;
2084 		if (!pt->mask)
2085 			return -EINVAL;
2086 	}
2087 
2088 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2089 
2090 	memset(&op, 0, sizeof(op));
2091 	op.cmd_q = cmd_q;
2092 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2093 
2094 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2095 		/* Load the mask */
2096 		op.sb_key = cmd_q->sb_key;
2097 
2098 		mask.length = pt->mask_len;
2099 		mask.dma.address = pt->mask;
2100 		mask.dma.length = pt->mask_len;
2101 
2102 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2103 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2104 		if (ret) {
2105 			cmd->engine_error = cmd_q->cmd_error;
2106 			return ret;
2107 		}
2108 	}
2109 
2110 	/* Send data to the CCP Passthru engine */
2111 	op.eom = 1;
2112 	op.soc = 1;
2113 
2114 	op.src.type = CCP_MEMTYPE_SYSTEM;
2115 	op.src.u.dma.address = pt->src_dma;
2116 	op.src.u.dma.offset = 0;
2117 	op.src.u.dma.length = pt->src_len;
2118 
2119 	op.dst.type = CCP_MEMTYPE_SYSTEM;
2120 	op.dst.u.dma.address = pt->dst_dma;
2121 	op.dst.u.dma.offset = 0;
2122 	op.dst.u.dma.length = pt->src_len;
2123 
2124 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
2125 	if (ret)
2126 		cmd->engine_error = cmd_q->cmd_error;
2127 
2128 	return ret;
2129 }
2130 
2131 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2132 {
2133 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2134 	struct ccp_dm_workarea src, dst;
2135 	struct ccp_op op;
2136 	int ret;
2137 	u8 *save;
2138 
2139 	if (!ecc->u.mm.operand_1 ||
2140 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
2141 		return -EINVAL;
2142 
2143 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
2144 		if (!ecc->u.mm.operand_2 ||
2145 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
2146 			return -EINVAL;
2147 
2148 	if (!ecc->u.mm.result ||
2149 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
2150 		return -EINVAL;
2151 
2152 	memset(&op, 0, sizeof(op));
2153 	op.cmd_q = cmd_q;
2154 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2155 
2156 	/* Concatenate the modulus and the operands. Both the modulus and
2157 	 * the operands must be in little endian format.  Since the input
2158 	 * is in big endian format it must be converted and placed in a
2159 	 * fixed length buffer.
2160 	 */
2161 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2162 				   DMA_TO_DEVICE);
2163 	if (ret)
2164 		return ret;
2165 
2166 	/* Save the workarea address since it is updated in order to perform
2167 	 * the concatenation
2168 	 */
2169 	save = src.address;
2170 
2171 	/* Copy the ECC modulus */
2172 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2173 	if (ret)
2174 		goto e_src;
2175 	src.address += CCP_ECC_OPERAND_SIZE;
2176 
2177 	/* Copy the first operand */
2178 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
2179 				      ecc->u.mm.operand_1_len);
2180 	if (ret)
2181 		goto e_src;
2182 	src.address += CCP_ECC_OPERAND_SIZE;
2183 
2184 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
2185 		/* Copy the second operand */
2186 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
2187 					      ecc->u.mm.operand_2_len);
2188 		if (ret)
2189 			goto e_src;
2190 		src.address += CCP_ECC_OPERAND_SIZE;
2191 	}
2192 
2193 	/* Restore the workarea address */
2194 	src.address = save;
2195 
2196 	/* Prepare the output area for the operation */
2197 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2198 				   DMA_FROM_DEVICE);
2199 	if (ret)
2200 		goto e_src;
2201 
2202 	op.soc = 1;
2203 	op.src.u.dma.address = src.dma.address;
2204 	op.src.u.dma.offset = 0;
2205 	op.src.u.dma.length = src.length;
2206 	op.dst.u.dma.address = dst.dma.address;
2207 	op.dst.u.dma.offset = 0;
2208 	op.dst.u.dma.length = dst.length;
2209 
2210 	op.u.ecc.function = cmd->u.ecc.function;
2211 
2212 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2213 	if (ret) {
2214 		cmd->engine_error = cmd_q->cmd_error;
2215 		goto e_dst;
2216 	}
2217 
2218 	ecc->ecc_result = le16_to_cpup(
2219 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2220 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2221 		ret = -EIO;
2222 		goto e_dst;
2223 	}
2224 
2225 	/* Save the ECC result */
2226 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
2227 				CCP_ECC_MODULUS_BYTES);
2228 
2229 e_dst:
2230 	ccp_dm_free(&dst);
2231 
2232 e_src:
2233 	ccp_dm_free(&src);
2234 
2235 	return ret;
2236 }
2237 
2238 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2239 {
2240 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2241 	struct ccp_dm_workarea src, dst;
2242 	struct ccp_op op;
2243 	int ret;
2244 	u8 *save;
2245 
2246 	if (!ecc->u.pm.point_1.x ||
2247 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
2248 	    !ecc->u.pm.point_1.y ||
2249 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
2250 		return -EINVAL;
2251 
2252 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2253 		if (!ecc->u.pm.point_2.x ||
2254 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
2255 		    !ecc->u.pm.point_2.y ||
2256 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
2257 			return -EINVAL;
2258 	} else {
2259 		if (!ecc->u.pm.domain_a ||
2260 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
2261 			return -EINVAL;
2262 
2263 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
2264 			if (!ecc->u.pm.scalar ||
2265 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
2266 				return -EINVAL;
2267 	}
2268 
2269 	if (!ecc->u.pm.result.x ||
2270 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
2271 	    !ecc->u.pm.result.y ||
2272 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
2273 		return -EINVAL;
2274 
2275 	memset(&op, 0, sizeof(op));
2276 	op.cmd_q = cmd_q;
2277 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2278 
2279 	/* Concatenate the modulus and the operands. Both the modulus and
2280 	 * the operands must be in little endian format.  Since the input
2281 	 * is in big endian format it must be converted and placed in a
2282 	 * fixed length buffer.
2283 	 */
2284 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2285 				   DMA_TO_DEVICE);
2286 	if (ret)
2287 		return ret;
2288 
2289 	/* Save the workarea address since it is updated in order to perform
2290 	 * the concatenation
2291 	 */
2292 	save = src.address;
2293 
2294 	/* Copy the ECC modulus */
2295 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2296 	if (ret)
2297 		goto e_src;
2298 	src.address += CCP_ECC_OPERAND_SIZE;
2299 
2300 	/* Copy the first point X and Y coordinate */
2301 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
2302 				      ecc->u.pm.point_1.x_len);
2303 	if (ret)
2304 		goto e_src;
2305 	src.address += CCP_ECC_OPERAND_SIZE;
2306 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
2307 				      ecc->u.pm.point_1.y_len);
2308 	if (ret)
2309 		goto e_src;
2310 	src.address += CCP_ECC_OPERAND_SIZE;
2311 
2312 	/* Set the first point Z coordinate to 1 */
2313 	*src.address = 0x01;
2314 	src.address += CCP_ECC_OPERAND_SIZE;
2315 
2316 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2317 		/* Copy the second point X and Y coordinate */
2318 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
2319 					      ecc->u.pm.point_2.x_len);
2320 		if (ret)
2321 			goto e_src;
2322 		src.address += CCP_ECC_OPERAND_SIZE;
2323 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
2324 					      ecc->u.pm.point_2.y_len);
2325 		if (ret)
2326 			goto e_src;
2327 		src.address += CCP_ECC_OPERAND_SIZE;
2328 
2329 		/* Set the second point Z coordinate to 1 */
2330 		*src.address = 0x01;
2331 		src.address += CCP_ECC_OPERAND_SIZE;
2332 	} else {
2333 		/* Copy the Domain "a" parameter */
2334 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
2335 					      ecc->u.pm.domain_a_len);
2336 		if (ret)
2337 			goto e_src;
2338 		src.address += CCP_ECC_OPERAND_SIZE;
2339 
2340 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2341 			/* Copy the scalar value */
2342 			ret = ccp_reverse_set_dm_area(&src, 0,
2343 						      ecc->u.pm.scalar, 0,
2344 						      ecc->u.pm.scalar_len);
2345 			if (ret)
2346 				goto e_src;
2347 			src.address += CCP_ECC_OPERAND_SIZE;
2348 		}
2349 	}
2350 
2351 	/* Restore the workarea address */
2352 	src.address = save;
2353 
2354 	/* Prepare the output area for the operation */
2355 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2356 				   DMA_FROM_DEVICE);
2357 	if (ret)
2358 		goto e_src;
2359 
2360 	op.soc = 1;
2361 	op.src.u.dma.address = src.dma.address;
2362 	op.src.u.dma.offset = 0;
2363 	op.src.u.dma.length = src.length;
2364 	op.dst.u.dma.address = dst.dma.address;
2365 	op.dst.u.dma.offset = 0;
2366 	op.dst.u.dma.length = dst.length;
2367 
2368 	op.u.ecc.function = cmd->u.ecc.function;
2369 
2370 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2371 	if (ret) {
2372 		cmd->engine_error = cmd_q->cmd_error;
2373 		goto e_dst;
2374 	}
2375 
2376 	ecc->ecc_result = le16_to_cpup(
2377 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2378 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2379 		ret = -EIO;
2380 		goto e_dst;
2381 	}
2382 
2383 	/* Save the workarea address since it is updated as we walk through
2384 	 * to copy the point math result
2385 	 */
2386 	save = dst.address;
2387 
2388 	/* Save the ECC result X and Y coordinates */
2389 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
2390 				CCP_ECC_MODULUS_BYTES);
2391 	dst.address += CCP_ECC_OUTPUT_SIZE;
2392 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
2393 				CCP_ECC_MODULUS_BYTES);
2394 	dst.address += CCP_ECC_OUTPUT_SIZE;
2395 
2396 	/* Restore the workarea address */
2397 	dst.address = save;
2398 
2399 e_dst:
2400 	ccp_dm_free(&dst);
2401 
2402 e_src:
2403 	ccp_dm_free(&src);
2404 
2405 	return ret;
2406 }
2407 
2408 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2409 {
2410 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2411 
2412 	ecc->ecc_result = 0;
2413 
2414 	if (!ecc->mod ||
2415 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2416 		return -EINVAL;
2417 
2418 	switch (ecc->function) {
2419 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2420 	case CCP_ECC_FUNCTION_MADD_384BIT:
2421 	case CCP_ECC_FUNCTION_MINV_384BIT:
2422 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2423 
2424 	case CCP_ECC_FUNCTION_PADD_384BIT:
2425 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2426 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2427 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2428 
2429 	default:
2430 		return -EINVAL;
2431 	}
2432 }
2433 
2434 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2435 {
2436 	int ret;
2437 
2438 	cmd->engine_error = 0;
2439 	cmd_q->cmd_error = 0;
2440 	cmd_q->int_rcvd = 0;
2441 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
2442 
2443 	switch (cmd->engine) {
2444 	case CCP_ENGINE_AES:
2445 		ret = ccp_run_aes_cmd(cmd_q, cmd);
2446 		break;
2447 	case CCP_ENGINE_XTS_AES_128:
2448 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2449 		break;
2450 	case CCP_ENGINE_DES3:
2451 		ret = ccp_run_des3_cmd(cmd_q, cmd);
2452 		break;
2453 	case CCP_ENGINE_SHA:
2454 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2455 		break;
2456 	case CCP_ENGINE_RSA:
2457 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2458 		break;
2459 	case CCP_ENGINE_PASSTHRU:
2460 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
2461 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
2462 		else
2463 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
2464 		break;
2465 	case CCP_ENGINE_ECC:
2466 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2467 		break;
2468 	default:
2469 		ret = -EINVAL;
2470 	}
2471 
2472 	return ret;
2473 }
2474