xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision 403d026c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Cryptographic Coprocessor (CCP) driver
4  *
5  * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
6  *
7  * Author: Tom Lendacky <thomas.lendacky@amd.com>
8  * Author: Gary R Hook <gary.hook@amd.com>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/interrupt.h>
14 #include <crypto/scatterwalk.h>
15 #include <crypto/des.h>
16 #include <linux/ccp.h>
17 
18 #include "ccp-dev.h"
19 
20 /* SHA initial context values */
21 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
22 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
23 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
24 	cpu_to_be32(SHA1_H4),
25 };
26 
27 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
28 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
29 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
30 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
31 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
32 };
33 
34 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
35 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
36 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
37 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
38 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
39 };
40 
41 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
42 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
43 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
44 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
45 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
46 };
47 
48 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
49 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
50 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
51 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
52 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
53 };
54 
55 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
56 					ccp_gen_jobid(ccp) : 0)
57 
58 static u32 ccp_gen_jobid(struct ccp_device *ccp)
59 {
60 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
61 }
62 
63 static void ccp_sg_free(struct ccp_sg_workarea *wa)
64 {
65 	if (wa->dma_count)
66 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
67 
68 	wa->dma_count = 0;
69 }
70 
71 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
72 				struct scatterlist *sg, u64 len,
73 				enum dma_data_direction dma_dir)
74 {
75 	memset(wa, 0, sizeof(*wa));
76 
77 	wa->sg = sg;
78 	if (!sg)
79 		return 0;
80 
81 	wa->nents = sg_nents_for_len(sg, len);
82 	if (wa->nents < 0)
83 		return wa->nents;
84 
85 	wa->bytes_left = len;
86 	wa->sg_used = 0;
87 
88 	if (len == 0)
89 		return 0;
90 
91 	if (dma_dir == DMA_NONE)
92 		return 0;
93 
94 	wa->dma_sg = sg;
95 	wa->dma_dev = dev;
96 	wa->dma_dir = dma_dir;
97 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
98 	if (!wa->dma_count)
99 		return -ENOMEM;
100 
101 	return 0;
102 }
103 
104 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
105 {
106 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
107 
108 	if (!wa->sg)
109 		return;
110 
111 	wa->sg_used += nbytes;
112 	wa->bytes_left -= nbytes;
113 	if (wa->sg_used == wa->sg->length) {
114 		wa->sg = sg_next(wa->sg);
115 		wa->sg_used = 0;
116 	}
117 }
118 
119 static void ccp_dm_free(struct ccp_dm_workarea *wa)
120 {
121 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
122 		if (wa->address)
123 			dma_pool_free(wa->dma_pool, wa->address,
124 				      wa->dma.address);
125 	} else {
126 		if (wa->dma.address)
127 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
128 					 wa->dma.dir);
129 		kfree(wa->address);
130 	}
131 
132 	wa->address = NULL;
133 	wa->dma.address = 0;
134 }
135 
136 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
137 				struct ccp_cmd_queue *cmd_q,
138 				unsigned int len,
139 				enum dma_data_direction dir)
140 {
141 	memset(wa, 0, sizeof(*wa));
142 
143 	if (!len)
144 		return 0;
145 
146 	wa->dev = cmd_q->ccp->dev;
147 	wa->length = len;
148 
149 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
150 		wa->dma_pool = cmd_q->dma_pool;
151 
152 		wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL,
153 					     &wa->dma.address);
154 		if (!wa->address)
155 			return -ENOMEM;
156 
157 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
158 
159 	} else {
160 		wa->address = kzalloc(len, GFP_KERNEL);
161 		if (!wa->address)
162 			return -ENOMEM;
163 
164 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
165 						 dir);
166 		if (dma_mapping_error(wa->dev, wa->dma.address))
167 			return -ENOMEM;
168 
169 		wa->dma.length = len;
170 	}
171 	wa->dma.dir = dir;
172 
173 	return 0;
174 }
175 
176 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
177 			   struct scatterlist *sg, unsigned int sg_offset,
178 			   unsigned int len)
179 {
180 	WARN_ON(!wa->address);
181 
182 	if (len > (wa->length - wa_offset))
183 		return -EINVAL;
184 
185 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
186 				 0);
187 	return 0;
188 }
189 
190 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
191 			    struct scatterlist *sg, unsigned int sg_offset,
192 			    unsigned int len)
193 {
194 	WARN_ON(!wa->address);
195 
196 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
197 				 1);
198 }
199 
200 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
201 				   unsigned int wa_offset,
202 				   struct scatterlist *sg,
203 				   unsigned int sg_offset,
204 				   unsigned int len)
205 {
206 	u8 *p, *q;
207 	int	rc;
208 
209 	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
210 	if (rc)
211 		return rc;
212 
213 	p = wa->address + wa_offset;
214 	q = p + len - 1;
215 	while (p < q) {
216 		*p = *p ^ *q;
217 		*q = *p ^ *q;
218 		*p = *p ^ *q;
219 		p++;
220 		q--;
221 	}
222 	return 0;
223 }
224 
225 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
226 				    unsigned int wa_offset,
227 				    struct scatterlist *sg,
228 				    unsigned int sg_offset,
229 				    unsigned int len)
230 {
231 	u8 *p, *q;
232 
233 	p = wa->address + wa_offset;
234 	q = p + len - 1;
235 	while (p < q) {
236 		*p = *p ^ *q;
237 		*q = *p ^ *q;
238 		*p = *p ^ *q;
239 		p++;
240 		q--;
241 	}
242 
243 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
244 }
245 
246 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
247 {
248 	ccp_dm_free(&data->dm_wa);
249 	ccp_sg_free(&data->sg_wa);
250 }
251 
252 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
253 			 struct scatterlist *sg, u64 sg_len,
254 			 unsigned int dm_len,
255 			 enum dma_data_direction dir)
256 {
257 	int ret;
258 
259 	memset(data, 0, sizeof(*data));
260 
261 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
262 				   dir);
263 	if (ret)
264 		goto e_err;
265 
266 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
267 	if (ret)
268 		goto e_err;
269 
270 	return 0;
271 
272 e_err:
273 	ccp_free_data(data, cmd_q);
274 
275 	return ret;
276 }
277 
278 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
279 {
280 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
281 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
282 	unsigned int buf_count, nbytes;
283 
284 	/* Clear the buffer if setting it */
285 	if (!from)
286 		memset(dm_wa->address, 0, dm_wa->length);
287 
288 	if (!sg_wa->sg)
289 		return 0;
290 
291 	/* Perform the copy operation
292 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
293 	 *   an unsigned int
294 	 */
295 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
296 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
297 				 nbytes, from);
298 
299 	/* Update the structures and generate the count */
300 	buf_count = 0;
301 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
302 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
303 			     dm_wa->length - buf_count);
304 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
305 
306 		buf_count += nbytes;
307 		ccp_update_sg_workarea(sg_wa, nbytes);
308 	}
309 
310 	return buf_count;
311 }
312 
313 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
314 {
315 	return ccp_queue_buf(data, 0);
316 }
317 
318 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
319 {
320 	return ccp_queue_buf(data, 1);
321 }
322 
323 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
324 			     struct ccp_op *op, unsigned int block_size,
325 			     bool blocksize_op)
326 {
327 	unsigned int sg_src_len, sg_dst_len, op_len;
328 
329 	/* The CCP can only DMA from/to one address each per operation. This
330 	 * requires that we find the smallest DMA area between the source
331 	 * and destination. The resulting len values will always be <= UINT_MAX
332 	 * because the dma length is an unsigned int.
333 	 */
334 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
335 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
336 
337 	if (dst) {
338 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
339 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
340 		op_len = min(sg_src_len, sg_dst_len);
341 	} else {
342 		op_len = sg_src_len;
343 	}
344 
345 	/* The data operation length will be at least block_size in length
346 	 * or the smaller of available sg room remaining for the source or
347 	 * the destination
348 	 */
349 	op_len = max(op_len, block_size);
350 
351 	/* Unless we have to buffer data, there's no reason to wait */
352 	op->soc = 0;
353 
354 	if (sg_src_len < block_size) {
355 		/* Not enough data in the sg element, so it
356 		 * needs to be buffered into a blocksize chunk
357 		 */
358 		int cp_len = ccp_fill_queue_buf(src);
359 
360 		op->soc = 1;
361 		op->src.u.dma.address = src->dm_wa.dma.address;
362 		op->src.u.dma.offset = 0;
363 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
364 	} else {
365 		/* Enough data in the sg element, but we need to
366 		 * adjust for any previously copied data
367 		 */
368 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
369 		op->src.u.dma.offset = src->sg_wa.sg_used;
370 		op->src.u.dma.length = op_len & ~(block_size - 1);
371 
372 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
373 	}
374 
375 	if (dst) {
376 		if (sg_dst_len < block_size) {
377 			/* Not enough room in the sg element or we're on the
378 			 * last piece of data (when using padding), so the
379 			 * output needs to be buffered into a blocksize chunk
380 			 */
381 			op->soc = 1;
382 			op->dst.u.dma.address = dst->dm_wa.dma.address;
383 			op->dst.u.dma.offset = 0;
384 			op->dst.u.dma.length = op->src.u.dma.length;
385 		} else {
386 			/* Enough room in the sg element, but we need to
387 			 * adjust for any previously used area
388 			 */
389 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
390 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
391 			op->dst.u.dma.length = op->src.u.dma.length;
392 		}
393 	}
394 }
395 
396 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
397 			     struct ccp_op *op)
398 {
399 	op->init = 0;
400 
401 	if (dst) {
402 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
403 			ccp_empty_queue_buf(dst);
404 		else
405 			ccp_update_sg_workarea(&dst->sg_wa,
406 					       op->dst.u.dma.length);
407 	}
408 }
409 
410 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
411 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
412 			       u32 byte_swap, bool from)
413 {
414 	struct ccp_op op;
415 
416 	memset(&op, 0, sizeof(op));
417 
418 	op.cmd_q = cmd_q;
419 	op.jobid = jobid;
420 	op.eom = 1;
421 
422 	if (from) {
423 		op.soc = 1;
424 		op.src.type = CCP_MEMTYPE_SB;
425 		op.src.u.sb = sb;
426 		op.dst.type = CCP_MEMTYPE_SYSTEM;
427 		op.dst.u.dma.address = wa->dma.address;
428 		op.dst.u.dma.length = wa->length;
429 	} else {
430 		op.src.type = CCP_MEMTYPE_SYSTEM;
431 		op.src.u.dma.address = wa->dma.address;
432 		op.src.u.dma.length = wa->length;
433 		op.dst.type = CCP_MEMTYPE_SB;
434 		op.dst.u.sb = sb;
435 	}
436 
437 	op.u.passthru.byte_swap = byte_swap;
438 
439 	return cmd_q->ccp->vdata->perform->passthru(&op);
440 }
441 
442 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
443 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
444 			  u32 byte_swap)
445 {
446 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
447 }
448 
449 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
450 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
451 			    u32 byte_swap)
452 {
453 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
454 }
455 
456 static noinline_for_stack int
457 ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
458 {
459 	struct ccp_aes_engine *aes = &cmd->u.aes;
460 	struct ccp_dm_workarea key, ctx;
461 	struct ccp_data src;
462 	struct ccp_op op;
463 	unsigned int dm_offset;
464 	int ret;
465 
466 	if (!((aes->key_len == AES_KEYSIZE_128) ||
467 	      (aes->key_len == AES_KEYSIZE_192) ||
468 	      (aes->key_len == AES_KEYSIZE_256)))
469 		return -EINVAL;
470 
471 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
472 		return -EINVAL;
473 
474 	if (aes->iv_len != AES_BLOCK_SIZE)
475 		return -EINVAL;
476 
477 	if (!aes->key || !aes->iv || !aes->src)
478 		return -EINVAL;
479 
480 	if (aes->cmac_final) {
481 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
482 			return -EINVAL;
483 
484 		if (!aes->cmac_key)
485 			return -EINVAL;
486 	}
487 
488 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
489 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
490 
491 	ret = -EIO;
492 	memset(&op, 0, sizeof(op));
493 	op.cmd_q = cmd_q;
494 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
495 	op.sb_key = cmd_q->sb_key;
496 	op.sb_ctx = cmd_q->sb_ctx;
497 	op.init = 1;
498 	op.u.aes.type = aes->type;
499 	op.u.aes.mode = aes->mode;
500 	op.u.aes.action = aes->action;
501 
502 	/* All supported key sizes fit in a single (32-byte) SB entry
503 	 * and must be in little endian format. Use the 256-bit byte
504 	 * swap passthru option to convert from big endian to little
505 	 * endian.
506 	 */
507 	ret = ccp_init_dm_workarea(&key, cmd_q,
508 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
509 				   DMA_TO_DEVICE);
510 	if (ret)
511 		return ret;
512 
513 	dm_offset = CCP_SB_BYTES - aes->key_len;
514 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
515 	if (ret)
516 		goto e_key;
517 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
518 			     CCP_PASSTHRU_BYTESWAP_256BIT);
519 	if (ret) {
520 		cmd->engine_error = cmd_q->cmd_error;
521 		goto e_key;
522 	}
523 
524 	/* The AES context fits in a single (32-byte) SB entry and
525 	 * must be in little endian format. Use the 256-bit byte swap
526 	 * passthru option to convert from big endian to little endian.
527 	 */
528 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
529 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
530 				   DMA_BIDIRECTIONAL);
531 	if (ret)
532 		goto e_key;
533 
534 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
535 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
536 	if (ret)
537 		goto e_ctx;
538 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
539 			     CCP_PASSTHRU_BYTESWAP_256BIT);
540 	if (ret) {
541 		cmd->engine_error = cmd_q->cmd_error;
542 		goto e_ctx;
543 	}
544 
545 	/* Send data to the CCP AES engine */
546 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
547 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
548 	if (ret)
549 		goto e_ctx;
550 
551 	while (src.sg_wa.bytes_left) {
552 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
553 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
554 			op.eom = 1;
555 
556 			/* Push the K1/K2 key to the CCP now */
557 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
558 					       op.sb_ctx,
559 					       CCP_PASSTHRU_BYTESWAP_256BIT);
560 			if (ret) {
561 				cmd->engine_error = cmd_q->cmd_error;
562 				goto e_src;
563 			}
564 
565 			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
566 					      aes->cmac_key_len);
567 			if (ret)
568 				goto e_src;
569 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
570 					     CCP_PASSTHRU_BYTESWAP_256BIT);
571 			if (ret) {
572 				cmd->engine_error = cmd_q->cmd_error;
573 				goto e_src;
574 			}
575 		}
576 
577 		ret = cmd_q->ccp->vdata->perform->aes(&op);
578 		if (ret) {
579 			cmd->engine_error = cmd_q->cmd_error;
580 			goto e_src;
581 		}
582 
583 		ccp_process_data(&src, NULL, &op);
584 	}
585 
586 	/* Retrieve the AES context - convert from LE to BE using
587 	 * 32-byte (256-bit) byteswapping
588 	 */
589 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
590 			       CCP_PASSTHRU_BYTESWAP_256BIT);
591 	if (ret) {
592 		cmd->engine_error = cmd_q->cmd_error;
593 		goto e_src;
594 	}
595 
596 	/* ...but we only need AES_BLOCK_SIZE bytes */
597 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
598 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
599 
600 e_src:
601 	ccp_free_data(&src, cmd_q);
602 
603 e_ctx:
604 	ccp_dm_free(&ctx);
605 
606 e_key:
607 	ccp_dm_free(&key);
608 
609 	return ret;
610 }
611 
612 static noinline_for_stack int
613 ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
614 {
615 	struct ccp_aes_engine *aes = &cmd->u.aes;
616 	struct ccp_dm_workarea key, ctx, final_wa, tag;
617 	struct ccp_data src, dst;
618 	struct ccp_data aad;
619 	struct ccp_op op;
620 
621 	unsigned long long *final;
622 	unsigned int dm_offset;
623 	unsigned int authsize;
624 	unsigned int jobid;
625 	unsigned int ilen;
626 	bool in_place = true; /* Default value */
627 	int ret;
628 
629 	struct scatterlist *p_inp, sg_inp[2];
630 	struct scatterlist *p_tag, sg_tag[2];
631 	struct scatterlist *p_outp, sg_outp[2];
632 	struct scatterlist *p_aad;
633 
634 	if (!aes->iv)
635 		return -EINVAL;
636 
637 	if (!((aes->key_len == AES_KEYSIZE_128) ||
638 		(aes->key_len == AES_KEYSIZE_192) ||
639 		(aes->key_len == AES_KEYSIZE_256)))
640 		return -EINVAL;
641 
642 	if (!aes->key) /* Gotta have a key SGL */
643 		return -EINVAL;
644 
645 	/* Zero defaults to 16 bytes, the maximum size */
646 	authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE;
647 	switch (authsize) {
648 	case 16:
649 	case 15:
650 	case 14:
651 	case 13:
652 	case 12:
653 	case 8:
654 	case 4:
655 		break;
656 	default:
657 		return -EINVAL;
658 	}
659 
660 	/* First, decompose the source buffer into AAD & PT,
661 	 * and the destination buffer into AAD, CT & tag, or
662 	 * the input into CT & tag.
663 	 * It is expected that the input and output SGs will
664 	 * be valid, even if the AAD and input lengths are 0.
665 	 */
666 	p_aad = aes->src;
667 	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
668 	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
669 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
670 		ilen = aes->src_len;
671 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
672 	} else {
673 		/* Input length for decryption includes tag */
674 		ilen = aes->src_len - authsize;
675 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
676 	}
677 
678 	jobid = CCP_NEW_JOBID(cmd_q->ccp);
679 
680 	memset(&op, 0, sizeof(op));
681 	op.cmd_q = cmd_q;
682 	op.jobid = jobid;
683 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
684 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
685 	op.init = 1;
686 	op.u.aes.type = aes->type;
687 
688 	/* Copy the key to the LSB */
689 	ret = ccp_init_dm_workarea(&key, cmd_q,
690 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
691 				   DMA_TO_DEVICE);
692 	if (ret)
693 		return ret;
694 
695 	dm_offset = CCP_SB_BYTES - aes->key_len;
696 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
697 	if (ret)
698 		goto e_key;
699 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
700 			     CCP_PASSTHRU_BYTESWAP_256BIT);
701 	if (ret) {
702 		cmd->engine_error = cmd_q->cmd_error;
703 		goto e_key;
704 	}
705 
706 	/* Copy the context (IV) to the LSB.
707 	 * There is an assumption here that the IV is 96 bits in length, plus
708 	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
709 	 */
710 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
711 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
712 				   DMA_BIDIRECTIONAL);
713 	if (ret)
714 		goto e_key;
715 
716 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
717 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
718 	if (ret)
719 		goto e_ctx;
720 
721 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
722 			     CCP_PASSTHRU_BYTESWAP_256BIT);
723 	if (ret) {
724 		cmd->engine_error = cmd_q->cmd_error;
725 		goto e_ctx;
726 	}
727 
728 	op.init = 1;
729 	if (aes->aad_len > 0) {
730 		/* Step 1: Run a GHASH over the Additional Authenticated Data */
731 		ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
732 				    AES_BLOCK_SIZE,
733 				    DMA_TO_DEVICE);
734 		if (ret)
735 			goto e_ctx;
736 
737 		op.u.aes.mode = CCP_AES_MODE_GHASH;
738 		op.u.aes.action = CCP_AES_GHASHAAD;
739 
740 		while (aad.sg_wa.bytes_left) {
741 			ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
742 
743 			ret = cmd_q->ccp->vdata->perform->aes(&op);
744 			if (ret) {
745 				cmd->engine_error = cmd_q->cmd_error;
746 				goto e_aad;
747 			}
748 
749 			ccp_process_data(&aad, NULL, &op);
750 			op.init = 0;
751 		}
752 	}
753 
754 	op.u.aes.mode = CCP_AES_MODE_GCTR;
755 	op.u.aes.action = aes->action;
756 
757 	if (ilen > 0) {
758 		/* Step 2: Run a GCTR over the plaintext */
759 		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
760 
761 		ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
762 				    AES_BLOCK_SIZE,
763 				    in_place ? DMA_BIDIRECTIONAL
764 					     : DMA_TO_DEVICE);
765 		if (ret)
766 			goto e_ctx;
767 
768 		if (in_place) {
769 			dst = src;
770 		} else {
771 			ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
772 					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
773 			if (ret)
774 				goto e_src;
775 		}
776 
777 		op.soc = 0;
778 		op.eom = 0;
779 		op.init = 1;
780 		while (src.sg_wa.bytes_left) {
781 			ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
782 			if (!src.sg_wa.bytes_left) {
783 				unsigned int nbytes = ilen % AES_BLOCK_SIZE;
784 
785 				if (nbytes) {
786 					op.eom = 1;
787 					op.u.aes.size = (nbytes * 8) - 1;
788 				}
789 			}
790 
791 			ret = cmd_q->ccp->vdata->perform->aes(&op);
792 			if (ret) {
793 				cmd->engine_error = cmd_q->cmd_error;
794 				goto e_dst;
795 			}
796 
797 			ccp_process_data(&src, &dst, &op);
798 			op.init = 0;
799 		}
800 	}
801 
802 	/* Step 3: Update the IV portion of the context with the original IV */
803 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
804 			       CCP_PASSTHRU_BYTESWAP_256BIT);
805 	if (ret) {
806 		cmd->engine_error = cmd_q->cmd_error;
807 		goto e_dst;
808 	}
809 
810 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
811 	if (ret)
812 		goto e_dst;
813 
814 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
815 			     CCP_PASSTHRU_BYTESWAP_256BIT);
816 	if (ret) {
817 		cmd->engine_error = cmd_q->cmd_error;
818 		goto e_dst;
819 	}
820 
821 	/* Step 4: Concatenate the lengths of the AAD and source, and
822 	 * hash that 16 byte buffer.
823 	 */
824 	ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
825 				   DMA_BIDIRECTIONAL);
826 	if (ret)
827 		goto e_dst;
828 	final = (unsigned long long *) final_wa.address;
829 	final[0] = cpu_to_be64(aes->aad_len * 8);
830 	final[1] = cpu_to_be64(ilen * 8);
831 
832 	memset(&op, 0, sizeof(op));
833 	op.cmd_q = cmd_q;
834 	op.jobid = jobid;
835 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
836 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
837 	op.init = 1;
838 	op.u.aes.type = aes->type;
839 	op.u.aes.mode = CCP_AES_MODE_GHASH;
840 	op.u.aes.action = CCP_AES_GHASHFINAL;
841 	op.src.type = CCP_MEMTYPE_SYSTEM;
842 	op.src.u.dma.address = final_wa.dma.address;
843 	op.src.u.dma.length = AES_BLOCK_SIZE;
844 	op.dst.type = CCP_MEMTYPE_SYSTEM;
845 	op.dst.u.dma.address = final_wa.dma.address;
846 	op.dst.u.dma.length = AES_BLOCK_SIZE;
847 	op.eom = 1;
848 	op.u.aes.size = 0;
849 	ret = cmd_q->ccp->vdata->perform->aes(&op);
850 	if (ret)
851 		goto e_dst;
852 
853 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
854 		/* Put the ciphered tag after the ciphertext. */
855 		ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize);
856 	} else {
857 		/* Does this ciphered tag match the input? */
858 		ret = ccp_init_dm_workarea(&tag, cmd_q, authsize,
859 					   DMA_BIDIRECTIONAL);
860 		if (ret)
861 			goto e_tag;
862 		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize);
863 		if (ret)
864 			goto e_tag;
865 
866 		ret = crypto_memneq(tag.address, final_wa.address,
867 				    authsize) ? -EBADMSG : 0;
868 		ccp_dm_free(&tag);
869 	}
870 
871 e_tag:
872 	ccp_dm_free(&final_wa);
873 
874 e_dst:
875 	if (ilen > 0 && !in_place)
876 		ccp_free_data(&dst, cmd_q);
877 
878 e_src:
879 	if (ilen > 0)
880 		ccp_free_data(&src, cmd_q);
881 
882 e_aad:
883 	if (aes->aad_len)
884 		ccp_free_data(&aad, cmd_q);
885 
886 e_ctx:
887 	ccp_dm_free(&ctx);
888 
889 e_key:
890 	ccp_dm_free(&key);
891 
892 	return ret;
893 }
894 
895 static noinline_for_stack int
896 ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
897 {
898 	struct ccp_aes_engine *aes = &cmd->u.aes;
899 	struct ccp_dm_workarea key, ctx;
900 	struct ccp_data src, dst;
901 	struct ccp_op op;
902 	unsigned int dm_offset;
903 	bool in_place = false;
904 	int ret;
905 
906 	if (!((aes->key_len == AES_KEYSIZE_128) ||
907 	      (aes->key_len == AES_KEYSIZE_192) ||
908 	      (aes->key_len == AES_KEYSIZE_256)))
909 		return -EINVAL;
910 
911 	if (((aes->mode == CCP_AES_MODE_ECB) ||
912 	     (aes->mode == CCP_AES_MODE_CBC)) &&
913 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
914 		return -EINVAL;
915 
916 	if (!aes->key || !aes->src || !aes->dst)
917 		return -EINVAL;
918 
919 	if (aes->mode != CCP_AES_MODE_ECB) {
920 		if (aes->iv_len != AES_BLOCK_SIZE)
921 			return -EINVAL;
922 
923 		if (!aes->iv)
924 			return -EINVAL;
925 	}
926 
927 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
928 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
929 
930 	ret = -EIO;
931 	memset(&op, 0, sizeof(op));
932 	op.cmd_q = cmd_q;
933 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
934 	op.sb_key = cmd_q->sb_key;
935 	op.sb_ctx = cmd_q->sb_ctx;
936 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
937 	op.u.aes.type = aes->type;
938 	op.u.aes.mode = aes->mode;
939 	op.u.aes.action = aes->action;
940 
941 	/* All supported key sizes fit in a single (32-byte) SB entry
942 	 * and must be in little endian format. Use the 256-bit byte
943 	 * swap passthru option to convert from big endian to little
944 	 * endian.
945 	 */
946 	ret = ccp_init_dm_workarea(&key, cmd_q,
947 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
948 				   DMA_TO_DEVICE);
949 	if (ret)
950 		return ret;
951 
952 	dm_offset = CCP_SB_BYTES - aes->key_len;
953 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
954 	if (ret)
955 		goto e_key;
956 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
957 			     CCP_PASSTHRU_BYTESWAP_256BIT);
958 	if (ret) {
959 		cmd->engine_error = cmd_q->cmd_error;
960 		goto e_key;
961 	}
962 
963 	/* The AES context fits in a single (32-byte) SB entry and
964 	 * must be in little endian format. Use the 256-bit byte swap
965 	 * passthru option to convert from big endian to little endian.
966 	 */
967 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
968 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
969 				   DMA_BIDIRECTIONAL);
970 	if (ret)
971 		goto e_key;
972 
973 	if (aes->mode != CCP_AES_MODE_ECB) {
974 		/* Load the AES context - convert to LE */
975 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
976 		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
977 		if (ret)
978 			goto e_ctx;
979 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
980 				     CCP_PASSTHRU_BYTESWAP_256BIT);
981 		if (ret) {
982 			cmd->engine_error = cmd_q->cmd_error;
983 			goto e_ctx;
984 		}
985 	}
986 	switch (aes->mode) {
987 	case CCP_AES_MODE_CFB: /* CFB128 only */
988 	case CCP_AES_MODE_CTR:
989 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
990 		break;
991 	default:
992 		op.u.aes.size = 0;
993 	}
994 
995 	/* Prepare the input and output data workareas. For in-place
996 	 * operations we need to set the dma direction to BIDIRECTIONAL
997 	 * and copy the src workarea to the dst workarea.
998 	 */
999 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1000 		in_place = true;
1001 
1002 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1003 			    AES_BLOCK_SIZE,
1004 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1005 	if (ret)
1006 		goto e_ctx;
1007 
1008 	if (in_place) {
1009 		dst = src;
1010 	} else {
1011 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1012 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1013 		if (ret)
1014 			goto e_src;
1015 	}
1016 
1017 	/* Send data to the CCP AES engine */
1018 	while (src.sg_wa.bytes_left) {
1019 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1020 		if (!src.sg_wa.bytes_left) {
1021 			op.eom = 1;
1022 
1023 			/* Since we don't retrieve the AES context in ECB
1024 			 * mode we have to wait for the operation to complete
1025 			 * on the last piece of data
1026 			 */
1027 			if (aes->mode == CCP_AES_MODE_ECB)
1028 				op.soc = 1;
1029 		}
1030 
1031 		ret = cmd_q->ccp->vdata->perform->aes(&op);
1032 		if (ret) {
1033 			cmd->engine_error = cmd_q->cmd_error;
1034 			goto e_dst;
1035 		}
1036 
1037 		ccp_process_data(&src, &dst, &op);
1038 	}
1039 
1040 	if (aes->mode != CCP_AES_MODE_ECB) {
1041 		/* Retrieve the AES context - convert from LE to BE using
1042 		 * 32-byte (256-bit) byteswapping
1043 		 */
1044 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1045 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1046 		if (ret) {
1047 			cmd->engine_error = cmd_q->cmd_error;
1048 			goto e_dst;
1049 		}
1050 
1051 		/* ...but we only need AES_BLOCK_SIZE bytes */
1052 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1053 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1054 	}
1055 
1056 e_dst:
1057 	if (!in_place)
1058 		ccp_free_data(&dst, cmd_q);
1059 
1060 e_src:
1061 	ccp_free_data(&src, cmd_q);
1062 
1063 e_ctx:
1064 	ccp_dm_free(&ctx);
1065 
1066 e_key:
1067 	ccp_dm_free(&key);
1068 
1069 	return ret;
1070 }
1071 
1072 static noinline_for_stack int
1073 ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1074 {
1075 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1076 	struct ccp_dm_workarea key, ctx;
1077 	struct ccp_data src, dst;
1078 	struct ccp_op op;
1079 	unsigned int unit_size, dm_offset;
1080 	bool in_place = false;
1081 	unsigned int sb_count;
1082 	enum ccp_aes_type aestype;
1083 	int ret;
1084 
1085 	switch (xts->unit_size) {
1086 	case CCP_XTS_AES_UNIT_SIZE_16:
1087 		unit_size = 16;
1088 		break;
1089 	case CCP_XTS_AES_UNIT_SIZE_512:
1090 		unit_size = 512;
1091 		break;
1092 	case CCP_XTS_AES_UNIT_SIZE_1024:
1093 		unit_size = 1024;
1094 		break;
1095 	case CCP_XTS_AES_UNIT_SIZE_2048:
1096 		unit_size = 2048;
1097 		break;
1098 	case CCP_XTS_AES_UNIT_SIZE_4096:
1099 		unit_size = 4096;
1100 		break;
1101 
1102 	default:
1103 		return -EINVAL;
1104 	}
1105 
1106 	if (xts->key_len == AES_KEYSIZE_128)
1107 		aestype = CCP_AES_TYPE_128;
1108 	else if (xts->key_len == AES_KEYSIZE_256)
1109 		aestype = CCP_AES_TYPE_256;
1110 	else
1111 		return -EINVAL;
1112 
1113 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1114 		return -EINVAL;
1115 
1116 	if (xts->iv_len != AES_BLOCK_SIZE)
1117 		return -EINVAL;
1118 
1119 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1120 		return -EINVAL;
1121 
1122 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
1123 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
1124 
1125 	ret = -EIO;
1126 	memset(&op, 0, sizeof(op));
1127 	op.cmd_q = cmd_q;
1128 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1129 	op.sb_key = cmd_q->sb_key;
1130 	op.sb_ctx = cmd_q->sb_ctx;
1131 	op.init = 1;
1132 	op.u.xts.type = aestype;
1133 	op.u.xts.action = xts->action;
1134 	op.u.xts.unit_size = xts->unit_size;
1135 
1136 	/* A version 3 device only supports 128-bit keys, which fits into a
1137 	 * single SB entry. A version 5 device uses a 512-bit vector, so two
1138 	 * SB entries.
1139 	 */
1140 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1141 		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
1142 	else
1143 		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
1144 	ret = ccp_init_dm_workarea(&key, cmd_q,
1145 				   sb_count * CCP_SB_BYTES,
1146 				   DMA_TO_DEVICE);
1147 	if (ret)
1148 		return ret;
1149 
1150 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1151 		/* All supported key sizes must be in little endian format.
1152 		 * Use the 256-bit byte swap passthru option to convert from
1153 		 * big endian to little endian.
1154 		 */
1155 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
1156 		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1157 		if (ret)
1158 			goto e_key;
1159 		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
1160 		if (ret)
1161 			goto e_key;
1162 	} else {
1163 		/* Version 5 CCPs use a 512-bit space for the key: each portion
1164 		 * occupies 256 bits, or one entire slot, and is zero-padded.
1165 		 */
1166 		unsigned int pad;
1167 
1168 		dm_offset = CCP_SB_BYTES;
1169 		pad = dm_offset - xts->key_len;
1170 		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
1171 		if (ret)
1172 			goto e_key;
1173 		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
1174 				      xts->key_len, xts->key_len);
1175 		if (ret)
1176 			goto e_key;
1177 	}
1178 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1179 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1180 	if (ret) {
1181 		cmd->engine_error = cmd_q->cmd_error;
1182 		goto e_key;
1183 	}
1184 
1185 	/* The AES context fits in a single (32-byte) SB entry and
1186 	 * for XTS is already in little endian format so no byte swapping
1187 	 * is needed.
1188 	 */
1189 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1190 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
1191 				   DMA_BIDIRECTIONAL);
1192 	if (ret)
1193 		goto e_key;
1194 
1195 	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1196 	if (ret)
1197 		goto e_ctx;
1198 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1199 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1200 	if (ret) {
1201 		cmd->engine_error = cmd_q->cmd_error;
1202 		goto e_ctx;
1203 	}
1204 
1205 	/* Prepare the input and output data workareas. For in-place
1206 	 * operations we need to set the dma direction to BIDIRECTIONAL
1207 	 * and copy the src workarea to the dst workarea.
1208 	 */
1209 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1210 		in_place = true;
1211 
1212 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1213 			    unit_size,
1214 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1215 	if (ret)
1216 		goto e_ctx;
1217 
1218 	if (in_place) {
1219 		dst = src;
1220 	} else {
1221 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1222 				    unit_size, DMA_FROM_DEVICE);
1223 		if (ret)
1224 			goto e_src;
1225 	}
1226 
1227 	/* Send data to the CCP AES engine */
1228 	while (src.sg_wa.bytes_left) {
1229 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1230 		if (!src.sg_wa.bytes_left)
1231 			op.eom = 1;
1232 
1233 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
1234 		if (ret) {
1235 			cmd->engine_error = cmd_q->cmd_error;
1236 			goto e_dst;
1237 		}
1238 
1239 		ccp_process_data(&src, &dst, &op);
1240 	}
1241 
1242 	/* Retrieve the AES context - convert from LE to BE using
1243 	 * 32-byte (256-bit) byteswapping
1244 	 */
1245 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1246 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1247 	if (ret) {
1248 		cmd->engine_error = cmd_q->cmd_error;
1249 		goto e_dst;
1250 	}
1251 
1252 	/* ...but we only need AES_BLOCK_SIZE bytes */
1253 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1254 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1255 
1256 e_dst:
1257 	if (!in_place)
1258 		ccp_free_data(&dst, cmd_q);
1259 
1260 e_src:
1261 	ccp_free_data(&src, cmd_q);
1262 
1263 e_ctx:
1264 	ccp_dm_free(&ctx);
1265 
1266 e_key:
1267 	ccp_dm_free(&key);
1268 
1269 	return ret;
1270 }
1271 
1272 static noinline_for_stack int
1273 ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1274 {
1275 	struct ccp_des3_engine *des3 = &cmd->u.des3;
1276 
1277 	struct ccp_dm_workarea key, ctx;
1278 	struct ccp_data src, dst;
1279 	struct ccp_op op;
1280 	unsigned int dm_offset;
1281 	unsigned int len_singlekey;
1282 	bool in_place = false;
1283 	int ret;
1284 
1285 	/* Error checks */
1286 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
1287 		return -EINVAL;
1288 
1289 	if (!cmd_q->ccp->vdata->perform->des3)
1290 		return -EINVAL;
1291 
1292 	if (des3->key_len != DES3_EDE_KEY_SIZE)
1293 		return -EINVAL;
1294 
1295 	if (((des3->mode == CCP_DES3_MODE_ECB) ||
1296 		(des3->mode == CCP_DES3_MODE_CBC)) &&
1297 		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
1298 		return -EINVAL;
1299 
1300 	if (!des3->key || !des3->src || !des3->dst)
1301 		return -EINVAL;
1302 
1303 	if (des3->mode != CCP_DES3_MODE_ECB) {
1304 		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
1305 			return -EINVAL;
1306 
1307 		if (!des3->iv)
1308 			return -EINVAL;
1309 	}
1310 
1311 	ret = -EIO;
1312 	/* Zero out all the fields of the command desc */
1313 	memset(&op, 0, sizeof(op));
1314 
1315 	/* Set up the Function field */
1316 	op.cmd_q = cmd_q;
1317 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1318 	op.sb_key = cmd_q->sb_key;
1319 
1320 	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
1321 	op.u.des3.type = des3->type;
1322 	op.u.des3.mode = des3->mode;
1323 	op.u.des3.action = des3->action;
1324 
1325 	/*
1326 	 * All supported key sizes fit in a single (32-byte) KSB entry and
1327 	 * (like AES) must be in little endian format. Use the 256-bit byte
1328 	 * swap passthru option to convert from big endian to little endian.
1329 	 */
1330 	ret = ccp_init_dm_workarea(&key, cmd_q,
1331 				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
1332 				   DMA_TO_DEVICE);
1333 	if (ret)
1334 		return ret;
1335 
1336 	/*
1337 	 * The contents of the key triplet are in the reverse order of what
1338 	 * is required by the engine. Copy the 3 pieces individually to put
1339 	 * them where they belong.
1340 	 */
1341 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
1342 
1343 	len_singlekey = des3->key_len / 3;
1344 	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
1345 			      des3->key, 0, len_singlekey);
1346 	if (ret)
1347 		goto e_key;
1348 	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
1349 			      des3->key, len_singlekey, len_singlekey);
1350 	if (ret)
1351 		goto e_key;
1352 	ret = ccp_set_dm_area(&key, dm_offset,
1353 			      des3->key, 2 * len_singlekey, len_singlekey);
1354 	if (ret)
1355 		goto e_key;
1356 
1357 	/* Copy the key to the SB */
1358 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1359 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1360 	if (ret) {
1361 		cmd->engine_error = cmd_q->cmd_error;
1362 		goto e_key;
1363 	}
1364 
1365 	/*
1366 	 * The DES3 context fits in a single (32-byte) KSB entry and
1367 	 * must be in little endian format. Use the 256-bit byte swap
1368 	 * passthru option to convert from big endian to little endian.
1369 	 */
1370 	if (des3->mode != CCP_DES3_MODE_ECB) {
1371 		op.sb_ctx = cmd_q->sb_ctx;
1372 
1373 		ret = ccp_init_dm_workarea(&ctx, cmd_q,
1374 					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
1375 					   DMA_BIDIRECTIONAL);
1376 		if (ret)
1377 			goto e_key;
1378 
1379 		/* Load the context into the LSB */
1380 		dm_offset = CCP_SB_BYTES - des3->iv_len;
1381 		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
1382 				      des3->iv_len);
1383 		if (ret)
1384 			goto e_ctx;
1385 
1386 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1387 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1388 		if (ret) {
1389 			cmd->engine_error = cmd_q->cmd_error;
1390 			goto e_ctx;
1391 		}
1392 	}
1393 
1394 	/*
1395 	 * Prepare the input and output data workareas. For in-place
1396 	 * operations we need to set the dma direction to BIDIRECTIONAL
1397 	 * and copy the src workarea to the dst workarea.
1398 	 */
1399 	if (sg_virt(des3->src) == sg_virt(des3->dst))
1400 		in_place = true;
1401 
1402 	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
1403 			DES3_EDE_BLOCK_SIZE,
1404 			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1405 	if (ret)
1406 		goto e_ctx;
1407 
1408 	if (in_place)
1409 		dst = src;
1410 	else {
1411 		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
1412 				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
1413 		if (ret)
1414 			goto e_src;
1415 	}
1416 
1417 	/* Send data to the CCP DES3 engine */
1418 	while (src.sg_wa.bytes_left) {
1419 		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
1420 		if (!src.sg_wa.bytes_left) {
1421 			op.eom = 1;
1422 
1423 			/* Since we don't retrieve the context in ECB mode
1424 			 * we have to wait for the operation to complete
1425 			 * on the last piece of data
1426 			 */
1427 			op.soc = 0;
1428 		}
1429 
1430 		ret = cmd_q->ccp->vdata->perform->des3(&op);
1431 		if (ret) {
1432 			cmd->engine_error = cmd_q->cmd_error;
1433 			goto e_dst;
1434 		}
1435 
1436 		ccp_process_data(&src, &dst, &op);
1437 	}
1438 
1439 	if (des3->mode != CCP_DES3_MODE_ECB) {
1440 		/* Retrieve the context and make BE */
1441 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1442 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1443 		if (ret) {
1444 			cmd->engine_error = cmd_q->cmd_error;
1445 			goto e_dst;
1446 		}
1447 
1448 		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
1449 		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
1450 				DES3_EDE_BLOCK_SIZE);
1451 	}
1452 e_dst:
1453 	if (!in_place)
1454 		ccp_free_data(&dst, cmd_q);
1455 
1456 e_src:
1457 	ccp_free_data(&src, cmd_q);
1458 
1459 e_ctx:
1460 	if (des3->mode != CCP_DES3_MODE_ECB)
1461 		ccp_dm_free(&ctx);
1462 
1463 e_key:
1464 	ccp_dm_free(&key);
1465 
1466 	return ret;
1467 }
1468 
1469 static noinline_for_stack int
1470 ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1471 {
1472 	struct ccp_sha_engine *sha = &cmd->u.sha;
1473 	struct ccp_dm_workarea ctx;
1474 	struct ccp_data src;
1475 	struct ccp_op op;
1476 	unsigned int ioffset, ooffset;
1477 	unsigned int digest_size;
1478 	int sb_count;
1479 	const void *init;
1480 	u64 block_size;
1481 	int ctx_size;
1482 	int ret;
1483 
1484 	switch (sha->type) {
1485 	case CCP_SHA_TYPE_1:
1486 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
1487 			return -EINVAL;
1488 		block_size = SHA1_BLOCK_SIZE;
1489 		break;
1490 	case CCP_SHA_TYPE_224:
1491 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
1492 			return -EINVAL;
1493 		block_size = SHA224_BLOCK_SIZE;
1494 		break;
1495 	case CCP_SHA_TYPE_256:
1496 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
1497 			return -EINVAL;
1498 		block_size = SHA256_BLOCK_SIZE;
1499 		break;
1500 	case CCP_SHA_TYPE_384:
1501 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1502 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
1503 			return -EINVAL;
1504 		block_size = SHA384_BLOCK_SIZE;
1505 		break;
1506 	case CCP_SHA_TYPE_512:
1507 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1508 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
1509 			return -EINVAL;
1510 		block_size = SHA512_BLOCK_SIZE;
1511 		break;
1512 	default:
1513 		return -EINVAL;
1514 	}
1515 
1516 	if (!sha->ctx)
1517 		return -EINVAL;
1518 
1519 	if (!sha->final && (sha->src_len & (block_size - 1)))
1520 		return -EINVAL;
1521 
1522 	/* The version 3 device can't handle zero-length input */
1523 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1524 
1525 		if (!sha->src_len) {
1526 			unsigned int digest_len;
1527 			const u8 *sha_zero;
1528 
1529 			/* Not final, just return */
1530 			if (!sha->final)
1531 				return 0;
1532 
1533 			/* CCP can't do a zero length sha operation so the
1534 			 * caller must buffer the data.
1535 			 */
1536 			if (sha->msg_bits)
1537 				return -EINVAL;
1538 
1539 			/* The CCP cannot perform zero-length sha operations
1540 			 * so the caller is required to buffer data for the
1541 			 * final operation. However, a sha operation for a
1542 			 * message with a total length of zero is valid so
1543 			 * known values are required to supply the result.
1544 			 */
1545 			switch (sha->type) {
1546 			case CCP_SHA_TYPE_1:
1547 				sha_zero = sha1_zero_message_hash;
1548 				digest_len = SHA1_DIGEST_SIZE;
1549 				break;
1550 			case CCP_SHA_TYPE_224:
1551 				sha_zero = sha224_zero_message_hash;
1552 				digest_len = SHA224_DIGEST_SIZE;
1553 				break;
1554 			case CCP_SHA_TYPE_256:
1555 				sha_zero = sha256_zero_message_hash;
1556 				digest_len = SHA256_DIGEST_SIZE;
1557 				break;
1558 			default:
1559 				return -EINVAL;
1560 			}
1561 
1562 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1563 						 digest_len, 1);
1564 
1565 			return 0;
1566 		}
1567 	}
1568 
1569 	/* Set variables used throughout */
1570 	switch (sha->type) {
1571 	case CCP_SHA_TYPE_1:
1572 		digest_size = SHA1_DIGEST_SIZE;
1573 		init = (void *) ccp_sha1_init;
1574 		ctx_size = SHA1_DIGEST_SIZE;
1575 		sb_count = 1;
1576 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1577 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1578 		else
1579 			ooffset = ioffset = 0;
1580 		break;
1581 	case CCP_SHA_TYPE_224:
1582 		digest_size = SHA224_DIGEST_SIZE;
1583 		init = (void *) ccp_sha224_init;
1584 		ctx_size = SHA256_DIGEST_SIZE;
1585 		sb_count = 1;
1586 		ioffset = 0;
1587 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1588 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1589 		else
1590 			ooffset = 0;
1591 		break;
1592 	case CCP_SHA_TYPE_256:
1593 		digest_size = SHA256_DIGEST_SIZE;
1594 		init = (void *) ccp_sha256_init;
1595 		ctx_size = SHA256_DIGEST_SIZE;
1596 		sb_count = 1;
1597 		ooffset = ioffset = 0;
1598 		break;
1599 	case CCP_SHA_TYPE_384:
1600 		digest_size = SHA384_DIGEST_SIZE;
1601 		init = (void *) ccp_sha384_init;
1602 		ctx_size = SHA512_DIGEST_SIZE;
1603 		sb_count = 2;
1604 		ioffset = 0;
1605 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1606 		break;
1607 	case CCP_SHA_TYPE_512:
1608 		digest_size = SHA512_DIGEST_SIZE;
1609 		init = (void *) ccp_sha512_init;
1610 		ctx_size = SHA512_DIGEST_SIZE;
1611 		sb_count = 2;
1612 		ooffset = ioffset = 0;
1613 		break;
1614 	default:
1615 		ret = -EINVAL;
1616 		goto e_data;
1617 	}
1618 
1619 	/* For zero-length plaintext the src pointer is ignored;
1620 	 * otherwise both parts must be valid
1621 	 */
1622 	if (sha->src_len && !sha->src)
1623 		return -EINVAL;
1624 
1625 	memset(&op, 0, sizeof(op));
1626 	op.cmd_q = cmd_q;
1627 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1628 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1629 	op.u.sha.type = sha->type;
1630 	op.u.sha.msg_bits = sha->msg_bits;
1631 
1632 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1633 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1634 	 * first slot, and the left half in the second. Each portion must then
1635 	 * be in little endian format: use the 256-bit byte swap option.
1636 	 */
1637 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1638 				   DMA_BIDIRECTIONAL);
1639 	if (ret)
1640 		return ret;
1641 	if (sha->first) {
1642 		switch (sha->type) {
1643 		case CCP_SHA_TYPE_1:
1644 		case CCP_SHA_TYPE_224:
1645 		case CCP_SHA_TYPE_256:
1646 			memcpy(ctx.address + ioffset, init, ctx_size);
1647 			break;
1648 		case CCP_SHA_TYPE_384:
1649 		case CCP_SHA_TYPE_512:
1650 			memcpy(ctx.address + ctx_size / 2, init,
1651 			       ctx_size / 2);
1652 			memcpy(ctx.address, init + ctx_size / 2,
1653 			       ctx_size / 2);
1654 			break;
1655 		default:
1656 			ret = -EINVAL;
1657 			goto e_ctx;
1658 		}
1659 	} else {
1660 		/* Restore the context */
1661 		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1662 				      sb_count * CCP_SB_BYTES);
1663 		if (ret)
1664 			goto e_ctx;
1665 	}
1666 
1667 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1668 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1669 	if (ret) {
1670 		cmd->engine_error = cmd_q->cmd_error;
1671 		goto e_ctx;
1672 	}
1673 
1674 	if (sha->src) {
1675 		/* Send data to the CCP SHA engine; block_size is set above */
1676 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1677 				    block_size, DMA_TO_DEVICE);
1678 		if (ret)
1679 			goto e_ctx;
1680 
1681 		while (src.sg_wa.bytes_left) {
1682 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1683 			if (sha->final && !src.sg_wa.bytes_left)
1684 				op.eom = 1;
1685 
1686 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1687 			if (ret) {
1688 				cmd->engine_error = cmd_q->cmd_error;
1689 				goto e_data;
1690 			}
1691 
1692 			ccp_process_data(&src, NULL, &op);
1693 		}
1694 	} else {
1695 		op.eom = 1;
1696 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1697 		if (ret) {
1698 			cmd->engine_error = cmd_q->cmd_error;
1699 			goto e_data;
1700 		}
1701 	}
1702 
1703 	/* Retrieve the SHA context - convert from LE to BE using
1704 	 * 32-byte (256-bit) byteswapping to BE
1705 	 */
1706 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1707 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1708 	if (ret) {
1709 		cmd->engine_error = cmd_q->cmd_error;
1710 		goto e_data;
1711 	}
1712 
1713 	if (sha->final) {
1714 		/* Finishing up, so get the digest */
1715 		switch (sha->type) {
1716 		case CCP_SHA_TYPE_1:
1717 		case CCP_SHA_TYPE_224:
1718 		case CCP_SHA_TYPE_256:
1719 			ccp_get_dm_area(&ctx, ooffset,
1720 					sha->ctx, 0,
1721 					digest_size);
1722 			break;
1723 		case CCP_SHA_TYPE_384:
1724 		case CCP_SHA_TYPE_512:
1725 			ccp_get_dm_area(&ctx, 0,
1726 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1727 					LSB_ITEM_SIZE);
1728 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1729 					sha->ctx, 0,
1730 					LSB_ITEM_SIZE - ooffset);
1731 			break;
1732 		default:
1733 			ret = -EINVAL;
1734 			goto e_ctx;
1735 		}
1736 	} else {
1737 		/* Stash the context */
1738 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1739 				sb_count * CCP_SB_BYTES);
1740 	}
1741 
1742 	if (sha->final && sha->opad) {
1743 		/* HMAC operation, recursively perform final SHA */
1744 		struct ccp_cmd hmac_cmd;
1745 		struct scatterlist sg;
1746 		u8 *hmac_buf;
1747 
1748 		if (sha->opad_len != block_size) {
1749 			ret = -EINVAL;
1750 			goto e_data;
1751 		}
1752 
1753 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1754 		if (!hmac_buf) {
1755 			ret = -ENOMEM;
1756 			goto e_data;
1757 		}
1758 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1759 
1760 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1761 		switch (sha->type) {
1762 		case CCP_SHA_TYPE_1:
1763 		case CCP_SHA_TYPE_224:
1764 		case CCP_SHA_TYPE_256:
1765 			memcpy(hmac_buf + block_size,
1766 			       ctx.address + ooffset,
1767 			       digest_size);
1768 			break;
1769 		case CCP_SHA_TYPE_384:
1770 		case CCP_SHA_TYPE_512:
1771 			memcpy(hmac_buf + block_size,
1772 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1773 			       LSB_ITEM_SIZE);
1774 			memcpy(hmac_buf + block_size +
1775 			       (LSB_ITEM_SIZE - ooffset),
1776 			       ctx.address,
1777 			       LSB_ITEM_SIZE);
1778 			break;
1779 		default:
1780 			kfree(hmac_buf);
1781 			ret = -EINVAL;
1782 			goto e_data;
1783 		}
1784 
1785 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1786 		hmac_cmd.engine = CCP_ENGINE_SHA;
1787 		hmac_cmd.u.sha.type = sha->type;
1788 		hmac_cmd.u.sha.ctx = sha->ctx;
1789 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1790 		hmac_cmd.u.sha.src = &sg;
1791 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1792 		hmac_cmd.u.sha.opad = NULL;
1793 		hmac_cmd.u.sha.opad_len = 0;
1794 		hmac_cmd.u.sha.first = 1;
1795 		hmac_cmd.u.sha.final = 1;
1796 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1797 
1798 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1799 		if (ret)
1800 			cmd->engine_error = hmac_cmd.engine_error;
1801 
1802 		kfree(hmac_buf);
1803 	}
1804 
1805 e_data:
1806 	if (sha->src)
1807 		ccp_free_data(&src, cmd_q);
1808 
1809 e_ctx:
1810 	ccp_dm_free(&ctx);
1811 
1812 	return ret;
1813 }
1814 
1815 static noinline_for_stack int
1816 ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1817 {
1818 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1819 	struct ccp_dm_workarea exp, src, dst;
1820 	struct ccp_op op;
1821 	unsigned int sb_count, i_len, o_len;
1822 	int ret;
1823 
1824 	/* Check against the maximum allowable size, in bits */
1825 	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
1826 		return -EINVAL;
1827 
1828 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1829 		return -EINVAL;
1830 
1831 	memset(&op, 0, sizeof(op));
1832 	op.cmd_q = cmd_q;
1833 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1834 
1835 	/* The RSA modulus must precede the message being acted upon, so
1836 	 * it must be copied to a DMA area where the message and the
1837 	 * modulus can be concatenated.  Therefore the input buffer
1838 	 * length required is twice the output buffer length (which
1839 	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
1840 	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
1841 	 * required.
1842 	 */
1843 	o_len = 32 * ((rsa->key_size + 255) / 256);
1844 	i_len = o_len * 2;
1845 
1846 	sb_count = 0;
1847 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1848 		/* sb_count is the number of storage block slots required
1849 		 * for the modulus.
1850 		 */
1851 		sb_count = o_len / CCP_SB_BYTES;
1852 		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
1853 								sb_count);
1854 		if (!op.sb_key)
1855 			return -EIO;
1856 	} else {
1857 		/* A version 5 device allows a modulus size that will not fit
1858 		 * in the LSB, so the command will transfer it from memory.
1859 		 * Set the sb key to the default, even though it's not used.
1860 		 */
1861 		op.sb_key = cmd_q->sb_key;
1862 	}
1863 
1864 	/* The RSA exponent must be in little endian format. Reverse its
1865 	 * byte order.
1866 	 */
1867 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1868 	if (ret)
1869 		goto e_sb;
1870 
1871 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1872 	if (ret)
1873 		goto e_exp;
1874 
1875 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1876 		/* Copy the exponent to the local storage block, using
1877 		 * as many 32-byte blocks as were allocated above. It's
1878 		 * already little endian, so no further change is required.
1879 		 */
1880 		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1881 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1882 		if (ret) {
1883 			cmd->engine_error = cmd_q->cmd_error;
1884 			goto e_exp;
1885 		}
1886 	} else {
1887 		/* The exponent can be retrieved from memory via DMA. */
1888 		op.exp.u.dma.address = exp.dma.address;
1889 		op.exp.u.dma.offset = 0;
1890 	}
1891 
1892 	/* Concatenate the modulus and the message. Both the modulus and
1893 	 * the operands must be in little endian format.  Since the input
1894 	 * is in big endian format it must be converted.
1895 	 */
1896 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1897 	if (ret)
1898 		goto e_exp;
1899 
1900 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1901 	if (ret)
1902 		goto e_src;
1903 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1904 	if (ret)
1905 		goto e_src;
1906 
1907 	/* Prepare the output area for the operation */
1908 	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
1909 	if (ret)
1910 		goto e_src;
1911 
1912 	op.soc = 1;
1913 	op.src.u.dma.address = src.dma.address;
1914 	op.src.u.dma.offset = 0;
1915 	op.src.u.dma.length = i_len;
1916 	op.dst.u.dma.address = dst.dma.address;
1917 	op.dst.u.dma.offset = 0;
1918 	op.dst.u.dma.length = o_len;
1919 
1920 	op.u.rsa.mod_size = rsa->key_size;
1921 	op.u.rsa.input_len = i_len;
1922 
1923 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1924 	if (ret) {
1925 		cmd->engine_error = cmd_q->cmd_error;
1926 		goto e_dst;
1927 	}
1928 
1929 	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
1930 
1931 e_dst:
1932 	ccp_dm_free(&dst);
1933 
1934 e_src:
1935 	ccp_dm_free(&src);
1936 
1937 e_exp:
1938 	ccp_dm_free(&exp);
1939 
1940 e_sb:
1941 	if (sb_count)
1942 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1943 
1944 	return ret;
1945 }
1946 
1947 static noinline_for_stack int
1948 ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1949 {
1950 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1951 	struct ccp_dm_workarea mask;
1952 	struct ccp_data src, dst;
1953 	struct ccp_op op;
1954 	bool in_place = false;
1955 	unsigned int i;
1956 	int ret = 0;
1957 
1958 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1959 		return -EINVAL;
1960 
1961 	if (!pt->src || !pt->dst)
1962 		return -EINVAL;
1963 
1964 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1965 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1966 			return -EINVAL;
1967 		if (!pt->mask)
1968 			return -EINVAL;
1969 	}
1970 
1971 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1972 
1973 	memset(&op, 0, sizeof(op));
1974 	op.cmd_q = cmd_q;
1975 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1976 
1977 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1978 		/* Load the mask */
1979 		op.sb_key = cmd_q->sb_key;
1980 
1981 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1982 					   CCP_PASSTHRU_SB_COUNT *
1983 					   CCP_SB_BYTES,
1984 					   DMA_TO_DEVICE);
1985 		if (ret)
1986 			return ret;
1987 
1988 		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1989 		if (ret)
1990 			goto e_mask;
1991 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
1992 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1993 		if (ret) {
1994 			cmd->engine_error = cmd_q->cmd_error;
1995 			goto e_mask;
1996 		}
1997 	}
1998 
1999 	/* Prepare the input and output data workareas. For in-place
2000 	 * operations we need to set the dma direction to BIDIRECTIONAL
2001 	 * and copy the src workarea to the dst workarea.
2002 	 */
2003 	if (sg_virt(pt->src) == sg_virt(pt->dst))
2004 		in_place = true;
2005 
2006 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
2007 			    CCP_PASSTHRU_MASKSIZE,
2008 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
2009 	if (ret)
2010 		goto e_mask;
2011 
2012 	if (in_place) {
2013 		dst = src;
2014 	} else {
2015 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
2016 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
2017 		if (ret)
2018 			goto e_src;
2019 	}
2020 
2021 	/* Send data to the CCP Passthru engine
2022 	 *   Because the CCP engine works on a single source and destination
2023 	 *   dma address at a time, each entry in the source scatterlist
2024 	 *   (after the dma_map_sg call) must be less than or equal to the
2025 	 *   (remaining) length in the destination scatterlist entry and the
2026 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
2027 	 */
2028 	dst.sg_wa.sg_used = 0;
2029 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
2030 		if (!dst.sg_wa.sg ||
2031 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
2032 			ret = -EINVAL;
2033 			goto e_dst;
2034 		}
2035 
2036 		if (i == src.sg_wa.dma_count) {
2037 			op.eom = 1;
2038 			op.soc = 1;
2039 		}
2040 
2041 		op.src.type = CCP_MEMTYPE_SYSTEM;
2042 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
2043 		op.src.u.dma.offset = 0;
2044 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
2045 
2046 		op.dst.type = CCP_MEMTYPE_SYSTEM;
2047 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
2048 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
2049 		op.dst.u.dma.length = op.src.u.dma.length;
2050 
2051 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
2052 		if (ret) {
2053 			cmd->engine_error = cmd_q->cmd_error;
2054 			goto e_dst;
2055 		}
2056 
2057 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
2058 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
2059 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
2060 			dst.sg_wa.sg_used = 0;
2061 		}
2062 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
2063 	}
2064 
2065 e_dst:
2066 	if (!in_place)
2067 		ccp_free_data(&dst, cmd_q);
2068 
2069 e_src:
2070 	ccp_free_data(&src, cmd_q);
2071 
2072 e_mask:
2073 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
2074 		ccp_dm_free(&mask);
2075 
2076 	return ret;
2077 }
2078 
2079 static noinline_for_stack int
2080 ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
2081 				      struct ccp_cmd *cmd)
2082 {
2083 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
2084 	struct ccp_dm_workarea mask;
2085 	struct ccp_op op;
2086 	int ret;
2087 
2088 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
2089 		return -EINVAL;
2090 
2091 	if (!pt->src_dma || !pt->dst_dma)
2092 		return -EINVAL;
2093 
2094 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2095 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
2096 			return -EINVAL;
2097 		if (!pt->mask)
2098 			return -EINVAL;
2099 	}
2100 
2101 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2102 
2103 	memset(&op, 0, sizeof(op));
2104 	op.cmd_q = cmd_q;
2105 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2106 
2107 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2108 		/* Load the mask */
2109 		op.sb_key = cmd_q->sb_key;
2110 
2111 		mask.length = pt->mask_len;
2112 		mask.dma.address = pt->mask;
2113 		mask.dma.length = pt->mask_len;
2114 
2115 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2116 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2117 		if (ret) {
2118 			cmd->engine_error = cmd_q->cmd_error;
2119 			return ret;
2120 		}
2121 	}
2122 
2123 	/* Send data to the CCP Passthru engine */
2124 	op.eom = 1;
2125 	op.soc = 1;
2126 
2127 	op.src.type = CCP_MEMTYPE_SYSTEM;
2128 	op.src.u.dma.address = pt->src_dma;
2129 	op.src.u.dma.offset = 0;
2130 	op.src.u.dma.length = pt->src_len;
2131 
2132 	op.dst.type = CCP_MEMTYPE_SYSTEM;
2133 	op.dst.u.dma.address = pt->dst_dma;
2134 	op.dst.u.dma.offset = 0;
2135 	op.dst.u.dma.length = pt->src_len;
2136 
2137 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
2138 	if (ret)
2139 		cmd->engine_error = cmd_q->cmd_error;
2140 
2141 	return ret;
2142 }
2143 
2144 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2145 {
2146 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2147 	struct ccp_dm_workarea src, dst;
2148 	struct ccp_op op;
2149 	int ret;
2150 	u8 *save;
2151 
2152 	if (!ecc->u.mm.operand_1 ||
2153 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
2154 		return -EINVAL;
2155 
2156 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
2157 		if (!ecc->u.mm.operand_2 ||
2158 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
2159 			return -EINVAL;
2160 
2161 	if (!ecc->u.mm.result ||
2162 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
2163 		return -EINVAL;
2164 
2165 	memset(&op, 0, sizeof(op));
2166 	op.cmd_q = cmd_q;
2167 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2168 
2169 	/* Concatenate the modulus and the operands. Both the modulus and
2170 	 * the operands must be in little endian format.  Since the input
2171 	 * is in big endian format it must be converted and placed in a
2172 	 * fixed length buffer.
2173 	 */
2174 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2175 				   DMA_TO_DEVICE);
2176 	if (ret)
2177 		return ret;
2178 
2179 	/* Save the workarea address since it is updated in order to perform
2180 	 * the concatenation
2181 	 */
2182 	save = src.address;
2183 
2184 	/* Copy the ECC modulus */
2185 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2186 	if (ret)
2187 		goto e_src;
2188 	src.address += CCP_ECC_OPERAND_SIZE;
2189 
2190 	/* Copy the first operand */
2191 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
2192 				      ecc->u.mm.operand_1_len);
2193 	if (ret)
2194 		goto e_src;
2195 	src.address += CCP_ECC_OPERAND_SIZE;
2196 
2197 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
2198 		/* Copy the second operand */
2199 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
2200 					      ecc->u.mm.operand_2_len);
2201 		if (ret)
2202 			goto e_src;
2203 		src.address += CCP_ECC_OPERAND_SIZE;
2204 	}
2205 
2206 	/* Restore the workarea address */
2207 	src.address = save;
2208 
2209 	/* Prepare the output area for the operation */
2210 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2211 				   DMA_FROM_DEVICE);
2212 	if (ret)
2213 		goto e_src;
2214 
2215 	op.soc = 1;
2216 	op.src.u.dma.address = src.dma.address;
2217 	op.src.u.dma.offset = 0;
2218 	op.src.u.dma.length = src.length;
2219 	op.dst.u.dma.address = dst.dma.address;
2220 	op.dst.u.dma.offset = 0;
2221 	op.dst.u.dma.length = dst.length;
2222 
2223 	op.u.ecc.function = cmd->u.ecc.function;
2224 
2225 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2226 	if (ret) {
2227 		cmd->engine_error = cmd_q->cmd_error;
2228 		goto e_dst;
2229 	}
2230 
2231 	ecc->ecc_result = le16_to_cpup(
2232 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2233 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2234 		ret = -EIO;
2235 		goto e_dst;
2236 	}
2237 
2238 	/* Save the ECC result */
2239 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
2240 				CCP_ECC_MODULUS_BYTES);
2241 
2242 e_dst:
2243 	ccp_dm_free(&dst);
2244 
2245 e_src:
2246 	ccp_dm_free(&src);
2247 
2248 	return ret;
2249 }
2250 
2251 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2252 {
2253 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2254 	struct ccp_dm_workarea src, dst;
2255 	struct ccp_op op;
2256 	int ret;
2257 	u8 *save;
2258 
2259 	if (!ecc->u.pm.point_1.x ||
2260 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
2261 	    !ecc->u.pm.point_1.y ||
2262 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
2263 		return -EINVAL;
2264 
2265 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2266 		if (!ecc->u.pm.point_2.x ||
2267 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
2268 		    !ecc->u.pm.point_2.y ||
2269 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
2270 			return -EINVAL;
2271 	} else {
2272 		if (!ecc->u.pm.domain_a ||
2273 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
2274 			return -EINVAL;
2275 
2276 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
2277 			if (!ecc->u.pm.scalar ||
2278 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
2279 				return -EINVAL;
2280 	}
2281 
2282 	if (!ecc->u.pm.result.x ||
2283 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
2284 	    !ecc->u.pm.result.y ||
2285 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
2286 		return -EINVAL;
2287 
2288 	memset(&op, 0, sizeof(op));
2289 	op.cmd_q = cmd_q;
2290 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2291 
2292 	/* Concatenate the modulus and the operands. Both the modulus and
2293 	 * the operands must be in little endian format.  Since the input
2294 	 * is in big endian format it must be converted and placed in a
2295 	 * fixed length buffer.
2296 	 */
2297 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2298 				   DMA_TO_DEVICE);
2299 	if (ret)
2300 		return ret;
2301 
2302 	/* Save the workarea address since it is updated in order to perform
2303 	 * the concatenation
2304 	 */
2305 	save = src.address;
2306 
2307 	/* Copy the ECC modulus */
2308 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2309 	if (ret)
2310 		goto e_src;
2311 	src.address += CCP_ECC_OPERAND_SIZE;
2312 
2313 	/* Copy the first point X and Y coordinate */
2314 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
2315 				      ecc->u.pm.point_1.x_len);
2316 	if (ret)
2317 		goto e_src;
2318 	src.address += CCP_ECC_OPERAND_SIZE;
2319 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
2320 				      ecc->u.pm.point_1.y_len);
2321 	if (ret)
2322 		goto e_src;
2323 	src.address += CCP_ECC_OPERAND_SIZE;
2324 
2325 	/* Set the first point Z coordinate to 1 */
2326 	*src.address = 0x01;
2327 	src.address += CCP_ECC_OPERAND_SIZE;
2328 
2329 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2330 		/* Copy the second point X and Y coordinate */
2331 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
2332 					      ecc->u.pm.point_2.x_len);
2333 		if (ret)
2334 			goto e_src;
2335 		src.address += CCP_ECC_OPERAND_SIZE;
2336 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
2337 					      ecc->u.pm.point_2.y_len);
2338 		if (ret)
2339 			goto e_src;
2340 		src.address += CCP_ECC_OPERAND_SIZE;
2341 
2342 		/* Set the second point Z coordinate to 1 */
2343 		*src.address = 0x01;
2344 		src.address += CCP_ECC_OPERAND_SIZE;
2345 	} else {
2346 		/* Copy the Domain "a" parameter */
2347 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
2348 					      ecc->u.pm.domain_a_len);
2349 		if (ret)
2350 			goto e_src;
2351 		src.address += CCP_ECC_OPERAND_SIZE;
2352 
2353 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2354 			/* Copy the scalar value */
2355 			ret = ccp_reverse_set_dm_area(&src, 0,
2356 						      ecc->u.pm.scalar, 0,
2357 						      ecc->u.pm.scalar_len);
2358 			if (ret)
2359 				goto e_src;
2360 			src.address += CCP_ECC_OPERAND_SIZE;
2361 		}
2362 	}
2363 
2364 	/* Restore the workarea address */
2365 	src.address = save;
2366 
2367 	/* Prepare the output area for the operation */
2368 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2369 				   DMA_FROM_DEVICE);
2370 	if (ret)
2371 		goto e_src;
2372 
2373 	op.soc = 1;
2374 	op.src.u.dma.address = src.dma.address;
2375 	op.src.u.dma.offset = 0;
2376 	op.src.u.dma.length = src.length;
2377 	op.dst.u.dma.address = dst.dma.address;
2378 	op.dst.u.dma.offset = 0;
2379 	op.dst.u.dma.length = dst.length;
2380 
2381 	op.u.ecc.function = cmd->u.ecc.function;
2382 
2383 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2384 	if (ret) {
2385 		cmd->engine_error = cmd_q->cmd_error;
2386 		goto e_dst;
2387 	}
2388 
2389 	ecc->ecc_result = le16_to_cpup(
2390 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2391 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2392 		ret = -EIO;
2393 		goto e_dst;
2394 	}
2395 
2396 	/* Save the workarea address since it is updated as we walk through
2397 	 * to copy the point math result
2398 	 */
2399 	save = dst.address;
2400 
2401 	/* Save the ECC result X and Y coordinates */
2402 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
2403 				CCP_ECC_MODULUS_BYTES);
2404 	dst.address += CCP_ECC_OUTPUT_SIZE;
2405 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
2406 				CCP_ECC_MODULUS_BYTES);
2407 	dst.address += CCP_ECC_OUTPUT_SIZE;
2408 
2409 	/* Restore the workarea address */
2410 	dst.address = save;
2411 
2412 e_dst:
2413 	ccp_dm_free(&dst);
2414 
2415 e_src:
2416 	ccp_dm_free(&src);
2417 
2418 	return ret;
2419 }
2420 
2421 static noinline_for_stack int
2422 ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2423 {
2424 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2425 
2426 	ecc->ecc_result = 0;
2427 
2428 	if (!ecc->mod ||
2429 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2430 		return -EINVAL;
2431 
2432 	switch (ecc->function) {
2433 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2434 	case CCP_ECC_FUNCTION_MADD_384BIT:
2435 	case CCP_ECC_FUNCTION_MINV_384BIT:
2436 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2437 
2438 	case CCP_ECC_FUNCTION_PADD_384BIT:
2439 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2440 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2441 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2442 
2443 	default:
2444 		return -EINVAL;
2445 	}
2446 }
2447 
2448 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2449 {
2450 	int ret;
2451 
2452 	cmd->engine_error = 0;
2453 	cmd_q->cmd_error = 0;
2454 	cmd_q->int_rcvd = 0;
2455 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
2456 
2457 	switch (cmd->engine) {
2458 	case CCP_ENGINE_AES:
2459 		switch (cmd->u.aes.mode) {
2460 		case CCP_AES_MODE_CMAC:
2461 			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
2462 			break;
2463 		case CCP_AES_MODE_GCM:
2464 			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
2465 			break;
2466 		default:
2467 			ret = ccp_run_aes_cmd(cmd_q, cmd);
2468 			break;
2469 		}
2470 		break;
2471 	case CCP_ENGINE_XTS_AES_128:
2472 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2473 		break;
2474 	case CCP_ENGINE_DES3:
2475 		ret = ccp_run_des3_cmd(cmd_q, cmd);
2476 		break;
2477 	case CCP_ENGINE_SHA:
2478 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2479 		break;
2480 	case CCP_ENGINE_RSA:
2481 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2482 		break;
2483 	case CCP_ENGINE_PASSTHRU:
2484 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
2485 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
2486 		else
2487 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
2488 		break;
2489 	case CCP_ENGINE_ECC:
2490 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2491 		break;
2492 	default:
2493 		ret = -EINVAL;
2494 	}
2495 
2496 	return ret;
2497 }
2498