xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision bfb5eb08)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Cryptographic Coprocessor (CCP) driver
4  *
5  * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
6  *
7  * Author: Tom Lendacky <thomas.lendacky@amd.com>
8  * Author: Gary R Hook <gary.hook@amd.com>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/pci.h>
14 #include <linux/interrupt.h>
15 #include <crypto/scatterwalk.h>
16 #include <crypto/des.h>
17 #include <linux/ccp.h>
18 
19 #include "ccp-dev.h"
20 
21 /* SHA initial context values */
22 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
23 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
24 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
25 	cpu_to_be32(SHA1_H4),
26 };
27 
28 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
29 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
30 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
31 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
32 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
33 };
34 
35 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
36 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
37 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
38 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
39 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
40 };
41 
42 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
43 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
44 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
45 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
46 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
47 };
48 
49 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
50 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
51 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
52 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
53 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
54 };
55 
56 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
57 					ccp_gen_jobid(ccp) : 0)
58 
59 static u32 ccp_gen_jobid(struct ccp_device *ccp)
60 {
61 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
62 }
63 
64 static void ccp_sg_free(struct ccp_sg_workarea *wa)
65 {
66 	if (wa->dma_count)
67 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
68 
69 	wa->dma_count = 0;
70 }
71 
72 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
73 				struct scatterlist *sg, u64 len,
74 				enum dma_data_direction dma_dir)
75 {
76 	memset(wa, 0, sizeof(*wa));
77 
78 	wa->sg = sg;
79 	if (!sg)
80 		return 0;
81 
82 	wa->nents = sg_nents_for_len(sg, len);
83 	if (wa->nents < 0)
84 		return wa->nents;
85 
86 	wa->bytes_left = len;
87 	wa->sg_used = 0;
88 
89 	if (len == 0)
90 		return 0;
91 
92 	if (dma_dir == DMA_NONE)
93 		return 0;
94 
95 	wa->dma_sg = sg;
96 	wa->dma_dev = dev;
97 	wa->dma_dir = dma_dir;
98 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
99 	if (!wa->dma_count)
100 		return -ENOMEM;
101 
102 	return 0;
103 }
104 
105 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
106 {
107 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
108 
109 	if (!wa->sg)
110 		return;
111 
112 	wa->sg_used += nbytes;
113 	wa->bytes_left -= nbytes;
114 	if (wa->sg_used == wa->sg->length) {
115 		wa->sg = sg_next(wa->sg);
116 		wa->sg_used = 0;
117 	}
118 }
119 
120 static void ccp_dm_free(struct ccp_dm_workarea *wa)
121 {
122 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
123 		if (wa->address)
124 			dma_pool_free(wa->dma_pool, wa->address,
125 				      wa->dma.address);
126 	} else {
127 		if (wa->dma.address)
128 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
129 					 wa->dma.dir);
130 		kfree(wa->address);
131 	}
132 
133 	wa->address = NULL;
134 	wa->dma.address = 0;
135 }
136 
137 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
138 				struct ccp_cmd_queue *cmd_q,
139 				unsigned int len,
140 				enum dma_data_direction dir)
141 {
142 	memset(wa, 0, sizeof(*wa));
143 
144 	if (!len)
145 		return 0;
146 
147 	wa->dev = cmd_q->ccp->dev;
148 	wa->length = len;
149 
150 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
151 		wa->dma_pool = cmd_q->dma_pool;
152 
153 		wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL,
154 					     &wa->dma.address);
155 		if (!wa->address)
156 			return -ENOMEM;
157 
158 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
159 
160 	} else {
161 		wa->address = kzalloc(len, GFP_KERNEL);
162 		if (!wa->address)
163 			return -ENOMEM;
164 
165 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
166 						 dir);
167 		if (dma_mapping_error(wa->dev, wa->dma.address))
168 			return -ENOMEM;
169 
170 		wa->dma.length = len;
171 	}
172 	wa->dma.dir = dir;
173 
174 	return 0;
175 }
176 
177 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
178 			   struct scatterlist *sg, unsigned int sg_offset,
179 			   unsigned int len)
180 {
181 	WARN_ON(!wa->address);
182 
183 	if (len > (wa->length - wa_offset))
184 		return -EINVAL;
185 
186 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
187 				 0);
188 	return 0;
189 }
190 
191 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
192 			    struct scatterlist *sg, unsigned int sg_offset,
193 			    unsigned int len)
194 {
195 	WARN_ON(!wa->address);
196 
197 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
198 				 1);
199 }
200 
201 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
202 				   unsigned int wa_offset,
203 				   struct scatterlist *sg,
204 				   unsigned int sg_offset,
205 				   unsigned int len)
206 {
207 	u8 *p, *q;
208 	int	rc;
209 
210 	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
211 	if (rc)
212 		return rc;
213 
214 	p = wa->address + wa_offset;
215 	q = p + len - 1;
216 	while (p < q) {
217 		*p = *p ^ *q;
218 		*q = *p ^ *q;
219 		*p = *p ^ *q;
220 		p++;
221 		q--;
222 	}
223 	return 0;
224 }
225 
226 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
227 				    unsigned int wa_offset,
228 				    struct scatterlist *sg,
229 				    unsigned int sg_offset,
230 				    unsigned int len)
231 {
232 	u8 *p, *q;
233 
234 	p = wa->address + wa_offset;
235 	q = p + len - 1;
236 	while (p < q) {
237 		*p = *p ^ *q;
238 		*q = *p ^ *q;
239 		*p = *p ^ *q;
240 		p++;
241 		q--;
242 	}
243 
244 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
245 }
246 
247 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
248 {
249 	ccp_dm_free(&data->dm_wa);
250 	ccp_sg_free(&data->sg_wa);
251 }
252 
253 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
254 			 struct scatterlist *sg, u64 sg_len,
255 			 unsigned int dm_len,
256 			 enum dma_data_direction dir)
257 {
258 	int ret;
259 
260 	memset(data, 0, sizeof(*data));
261 
262 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
263 				   dir);
264 	if (ret)
265 		goto e_err;
266 
267 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
268 	if (ret)
269 		goto e_err;
270 
271 	return 0;
272 
273 e_err:
274 	ccp_free_data(data, cmd_q);
275 
276 	return ret;
277 }
278 
279 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
280 {
281 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
282 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
283 	unsigned int buf_count, nbytes;
284 
285 	/* Clear the buffer if setting it */
286 	if (!from)
287 		memset(dm_wa->address, 0, dm_wa->length);
288 
289 	if (!sg_wa->sg)
290 		return 0;
291 
292 	/* Perform the copy operation
293 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
294 	 *   an unsigned int
295 	 */
296 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
297 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
298 				 nbytes, from);
299 
300 	/* Update the structures and generate the count */
301 	buf_count = 0;
302 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
303 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
304 			     dm_wa->length - buf_count);
305 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
306 
307 		buf_count += nbytes;
308 		ccp_update_sg_workarea(sg_wa, nbytes);
309 	}
310 
311 	return buf_count;
312 }
313 
314 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
315 {
316 	return ccp_queue_buf(data, 0);
317 }
318 
319 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
320 {
321 	return ccp_queue_buf(data, 1);
322 }
323 
324 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
325 			     struct ccp_op *op, unsigned int block_size,
326 			     bool blocksize_op)
327 {
328 	unsigned int sg_src_len, sg_dst_len, op_len;
329 
330 	/* The CCP can only DMA from/to one address each per operation. This
331 	 * requires that we find the smallest DMA area between the source
332 	 * and destination. The resulting len values will always be <= UINT_MAX
333 	 * because the dma length is an unsigned int.
334 	 */
335 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
336 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
337 
338 	if (dst) {
339 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
340 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
341 		op_len = min(sg_src_len, sg_dst_len);
342 	} else {
343 		op_len = sg_src_len;
344 	}
345 
346 	/* The data operation length will be at least block_size in length
347 	 * or the smaller of available sg room remaining for the source or
348 	 * the destination
349 	 */
350 	op_len = max(op_len, block_size);
351 
352 	/* Unless we have to buffer data, there's no reason to wait */
353 	op->soc = 0;
354 
355 	if (sg_src_len < block_size) {
356 		/* Not enough data in the sg element, so it
357 		 * needs to be buffered into a blocksize chunk
358 		 */
359 		int cp_len = ccp_fill_queue_buf(src);
360 
361 		op->soc = 1;
362 		op->src.u.dma.address = src->dm_wa.dma.address;
363 		op->src.u.dma.offset = 0;
364 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
365 	} else {
366 		/* Enough data in the sg element, but we need to
367 		 * adjust for any previously copied data
368 		 */
369 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
370 		op->src.u.dma.offset = src->sg_wa.sg_used;
371 		op->src.u.dma.length = op_len & ~(block_size - 1);
372 
373 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
374 	}
375 
376 	if (dst) {
377 		if (sg_dst_len < block_size) {
378 			/* Not enough room in the sg element or we're on the
379 			 * last piece of data (when using padding), so the
380 			 * output needs to be buffered into a blocksize chunk
381 			 */
382 			op->soc = 1;
383 			op->dst.u.dma.address = dst->dm_wa.dma.address;
384 			op->dst.u.dma.offset = 0;
385 			op->dst.u.dma.length = op->src.u.dma.length;
386 		} else {
387 			/* Enough room in the sg element, but we need to
388 			 * adjust for any previously used area
389 			 */
390 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
391 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
392 			op->dst.u.dma.length = op->src.u.dma.length;
393 		}
394 	}
395 }
396 
397 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
398 			     struct ccp_op *op)
399 {
400 	op->init = 0;
401 
402 	if (dst) {
403 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
404 			ccp_empty_queue_buf(dst);
405 		else
406 			ccp_update_sg_workarea(&dst->sg_wa,
407 					       op->dst.u.dma.length);
408 	}
409 }
410 
411 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
412 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
413 			       u32 byte_swap, bool from)
414 {
415 	struct ccp_op op;
416 
417 	memset(&op, 0, sizeof(op));
418 
419 	op.cmd_q = cmd_q;
420 	op.jobid = jobid;
421 	op.eom = 1;
422 
423 	if (from) {
424 		op.soc = 1;
425 		op.src.type = CCP_MEMTYPE_SB;
426 		op.src.u.sb = sb;
427 		op.dst.type = CCP_MEMTYPE_SYSTEM;
428 		op.dst.u.dma.address = wa->dma.address;
429 		op.dst.u.dma.length = wa->length;
430 	} else {
431 		op.src.type = CCP_MEMTYPE_SYSTEM;
432 		op.src.u.dma.address = wa->dma.address;
433 		op.src.u.dma.length = wa->length;
434 		op.dst.type = CCP_MEMTYPE_SB;
435 		op.dst.u.sb = sb;
436 	}
437 
438 	op.u.passthru.byte_swap = byte_swap;
439 
440 	return cmd_q->ccp->vdata->perform->passthru(&op);
441 }
442 
443 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
444 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
445 			  u32 byte_swap)
446 {
447 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
448 }
449 
450 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
451 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
452 			    u32 byte_swap)
453 {
454 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
455 }
456 
457 static noinline_for_stack int
458 ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
459 {
460 	struct ccp_aes_engine *aes = &cmd->u.aes;
461 	struct ccp_dm_workarea key, ctx;
462 	struct ccp_data src;
463 	struct ccp_op op;
464 	unsigned int dm_offset;
465 	int ret;
466 
467 	if (!((aes->key_len == AES_KEYSIZE_128) ||
468 	      (aes->key_len == AES_KEYSIZE_192) ||
469 	      (aes->key_len == AES_KEYSIZE_256)))
470 		return -EINVAL;
471 
472 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
473 		return -EINVAL;
474 
475 	if (aes->iv_len != AES_BLOCK_SIZE)
476 		return -EINVAL;
477 
478 	if (!aes->key || !aes->iv || !aes->src)
479 		return -EINVAL;
480 
481 	if (aes->cmac_final) {
482 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
483 			return -EINVAL;
484 
485 		if (!aes->cmac_key)
486 			return -EINVAL;
487 	}
488 
489 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
490 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
491 
492 	ret = -EIO;
493 	memset(&op, 0, sizeof(op));
494 	op.cmd_q = cmd_q;
495 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
496 	op.sb_key = cmd_q->sb_key;
497 	op.sb_ctx = cmd_q->sb_ctx;
498 	op.init = 1;
499 	op.u.aes.type = aes->type;
500 	op.u.aes.mode = aes->mode;
501 	op.u.aes.action = aes->action;
502 
503 	/* All supported key sizes fit in a single (32-byte) SB entry
504 	 * and must be in little endian format. Use the 256-bit byte
505 	 * swap passthru option to convert from big endian to little
506 	 * endian.
507 	 */
508 	ret = ccp_init_dm_workarea(&key, cmd_q,
509 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
510 				   DMA_TO_DEVICE);
511 	if (ret)
512 		return ret;
513 
514 	dm_offset = CCP_SB_BYTES - aes->key_len;
515 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
516 	if (ret)
517 		goto e_key;
518 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
519 			     CCP_PASSTHRU_BYTESWAP_256BIT);
520 	if (ret) {
521 		cmd->engine_error = cmd_q->cmd_error;
522 		goto e_key;
523 	}
524 
525 	/* The AES context fits in a single (32-byte) SB entry and
526 	 * must be in little endian format. Use the 256-bit byte swap
527 	 * passthru option to convert from big endian to little endian.
528 	 */
529 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
530 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
531 				   DMA_BIDIRECTIONAL);
532 	if (ret)
533 		goto e_key;
534 
535 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
536 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
537 	if (ret)
538 		goto e_ctx;
539 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
540 			     CCP_PASSTHRU_BYTESWAP_256BIT);
541 	if (ret) {
542 		cmd->engine_error = cmd_q->cmd_error;
543 		goto e_ctx;
544 	}
545 
546 	/* Send data to the CCP AES engine */
547 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
548 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
549 	if (ret)
550 		goto e_ctx;
551 
552 	while (src.sg_wa.bytes_left) {
553 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
554 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
555 			op.eom = 1;
556 
557 			/* Push the K1/K2 key to the CCP now */
558 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
559 					       op.sb_ctx,
560 					       CCP_PASSTHRU_BYTESWAP_256BIT);
561 			if (ret) {
562 				cmd->engine_error = cmd_q->cmd_error;
563 				goto e_src;
564 			}
565 
566 			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
567 					      aes->cmac_key_len);
568 			if (ret)
569 				goto e_src;
570 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
571 					     CCP_PASSTHRU_BYTESWAP_256BIT);
572 			if (ret) {
573 				cmd->engine_error = cmd_q->cmd_error;
574 				goto e_src;
575 			}
576 		}
577 
578 		ret = cmd_q->ccp->vdata->perform->aes(&op);
579 		if (ret) {
580 			cmd->engine_error = cmd_q->cmd_error;
581 			goto e_src;
582 		}
583 
584 		ccp_process_data(&src, NULL, &op);
585 	}
586 
587 	/* Retrieve the AES context - convert from LE to BE using
588 	 * 32-byte (256-bit) byteswapping
589 	 */
590 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
591 			       CCP_PASSTHRU_BYTESWAP_256BIT);
592 	if (ret) {
593 		cmd->engine_error = cmd_q->cmd_error;
594 		goto e_src;
595 	}
596 
597 	/* ...but we only need AES_BLOCK_SIZE bytes */
598 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
599 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
600 
601 e_src:
602 	ccp_free_data(&src, cmd_q);
603 
604 e_ctx:
605 	ccp_dm_free(&ctx);
606 
607 e_key:
608 	ccp_dm_free(&key);
609 
610 	return ret;
611 }
612 
613 static noinline_for_stack int
614 ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
615 {
616 	struct ccp_aes_engine *aes = &cmd->u.aes;
617 	struct ccp_dm_workarea key, ctx, final_wa, tag;
618 	struct ccp_data src, dst;
619 	struct ccp_data aad;
620 	struct ccp_op op;
621 
622 	unsigned long long *final;
623 	unsigned int dm_offset;
624 	unsigned int jobid;
625 	unsigned int ilen;
626 	bool in_place = true; /* Default value */
627 	int ret;
628 
629 	struct scatterlist *p_inp, sg_inp[2];
630 	struct scatterlist *p_tag, sg_tag[2];
631 	struct scatterlist *p_outp, sg_outp[2];
632 	struct scatterlist *p_aad;
633 
634 	if (!aes->iv)
635 		return -EINVAL;
636 
637 	if (!((aes->key_len == AES_KEYSIZE_128) ||
638 		(aes->key_len == AES_KEYSIZE_192) ||
639 		(aes->key_len == AES_KEYSIZE_256)))
640 		return -EINVAL;
641 
642 	if (!aes->key) /* Gotta have a key SGL */
643 		return -EINVAL;
644 
645 	/* First, decompose the source buffer into AAD & PT,
646 	 * and the destination buffer into AAD, CT & tag, or
647 	 * the input into CT & tag.
648 	 * It is expected that the input and output SGs will
649 	 * be valid, even if the AAD and input lengths are 0.
650 	 */
651 	p_aad = aes->src;
652 	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
653 	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
654 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
655 		ilen = aes->src_len;
656 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
657 	} else {
658 		/* Input length for decryption includes tag */
659 		ilen = aes->src_len - AES_BLOCK_SIZE;
660 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
661 	}
662 
663 	jobid = CCP_NEW_JOBID(cmd_q->ccp);
664 
665 	memset(&op, 0, sizeof(op));
666 	op.cmd_q = cmd_q;
667 	op.jobid = jobid;
668 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
669 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
670 	op.init = 1;
671 	op.u.aes.type = aes->type;
672 
673 	/* Copy the key to the LSB */
674 	ret = ccp_init_dm_workarea(&key, cmd_q,
675 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
676 				   DMA_TO_DEVICE);
677 	if (ret)
678 		return ret;
679 
680 	dm_offset = CCP_SB_BYTES - aes->key_len;
681 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
682 	if (ret)
683 		goto e_key;
684 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
685 			     CCP_PASSTHRU_BYTESWAP_256BIT);
686 	if (ret) {
687 		cmd->engine_error = cmd_q->cmd_error;
688 		goto e_key;
689 	}
690 
691 	/* Copy the context (IV) to the LSB.
692 	 * There is an assumption here that the IV is 96 bits in length, plus
693 	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
694 	 */
695 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
696 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
697 				   DMA_BIDIRECTIONAL);
698 	if (ret)
699 		goto e_key;
700 
701 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
702 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
703 	if (ret)
704 		goto e_ctx;
705 
706 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
707 			     CCP_PASSTHRU_BYTESWAP_256BIT);
708 	if (ret) {
709 		cmd->engine_error = cmd_q->cmd_error;
710 		goto e_ctx;
711 	}
712 
713 	op.init = 1;
714 	if (aes->aad_len > 0) {
715 		/* Step 1: Run a GHASH over the Additional Authenticated Data */
716 		ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
717 				    AES_BLOCK_SIZE,
718 				    DMA_TO_DEVICE);
719 		if (ret)
720 			goto e_ctx;
721 
722 		op.u.aes.mode = CCP_AES_MODE_GHASH;
723 		op.u.aes.action = CCP_AES_GHASHAAD;
724 
725 		while (aad.sg_wa.bytes_left) {
726 			ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
727 
728 			ret = cmd_q->ccp->vdata->perform->aes(&op);
729 			if (ret) {
730 				cmd->engine_error = cmd_q->cmd_error;
731 				goto e_aad;
732 			}
733 
734 			ccp_process_data(&aad, NULL, &op);
735 			op.init = 0;
736 		}
737 	}
738 
739 	op.u.aes.mode = CCP_AES_MODE_GCTR;
740 	op.u.aes.action = aes->action;
741 
742 	if (ilen > 0) {
743 		/* Step 2: Run a GCTR over the plaintext */
744 		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
745 
746 		ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
747 				    AES_BLOCK_SIZE,
748 				    in_place ? DMA_BIDIRECTIONAL
749 					     : DMA_TO_DEVICE);
750 		if (ret)
751 			goto e_ctx;
752 
753 		if (in_place) {
754 			dst = src;
755 		} else {
756 			ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
757 					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
758 			if (ret)
759 				goto e_src;
760 		}
761 
762 		op.soc = 0;
763 		op.eom = 0;
764 		op.init = 1;
765 		while (src.sg_wa.bytes_left) {
766 			ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
767 			if (!src.sg_wa.bytes_left) {
768 				unsigned int nbytes = aes->src_len
769 						      % AES_BLOCK_SIZE;
770 
771 				if (nbytes) {
772 					op.eom = 1;
773 					op.u.aes.size = (nbytes * 8) - 1;
774 				}
775 			}
776 
777 			ret = cmd_q->ccp->vdata->perform->aes(&op);
778 			if (ret) {
779 				cmd->engine_error = cmd_q->cmd_error;
780 				goto e_dst;
781 			}
782 
783 			ccp_process_data(&src, &dst, &op);
784 			op.init = 0;
785 		}
786 	}
787 
788 	/* Step 3: Update the IV portion of the context with the original IV */
789 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
790 			       CCP_PASSTHRU_BYTESWAP_256BIT);
791 	if (ret) {
792 		cmd->engine_error = cmd_q->cmd_error;
793 		goto e_dst;
794 	}
795 
796 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
797 	if (ret)
798 		goto e_dst;
799 
800 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
801 			     CCP_PASSTHRU_BYTESWAP_256BIT);
802 	if (ret) {
803 		cmd->engine_error = cmd_q->cmd_error;
804 		goto e_dst;
805 	}
806 
807 	/* Step 4: Concatenate the lengths of the AAD and source, and
808 	 * hash that 16 byte buffer.
809 	 */
810 	ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
811 				   DMA_BIDIRECTIONAL);
812 	if (ret)
813 		goto e_dst;
814 	final = (unsigned long long *) final_wa.address;
815 	final[0] = cpu_to_be64(aes->aad_len * 8);
816 	final[1] = cpu_to_be64(ilen * 8);
817 
818 	memset(&op, 0, sizeof(op));
819 	op.cmd_q = cmd_q;
820 	op.jobid = jobid;
821 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
822 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
823 	op.init = 1;
824 	op.u.aes.type = aes->type;
825 	op.u.aes.mode = CCP_AES_MODE_GHASH;
826 	op.u.aes.action = CCP_AES_GHASHFINAL;
827 	op.src.type = CCP_MEMTYPE_SYSTEM;
828 	op.src.u.dma.address = final_wa.dma.address;
829 	op.src.u.dma.length = AES_BLOCK_SIZE;
830 	op.dst.type = CCP_MEMTYPE_SYSTEM;
831 	op.dst.u.dma.address = final_wa.dma.address;
832 	op.dst.u.dma.length = AES_BLOCK_SIZE;
833 	op.eom = 1;
834 	op.u.aes.size = 0;
835 	ret = cmd_q->ccp->vdata->perform->aes(&op);
836 	if (ret)
837 		goto e_dst;
838 
839 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
840 		/* Put the ciphered tag after the ciphertext. */
841 		ccp_get_dm_area(&final_wa, 0, p_tag, 0, AES_BLOCK_SIZE);
842 	} else {
843 		/* Does this ciphered tag match the input? */
844 		ret = ccp_init_dm_workarea(&tag, cmd_q, AES_BLOCK_SIZE,
845 					   DMA_BIDIRECTIONAL);
846 		if (ret)
847 			goto e_tag;
848 		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE);
849 		if (ret)
850 			goto e_tag;
851 
852 		ret = crypto_memneq(tag.address, final_wa.address,
853 				    AES_BLOCK_SIZE) ? -EBADMSG : 0;
854 		ccp_dm_free(&tag);
855 	}
856 
857 e_tag:
858 	ccp_dm_free(&final_wa);
859 
860 e_dst:
861 	if (aes->src_len && !in_place)
862 		ccp_free_data(&dst, cmd_q);
863 
864 e_src:
865 	if (aes->src_len)
866 		ccp_free_data(&src, cmd_q);
867 
868 e_aad:
869 	if (aes->aad_len)
870 		ccp_free_data(&aad, cmd_q);
871 
872 e_ctx:
873 	ccp_dm_free(&ctx);
874 
875 e_key:
876 	ccp_dm_free(&key);
877 
878 	return ret;
879 }
880 
881 static noinline_for_stack int
882 ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
883 {
884 	struct ccp_aes_engine *aes = &cmd->u.aes;
885 	struct ccp_dm_workarea key, ctx;
886 	struct ccp_data src, dst;
887 	struct ccp_op op;
888 	unsigned int dm_offset;
889 	bool in_place = false;
890 	int ret;
891 
892 	if (!((aes->key_len == AES_KEYSIZE_128) ||
893 	      (aes->key_len == AES_KEYSIZE_192) ||
894 	      (aes->key_len == AES_KEYSIZE_256)))
895 		return -EINVAL;
896 
897 	if (((aes->mode == CCP_AES_MODE_ECB) ||
898 	     (aes->mode == CCP_AES_MODE_CBC)) &&
899 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
900 		return -EINVAL;
901 
902 	if (!aes->key || !aes->src || !aes->dst)
903 		return -EINVAL;
904 
905 	if (aes->mode != CCP_AES_MODE_ECB) {
906 		if (aes->iv_len != AES_BLOCK_SIZE)
907 			return -EINVAL;
908 
909 		if (!aes->iv)
910 			return -EINVAL;
911 	}
912 
913 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
914 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
915 
916 	ret = -EIO;
917 	memset(&op, 0, sizeof(op));
918 	op.cmd_q = cmd_q;
919 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
920 	op.sb_key = cmd_q->sb_key;
921 	op.sb_ctx = cmd_q->sb_ctx;
922 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
923 	op.u.aes.type = aes->type;
924 	op.u.aes.mode = aes->mode;
925 	op.u.aes.action = aes->action;
926 
927 	/* All supported key sizes fit in a single (32-byte) SB entry
928 	 * and must be in little endian format. Use the 256-bit byte
929 	 * swap passthru option to convert from big endian to little
930 	 * endian.
931 	 */
932 	ret = ccp_init_dm_workarea(&key, cmd_q,
933 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
934 				   DMA_TO_DEVICE);
935 	if (ret)
936 		return ret;
937 
938 	dm_offset = CCP_SB_BYTES - aes->key_len;
939 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
940 	if (ret)
941 		goto e_key;
942 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
943 			     CCP_PASSTHRU_BYTESWAP_256BIT);
944 	if (ret) {
945 		cmd->engine_error = cmd_q->cmd_error;
946 		goto e_key;
947 	}
948 
949 	/* The AES context fits in a single (32-byte) SB entry and
950 	 * must be in little endian format. Use the 256-bit byte swap
951 	 * passthru option to convert from big endian to little endian.
952 	 */
953 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
954 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
955 				   DMA_BIDIRECTIONAL);
956 	if (ret)
957 		goto e_key;
958 
959 	if (aes->mode != CCP_AES_MODE_ECB) {
960 		/* Load the AES context - convert to LE */
961 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
962 		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
963 		if (ret)
964 			goto e_ctx;
965 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
966 				     CCP_PASSTHRU_BYTESWAP_256BIT);
967 		if (ret) {
968 			cmd->engine_error = cmd_q->cmd_error;
969 			goto e_ctx;
970 		}
971 	}
972 	switch (aes->mode) {
973 	case CCP_AES_MODE_CFB: /* CFB128 only */
974 	case CCP_AES_MODE_CTR:
975 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
976 		break;
977 	default:
978 		op.u.aes.size = 0;
979 	}
980 
981 	/* Prepare the input and output data workareas. For in-place
982 	 * operations we need to set the dma direction to BIDIRECTIONAL
983 	 * and copy the src workarea to the dst workarea.
984 	 */
985 	if (sg_virt(aes->src) == sg_virt(aes->dst))
986 		in_place = true;
987 
988 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
989 			    AES_BLOCK_SIZE,
990 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
991 	if (ret)
992 		goto e_ctx;
993 
994 	if (in_place) {
995 		dst = src;
996 	} else {
997 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
998 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
999 		if (ret)
1000 			goto e_src;
1001 	}
1002 
1003 	/* Send data to the CCP AES engine */
1004 	while (src.sg_wa.bytes_left) {
1005 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1006 		if (!src.sg_wa.bytes_left) {
1007 			op.eom = 1;
1008 
1009 			/* Since we don't retrieve the AES context in ECB
1010 			 * mode we have to wait for the operation to complete
1011 			 * on the last piece of data
1012 			 */
1013 			if (aes->mode == CCP_AES_MODE_ECB)
1014 				op.soc = 1;
1015 		}
1016 
1017 		ret = cmd_q->ccp->vdata->perform->aes(&op);
1018 		if (ret) {
1019 			cmd->engine_error = cmd_q->cmd_error;
1020 			goto e_dst;
1021 		}
1022 
1023 		ccp_process_data(&src, &dst, &op);
1024 	}
1025 
1026 	if (aes->mode != CCP_AES_MODE_ECB) {
1027 		/* Retrieve the AES context - convert from LE to BE using
1028 		 * 32-byte (256-bit) byteswapping
1029 		 */
1030 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1031 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1032 		if (ret) {
1033 			cmd->engine_error = cmd_q->cmd_error;
1034 			goto e_dst;
1035 		}
1036 
1037 		/* ...but we only need AES_BLOCK_SIZE bytes */
1038 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1039 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1040 	}
1041 
1042 e_dst:
1043 	if (!in_place)
1044 		ccp_free_data(&dst, cmd_q);
1045 
1046 e_src:
1047 	ccp_free_data(&src, cmd_q);
1048 
1049 e_ctx:
1050 	ccp_dm_free(&ctx);
1051 
1052 e_key:
1053 	ccp_dm_free(&key);
1054 
1055 	return ret;
1056 }
1057 
1058 static noinline_for_stack int
1059 ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1060 {
1061 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1062 	struct ccp_dm_workarea key, ctx;
1063 	struct ccp_data src, dst;
1064 	struct ccp_op op;
1065 	unsigned int unit_size, dm_offset;
1066 	bool in_place = false;
1067 	unsigned int sb_count;
1068 	enum ccp_aes_type aestype;
1069 	int ret;
1070 
1071 	switch (xts->unit_size) {
1072 	case CCP_XTS_AES_UNIT_SIZE_16:
1073 		unit_size = 16;
1074 		break;
1075 	case CCP_XTS_AES_UNIT_SIZE_512:
1076 		unit_size = 512;
1077 		break;
1078 	case CCP_XTS_AES_UNIT_SIZE_1024:
1079 		unit_size = 1024;
1080 		break;
1081 	case CCP_XTS_AES_UNIT_SIZE_2048:
1082 		unit_size = 2048;
1083 		break;
1084 	case CCP_XTS_AES_UNIT_SIZE_4096:
1085 		unit_size = 4096;
1086 		break;
1087 
1088 	default:
1089 		return -EINVAL;
1090 	}
1091 
1092 	if (xts->key_len == AES_KEYSIZE_128)
1093 		aestype = CCP_AES_TYPE_128;
1094 	else if (xts->key_len == AES_KEYSIZE_256)
1095 		aestype = CCP_AES_TYPE_256;
1096 	else
1097 		return -EINVAL;
1098 
1099 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1100 		return -EINVAL;
1101 
1102 	if (xts->iv_len != AES_BLOCK_SIZE)
1103 		return -EINVAL;
1104 
1105 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1106 		return -EINVAL;
1107 
1108 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
1109 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
1110 
1111 	ret = -EIO;
1112 	memset(&op, 0, sizeof(op));
1113 	op.cmd_q = cmd_q;
1114 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1115 	op.sb_key = cmd_q->sb_key;
1116 	op.sb_ctx = cmd_q->sb_ctx;
1117 	op.init = 1;
1118 	op.u.xts.type = aestype;
1119 	op.u.xts.action = xts->action;
1120 	op.u.xts.unit_size = xts->unit_size;
1121 
1122 	/* A version 3 device only supports 128-bit keys, which fits into a
1123 	 * single SB entry. A version 5 device uses a 512-bit vector, so two
1124 	 * SB entries.
1125 	 */
1126 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1127 		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
1128 	else
1129 		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
1130 	ret = ccp_init_dm_workarea(&key, cmd_q,
1131 				   sb_count * CCP_SB_BYTES,
1132 				   DMA_TO_DEVICE);
1133 	if (ret)
1134 		return ret;
1135 
1136 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1137 		/* All supported key sizes must be in little endian format.
1138 		 * Use the 256-bit byte swap passthru option to convert from
1139 		 * big endian to little endian.
1140 		 */
1141 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
1142 		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1143 		if (ret)
1144 			goto e_key;
1145 		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
1146 		if (ret)
1147 			goto e_key;
1148 	} else {
1149 		/* Version 5 CCPs use a 512-bit space for the key: each portion
1150 		 * occupies 256 bits, or one entire slot, and is zero-padded.
1151 		 */
1152 		unsigned int pad;
1153 
1154 		dm_offset = CCP_SB_BYTES;
1155 		pad = dm_offset - xts->key_len;
1156 		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
1157 		if (ret)
1158 			goto e_key;
1159 		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
1160 				      xts->key_len, xts->key_len);
1161 		if (ret)
1162 			goto e_key;
1163 	}
1164 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1165 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1166 	if (ret) {
1167 		cmd->engine_error = cmd_q->cmd_error;
1168 		goto e_key;
1169 	}
1170 
1171 	/* The AES context fits in a single (32-byte) SB entry and
1172 	 * for XTS is already in little endian format so no byte swapping
1173 	 * is needed.
1174 	 */
1175 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1176 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
1177 				   DMA_BIDIRECTIONAL);
1178 	if (ret)
1179 		goto e_key;
1180 
1181 	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1182 	if (ret)
1183 		goto e_ctx;
1184 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1185 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1186 	if (ret) {
1187 		cmd->engine_error = cmd_q->cmd_error;
1188 		goto e_ctx;
1189 	}
1190 
1191 	/* Prepare the input and output data workareas. For in-place
1192 	 * operations we need to set the dma direction to BIDIRECTIONAL
1193 	 * and copy the src workarea to the dst workarea.
1194 	 */
1195 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1196 		in_place = true;
1197 
1198 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1199 			    unit_size,
1200 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1201 	if (ret)
1202 		goto e_ctx;
1203 
1204 	if (in_place) {
1205 		dst = src;
1206 	} else {
1207 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1208 				    unit_size, DMA_FROM_DEVICE);
1209 		if (ret)
1210 			goto e_src;
1211 	}
1212 
1213 	/* Send data to the CCP AES engine */
1214 	while (src.sg_wa.bytes_left) {
1215 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1216 		if (!src.sg_wa.bytes_left)
1217 			op.eom = 1;
1218 
1219 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
1220 		if (ret) {
1221 			cmd->engine_error = cmd_q->cmd_error;
1222 			goto e_dst;
1223 		}
1224 
1225 		ccp_process_data(&src, &dst, &op);
1226 	}
1227 
1228 	/* Retrieve the AES context - convert from LE to BE using
1229 	 * 32-byte (256-bit) byteswapping
1230 	 */
1231 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1232 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1233 	if (ret) {
1234 		cmd->engine_error = cmd_q->cmd_error;
1235 		goto e_dst;
1236 	}
1237 
1238 	/* ...but we only need AES_BLOCK_SIZE bytes */
1239 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1240 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1241 
1242 e_dst:
1243 	if (!in_place)
1244 		ccp_free_data(&dst, cmd_q);
1245 
1246 e_src:
1247 	ccp_free_data(&src, cmd_q);
1248 
1249 e_ctx:
1250 	ccp_dm_free(&ctx);
1251 
1252 e_key:
1253 	ccp_dm_free(&key);
1254 
1255 	return ret;
1256 }
1257 
1258 static noinline_for_stack int
1259 ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1260 {
1261 	struct ccp_des3_engine *des3 = &cmd->u.des3;
1262 
1263 	struct ccp_dm_workarea key, ctx;
1264 	struct ccp_data src, dst;
1265 	struct ccp_op op;
1266 	unsigned int dm_offset;
1267 	unsigned int len_singlekey;
1268 	bool in_place = false;
1269 	int ret;
1270 
1271 	/* Error checks */
1272 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
1273 		return -EINVAL;
1274 
1275 	if (!cmd_q->ccp->vdata->perform->des3)
1276 		return -EINVAL;
1277 
1278 	if (des3->key_len != DES3_EDE_KEY_SIZE)
1279 		return -EINVAL;
1280 
1281 	if (((des3->mode == CCP_DES3_MODE_ECB) ||
1282 		(des3->mode == CCP_DES3_MODE_CBC)) &&
1283 		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
1284 		return -EINVAL;
1285 
1286 	if (!des3->key || !des3->src || !des3->dst)
1287 		return -EINVAL;
1288 
1289 	if (des3->mode != CCP_DES3_MODE_ECB) {
1290 		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
1291 			return -EINVAL;
1292 
1293 		if (!des3->iv)
1294 			return -EINVAL;
1295 	}
1296 
1297 	ret = -EIO;
1298 	/* Zero out all the fields of the command desc */
1299 	memset(&op, 0, sizeof(op));
1300 
1301 	/* Set up the Function field */
1302 	op.cmd_q = cmd_q;
1303 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1304 	op.sb_key = cmd_q->sb_key;
1305 
1306 	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
1307 	op.u.des3.type = des3->type;
1308 	op.u.des3.mode = des3->mode;
1309 	op.u.des3.action = des3->action;
1310 
1311 	/*
1312 	 * All supported key sizes fit in a single (32-byte) KSB entry and
1313 	 * (like AES) must be in little endian format. Use the 256-bit byte
1314 	 * swap passthru option to convert from big endian to little endian.
1315 	 */
1316 	ret = ccp_init_dm_workarea(&key, cmd_q,
1317 				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
1318 				   DMA_TO_DEVICE);
1319 	if (ret)
1320 		return ret;
1321 
1322 	/*
1323 	 * The contents of the key triplet are in the reverse order of what
1324 	 * is required by the engine. Copy the 3 pieces individually to put
1325 	 * them where they belong.
1326 	 */
1327 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
1328 
1329 	len_singlekey = des3->key_len / 3;
1330 	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
1331 			      des3->key, 0, len_singlekey);
1332 	if (ret)
1333 		goto e_key;
1334 	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
1335 			      des3->key, len_singlekey, len_singlekey);
1336 	if (ret)
1337 		goto e_key;
1338 	ret = ccp_set_dm_area(&key, dm_offset,
1339 			      des3->key, 2 * len_singlekey, len_singlekey);
1340 	if (ret)
1341 		goto e_key;
1342 
1343 	/* Copy the key to the SB */
1344 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1345 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1346 	if (ret) {
1347 		cmd->engine_error = cmd_q->cmd_error;
1348 		goto e_key;
1349 	}
1350 
1351 	/*
1352 	 * The DES3 context fits in a single (32-byte) KSB entry and
1353 	 * must be in little endian format. Use the 256-bit byte swap
1354 	 * passthru option to convert from big endian to little endian.
1355 	 */
1356 	if (des3->mode != CCP_DES3_MODE_ECB) {
1357 		op.sb_ctx = cmd_q->sb_ctx;
1358 
1359 		ret = ccp_init_dm_workarea(&ctx, cmd_q,
1360 					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
1361 					   DMA_BIDIRECTIONAL);
1362 		if (ret)
1363 			goto e_key;
1364 
1365 		/* Load the context into the LSB */
1366 		dm_offset = CCP_SB_BYTES - des3->iv_len;
1367 		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
1368 				      des3->iv_len);
1369 		if (ret)
1370 			goto e_ctx;
1371 
1372 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1373 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1374 		if (ret) {
1375 			cmd->engine_error = cmd_q->cmd_error;
1376 			goto e_ctx;
1377 		}
1378 	}
1379 
1380 	/*
1381 	 * Prepare the input and output data workareas. For in-place
1382 	 * operations we need to set the dma direction to BIDIRECTIONAL
1383 	 * and copy the src workarea to the dst workarea.
1384 	 */
1385 	if (sg_virt(des3->src) == sg_virt(des3->dst))
1386 		in_place = true;
1387 
1388 	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
1389 			DES3_EDE_BLOCK_SIZE,
1390 			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1391 	if (ret)
1392 		goto e_ctx;
1393 
1394 	if (in_place)
1395 		dst = src;
1396 	else {
1397 		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
1398 				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
1399 		if (ret)
1400 			goto e_src;
1401 	}
1402 
1403 	/* Send data to the CCP DES3 engine */
1404 	while (src.sg_wa.bytes_left) {
1405 		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
1406 		if (!src.sg_wa.bytes_left) {
1407 			op.eom = 1;
1408 
1409 			/* Since we don't retrieve the context in ECB mode
1410 			 * we have to wait for the operation to complete
1411 			 * on the last piece of data
1412 			 */
1413 			op.soc = 0;
1414 		}
1415 
1416 		ret = cmd_q->ccp->vdata->perform->des3(&op);
1417 		if (ret) {
1418 			cmd->engine_error = cmd_q->cmd_error;
1419 			goto e_dst;
1420 		}
1421 
1422 		ccp_process_data(&src, &dst, &op);
1423 	}
1424 
1425 	if (des3->mode != CCP_DES3_MODE_ECB) {
1426 		/* Retrieve the context and make BE */
1427 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1428 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1429 		if (ret) {
1430 			cmd->engine_error = cmd_q->cmd_error;
1431 			goto e_dst;
1432 		}
1433 
1434 		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
1435 		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
1436 				DES3_EDE_BLOCK_SIZE);
1437 	}
1438 e_dst:
1439 	if (!in_place)
1440 		ccp_free_data(&dst, cmd_q);
1441 
1442 e_src:
1443 	ccp_free_data(&src, cmd_q);
1444 
1445 e_ctx:
1446 	if (des3->mode != CCP_DES3_MODE_ECB)
1447 		ccp_dm_free(&ctx);
1448 
1449 e_key:
1450 	ccp_dm_free(&key);
1451 
1452 	return ret;
1453 }
1454 
1455 static noinline_for_stack int
1456 ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1457 {
1458 	struct ccp_sha_engine *sha = &cmd->u.sha;
1459 	struct ccp_dm_workarea ctx;
1460 	struct ccp_data src;
1461 	struct ccp_op op;
1462 	unsigned int ioffset, ooffset;
1463 	unsigned int digest_size;
1464 	int sb_count;
1465 	const void *init;
1466 	u64 block_size;
1467 	int ctx_size;
1468 	int ret;
1469 
1470 	switch (sha->type) {
1471 	case CCP_SHA_TYPE_1:
1472 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
1473 			return -EINVAL;
1474 		block_size = SHA1_BLOCK_SIZE;
1475 		break;
1476 	case CCP_SHA_TYPE_224:
1477 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
1478 			return -EINVAL;
1479 		block_size = SHA224_BLOCK_SIZE;
1480 		break;
1481 	case CCP_SHA_TYPE_256:
1482 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
1483 			return -EINVAL;
1484 		block_size = SHA256_BLOCK_SIZE;
1485 		break;
1486 	case CCP_SHA_TYPE_384:
1487 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1488 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
1489 			return -EINVAL;
1490 		block_size = SHA384_BLOCK_SIZE;
1491 		break;
1492 	case CCP_SHA_TYPE_512:
1493 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1494 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
1495 			return -EINVAL;
1496 		block_size = SHA512_BLOCK_SIZE;
1497 		break;
1498 	default:
1499 		return -EINVAL;
1500 	}
1501 
1502 	if (!sha->ctx)
1503 		return -EINVAL;
1504 
1505 	if (!sha->final && (sha->src_len & (block_size - 1)))
1506 		return -EINVAL;
1507 
1508 	/* The version 3 device can't handle zero-length input */
1509 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1510 
1511 		if (!sha->src_len) {
1512 			unsigned int digest_len;
1513 			const u8 *sha_zero;
1514 
1515 			/* Not final, just return */
1516 			if (!sha->final)
1517 				return 0;
1518 
1519 			/* CCP can't do a zero length sha operation so the
1520 			 * caller must buffer the data.
1521 			 */
1522 			if (sha->msg_bits)
1523 				return -EINVAL;
1524 
1525 			/* The CCP cannot perform zero-length sha operations
1526 			 * so the caller is required to buffer data for the
1527 			 * final operation. However, a sha operation for a
1528 			 * message with a total length of zero is valid so
1529 			 * known values are required to supply the result.
1530 			 */
1531 			switch (sha->type) {
1532 			case CCP_SHA_TYPE_1:
1533 				sha_zero = sha1_zero_message_hash;
1534 				digest_len = SHA1_DIGEST_SIZE;
1535 				break;
1536 			case CCP_SHA_TYPE_224:
1537 				sha_zero = sha224_zero_message_hash;
1538 				digest_len = SHA224_DIGEST_SIZE;
1539 				break;
1540 			case CCP_SHA_TYPE_256:
1541 				sha_zero = sha256_zero_message_hash;
1542 				digest_len = SHA256_DIGEST_SIZE;
1543 				break;
1544 			default:
1545 				return -EINVAL;
1546 			}
1547 
1548 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1549 						 digest_len, 1);
1550 
1551 			return 0;
1552 		}
1553 	}
1554 
1555 	/* Set variables used throughout */
1556 	switch (sha->type) {
1557 	case CCP_SHA_TYPE_1:
1558 		digest_size = SHA1_DIGEST_SIZE;
1559 		init = (void *) ccp_sha1_init;
1560 		ctx_size = SHA1_DIGEST_SIZE;
1561 		sb_count = 1;
1562 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1563 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1564 		else
1565 			ooffset = ioffset = 0;
1566 		break;
1567 	case CCP_SHA_TYPE_224:
1568 		digest_size = SHA224_DIGEST_SIZE;
1569 		init = (void *) ccp_sha224_init;
1570 		ctx_size = SHA256_DIGEST_SIZE;
1571 		sb_count = 1;
1572 		ioffset = 0;
1573 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1574 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1575 		else
1576 			ooffset = 0;
1577 		break;
1578 	case CCP_SHA_TYPE_256:
1579 		digest_size = SHA256_DIGEST_SIZE;
1580 		init = (void *) ccp_sha256_init;
1581 		ctx_size = SHA256_DIGEST_SIZE;
1582 		sb_count = 1;
1583 		ooffset = ioffset = 0;
1584 		break;
1585 	case CCP_SHA_TYPE_384:
1586 		digest_size = SHA384_DIGEST_SIZE;
1587 		init = (void *) ccp_sha384_init;
1588 		ctx_size = SHA512_DIGEST_SIZE;
1589 		sb_count = 2;
1590 		ioffset = 0;
1591 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1592 		break;
1593 	case CCP_SHA_TYPE_512:
1594 		digest_size = SHA512_DIGEST_SIZE;
1595 		init = (void *) ccp_sha512_init;
1596 		ctx_size = SHA512_DIGEST_SIZE;
1597 		sb_count = 2;
1598 		ooffset = ioffset = 0;
1599 		break;
1600 	default:
1601 		ret = -EINVAL;
1602 		goto e_data;
1603 	}
1604 
1605 	/* For zero-length plaintext the src pointer is ignored;
1606 	 * otherwise both parts must be valid
1607 	 */
1608 	if (sha->src_len && !sha->src)
1609 		return -EINVAL;
1610 
1611 	memset(&op, 0, sizeof(op));
1612 	op.cmd_q = cmd_q;
1613 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1614 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1615 	op.u.sha.type = sha->type;
1616 	op.u.sha.msg_bits = sha->msg_bits;
1617 
1618 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1619 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1620 	 * first slot, and the left half in the second. Each portion must then
1621 	 * be in little endian format: use the 256-bit byte swap option.
1622 	 */
1623 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1624 				   DMA_BIDIRECTIONAL);
1625 	if (ret)
1626 		return ret;
1627 	if (sha->first) {
1628 		switch (sha->type) {
1629 		case CCP_SHA_TYPE_1:
1630 		case CCP_SHA_TYPE_224:
1631 		case CCP_SHA_TYPE_256:
1632 			memcpy(ctx.address + ioffset, init, ctx_size);
1633 			break;
1634 		case CCP_SHA_TYPE_384:
1635 		case CCP_SHA_TYPE_512:
1636 			memcpy(ctx.address + ctx_size / 2, init,
1637 			       ctx_size / 2);
1638 			memcpy(ctx.address, init + ctx_size / 2,
1639 			       ctx_size / 2);
1640 			break;
1641 		default:
1642 			ret = -EINVAL;
1643 			goto e_ctx;
1644 		}
1645 	} else {
1646 		/* Restore the context */
1647 		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1648 				      sb_count * CCP_SB_BYTES);
1649 		if (ret)
1650 			goto e_ctx;
1651 	}
1652 
1653 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1654 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1655 	if (ret) {
1656 		cmd->engine_error = cmd_q->cmd_error;
1657 		goto e_ctx;
1658 	}
1659 
1660 	if (sha->src) {
1661 		/* Send data to the CCP SHA engine; block_size is set above */
1662 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1663 				    block_size, DMA_TO_DEVICE);
1664 		if (ret)
1665 			goto e_ctx;
1666 
1667 		while (src.sg_wa.bytes_left) {
1668 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1669 			if (sha->final && !src.sg_wa.bytes_left)
1670 				op.eom = 1;
1671 
1672 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1673 			if (ret) {
1674 				cmd->engine_error = cmd_q->cmd_error;
1675 				goto e_data;
1676 			}
1677 
1678 			ccp_process_data(&src, NULL, &op);
1679 		}
1680 	} else {
1681 		op.eom = 1;
1682 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1683 		if (ret) {
1684 			cmd->engine_error = cmd_q->cmd_error;
1685 			goto e_data;
1686 		}
1687 	}
1688 
1689 	/* Retrieve the SHA context - convert from LE to BE using
1690 	 * 32-byte (256-bit) byteswapping to BE
1691 	 */
1692 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1693 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1694 	if (ret) {
1695 		cmd->engine_error = cmd_q->cmd_error;
1696 		goto e_data;
1697 	}
1698 
1699 	if (sha->final) {
1700 		/* Finishing up, so get the digest */
1701 		switch (sha->type) {
1702 		case CCP_SHA_TYPE_1:
1703 		case CCP_SHA_TYPE_224:
1704 		case CCP_SHA_TYPE_256:
1705 			ccp_get_dm_area(&ctx, ooffset,
1706 					sha->ctx, 0,
1707 					digest_size);
1708 			break;
1709 		case CCP_SHA_TYPE_384:
1710 		case CCP_SHA_TYPE_512:
1711 			ccp_get_dm_area(&ctx, 0,
1712 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1713 					LSB_ITEM_SIZE);
1714 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1715 					sha->ctx, 0,
1716 					LSB_ITEM_SIZE - ooffset);
1717 			break;
1718 		default:
1719 			ret = -EINVAL;
1720 			goto e_ctx;
1721 		}
1722 	} else {
1723 		/* Stash the context */
1724 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1725 				sb_count * CCP_SB_BYTES);
1726 	}
1727 
1728 	if (sha->final && sha->opad) {
1729 		/* HMAC operation, recursively perform final SHA */
1730 		struct ccp_cmd hmac_cmd;
1731 		struct scatterlist sg;
1732 		u8 *hmac_buf;
1733 
1734 		if (sha->opad_len != block_size) {
1735 			ret = -EINVAL;
1736 			goto e_data;
1737 		}
1738 
1739 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1740 		if (!hmac_buf) {
1741 			ret = -ENOMEM;
1742 			goto e_data;
1743 		}
1744 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1745 
1746 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1747 		switch (sha->type) {
1748 		case CCP_SHA_TYPE_1:
1749 		case CCP_SHA_TYPE_224:
1750 		case CCP_SHA_TYPE_256:
1751 			memcpy(hmac_buf + block_size,
1752 			       ctx.address + ooffset,
1753 			       digest_size);
1754 			break;
1755 		case CCP_SHA_TYPE_384:
1756 		case CCP_SHA_TYPE_512:
1757 			memcpy(hmac_buf + block_size,
1758 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1759 			       LSB_ITEM_SIZE);
1760 			memcpy(hmac_buf + block_size +
1761 			       (LSB_ITEM_SIZE - ooffset),
1762 			       ctx.address,
1763 			       LSB_ITEM_SIZE);
1764 			break;
1765 		default:
1766 			ret = -EINVAL;
1767 			goto e_ctx;
1768 		}
1769 
1770 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1771 		hmac_cmd.engine = CCP_ENGINE_SHA;
1772 		hmac_cmd.u.sha.type = sha->type;
1773 		hmac_cmd.u.sha.ctx = sha->ctx;
1774 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1775 		hmac_cmd.u.sha.src = &sg;
1776 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1777 		hmac_cmd.u.sha.opad = NULL;
1778 		hmac_cmd.u.sha.opad_len = 0;
1779 		hmac_cmd.u.sha.first = 1;
1780 		hmac_cmd.u.sha.final = 1;
1781 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1782 
1783 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1784 		if (ret)
1785 			cmd->engine_error = hmac_cmd.engine_error;
1786 
1787 		kfree(hmac_buf);
1788 	}
1789 
1790 e_data:
1791 	if (sha->src)
1792 		ccp_free_data(&src, cmd_q);
1793 
1794 e_ctx:
1795 	ccp_dm_free(&ctx);
1796 
1797 	return ret;
1798 }
1799 
1800 static noinline_for_stack int
1801 ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1802 {
1803 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1804 	struct ccp_dm_workarea exp, src, dst;
1805 	struct ccp_op op;
1806 	unsigned int sb_count, i_len, o_len;
1807 	int ret;
1808 
1809 	/* Check against the maximum allowable size, in bits */
1810 	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
1811 		return -EINVAL;
1812 
1813 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1814 		return -EINVAL;
1815 
1816 	memset(&op, 0, sizeof(op));
1817 	op.cmd_q = cmd_q;
1818 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1819 
1820 	/* The RSA modulus must precede the message being acted upon, so
1821 	 * it must be copied to a DMA area where the message and the
1822 	 * modulus can be concatenated.  Therefore the input buffer
1823 	 * length required is twice the output buffer length (which
1824 	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
1825 	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
1826 	 * required.
1827 	 */
1828 	o_len = 32 * ((rsa->key_size + 255) / 256);
1829 	i_len = o_len * 2;
1830 
1831 	sb_count = 0;
1832 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1833 		/* sb_count is the number of storage block slots required
1834 		 * for the modulus.
1835 		 */
1836 		sb_count = o_len / CCP_SB_BYTES;
1837 		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
1838 								sb_count);
1839 		if (!op.sb_key)
1840 			return -EIO;
1841 	} else {
1842 		/* A version 5 device allows a modulus size that will not fit
1843 		 * in the LSB, so the command will transfer it from memory.
1844 		 * Set the sb key to the default, even though it's not used.
1845 		 */
1846 		op.sb_key = cmd_q->sb_key;
1847 	}
1848 
1849 	/* The RSA exponent must be in little endian format. Reverse its
1850 	 * byte order.
1851 	 */
1852 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1853 	if (ret)
1854 		goto e_sb;
1855 
1856 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1857 	if (ret)
1858 		goto e_exp;
1859 
1860 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1861 		/* Copy the exponent to the local storage block, using
1862 		 * as many 32-byte blocks as were allocated above. It's
1863 		 * already little endian, so no further change is required.
1864 		 */
1865 		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1866 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1867 		if (ret) {
1868 			cmd->engine_error = cmd_q->cmd_error;
1869 			goto e_exp;
1870 		}
1871 	} else {
1872 		/* The exponent can be retrieved from memory via DMA. */
1873 		op.exp.u.dma.address = exp.dma.address;
1874 		op.exp.u.dma.offset = 0;
1875 	}
1876 
1877 	/* Concatenate the modulus and the message. Both the modulus and
1878 	 * the operands must be in little endian format.  Since the input
1879 	 * is in big endian format it must be converted.
1880 	 */
1881 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1882 	if (ret)
1883 		goto e_exp;
1884 
1885 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1886 	if (ret)
1887 		goto e_src;
1888 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1889 	if (ret)
1890 		goto e_src;
1891 
1892 	/* Prepare the output area for the operation */
1893 	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
1894 	if (ret)
1895 		goto e_src;
1896 
1897 	op.soc = 1;
1898 	op.src.u.dma.address = src.dma.address;
1899 	op.src.u.dma.offset = 0;
1900 	op.src.u.dma.length = i_len;
1901 	op.dst.u.dma.address = dst.dma.address;
1902 	op.dst.u.dma.offset = 0;
1903 	op.dst.u.dma.length = o_len;
1904 
1905 	op.u.rsa.mod_size = rsa->key_size;
1906 	op.u.rsa.input_len = i_len;
1907 
1908 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1909 	if (ret) {
1910 		cmd->engine_error = cmd_q->cmd_error;
1911 		goto e_dst;
1912 	}
1913 
1914 	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
1915 
1916 e_dst:
1917 	ccp_dm_free(&dst);
1918 
1919 e_src:
1920 	ccp_dm_free(&src);
1921 
1922 e_exp:
1923 	ccp_dm_free(&exp);
1924 
1925 e_sb:
1926 	if (sb_count)
1927 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1928 
1929 	return ret;
1930 }
1931 
1932 static noinline_for_stack int
1933 ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1934 {
1935 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1936 	struct ccp_dm_workarea mask;
1937 	struct ccp_data src, dst;
1938 	struct ccp_op op;
1939 	bool in_place = false;
1940 	unsigned int i;
1941 	int ret = 0;
1942 
1943 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1944 		return -EINVAL;
1945 
1946 	if (!pt->src || !pt->dst)
1947 		return -EINVAL;
1948 
1949 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1950 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1951 			return -EINVAL;
1952 		if (!pt->mask)
1953 			return -EINVAL;
1954 	}
1955 
1956 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1957 
1958 	memset(&op, 0, sizeof(op));
1959 	op.cmd_q = cmd_q;
1960 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1961 
1962 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1963 		/* Load the mask */
1964 		op.sb_key = cmd_q->sb_key;
1965 
1966 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1967 					   CCP_PASSTHRU_SB_COUNT *
1968 					   CCP_SB_BYTES,
1969 					   DMA_TO_DEVICE);
1970 		if (ret)
1971 			return ret;
1972 
1973 		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1974 		if (ret)
1975 			goto e_mask;
1976 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
1977 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1978 		if (ret) {
1979 			cmd->engine_error = cmd_q->cmd_error;
1980 			goto e_mask;
1981 		}
1982 	}
1983 
1984 	/* Prepare the input and output data workareas. For in-place
1985 	 * operations we need to set the dma direction to BIDIRECTIONAL
1986 	 * and copy the src workarea to the dst workarea.
1987 	 */
1988 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1989 		in_place = true;
1990 
1991 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1992 			    CCP_PASSTHRU_MASKSIZE,
1993 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1994 	if (ret)
1995 		goto e_mask;
1996 
1997 	if (in_place) {
1998 		dst = src;
1999 	} else {
2000 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
2001 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
2002 		if (ret)
2003 			goto e_src;
2004 	}
2005 
2006 	/* Send data to the CCP Passthru engine
2007 	 *   Because the CCP engine works on a single source and destination
2008 	 *   dma address at a time, each entry in the source scatterlist
2009 	 *   (after the dma_map_sg call) must be less than or equal to the
2010 	 *   (remaining) length in the destination scatterlist entry and the
2011 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
2012 	 */
2013 	dst.sg_wa.sg_used = 0;
2014 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
2015 		if (!dst.sg_wa.sg ||
2016 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
2017 			ret = -EINVAL;
2018 			goto e_dst;
2019 		}
2020 
2021 		if (i == src.sg_wa.dma_count) {
2022 			op.eom = 1;
2023 			op.soc = 1;
2024 		}
2025 
2026 		op.src.type = CCP_MEMTYPE_SYSTEM;
2027 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
2028 		op.src.u.dma.offset = 0;
2029 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
2030 
2031 		op.dst.type = CCP_MEMTYPE_SYSTEM;
2032 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
2033 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
2034 		op.dst.u.dma.length = op.src.u.dma.length;
2035 
2036 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
2037 		if (ret) {
2038 			cmd->engine_error = cmd_q->cmd_error;
2039 			goto e_dst;
2040 		}
2041 
2042 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
2043 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
2044 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
2045 			dst.sg_wa.sg_used = 0;
2046 		}
2047 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
2048 	}
2049 
2050 e_dst:
2051 	if (!in_place)
2052 		ccp_free_data(&dst, cmd_q);
2053 
2054 e_src:
2055 	ccp_free_data(&src, cmd_q);
2056 
2057 e_mask:
2058 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
2059 		ccp_dm_free(&mask);
2060 
2061 	return ret;
2062 }
2063 
2064 static noinline_for_stack int
2065 ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
2066 				      struct ccp_cmd *cmd)
2067 {
2068 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
2069 	struct ccp_dm_workarea mask;
2070 	struct ccp_op op;
2071 	int ret;
2072 
2073 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
2074 		return -EINVAL;
2075 
2076 	if (!pt->src_dma || !pt->dst_dma)
2077 		return -EINVAL;
2078 
2079 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2080 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
2081 			return -EINVAL;
2082 		if (!pt->mask)
2083 			return -EINVAL;
2084 	}
2085 
2086 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2087 
2088 	memset(&op, 0, sizeof(op));
2089 	op.cmd_q = cmd_q;
2090 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2091 
2092 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2093 		/* Load the mask */
2094 		op.sb_key = cmd_q->sb_key;
2095 
2096 		mask.length = pt->mask_len;
2097 		mask.dma.address = pt->mask;
2098 		mask.dma.length = pt->mask_len;
2099 
2100 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2101 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2102 		if (ret) {
2103 			cmd->engine_error = cmd_q->cmd_error;
2104 			return ret;
2105 		}
2106 	}
2107 
2108 	/* Send data to the CCP Passthru engine */
2109 	op.eom = 1;
2110 	op.soc = 1;
2111 
2112 	op.src.type = CCP_MEMTYPE_SYSTEM;
2113 	op.src.u.dma.address = pt->src_dma;
2114 	op.src.u.dma.offset = 0;
2115 	op.src.u.dma.length = pt->src_len;
2116 
2117 	op.dst.type = CCP_MEMTYPE_SYSTEM;
2118 	op.dst.u.dma.address = pt->dst_dma;
2119 	op.dst.u.dma.offset = 0;
2120 	op.dst.u.dma.length = pt->src_len;
2121 
2122 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
2123 	if (ret)
2124 		cmd->engine_error = cmd_q->cmd_error;
2125 
2126 	return ret;
2127 }
2128 
2129 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2130 {
2131 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2132 	struct ccp_dm_workarea src, dst;
2133 	struct ccp_op op;
2134 	int ret;
2135 	u8 *save;
2136 
2137 	if (!ecc->u.mm.operand_1 ||
2138 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
2139 		return -EINVAL;
2140 
2141 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
2142 		if (!ecc->u.mm.operand_2 ||
2143 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
2144 			return -EINVAL;
2145 
2146 	if (!ecc->u.mm.result ||
2147 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
2148 		return -EINVAL;
2149 
2150 	memset(&op, 0, sizeof(op));
2151 	op.cmd_q = cmd_q;
2152 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2153 
2154 	/* Concatenate the modulus and the operands. Both the modulus and
2155 	 * the operands must be in little endian format.  Since the input
2156 	 * is in big endian format it must be converted and placed in a
2157 	 * fixed length buffer.
2158 	 */
2159 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2160 				   DMA_TO_DEVICE);
2161 	if (ret)
2162 		return ret;
2163 
2164 	/* Save the workarea address since it is updated in order to perform
2165 	 * the concatenation
2166 	 */
2167 	save = src.address;
2168 
2169 	/* Copy the ECC modulus */
2170 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2171 	if (ret)
2172 		goto e_src;
2173 	src.address += CCP_ECC_OPERAND_SIZE;
2174 
2175 	/* Copy the first operand */
2176 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
2177 				      ecc->u.mm.operand_1_len);
2178 	if (ret)
2179 		goto e_src;
2180 	src.address += CCP_ECC_OPERAND_SIZE;
2181 
2182 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
2183 		/* Copy the second operand */
2184 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
2185 					      ecc->u.mm.operand_2_len);
2186 		if (ret)
2187 			goto e_src;
2188 		src.address += CCP_ECC_OPERAND_SIZE;
2189 	}
2190 
2191 	/* Restore the workarea address */
2192 	src.address = save;
2193 
2194 	/* Prepare the output area for the operation */
2195 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2196 				   DMA_FROM_DEVICE);
2197 	if (ret)
2198 		goto e_src;
2199 
2200 	op.soc = 1;
2201 	op.src.u.dma.address = src.dma.address;
2202 	op.src.u.dma.offset = 0;
2203 	op.src.u.dma.length = src.length;
2204 	op.dst.u.dma.address = dst.dma.address;
2205 	op.dst.u.dma.offset = 0;
2206 	op.dst.u.dma.length = dst.length;
2207 
2208 	op.u.ecc.function = cmd->u.ecc.function;
2209 
2210 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2211 	if (ret) {
2212 		cmd->engine_error = cmd_q->cmd_error;
2213 		goto e_dst;
2214 	}
2215 
2216 	ecc->ecc_result = le16_to_cpup(
2217 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2218 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2219 		ret = -EIO;
2220 		goto e_dst;
2221 	}
2222 
2223 	/* Save the ECC result */
2224 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
2225 				CCP_ECC_MODULUS_BYTES);
2226 
2227 e_dst:
2228 	ccp_dm_free(&dst);
2229 
2230 e_src:
2231 	ccp_dm_free(&src);
2232 
2233 	return ret;
2234 }
2235 
2236 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2237 {
2238 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2239 	struct ccp_dm_workarea src, dst;
2240 	struct ccp_op op;
2241 	int ret;
2242 	u8 *save;
2243 
2244 	if (!ecc->u.pm.point_1.x ||
2245 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
2246 	    !ecc->u.pm.point_1.y ||
2247 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
2248 		return -EINVAL;
2249 
2250 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2251 		if (!ecc->u.pm.point_2.x ||
2252 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
2253 		    !ecc->u.pm.point_2.y ||
2254 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
2255 			return -EINVAL;
2256 	} else {
2257 		if (!ecc->u.pm.domain_a ||
2258 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
2259 			return -EINVAL;
2260 
2261 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
2262 			if (!ecc->u.pm.scalar ||
2263 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
2264 				return -EINVAL;
2265 	}
2266 
2267 	if (!ecc->u.pm.result.x ||
2268 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
2269 	    !ecc->u.pm.result.y ||
2270 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
2271 		return -EINVAL;
2272 
2273 	memset(&op, 0, sizeof(op));
2274 	op.cmd_q = cmd_q;
2275 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2276 
2277 	/* Concatenate the modulus and the operands. Both the modulus and
2278 	 * the operands must be in little endian format.  Since the input
2279 	 * is in big endian format it must be converted and placed in a
2280 	 * fixed length buffer.
2281 	 */
2282 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2283 				   DMA_TO_DEVICE);
2284 	if (ret)
2285 		return ret;
2286 
2287 	/* Save the workarea address since it is updated in order to perform
2288 	 * the concatenation
2289 	 */
2290 	save = src.address;
2291 
2292 	/* Copy the ECC modulus */
2293 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2294 	if (ret)
2295 		goto e_src;
2296 	src.address += CCP_ECC_OPERAND_SIZE;
2297 
2298 	/* Copy the first point X and Y coordinate */
2299 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
2300 				      ecc->u.pm.point_1.x_len);
2301 	if (ret)
2302 		goto e_src;
2303 	src.address += CCP_ECC_OPERAND_SIZE;
2304 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
2305 				      ecc->u.pm.point_1.y_len);
2306 	if (ret)
2307 		goto e_src;
2308 	src.address += CCP_ECC_OPERAND_SIZE;
2309 
2310 	/* Set the first point Z coordinate to 1 */
2311 	*src.address = 0x01;
2312 	src.address += CCP_ECC_OPERAND_SIZE;
2313 
2314 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2315 		/* Copy the second point X and Y coordinate */
2316 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
2317 					      ecc->u.pm.point_2.x_len);
2318 		if (ret)
2319 			goto e_src;
2320 		src.address += CCP_ECC_OPERAND_SIZE;
2321 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
2322 					      ecc->u.pm.point_2.y_len);
2323 		if (ret)
2324 			goto e_src;
2325 		src.address += CCP_ECC_OPERAND_SIZE;
2326 
2327 		/* Set the second point Z coordinate to 1 */
2328 		*src.address = 0x01;
2329 		src.address += CCP_ECC_OPERAND_SIZE;
2330 	} else {
2331 		/* Copy the Domain "a" parameter */
2332 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
2333 					      ecc->u.pm.domain_a_len);
2334 		if (ret)
2335 			goto e_src;
2336 		src.address += CCP_ECC_OPERAND_SIZE;
2337 
2338 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2339 			/* Copy the scalar value */
2340 			ret = ccp_reverse_set_dm_area(&src, 0,
2341 						      ecc->u.pm.scalar, 0,
2342 						      ecc->u.pm.scalar_len);
2343 			if (ret)
2344 				goto e_src;
2345 			src.address += CCP_ECC_OPERAND_SIZE;
2346 		}
2347 	}
2348 
2349 	/* Restore the workarea address */
2350 	src.address = save;
2351 
2352 	/* Prepare the output area for the operation */
2353 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2354 				   DMA_FROM_DEVICE);
2355 	if (ret)
2356 		goto e_src;
2357 
2358 	op.soc = 1;
2359 	op.src.u.dma.address = src.dma.address;
2360 	op.src.u.dma.offset = 0;
2361 	op.src.u.dma.length = src.length;
2362 	op.dst.u.dma.address = dst.dma.address;
2363 	op.dst.u.dma.offset = 0;
2364 	op.dst.u.dma.length = dst.length;
2365 
2366 	op.u.ecc.function = cmd->u.ecc.function;
2367 
2368 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2369 	if (ret) {
2370 		cmd->engine_error = cmd_q->cmd_error;
2371 		goto e_dst;
2372 	}
2373 
2374 	ecc->ecc_result = le16_to_cpup(
2375 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2376 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2377 		ret = -EIO;
2378 		goto e_dst;
2379 	}
2380 
2381 	/* Save the workarea address since it is updated as we walk through
2382 	 * to copy the point math result
2383 	 */
2384 	save = dst.address;
2385 
2386 	/* Save the ECC result X and Y coordinates */
2387 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
2388 				CCP_ECC_MODULUS_BYTES);
2389 	dst.address += CCP_ECC_OUTPUT_SIZE;
2390 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
2391 				CCP_ECC_MODULUS_BYTES);
2392 	dst.address += CCP_ECC_OUTPUT_SIZE;
2393 
2394 	/* Restore the workarea address */
2395 	dst.address = save;
2396 
2397 e_dst:
2398 	ccp_dm_free(&dst);
2399 
2400 e_src:
2401 	ccp_dm_free(&src);
2402 
2403 	return ret;
2404 }
2405 
2406 static noinline_for_stack int
2407 ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2408 {
2409 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2410 
2411 	ecc->ecc_result = 0;
2412 
2413 	if (!ecc->mod ||
2414 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2415 		return -EINVAL;
2416 
2417 	switch (ecc->function) {
2418 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2419 	case CCP_ECC_FUNCTION_MADD_384BIT:
2420 	case CCP_ECC_FUNCTION_MINV_384BIT:
2421 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2422 
2423 	case CCP_ECC_FUNCTION_PADD_384BIT:
2424 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2425 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2426 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2427 
2428 	default:
2429 		return -EINVAL;
2430 	}
2431 }
2432 
2433 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2434 {
2435 	int ret;
2436 
2437 	cmd->engine_error = 0;
2438 	cmd_q->cmd_error = 0;
2439 	cmd_q->int_rcvd = 0;
2440 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
2441 
2442 	switch (cmd->engine) {
2443 	case CCP_ENGINE_AES:
2444 		switch (cmd->u.aes.mode) {
2445 		case CCP_AES_MODE_CMAC:
2446 			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
2447 			break;
2448 		case CCP_AES_MODE_GCM:
2449 			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
2450 			break;
2451 		default:
2452 			ret = ccp_run_aes_cmd(cmd_q, cmd);
2453 			break;
2454 		}
2455 		break;
2456 	case CCP_ENGINE_XTS_AES_128:
2457 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2458 		break;
2459 	case CCP_ENGINE_DES3:
2460 		ret = ccp_run_des3_cmd(cmd_q, cmd);
2461 		break;
2462 	case CCP_ENGINE_SHA:
2463 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2464 		break;
2465 	case CCP_ENGINE_RSA:
2466 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2467 		break;
2468 	case CCP_ENGINE_PASSTHRU:
2469 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
2470 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
2471 		else
2472 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
2473 		break;
2474 	case CCP_ENGINE_ECC:
2475 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2476 		break;
2477 	default:
2478 		ret = -EINVAL;
2479 	}
2480 
2481 	return ret;
2482 }
2483