xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision 4c5a116a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Cryptographic Coprocessor (CCP) driver
4  *
5  * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
6  *
7  * Author: Tom Lendacky <thomas.lendacky@amd.com>
8  * Author: Gary R Hook <gary.hook@amd.com>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/interrupt.h>
14 #include <crypto/scatterwalk.h>
15 #include <crypto/des.h>
16 #include <linux/ccp.h>
17 
18 #include "ccp-dev.h"
19 
20 /* SHA initial context values */
21 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
22 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
23 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
24 	cpu_to_be32(SHA1_H4),
25 };
26 
27 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
28 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
29 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
30 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
31 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
32 };
33 
34 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
35 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
36 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
37 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
38 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
39 };
40 
41 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
42 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
43 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
44 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
45 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
46 };
47 
48 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
49 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
50 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
51 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
52 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
53 };
54 
55 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
56 					ccp_gen_jobid(ccp) : 0)
57 
58 static u32 ccp_gen_jobid(struct ccp_device *ccp)
59 {
60 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
61 }
62 
63 static void ccp_sg_free(struct ccp_sg_workarea *wa)
64 {
65 	if (wa->dma_count)
66 		dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
67 
68 	wa->dma_count = 0;
69 }
70 
71 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
72 				struct scatterlist *sg, u64 len,
73 				enum dma_data_direction dma_dir)
74 {
75 	memset(wa, 0, sizeof(*wa));
76 
77 	wa->sg = sg;
78 	if (!sg)
79 		return 0;
80 
81 	wa->nents = sg_nents_for_len(sg, len);
82 	if (wa->nents < 0)
83 		return wa->nents;
84 
85 	wa->bytes_left = len;
86 	wa->sg_used = 0;
87 
88 	if (len == 0)
89 		return 0;
90 
91 	if (dma_dir == DMA_NONE)
92 		return 0;
93 
94 	wa->dma_sg = sg;
95 	wa->dma_sg_head = sg;
96 	wa->dma_dev = dev;
97 	wa->dma_dir = dma_dir;
98 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
99 	if (!wa->dma_count)
100 		return -ENOMEM;
101 
102 	return 0;
103 }
104 
105 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
106 {
107 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
108 	unsigned int sg_combined_len = 0;
109 
110 	if (!wa->sg)
111 		return;
112 
113 	wa->sg_used += nbytes;
114 	wa->bytes_left -= nbytes;
115 	if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
116 		/* Advance to the next DMA scatterlist entry */
117 		wa->dma_sg = sg_next(wa->dma_sg);
118 
119 		/* In the case that the DMA mapped scatterlist has entries
120 		 * that have been merged, the non-DMA mapped scatterlist
121 		 * must be advanced multiple times for each merged entry.
122 		 * This ensures that the current non-DMA mapped entry
123 		 * corresponds to the current DMA mapped entry.
124 		 */
125 		do {
126 			sg_combined_len += wa->sg->length;
127 			wa->sg = sg_next(wa->sg);
128 		} while (wa->sg_used > sg_combined_len);
129 
130 		wa->sg_used = 0;
131 	}
132 }
133 
134 static void ccp_dm_free(struct ccp_dm_workarea *wa)
135 {
136 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
137 		if (wa->address)
138 			dma_pool_free(wa->dma_pool, wa->address,
139 				      wa->dma.address);
140 	} else {
141 		if (wa->dma.address)
142 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
143 					 wa->dma.dir);
144 		kfree(wa->address);
145 	}
146 
147 	wa->address = NULL;
148 	wa->dma.address = 0;
149 }
150 
151 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
152 				struct ccp_cmd_queue *cmd_q,
153 				unsigned int len,
154 				enum dma_data_direction dir)
155 {
156 	memset(wa, 0, sizeof(*wa));
157 
158 	if (!len)
159 		return 0;
160 
161 	wa->dev = cmd_q->ccp->dev;
162 	wa->length = len;
163 
164 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
165 		wa->dma_pool = cmd_q->dma_pool;
166 
167 		wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL,
168 					     &wa->dma.address);
169 		if (!wa->address)
170 			return -ENOMEM;
171 
172 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
173 
174 	} else {
175 		wa->address = kzalloc(len, GFP_KERNEL);
176 		if (!wa->address)
177 			return -ENOMEM;
178 
179 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
180 						 dir);
181 		if (dma_mapping_error(wa->dev, wa->dma.address))
182 			return -ENOMEM;
183 
184 		wa->dma.length = len;
185 	}
186 	wa->dma.dir = dir;
187 
188 	return 0;
189 }
190 
191 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
192 			   struct scatterlist *sg, unsigned int sg_offset,
193 			   unsigned int len)
194 {
195 	WARN_ON(!wa->address);
196 
197 	if (len > (wa->length - wa_offset))
198 		return -EINVAL;
199 
200 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
201 				 0);
202 	return 0;
203 }
204 
205 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
206 			    struct scatterlist *sg, unsigned int sg_offset,
207 			    unsigned int len)
208 {
209 	WARN_ON(!wa->address);
210 
211 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
212 				 1);
213 }
214 
215 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
216 				   unsigned int wa_offset,
217 				   struct scatterlist *sg,
218 				   unsigned int sg_offset,
219 				   unsigned int len)
220 {
221 	u8 *p, *q;
222 	int	rc;
223 
224 	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
225 	if (rc)
226 		return rc;
227 
228 	p = wa->address + wa_offset;
229 	q = p + len - 1;
230 	while (p < q) {
231 		*p = *p ^ *q;
232 		*q = *p ^ *q;
233 		*p = *p ^ *q;
234 		p++;
235 		q--;
236 	}
237 	return 0;
238 }
239 
240 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
241 				    unsigned int wa_offset,
242 				    struct scatterlist *sg,
243 				    unsigned int sg_offset,
244 				    unsigned int len)
245 {
246 	u8 *p, *q;
247 
248 	p = wa->address + wa_offset;
249 	q = p + len - 1;
250 	while (p < q) {
251 		*p = *p ^ *q;
252 		*q = *p ^ *q;
253 		*p = *p ^ *q;
254 		p++;
255 		q--;
256 	}
257 
258 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
259 }
260 
261 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
262 {
263 	ccp_dm_free(&data->dm_wa);
264 	ccp_sg_free(&data->sg_wa);
265 }
266 
267 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
268 			 struct scatterlist *sg, u64 sg_len,
269 			 unsigned int dm_len,
270 			 enum dma_data_direction dir)
271 {
272 	int ret;
273 
274 	memset(data, 0, sizeof(*data));
275 
276 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
277 				   dir);
278 	if (ret)
279 		goto e_err;
280 
281 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
282 	if (ret)
283 		goto e_err;
284 
285 	return 0;
286 
287 e_err:
288 	ccp_free_data(data, cmd_q);
289 
290 	return ret;
291 }
292 
293 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
294 {
295 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
296 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
297 	unsigned int buf_count, nbytes;
298 
299 	/* Clear the buffer if setting it */
300 	if (!from)
301 		memset(dm_wa->address, 0, dm_wa->length);
302 
303 	if (!sg_wa->sg)
304 		return 0;
305 
306 	/* Perform the copy operation
307 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
308 	 *   an unsigned int
309 	 */
310 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
311 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
312 				 nbytes, from);
313 
314 	/* Update the structures and generate the count */
315 	buf_count = 0;
316 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
317 		nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
318 			     dm_wa->length - buf_count);
319 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
320 
321 		buf_count += nbytes;
322 		ccp_update_sg_workarea(sg_wa, nbytes);
323 	}
324 
325 	return buf_count;
326 }
327 
328 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
329 {
330 	return ccp_queue_buf(data, 0);
331 }
332 
333 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
334 {
335 	return ccp_queue_buf(data, 1);
336 }
337 
338 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
339 			     struct ccp_op *op, unsigned int block_size,
340 			     bool blocksize_op)
341 {
342 	unsigned int sg_src_len, sg_dst_len, op_len;
343 
344 	/* The CCP can only DMA from/to one address each per operation. This
345 	 * requires that we find the smallest DMA area between the source
346 	 * and destination. The resulting len values will always be <= UINT_MAX
347 	 * because the dma length is an unsigned int.
348 	 */
349 	sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
350 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
351 
352 	if (dst) {
353 		sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
354 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
355 		op_len = min(sg_src_len, sg_dst_len);
356 	} else {
357 		op_len = sg_src_len;
358 	}
359 
360 	/* The data operation length will be at least block_size in length
361 	 * or the smaller of available sg room remaining for the source or
362 	 * the destination
363 	 */
364 	op_len = max(op_len, block_size);
365 
366 	/* Unless we have to buffer data, there's no reason to wait */
367 	op->soc = 0;
368 
369 	if (sg_src_len < block_size) {
370 		/* Not enough data in the sg element, so it
371 		 * needs to be buffered into a blocksize chunk
372 		 */
373 		int cp_len = ccp_fill_queue_buf(src);
374 
375 		op->soc = 1;
376 		op->src.u.dma.address = src->dm_wa.dma.address;
377 		op->src.u.dma.offset = 0;
378 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
379 	} else {
380 		/* Enough data in the sg element, but we need to
381 		 * adjust for any previously copied data
382 		 */
383 		op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
384 		op->src.u.dma.offset = src->sg_wa.sg_used;
385 		op->src.u.dma.length = op_len & ~(block_size - 1);
386 
387 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
388 	}
389 
390 	if (dst) {
391 		if (sg_dst_len < block_size) {
392 			/* Not enough room in the sg element or we're on the
393 			 * last piece of data (when using padding), so the
394 			 * output needs to be buffered into a blocksize chunk
395 			 */
396 			op->soc = 1;
397 			op->dst.u.dma.address = dst->dm_wa.dma.address;
398 			op->dst.u.dma.offset = 0;
399 			op->dst.u.dma.length = op->src.u.dma.length;
400 		} else {
401 			/* Enough room in the sg element, but we need to
402 			 * adjust for any previously used area
403 			 */
404 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
405 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
406 			op->dst.u.dma.length = op->src.u.dma.length;
407 		}
408 	}
409 }
410 
411 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
412 			     struct ccp_op *op)
413 {
414 	op->init = 0;
415 
416 	if (dst) {
417 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
418 			ccp_empty_queue_buf(dst);
419 		else
420 			ccp_update_sg_workarea(&dst->sg_wa,
421 					       op->dst.u.dma.length);
422 	}
423 }
424 
425 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
426 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
427 			       u32 byte_swap, bool from)
428 {
429 	struct ccp_op op;
430 
431 	memset(&op, 0, sizeof(op));
432 
433 	op.cmd_q = cmd_q;
434 	op.jobid = jobid;
435 	op.eom = 1;
436 
437 	if (from) {
438 		op.soc = 1;
439 		op.src.type = CCP_MEMTYPE_SB;
440 		op.src.u.sb = sb;
441 		op.dst.type = CCP_MEMTYPE_SYSTEM;
442 		op.dst.u.dma.address = wa->dma.address;
443 		op.dst.u.dma.length = wa->length;
444 	} else {
445 		op.src.type = CCP_MEMTYPE_SYSTEM;
446 		op.src.u.dma.address = wa->dma.address;
447 		op.src.u.dma.length = wa->length;
448 		op.dst.type = CCP_MEMTYPE_SB;
449 		op.dst.u.sb = sb;
450 	}
451 
452 	op.u.passthru.byte_swap = byte_swap;
453 
454 	return cmd_q->ccp->vdata->perform->passthru(&op);
455 }
456 
457 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
458 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
459 			  u32 byte_swap)
460 {
461 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
462 }
463 
464 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
465 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
466 			    u32 byte_swap)
467 {
468 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
469 }
470 
471 static noinline_for_stack int
472 ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
473 {
474 	struct ccp_aes_engine *aes = &cmd->u.aes;
475 	struct ccp_dm_workarea key, ctx;
476 	struct ccp_data src;
477 	struct ccp_op op;
478 	unsigned int dm_offset;
479 	int ret;
480 
481 	if (!((aes->key_len == AES_KEYSIZE_128) ||
482 	      (aes->key_len == AES_KEYSIZE_192) ||
483 	      (aes->key_len == AES_KEYSIZE_256)))
484 		return -EINVAL;
485 
486 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
487 		return -EINVAL;
488 
489 	if (aes->iv_len != AES_BLOCK_SIZE)
490 		return -EINVAL;
491 
492 	if (!aes->key || !aes->iv || !aes->src)
493 		return -EINVAL;
494 
495 	if (aes->cmac_final) {
496 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
497 			return -EINVAL;
498 
499 		if (!aes->cmac_key)
500 			return -EINVAL;
501 	}
502 
503 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
504 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
505 
506 	ret = -EIO;
507 	memset(&op, 0, sizeof(op));
508 	op.cmd_q = cmd_q;
509 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
510 	op.sb_key = cmd_q->sb_key;
511 	op.sb_ctx = cmd_q->sb_ctx;
512 	op.init = 1;
513 	op.u.aes.type = aes->type;
514 	op.u.aes.mode = aes->mode;
515 	op.u.aes.action = aes->action;
516 
517 	/* All supported key sizes fit in a single (32-byte) SB entry
518 	 * and must be in little endian format. Use the 256-bit byte
519 	 * swap passthru option to convert from big endian to little
520 	 * endian.
521 	 */
522 	ret = ccp_init_dm_workarea(&key, cmd_q,
523 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
524 				   DMA_TO_DEVICE);
525 	if (ret)
526 		return ret;
527 
528 	dm_offset = CCP_SB_BYTES - aes->key_len;
529 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
530 	if (ret)
531 		goto e_key;
532 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
533 			     CCP_PASSTHRU_BYTESWAP_256BIT);
534 	if (ret) {
535 		cmd->engine_error = cmd_q->cmd_error;
536 		goto e_key;
537 	}
538 
539 	/* The AES context fits in a single (32-byte) SB entry and
540 	 * must be in little endian format. Use the 256-bit byte swap
541 	 * passthru option to convert from big endian to little endian.
542 	 */
543 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
544 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
545 				   DMA_BIDIRECTIONAL);
546 	if (ret)
547 		goto e_key;
548 
549 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
550 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
551 	if (ret)
552 		goto e_ctx;
553 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
554 			     CCP_PASSTHRU_BYTESWAP_256BIT);
555 	if (ret) {
556 		cmd->engine_error = cmd_q->cmd_error;
557 		goto e_ctx;
558 	}
559 
560 	/* Send data to the CCP AES engine */
561 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
562 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
563 	if (ret)
564 		goto e_ctx;
565 
566 	while (src.sg_wa.bytes_left) {
567 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
568 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
569 			op.eom = 1;
570 
571 			/* Push the K1/K2 key to the CCP now */
572 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
573 					       op.sb_ctx,
574 					       CCP_PASSTHRU_BYTESWAP_256BIT);
575 			if (ret) {
576 				cmd->engine_error = cmd_q->cmd_error;
577 				goto e_src;
578 			}
579 
580 			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
581 					      aes->cmac_key_len);
582 			if (ret)
583 				goto e_src;
584 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
585 					     CCP_PASSTHRU_BYTESWAP_256BIT);
586 			if (ret) {
587 				cmd->engine_error = cmd_q->cmd_error;
588 				goto e_src;
589 			}
590 		}
591 
592 		ret = cmd_q->ccp->vdata->perform->aes(&op);
593 		if (ret) {
594 			cmd->engine_error = cmd_q->cmd_error;
595 			goto e_src;
596 		}
597 
598 		ccp_process_data(&src, NULL, &op);
599 	}
600 
601 	/* Retrieve the AES context - convert from LE to BE using
602 	 * 32-byte (256-bit) byteswapping
603 	 */
604 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
605 			       CCP_PASSTHRU_BYTESWAP_256BIT);
606 	if (ret) {
607 		cmd->engine_error = cmd_q->cmd_error;
608 		goto e_src;
609 	}
610 
611 	/* ...but we only need AES_BLOCK_SIZE bytes */
612 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
613 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
614 
615 e_src:
616 	ccp_free_data(&src, cmd_q);
617 
618 e_ctx:
619 	ccp_dm_free(&ctx);
620 
621 e_key:
622 	ccp_dm_free(&key);
623 
624 	return ret;
625 }
626 
627 static noinline_for_stack int
628 ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
629 {
630 	struct ccp_aes_engine *aes = &cmd->u.aes;
631 	struct ccp_dm_workarea key, ctx, final_wa, tag;
632 	struct ccp_data src, dst;
633 	struct ccp_data aad;
634 	struct ccp_op op;
635 	unsigned int dm_offset;
636 	unsigned int authsize;
637 	unsigned int jobid;
638 	unsigned int ilen;
639 	bool in_place = true; /* Default value */
640 	__be64 *final;
641 	int ret;
642 
643 	struct scatterlist *p_inp, sg_inp[2];
644 	struct scatterlist *p_tag, sg_tag[2];
645 	struct scatterlist *p_outp, sg_outp[2];
646 	struct scatterlist *p_aad;
647 
648 	if (!aes->iv)
649 		return -EINVAL;
650 
651 	if (!((aes->key_len == AES_KEYSIZE_128) ||
652 		(aes->key_len == AES_KEYSIZE_192) ||
653 		(aes->key_len == AES_KEYSIZE_256)))
654 		return -EINVAL;
655 
656 	if (!aes->key) /* Gotta have a key SGL */
657 		return -EINVAL;
658 
659 	/* Zero defaults to 16 bytes, the maximum size */
660 	authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE;
661 	switch (authsize) {
662 	case 16:
663 	case 15:
664 	case 14:
665 	case 13:
666 	case 12:
667 	case 8:
668 	case 4:
669 		break;
670 	default:
671 		return -EINVAL;
672 	}
673 
674 	/* First, decompose the source buffer into AAD & PT,
675 	 * and the destination buffer into AAD, CT & tag, or
676 	 * the input into CT & tag.
677 	 * It is expected that the input and output SGs will
678 	 * be valid, even if the AAD and input lengths are 0.
679 	 */
680 	p_aad = aes->src;
681 	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
682 	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
683 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
684 		ilen = aes->src_len;
685 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
686 	} else {
687 		/* Input length for decryption includes tag */
688 		ilen = aes->src_len - authsize;
689 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
690 	}
691 
692 	jobid = CCP_NEW_JOBID(cmd_q->ccp);
693 
694 	memset(&op, 0, sizeof(op));
695 	op.cmd_q = cmd_q;
696 	op.jobid = jobid;
697 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
698 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
699 	op.init = 1;
700 	op.u.aes.type = aes->type;
701 
702 	/* Copy the key to the LSB */
703 	ret = ccp_init_dm_workarea(&key, cmd_q,
704 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
705 				   DMA_TO_DEVICE);
706 	if (ret)
707 		return ret;
708 
709 	dm_offset = CCP_SB_BYTES - aes->key_len;
710 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
711 	if (ret)
712 		goto e_key;
713 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
714 			     CCP_PASSTHRU_BYTESWAP_256BIT);
715 	if (ret) {
716 		cmd->engine_error = cmd_q->cmd_error;
717 		goto e_key;
718 	}
719 
720 	/* Copy the context (IV) to the LSB.
721 	 * There is an assumption here that the IV is 96 bits in length, plus
722 	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
723 	 */
724 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
725 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
726 				   DMA_BIDIRECTIONAL);
727 	if (ret)
728 		goto e_key;
729 
730 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
731 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
732 	if (ret)
733 		goto e_ctx;
734 
735 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
736 			     CCP_PASSTHRU_BYTESWAP_256BIT);
737 	if (ret) {
738 		cmd->engine_error = cmd_q->cmd_error;
739 		goto e_ctx;
740 	}
741 
742 	op.init = 1;
743 	if (aes->aad_len > 0) {
744 		/* Step 1: Run a GHASH over the Additional Authenticated Data */
745 		ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
746 				    AES_BLOCK_SIZE,
747 				    DMA_TO_DEVICE);
748 		if (ret)
749 			goto e_ctx;
750 
751 		op.u.aes.mode = CCP_AES_MODE_GHASH;
752 		op.u.aes.action = CCP_AES_GHASHAAD;
753 
754 		while (aad.sg_wa.bytes_left) {
755 			ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
756 
757 			ret = cmd_q->ccp->vdata->perform->aes(&op);
758 			if (ret) {
759 				cmd->engine_error = cmd_q->cmd_error;
760 				goto e_aad;
761 			}
762 
763 			ccp_process_data(&aad, NULL, &op);
764 			op.init = 0;
765 		}
766 	}
767 
768 	op.u.aes.mode = CCP_AES_MODE_GCTR;
769 	op.u.aes.action = aes->action;
770 
771 	if (ilen > 0) {
772 		/* Step 2: Run a GCTR over the plaintext */
773 		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
774 
775 		ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
776 				    AES_BLOCK_SIZE,
777 				    in_place ? DMA_BIDIRECTIONAL
778 					     : DMA_TO_DEVICE);
779 		if (ret)
780 			goto e_ctx;
781 
782 		if (in_place) {
783 			dst = src;
784 		} else {
785 			ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
786 					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
787 			if (ret)
788 				goto e_src;
789 		}
790 
791 		op.soc = 0;
792 		op.eom = 0;
793 		op.init = 1;
794 		while (src.sg_wa.bytes_left) {
795 			ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
796 			if (!src.sg_wa.bytes_left) {
797 				unsigned int nbytes = ilen % AES_BLOCK_SIZE;
798 
799 				if (nbytes) {
800 					op.eom = 1;
801 					op.u.aes.size = (nbytes * 8) - 1;
802 				}
803 			}
804 
805 			ret = cmd_q->ccp->vdata->perform->aes(&op);
806 			if (ret) {
807 				cmd->engine_error = cmd_q->cmd_error;
808 				goto e_dst;
809 			}
810 
811 			ccp_process_data(&src, &dst, &op);
812 			op.init = 0;
813 		}
814 	}
815 
816 	/* Step 3: Update the IV portion of the context with the original IV */
817 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
818 			       CCP_PASSTHRU_BYTESWAP_256BIT);
819 	if (ret) {
820 		cmd->engine_error = cmd_q->cmd_error;
821 		goto e_dst;
822 	}
823 
824 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
825 	if (ret)
826 		goto e_dst;
827 
828 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
829 			     CCP_PASSTHRU_BYTESWAP_256BIT);
830 	if (ret) {
831 		cmd->engine_error = cmd_q->cmd_error;
832 		goto e_dst;
833 	}
834 
835 	/* Step 4: Concatenate the lengths of the AAD and source, and
836 	 * hash that 16 byte buffer.
837 	 */
838 	ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
839 				   DMA_BIDIRECTIONAL);
840 	if (ret)
841 		goto e_dst;
842 	final = (__be64 *)final_wa.address;
843 	final[0] = cpu_to_be64(aes->aad_len * 8);
844 	final[1] = cpu_to_be64(ilen * 8);
845 
846 	memset(&op, 0, sizeof(op));
847 	op.cmd_q = cmd_q;
848 	op.jobid = jobid;
849 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
850 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
851 	op.init = 1;
852 	op.u.aes.type = aes->type;
853 	op.u.aes.mode = CCP_AES_MODE_GHASH;
854 	op.u.aes.action = CCP_AES_GHASHFINAL;
855 	op.src.type = CCP_MEMTYPE_SYSTEM;
856 	op.src.u.dma.address = final_wa.dma.address;
857 	op.src.u.dma.length = AES_BLOCK_SIZE;
858 	op.dst.type = CCP_MEMTYPE_SYSTEM;
859 	op.dst.u.dma.address = final_wa.dma.address;
860 	op.dst.u.dma.length = AES_BLOCK_SIZE;
861 	op.eom = 1;
862 	op.u.aes.size = 0;
863 	ret = cmd_q->ccp->vdata->perform->aes(&op);
864 	if (ret)
865 		goto e_dst;
866 
867 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
868 		/* Put the ciphered tag after the ciphertext. */
869 		ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize);
870 	} else {
871 		/* Does this ciphered tag match the input? */
872 		ret = ccp_init_dm_workarea(&tag, cmd_q, authsize,
873 					   DMA_BIDIRECTIONAL);
874 		if (ret)
875 			goto e_tag;
876 		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize);
877 		if (ret)
878 			goto e_tag;
879 
880 		ret = crypto_memneq(tag.address, final_wa.address,
881 				    authsize) ? -EBADMSG : 0;
882 		ccp_dm_free(&tag);
883 	}
884 
885 e_tag:
886 	ccp_dm_free(&final_wa);
887 
888 e_dst:
889 	if (ilen > 0 && !in_place)
890 		ccp_free_data(&dst, cmd_q);
891 
892 e_src:
893 	if (ilen > 0)
894 		ccp_free_data(&src, cmd_q);
895 
896 e_aad:
897 	if (aes->aad_len)
898 		ccp_free_data(&aad, cmd_q);
899 
900 e_ctx:
901 	ccp_dm_free(&ctx);
902 
903 e_key:
904 	ccp_dm_free(&key);
905 
906 	return ret;
907 }
908 
909 static noinline_for_stack int
910 ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
911 {
912 	struct ccp_aes_engine *aes = &cmd->u.aes;
913 	struct ccp_dm_workarea key, ctx;
914 	struct ccp_data src, dst;
915 	struct ccp_op op;
916 	unsigned int dm_offset;
917 	bool in_place = false;
918 	int ret;
919 
920 	if (!((aes->key_len == AES_KEYSIZE_128) ||
921 	      (aes->key_len == AES_KEYSIZE_192) ||
922 	      (aes->key_len == AES_KEYSIZE_256)))
923 		return -EINVAL;
924 
925 	if (((aes->mode == CCP_AES_MODE_ECB) ||
926 	     (aes->mode == CCP_AES_MODE_CBC)) &&
927 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
928 		return -EINVAL;
929 
930 	if (!aes->key || !aes->src || !aes->dst)
931 		return -EINVAL;
932 
933 	if (aes->mode != CCP_AES_MODE_ECB) {
934 		if (aes->iv_len != AES_BLOCK_SIZE)
935 			return -EINVAL;
936 
937 		if (!aes->iv)
938 			return -EINVAL;
939 	}
940 
941 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
942 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
943 
944 	ret = -EIO;
945 	memset(&op, 0, sizeof(op));
946 	op.cmd_q = cmd_q;
947 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
948 	op.sb_key = cmd_q->sb_key;
949 	op.sb_ctx = cmd_q->sb_ctx;
950 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
951 	op.u.aes.type = aes->type;
952 	op.u.aes.mode = aes->mode;
953 	op.u.aes.action = aes->action;
954 
955 	/* All supported key sizes fit in a single (32-byte) SB entry
956 	 * and must be in little endian format. Use the 256-bit byte
957 	 * swap passthru option to convert from big endian to little
958 	 * endian.
959 	 */
960 	ret = ccp_init_dm_workarea(&key, cmd_q,
961 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
962 				   DMA_TO_DEVICE);
963 	if (ret)
964 		return ret;
965 
966 	dm_offset = CCP_SB_BYTES - aes->key_len;
967 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
968 	if (ret)
969 		goto e_key;
970 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
971 			     CCP_PASSTHRU_BYTESWAP_256BIT);
972 	if (ret) {
973 		cmd->engine_error = cmd_q->cmd_error;
974 		goto e_key;
975 	}
976 
977 	/* The AES context fits in a single (32-byte) SB entry and
978 	 * must be in little endian format. Use the 256-bit byte swap
979 	 * passthru option to convert from big endian to little endian.
980 	 */
981 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
982 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
983 				   DMA_BIDIRECTIONAL);
984 	if (ret)
985 		goto e_key;
986 
987 	if (aes->mode != CCP_AES_MODE_ECB) {
988 		/* Load the AES context - convert to LE */
989 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
990 		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
991 		if (ret)
992 			goto e_ctx;
993 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
994 				     CCP_PASSTHRU_BYTESWAP_256BIT);
995 		if (ret) {
996 			cmd->engine_error = cmd_q->cmd_error;
997 			goto e_ctx;
998 		}
999 	}
1000 	switch (aes->mode) {
1001 	case CCP_AES_MODE_CFB: /* CFB128 only */
1002 	case CCP_AES_MODE_CTR:
1003 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
1004 		break;
1005 	default:
1006 		op.u.aes.size = 0;
1007 	}
1008 
1009 	/* Prepare the input and output data workareas. For in-place
1010 	 * operations we need to set the dma direction to BIDIRECTIONAL
1011 	 * and copy the src workarea to the dst workarea.
1012 	 */
1013 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1014 		in_place = true;
1015 
1016 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1017 			    AES_BLOCK_SIZE,
1018 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1019 	if (ret)
1020 		goto e_ctx;
1021 
1022 	if (in_place) {
1023 		dst = src;
1024 	} else {
1025 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1026 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1027 		if (ret)
1028 			goto e_src;
1029 	}
1030 
1031 	/* Send data to the CCP AES engine */
1032 	while (src.sg_wa.bytes_left) {
1033 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1034 		if (!src.sg_wa.bytes_left) {
1035 			op.eom = 1;
1036 
1037 			/* Since we don't retrieve the AES context in ECB
1038 			 * mode we have to wait for the operation to complete
1039 			 * on the last piece of data
1040 			 */
1041 			if (aes->mode == CCP_AES_MODE_ECB)
1042 				op.soc = 1;
1043 		}
1044 
1045 		ret = cmd_q->ccp->vdata->perform->aes(&op);
1046 		if (ret) {
1047 			cmd->engine_error = cmd_q->cmd_error;
1048 			goto e_dst;
1049 		}
1050 
1051 		ccp_process_data(&src, &dst, &op);
1052 	}
1053 
1054 	if (aes->mode != CCP_AES_MODE_ECB) {
1055 		/* Retrieve the AES context - convert from LE to BE using
1056 		 * 32-byte (256-bit) byteswapping
1057 		 */
1058 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1059 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1060 		if (ret) {
1061 			cmd->engine_error = cmd_q->cmd_error;
1062 			goto e_dst;
1063 		}
1064 
1065 		/* ...but we only need AES_BLOCK_SIZE bytes */
1066 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1067 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1068 	}
1069 
1070 e_dst:
1071 	if (!in_place)
1072 		ccp_free_data(&dst, cmd_q);
1073 
1074 e_src:
1075 	ccp_free_data(&src, cmd_q);
1076 
1077 e_ctx:
1078 	ccp_dm_free(&ctx);
1079 
1080 e_key:
1081 	ccp_dm_free(&key);
1082 
1083 	return ret;
1084 }
1085 
1086 static noinline_for_stack int
1087 ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1088 {
1089 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1090 	struct ccp_dm_workarea key, ctx;
1091 	struct ccp_data src, dst;
1092 	struct ccp_op op;
1093 	unsigned int unit_size, dm_offset;
1094 	bool in_place = false;
1095 	unsigned int sb_count;
1096 	enum ccp_aes_type aestype;
1097 	int ret;
1098 
1099 	switch (xts->unit_size) {
1100 	case CCP_XTS_AES_UNIT_SIZE_16:
1101 		unit_size = 16;
1102 		break;
1103 	case CCP_XTS_AES_UNIT_SIZE_512:
1104 		unit_size = 512;
1105 		break;
1106 	case CCP_XTS_AES_UNIT_SIZE_1024:
1107 		unit_size = 1024;
1108 		break;
1109 	case CCP_XTS_AES_UNIT_SIZE_2048:
1110 		unit_size = 2048;
1111 		break;
1112 	case CCP_XTS_AES_UNIT_SIZE_4096:
1113 		unit_size = 4096;
1114 		break;
1115 
1116 	default:
1117 		return -EINVAL;
1118 	}
1119 
1120 	if (xts->key_len == AES_KEYSIZE_128)
1121 		aestype = CCP_AES_TYPE_128;
1122 	else if (xts->key_len == AES_KEYSIZE_256)
1123 		aestype = CCP_AES_TYPE_256;
1124 	else
1125 		return -EINVAL;
1126 
1127 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1128 		return -EINVAL;
1129 
1130 	if (xts->iv_len != AES_BLOCK_SIZE)
1131 		return -EINVAL;
1132 
1133 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1134 		return -EINVAL;
1135 
1136 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
1137 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
1138 
1139 	ret = -EIO;
1140 	memset(&op, 0, sizeof(op));
1141 	op.cmd_q = cmd_q;
1142 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1143 	op.sb_key = cmd_q->sb_key;
1144 	op.sb_ctx = cmd_q->sb_ctx;
1145 	op.init = 1;
1146 	op.u.xts.type = aestype;
1147 	op.u.xts.action = xts->action;
1148 	op.u.xts.unit_size = xts->unit_size;
1149 
1150 	/* A version 3 device only supports 128-bit keys, which fits into a
1151 	 * single SB entry. A version 5 device uses a 512-bit vector, so two
1152 	 * SB entries.
1153 	 */
1154 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1155 		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
1156 	else
1157 		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
1158 	ret = ccp_init_dm_workarea(&key, cmd_q,
1159 				   sb_count * CCP_SB_BYTES,
1160 				   DMA_TO_DEVICE);
1161 	if (ret)
1162 		return ret;
1163 
1164 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1165 		/* All supported key sizes must be in little endian format.
1166 		 * Use the 256-bit byte swap passthru option to convert from
1167 		 * big endian to little endian.
1168 		 */
1169 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
1170 		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1171 		if (ret)
1172 			goto e_key;
1173 		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
1174 		if (ret)
1175 			goto e_key;
1176 	} else {
1177 		/* Version 5 CCPs use a 512-bit space for the key: each portion
1178 		 * occupies 256 bits, or one entire slot, and is zero-padded.
1179 		 */
1180 		unsigned int pad;
1181 
1182 		dm_offset = CCP_SB_BYTES;
1183 		pad = dm_offset - xts->key_len;
1184 		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
1185 		if (ret)
1186 			goto e_key;
1187 		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
1188 				      xts->key_len, xts->key_len);
1189 		if (ret)
1190 			goto e_key;
1191 	}
1192 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1193 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1194 	if (ret) {
1195 		cmd->engine_error = cmd_q->cmd_error;
1196 		goto e_key;
1197 	}
1198 
1199 	/* The AES context fits in a single (32-byte) SB entry and
1200 	 * for XTS is already in little endian format so no byte swapping
1201 	 * is needed.
1202 	 */
1203 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1204 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
1205 				   DMA_BIDIRECTIONAL);
1206 	if (ret)
1207 		goto e_key;
1208 
1209 	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1210 	if (ret)
1211 		goto e_ctx;
1212 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1213 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1214 	if (ret) {
1215 		cmd->engine_error = cmd_q->cmd_error;
1216 		goto e_ctx;
1217 	}
1218 
1219 	/* Prepare the input and output data workareas. For in-place
1220 	 * operations we need to set the dma direction to BIDIRECTIONAL
1221 	 * and copy the src workarea to the dst workarea.
1222 	 */
1223 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1224 		in_place = true;
1225 
1226 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1227 			    unit_size,
1228 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1229 	if (ret)
1230 		goto e_ctx;
1231 
1232 	if (in_place) {
1233 		dst = src;
1234 	} else {
1235 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1236 				    unit_size, DMA_FROM_DEVICE);
1237 		if (ret)
1238 			goto e_src;
1239 	}
1240 
1241 	/* Send data to the CCP AES engine */
1242 	while (src.sg_wa.bytes_left) {
1243 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1244 		if (!src.sg_wa.bytes_left)
1245 			op.eom = 1;
1246 
1247 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
1248 		if (ret) {
1249 			cmd->engine_error = cmd_q->cmd_error;
1250 			goto e_dst;
1251 		}
1252 
1253 		ccp_process_data(&src, &dst, &op);
1254 	}
1255 
1256 	/* Retrieve the AES context - convert from LE to BE using
1257 	 * 32-byte (256-bit) byteswapping
1258 	 */
1259 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1260 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1261 	if (ret) {
1262 		cmd->engine_error = cmd_q->cmd_error;
1263 		goto e_dst;
1264 	}
1265 
1266 	/* ...but we only need AES_BLOCK_SIZE bytes */
1267 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1268 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1269 
1270 e_dst:
1271 	if (!in_place)
1272 		ccp_free_data(&dst, cmd_q);
1273 
1274 e_src:
1275 	ccp_free_data(&src, cmd_q);
1276 
1277 e_ctx:
1278 	ccp_dm_free(&ctx);
1279 
1280 e_key:
1281 	ccp_dm_free(&key);
1282 
1283 	return ret;
1284 }
1285 
1286 static noinline_for_stack int
1287 ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1288 {
1289 	struct ccp_des3_engine *des3 = &cmd->u.des3;
1290 
1291 	struct ccp_dm_workarea key, ctx;
1292 	struct ccp_data src, dst;
1293 	struct ccp_op op;
1294 	unsigned int dm_offset;
1295 	unsigned int len_singlekey;
1296 	bool in_place = false;
1297 	int ret;
1298 
1299 	/* Error checks */
1300 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
1301 		return -EINVAL;
1302 
1303 	if (!cmd_q->ccp->vdata->perform->des3)
1304 		return -EINVAL;
1305 
1306 	if (des3->key_len != DES3_EDE_KEY_SIZE)
1307 		return -EINVAL;
1308 
1309 	if (((des3->mode == CCP_DES3_MODE_ECB) ||
1310 		(des3->mode == CCP_DES3_MODE_CBC)) &&
1311 		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
1312 		return -EINVAL;
1313 
1314 	if (!des3->key || !des3->src || !des3->dst)
1315 		return -EINVAL;
1316 
1317 	if (des3->mode != CCP_DES3_MODE_ECB) {
1318 		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
1319 			return -EINVAL;
1320 
1321 		if (!des3->iv)
1322 			return -EINVAL;
1323 	}
1324 
1325 	/* Zero out all the fields of the command desc */
1326 	memset(&op, 0, sizeof(op));
1327 
1328 	/* Set up the Function field */
1329 	op.cmd_q = cmd_q;
1330 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1331 	op.sb_key = cmd_q->sb_key;
1332 
1333 	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
1334 	op.u.des3.type = des3->type;
1335 	op.u.des3.mode = des3->mode;
1336 	op.u.des3.action = des3->action;
1337 
1338 	/*
1339 	 * All supported key sizes fit in a single (32-byte) KSB entry and
1340 	 * (like AES) must be in little endian format. Use the 256-bit byte
1341 	 * swap passthru option to convert from big endian to little endian.
1342 	 */
1343 	ret = ccp_init_dm_workarea(&key, cmd_q,
1344 				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
1345 				   DMA_TO_DEVICE);
1346 	if (ret)
1347 		return ret;
1348 
1349 	/*
1350 	 * The contents of the key triplet are in the reverse order of what
1351 	 * is required by the engine. Copy the 3 pieces individually to put
1352 	 * them where they belong.
1353 	 */
1354 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
1355 
1356 	len_singlekey = des3->key_len / 3;
1357 	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
1358 			      des3->key, 0, len_singlekey);
1359 	if (ret)
1360 		goto e_key;
1361 	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
1362 			      des3->key, len_singlekey, len_singlekey);
1363 	if (ret)
1364 		goto e_key;
1365 	ret = ccp_set_dm_area(&key, dm_offset,
1366 			      des3->key, 2 * len_singlekey, len_singlekey);
1367 	if (ret)
1368 		goto e_key;
1369 
1370 	/* Copy the key to the SB */
1371 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1372 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1373 	if (ret) {
1374 		cmd->engine_error = cmd_q->cmd_error;
1375 		goto e_key;
1376 	}
1377 
1378 	/*
1379 	 * The DES3 context fits in a single (32-byte) KSB entry and
1380 	 * must be in little endian format. Use the 256-bit byte swap
1381 	 * passthru option to convert from big endian to little endian.
1382 	 */
1383 	if (des3->mode != CCP_DES3_MODE_ECB) {
1384 		op.sb_ctx = cmd_q->sb_ctx;
1385 
1386 		ret = ccp_init_dm_workarea(&ctx, cmd_q,
1387 					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
1388 					   DMA_BIDIRECTIONAL);
1389 		if (ret)
1390 			goto e_key;
1391 
1392 		/* Load the context into the LSB */
1393 		dm_offset = CCP_SB_BYTES - des3->iv_len;
1394 		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
1395 				      des3->iv_len);
1396 		if (ret)
1397 			goto e_ctx;
1398 
1399 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1400 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1401 		if (ret) {
1402 			cmd->engine_error = cmd_q->cmd_error;
1403 			goto e_ctx;
1404 		}
1405 	}
1406 
1407 	/*
1408 	 * Prepare the input and output data workareas. For in-place
1409 	 * operations we need to set the dma direction to BIDIRECTIONAL
1410 	 * and copy the src workarea to the dst workarea.
1411 	 */
1412 	if (sg_virt(des3->src) == sg_virt(des3->dst))
1413 		in_place = true;
1414 
1415 	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
1416 			DES3_EDE_BLOCK_SIZE,
1417 			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1418 	if (ret)
1419 		goto e_ctx;
1420 
1421 	if (in_place)
1422 		dst = src;
1423 	else {
1424 		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
1425 				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
1426 		if (ret)
1427 			goto e_src;
1428 	}
1429 
1430 	/* Send data to the CCP DES3 engine */
1431 	while (src.sg_wa.bytes_left) {
1432 		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
1433 		if (!src.sg_wa.bytes_left) {
1434 			op.eom = 1;
1435 
1436 			/* Since we don't retrieve the context in ECB mode
1437 			 * we have to wait for the operation to complete
1438 			 * on the last piece of data
1439 			 */
1440 			op.soc = 0;
1441 		}
1442 
1443 		ret = cmd_q->ccp->vdata->perform->des3(&op);
1444 		if (ret) {
1445 			cmd->engine_error = cmd_q->cmd_error;
1446 			goto e_dst;
1447 		}
1448 
1449 		ccp_process_data(&src, &dst, &op);
1450 	}
1451 
1452 	if (des3->mode != CCP_DES3_MODE_ECB) {
1453 		/* Retrieve the context and make BE */
1454 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1455 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1456 		if (ret) {
1457 			cmd->engine_error = cmd_q->cmd_error;
1458 			goto e_dst;
1459 		}
1460 
1461 		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
1462 		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
1463 				DES3_EDE_BLOCK_SIZE);
1464 	}
1465 e_dst:
1466 	if (!in_place)
1467 		ccp_free_data(&dst, cmd_q);
1468 
1469 e_src:
1470 	ccp_free_data(&src, cmd_q);
1471 
1472 e_ctx:
1473 	if (des3->mode != CCP_DES3_MODE_ECB)
1474 		ccp_dm_free(&ctx);
1475 
1476 e_key:
1477 	ccp_dm_free(&key);
1478 
1479 	return ret;
1480 }
1481 
1482 static noinline_for_stack int
1483 ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1484 {
1485 	struct ccp_sha_engine *sha = &cmd->u.sha;
1486 	struct ccp_dm_workarea ctx;
1487 	struct ccp_data src;
1488 	struct ccp_op op;
1489 	unsigned int ioffset, ooffset;
1490 	unsigned int digest_size;
1491 	int sb_count;
1492 	const void *init;
1493 	u64 block_size;
1494 	int ctx_size;
1495 	int ret;
1496 
1497 	switch (sha->type) {
1498 	case CCP_SHA_TYPE_1:
1499 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
1500 			return -EINVAL;
1501 		block_size = SHA1_BLOCK_SIZE;
1502 		break;
1503 	case CCP_SHA_TYPE_224:
1504 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
1505 			return -EINVAL;
1506 		block_size = SHA224_BLOCK_SIZE;
1507 		break;
1508 	case CCP_SHA_TYPE_256:
1509 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
1510 			return -EINVAL;
1511 		block_size = SHA256_BLOCK_SIZE;
1512 		break;
1513 	case CCP_SHA_TYPE_384:
1514 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1515 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
1516 			return -EINVAL;
1517 		block_size = SHA384_BLOCK_SIZE;
1518 		break;
1519 	case CCP_SHA_TYPE_512:
1520 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1521 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
1522 			return -EINVAL;
1523 		block_size = SHA512_BLOCK_SIZE;
1524 		break;
1525 	default:
1526 		return -EINVAL;
1527 	}
1528 
1529 	if (!sha->ctx)
1530 		return -EINVAL;
1531 
1532 	if (!sha->final && (sha->src_len & (block_size - 1)))
1533 		return -EINVAL;
1534 
1535 	/* The version 3 device can't handle zero-length input */
1536 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1537 
1538 		if (!sha->src_len) {
1539 			unsigned int digest_len;
1540 			const u8 *sha_zero;
1541 
1542 			/* Not final, just return */
1543 			if (!sha->final)
1544 				return 0;
1545 
1546 			/* CCP can't do a zero length sha operation so the
1547 			 * caller must buffer the data.
1548 			 */
1549 			if (sha->msg_bits)
1550 				return -EINVAL;
1551 
1552 			/* The CCP cannot perform zero-length sha operations
1553 			 * so the caller is required to buffer data for the
1554 			 * final operation. However, a sha operation for a
1555 			 * message with a total length of zero is valid so
1556 			 * known values are required to supply the result.
1557 			 */
1558 			switch (sha->type) {
1559 			case CCP_SHA_TYPE_1:
1560 				sha_zero = sha1_zero_message_hash;
1561 				digest_len = SHA1_DIGEST_SIZE;
1562 				break;
1563 			case CCP_SHA_TYPE_224:
1564 				sha_zero = sha224_zero_message_hash;
1565 				digest_len = SHA224_DIGEST_SIZE;
1566 				break;
1567 			case CCP_SHA_TYPE_256:
1568 				sha_zero = sha256_zero_message_hash;
1569 				digest_len = SHA256_DIGEST_SIZE;
1570 				break;
1571 			default:
1572 				return -EINVAL;
1573 			}
1574 
1575 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1576 						 digest_len, 1);
1577 
1578 			return 0;
1579 		}
1580 	}
1581 
1582 	/* Set variables used throughout */
1583 	switch (sha->type) {
1584 	case CCP_SHA_TYPE_1:
1585 		digest_size = SHA1_DIGEST_SIZE;
1586 		init = (void *) ccp_sha1_init;
1587 		ctx_size = SHA1_DIGEST_SIZE;
1588 		sb_count = 1;
1589 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1590 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1591 		else
1592 			ooffset = ioffset = 0;
1593 		break;
1594 	case CCP_SHA_TYPE_224:
1595 		digest_size = SHA224_DIGEST_SIZE;
1596 		init = (void *) ccp_sha224_init;
1597 		ctx_size = SHA256_DIGEST_SIZE;
1598 		sb_count = 1;
1599 		ioffset = 0;
1600 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1601 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1602 		else
1603 			ooffset = 0;
1604 		break;
1605 	case CCP_SHA_TYPE_256:
1606 		digest_size = SHA256_DIGEST_SIZE;
1607 		init = (void *) ccp_sha256_init;
1608 		ctx_size = SHA256_DIGEST_SIZE;
1609 		sb_count = 1;
1610 		ooffset = ioffset = 0;
1611 		break;
1612 	case CCP_SHA_TYPE_384:
1613 		digest_size = SHA384_DIGEST_SIZE;
1614 		init = (void *) ccp_sha384_init;
1615 		ctx_size = SHA512_DIGEST_SIZE;
1616 		sb_count = 2;
1617 		ioffset = 0;
1618 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1619 		break;
1620 	case CCP_SHA_TYPE_512:
1621 		digest_size = SHA512_DIGEST_SIZE;
1622 		init = (void *) ccp_sha512_init;
1623 		ctx_size = SHA512_DIGEST_SIZE;
1624 		sb_count = 2;
1625 		ooffset = ioffset = 0;
1626 		break;
1627 	default:
1628 		ret = -EINVAL;
1629 		goto e_data;
1630 	}
1631 
1632 	/* For zero-length plaintext the src pointer is ignored;
1633 	 * otherwise both parts must be valid
1634 	 */
1635 	if (sha->src_len && !sha->src)
1636 		return -EINVAL;
1637 
1638 	memset(&op, 0, sizeof(op));
1639 	op.cmd_q = cmd_q;
1640 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1641 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1642 	op.u.sha.type = sha->type;
1643 	op.u.sha.msg_bits = sha->msg_bits;
1644 
1645 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1646 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1647 	 * first slot, and the left half in the second. Each portion must then
1648 	 * be in little endian format: use the 256-bit byte swap option.
1649 	 */
1650 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1651 				   DMA_BIDIRECTIONAL);
1652 	if (ret)
1653 		return ret;
1654 	if (sha->first) {
1655 		switch (sha->type) {
1656 		case CCP_SHA_TYPE_1:
1657 		case CCP_SHA_TYPE_224:
1658 		case CCP_SHA_TYPE_256:
1659 			memcpy(ctx.address + ioffset, init, ctx_size);
1660 			break;
1661 		case CCP_SHA_TYPE_384:
1662 		case CCP_SHA_TYPE_512:
1663 			memcpy(ctx.address + ctx_size / 2, init,
1664 			       ctx_size / 2);
1665 			memcpy(ctx.address, init + ctx_size / 2,
1666 			       ctx_size / 2);
1667 			break;
1668 		default:
1669 			ret = -EINVAL;
1670 			goto e_ctx;
1671 		}
1672 	} else {
1673 		/* Restore the context */
1674 		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1675 				      sb_count * CCP_SB_BYTES);
1676 		if (ret)
1677 			goto e_ctx;
1678 	}
1679 
1680 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1681 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1682 	if (ret) {
1683 		cmd->engine_error = cmd_q->cmd_error;
1684 		goto e_ctx;
1685 	}
1686 
1687 	if (sha->src) {
1688 		/* Send data to the CCP SHA engine; block_size is set above */
1689 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1690 				    block_size, DMA_TO_DEVICE);
1691 		if (ret)
1692 			goto e_ctx;
1693 
1694 		while (src.sg_wa.bytes_left) {
1695 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1696 			if (sha->final && !src.sg_wa.bytes_left)
1697 				op.eom = 1;
1698 
1699 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1700 			if (ret) {
1701 				cmd->engine_error = cmd_q->cmd_error;
1702 				goto e_data;
1703 			}
1704 
1705 			ccp_process_data(&src, NULL, &op);
1706 		}
1707 	} else {
1708 		op.eom = 1;
1709 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1710 		if (ret) {
1711 			cmd->engine_error = cmd_q->cmd_error;
1712 			goto e_data;
1713 		}
1714 	}
1715 
1716 	/* Retrieve the SHA context - convert from LE to BE using
1717 	 * 32-byte (256-bit) byteswapping to BE
1718 	 */
1719 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1720 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1721 	if (ret) {
1722 		cmd->engine_error = cmd_q->cmd_error;
1723 		goto e_data;
1724 	}
1725 
1726 	if (sha->final) {
1727 		/* Finishing up, so get the digest */
1728 		switch (sha->type) {
1729 		case CCP_SHA_TYPE_1:
1730 		case CCP_SHA_TYPE_224:
1731 		case CCP_SHA_TYPE_256:
1732 			ccp_get_dm_area(&ctx, ooffset,
1733 					sha->ctx, 0,
1734 					digest_size);
1735 			break;
1736 		case CCP_SHA_TYPE_384:
1737 		case CCP_SHA_TYPE_512:
1738 			ccp_get_dm_area(&ctx, 0,
1739 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1740 					LSB_ITEM_SIZE);
1741 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1742 					sha->ctx, 0,
1743 					LSB_ITEM_SIZE - ooffset);
1744 			break;
1745 		default:
1746 			ret = -EINVAL;
1747 			goto e_ctx;
1748 		}
1749 	} else {
1750 		/* Stash the context */
1751 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1752 				sb_count * CCP_SB_BYTES);
1753 	}
1754 
1755 	if (sha->final && sha->opad) {
1756 		/* HMAC operation, recursively perform final SHA */
1757 		struct ccp_cmd hmac_cmd;
1758 		struct scatterlist sg;
1759 		u8 *hmac_buf;
1760 
1761 		if (sha->opad_len != block_size) {
1762 			ret = -EINVAL;
1763 			goto e_data;
1764 		}
1765 
1766 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1767 		if (!hmac_buf) {
1768 			ret = -ENOMEM;
1769 			goto e_data;
1770 		}
1771 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1772 
1773 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1774 		switch (sha->type) {
1775 		case CCP_SHA_TYPE_1:
1776 		case CCP_SHA_TYPE_224:
1777 		case CCP_SHA_TYPE_256:
1778 			memcpy(hmac_buf + block_size,
1779 			       ctx.address + ooffset,
1780 			       digest_size);
1781 			break;
1782 		case CCP_SHA_TYPE_384:
1783 		case CCP_SHA_TYPE_512:
1784 			memcpy(hmac_buf + block_size,
1785 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1786 			       LSB_ITEM_SIZE);
1787 			memcpy(hmac_buf + block_size +
1788 			       (LSB_ITEM_SIZE - ooffset),
1789 			       ctx.address,
1790 			       LSB_ITEM_SIZE);
1791 			break;
1792 		default:
1793 			kfree(hmac_buf);
1794 			ret = -EINVAL;
1795 			goto e_data;
1796 		}
1797 
1798 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1799 		hmac_cmd.engine = CCP_ENGINE_SHA;
1800 		hmac_cmd.u.sha.type = sha->type;
1801 		hmac_cmd.u.sha.ctx = sha->ctx;
1802 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1803 		hmac_cmd.u.sha.src = &sg;
1804 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1805 		hmac_cmd.u.sha.opad = NULL;
1806 		hmac_cmd.u.sha.opad_len = 0;
1807 		hmac_cmd.u.sha.first = 1;
1808 		hmac_cmd.u.sha.final = 1;
1809 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1810 
1811 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1812 		if (ret)
1813 			cmd->engine_error = hmac_cmd.engine_error;
1814 
1815 		kfree(hmac_buf);
1816 	}
1817 
1818 e_data:
1819 	if (sha->src)
1820 		ccp_free_data(&src, cmd_q);
1821 
1822 e_ctx:
1823 	ccp_dm_free(&ctx);
1824 
1825 	return ret;
1826 }
1827 
1828 static noinline_for_stack int
1829 ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1830 {
1831 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1832 	struct ccp_dm_workarea exp, src, dst;
1833 	struct ccp_op op;
1834 	unsigned int sb_count, i_len, o_len;
1835 	int ret;
1836 
1837 	/* Check against the maximum allowable size, in bits */
1838 	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
1839 		return -EINVAL;
1840 
1841 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1842 		return -EINVAL;
1843 
1844 	memset(&op, 0, sizeof(op));
1845 	op.cmd_q = cmd_q;
1846 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1847 
1848 	/* The RSA modulus must precede the message being acted upon, so
1849 	 * it must be copied to a DMA area where the message and the
1850 	 * modulus can be concatenated.  Therefore the input buffer
1851 	 * length required is twice the output buffer length (which
1852 	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
1853 	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
1854 	 * required.
1855 	 */
1856 	o_len = 32 * ((rsa->key_size + 255) / 256);
1857 	i_len = o_len * 2;
1858 
1859 	sb_count = 0;
1860 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1861 		/* sb_count is the number of storage block slots required
1862 		 * for the modulus.
1863 		 */
1864 		sb_count = o_len / CCP_SB_BYTES;
1865 		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
1866 								sb_count);
1867 		if (!op.sb_key)
1868 			return -EIO;
1869 	} else {
1870 		/* A version 5 device allows a modulus size that will not fit
1871 		 * in the LSB, so the command will transfer it from memory.
1872 		 * Set the sb key to the default, even though it's not used.
1873 		 */
1874 		op.sb_key = cmd_q->sb_key;
1875 	}
1876 
1877 	/* The RSA exponent must be in little endian format. Reverse its
1878 	 * byte order.
1879 	 */
1880 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1881 	if (ret)
1882 		goto e_sb;
1883 
1884 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1885 	if (ret)
1886 		goto e_exp;
1887 
1888 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1889 		/* Copy the exponent to the local storage block, using
1890 		 * as many 32-byte blocks as were allocated above. It's
1891 		 * already little endian, so no further change is required.
1892 		 */
1893 		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1894 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1895 		if (ret) {
1896 			cmd->engine_error = cmd_q->cmd_error;
1897 			goto e_exp;
1898 		}
1899 	} else {
1900 		/* The exponent can be retrieved from memory via DMA. */
1901 		op.exp.u.dma.address = exp.dma.address;
1902 		op.exp.u.dma.offset = 0;
1903 	}
1904 
1905 	/* Concatenate the modulus and the message. Both the modulus and
1906 	 * the operands must be in little endian format.  Since the input
1907 	 * is in big endian format it must be converted.
1908 	 */
1909 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1910 	if (ret)
1911 		goto e_exp;
1912 
1913 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1914 	if (ret)
1915 		goto e_src;
1916 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1917 	if (ret)
1918 		goto e_src;
1919 
1920 	/* Prepare the output area for the operation */
1921 	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
1922 	if (ret)
1923 		goto e_src;
1924 
1925 	op.soc = 1;
1926 	op.src.u.dma.address = src.dma.address;
1927 	op.src.u.dma.offset = 0;
1928 	op.src.u.dma.length = i_len;
1929 	op.dst.u.dma.address = dst.dma.address;
1930 	op.dst.u.dma.offset = 0;
1931 	op.dst.u.dma.length = o_len;
1932 
1933 	op.u.rsa.mod_size = rsa->key_size;
1934 	op.u.rsa.input_len = i_len;
1935 
1936 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1937 	if (ret) {
1938 		cmd->engine_error = cmd_q->cmd_error;
1939 		goto e_dst;
1940 	}
1941 
1942 	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
1943 
1944 e_dst:
1945 	ccp_dm_free(&dst);
1946 
1947 e_src:
1948 	ccp_dm_free(&src);
1949 
1950 e_exp:
1951 	ccp_dm_free(&exp);
1952 
1953 e_sb:
1954 	if (sb_count)
1955 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1956 
1957 	return ret;
1958 }
1959 
1960 static noinline_for_stack int
1961 ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1962 {
1963 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1964 	struct ccp_dm_workarea mask;
1965 	struct ccp_data src, dst;
1966 	struct ccp_op op;
1967 	bool in_place = false;
1968 	unsigned int i;
1969 	int ret = 0;
1970 
1971 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1972 		return -EINVAL;
1973 
1974 	if (!pt->src || !pt->dst)
1975 		return -EINVAL;
1976 
1977 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1978 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1979 			return -EINVAL;
1980 		if (!pt->mask)
1981 			return -EINVAL;
1982 	}
1983 
1984 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1985 
1986 	memset(&op, 0, sizeof(op));
1987 	op.cmd_q = cmd_q;
1988 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1989 
1990 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1991 		/* Load the mask */
1992 		op.sb_key = cmd_q->sb_key;
1993 
1994 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1995 					   CCP_PASSTHRU_SB_COUNT *
1996 					   CCP_SB_BYTES,
1997 					   DMA_TO_DEVICE);
1998 		if (ret)
1999 			return ret;
2000 
2001 		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
2002 		if (ret)
2003 			goto e_mask;
2004 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2005 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2006 		if (ret) {
2007 			cmd->engine_error = cmd_q->cmd_error;
2008 			goto e_mask;
2009 		}
2010 	}
2011 
2012 	/* Prepare the input and output data workareas. For in-place
2013 	 * operations we need to set the dma direction to BIDIRECTIONAL
2014 	 * and copy the src workarea to the dst workarea.
2015 	 */
2016 	if (sg_virt(pt->src) == sg_virt(pt->dst))
2017 		in_place = true;
2018 
2019 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
2020 			    CCP_PASSTHRU_MASKSIZE,
2021 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
2022 	if (ret)
2023 		goto e_mask;
2024 
2025 	if (in_place) {
2026 		dst = src;
2027 	} else {
2028 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
2029 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
2030 		if (ret)
2031 			goto e_src;
2032 	}
2033 
2034 	/* Send data to the CCP Passthru engine
2035 	 *   Because the CCP engine works on a single source and destination
2036 	 *   dma address at a time, each entry in the source scatterlist
2037 	 *   (after the dma_map_sg call) must be less than or equal to the
2038 	 *   (remaining) length in the destination scatterlist entry and the
2039 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
2040 	 */
2041 	dst.sg_wa.sg_used = 0;
2042 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
2043 		if (!dst.sg_wa.sg ||
2044 		    (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
2045 			ret = -EINVAL;
2046 			goto e_dst;
2047 		}
2048 
2049 		if (i == src.sg_wa.dma_count) {
2050 			op.eom = 1;
2051 			op.soc = 1;
2052 		}
2053 
2054 		op.src.type = CCP_MEMTYPE_SYSTEM;
2055 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
2056 		op.src.u.dma.offset = 0;
2057 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
2058 
2059 		op.dst.type = CCP_MEMTYPE_SYSTEM;
2060 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
2061 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
2062 		op.dst.u.dma.length = op.src.u.dma.length;
2063 
2064 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
2065 		if (ret) {
2066 			cmd->engine_error = cmd_q->cmd_error;
2067 			goto e_dst;
2068 		}
2069 
2070 		dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
2071 		if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
2072 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
2073 			dst.sg_wa.sg_used = 0;
2074 		}
2075 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
2076 	}
2077 
2078 e_dst:
2079 	if (!in_place)
2080 		ccp_free_data(&dst, cmd_q);
2081 
2082 e_src:
2083 	ccp_free_data(&src, cmd_q);
2084 
2085 e_mask:
2086 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
2087 		ccp_dm_free(&mask);
2088 
2089 	return ret;
2090 }
2091 
2092 static noinline_for_stack int
2093 ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
2094 				      struct ccp_cmd *cmd)
2095 {
2096 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
2097 	struct ccp_dm_workarea mask;
2098 	struct ccp_op op;
2099 	int ret;
2100 
2101 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
2102 		return -EINVAL;
2103 
2104 	if (!pt->src_dma || !pt->dst_dma)
2105 		return -EINVAL;
2106 
2107 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2108 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
2109 			return -EINVAL;
2110 		if (!pt->mask)
2111 			return -EINVAL;
2112 	}
2113 
2114 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2115 
2116 	memset(&op, 0, sizeof(op));
2117 	op.cmd_q = cmd_q;
2118 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2119 
2120 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2121 		/* Load the mask */
2122 		op.sb_key = cmd_q->sb_key;
2123 
2124 		mask.length = pt->mask_len;
2125 		mask.dma.address = pt->mask;
2126 		mask.dma.length = pt->mask_len;
2127 
2128 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2129 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2130 		if (ret) {
2131 			cmd->engine_error = cmd_q->cmd_error;
2132 			return ret;
2133 		}
2134 	}
2135 
2136 	/* Send data to the CCP Passthru engine */
2137 	op.eom = 1;
2138 	op.soc = 1;
2139 
2140 	op.src.type = CCP_MEMTYPE_SYSTEM;
2141 	op.src.u.dma.address = pt->src_dma;
2142 	op.src.u.dma.offset = 0;
2143 	op.src.u.dma.length = pt->src_len;
2144 
2145 	op.dst.type = CCP_MEMTYPE_SYSTEM;
2146 	op.dst.u.dma.address = pt->dst_dma;
2147 	op.dst.u.dma.offset = 0;
2148 	op.dst.u.dma.length = pt->src_len;
2149 
2150 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
2151 	if (ret)
2152 		cmd->engine_error = cmd_q->cmd_error;
2153 
2154 	return ret;
2155 }
2156 
2157 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2158 {
2159 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2160 	struct ccp_dm_workarea src, dst;
2161 	struct ccp_op op;
2162 	int ret;
2163 	u8 *save;
2164 
2165 	if (!ecc->u.mm.operand_1 ||
2166 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
2167 		return -EINVAL;
2168 
2169 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
2170 		if (!ecc->u.mm.operand_2 ||
2171 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
2172 			return -EINVAL;
2173 
2174 	if (!ecc->u.mm.result ||
2175 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
2176 		return -EINVAL;
2177 
2178 	memset(&op, 0, sizeof(op));
2179 	op.cmd_q = cmd_q;
2180 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2181 
2182 	/* Concatenate the modulus and the operands. Both the modulus and
2183 	 * the operands must be in little endian format.  Since the input
2184 	 * is in big endian format it must be converted and placed in a
2185 	 * fixed length buffer.
2186 	 */
2187 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2188 				   DMA_TO_DEVICE);
2189 	if (ret)
2190 		return ret;
2191 
2192 	/* Save the workarea address since it is updated in order to perform
2193 	 * the concatenation
2194 	 */
2195 	save = src.address;
2196 
2197 	/* Copy the ECC modulus */
2198 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2199 	if (ret)
2200 		goto e_src;
2201 	src.address += CCP_ECC_OPERAND_SIZE;
2202 
2203 	/* Copy the first operand */
2204 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
2205 				      ecc->u.mm.operand_1_len);
2206 	if (ret)
2207 		goto e_src;
2208 	src.address += CCP_ECC_OPERAND_SIZE;
2209 
2210 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
2211 		/* Copy the second operand */
2212 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
2213 					      ecc->u.mm.operand_2_len);
2214 		if (ret)
2215 			goto e_src;
2216 		src.address += CCP_ECC_OPERAND_SIZE;
2217 	}
2218 
2219 	/* Restore the workarea address */
2220 	src.address = save;
2221 
2222 	/* Prepare the output area for the operation */
2223 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2224 				   DMA_FROM_DEVICE);
2225 	if (ret)
2226 		goto e_src;
2227 
2228 	op.soc = 1;
2229 	op.src.u.dma.address = src.dma.address;
2230 	op.src.u.dma.offset = 0;
2231 	op.src.u.dma.length = src.length;
2232 	op.dst.u.dma.address = dst.dma.address;
2233 	op.dst.u.dma.offset = 0;
2234 	op.dst.u.dma.length = dst.length;
2235 
2236 	op.u.ecc.function = cmd->u.ecc.function;
2237 
2238 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2239 	if (ret) {
2240 		cmd->engine_error = cmd_q->cmd_error;
2241 		goto e_dst;
2242 	}
2243 
2244 	ecc->ecc_result = le16_to_cpup(
2245 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2246 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2247 		ret = -EIO;
2248 		goto e_dst;
2249 	}
2250 
2251 	/* Save the ECC result */
2252 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
2253 				CCP_ECC_MODULUS_BYTES);
2254 
2255 e_dst:
2256 	ccp_dm_free(&dst);
2257 
2258 e_src:
2259 	ccp_dm_free(&src);
2260 
2261 	return ret;
2262 }
2263 
2264 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2265 {
2266 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2267 	struct ccp_dm_workarea src, dst;
2268 	struct ccp_op op;
2269 	int ret;
2270 	u8 *save;
2271 
2272 	if (!ecc->u.pm.point_1.x ||
2273 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
2274 	    !ecc->u.pm.point_1.y ||
2275 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
2276 		return -EINVAL;
2277 
2278 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2279 		if (!ecc->u.pm.point_2.x ||
2280 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
2281 		    !ecc->u.pm.point_2.y ||
2282 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
2283 			return -EINVAL;
2284 	} else {
2285 		if (!ecc->u.pm.domain_a ||
2286 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
2287 			return -EINVAL;
2288 
2289 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
2290 			if (!ecc->u.pm.scalar ||
2291 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
2292 				return -EINVAL;
2293 	}
2294 
2295 	if (!ecc->u.pm.result.x ||
2296 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
2297 	    !ecc->u.pm.result.y ||
2298 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
2299 		return -EINVAL;
2300 
2301 	memset(&op, 0, sizeof(op));
2302 	op.cmd_q = cmd_q;
2303 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2304 
2305 	/* Concatenate the modulus and the operands. Both the modulus and
2306 	 * the operands must be in little endian format.  Since the input
2307 	 * is in big endian format it must be converted and placed in a
2308 	 * fixed length buffer.
2309 	 */
2310 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2311 				   DMA_TO_DEVICE);
2312 	if (ret)
2313 		return ret;
2314 
2315 	/* Save the workarea address since it is updated in order to perform
2316 	 * the concatenation
2317 	 */
2318 	save = src.address;
2319 
2320 	/* Copy the ECC modulus */
2321 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2322 	if (ret)
2323 		goto e_src;
2324 	src.address += CCP_ECC_OPERAND_SIZE;
2325 
2326 	/* Copy the first point X and Y coordinate */
2327 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
2328 				      ecc->u.pm.point_1.x_len);
2329 	if (ret)
2330 		goto e_src;
2331 	src.address += CCP_ECC_OPERAND_SIZE;
2332 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
2333 				      ecc->u.pm.point_1.y_len);
2334 	if (ret)
2335 		goto e_src;
2336 	src.address += CCP_ECC_OPERAND_SIZE;
2337 
2338 	/* Set the first point Z coordinate to 1 */
2339 	*src.address = 0x01;
2340 	src.address += CCP_ECC_OPERAND_SIZE;
2341 
2342 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2343 		/* Copy the second point X and Y coordinate */
2344 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
2345 					      ecc->u.pm.point_2.x_len);
2346 		if (ret)
2347 			goto e_src;
2348 		src.address += CCP_ECC_OPERAND_SIZE;
2349 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
2350 					      ecc->u.pm.point_2.y_len);
2351 		if (ret)
2352 			goto e_src;
2353 		src.address += CCP_ECC_OPERAND_SIZE;
2354 
2355 		/* Set the second point Z coordinate to 1 */
2356 		*src.address = 0x01;
2357 		src.address += CCP_ECC_OPERAND_SIZE;
2358 	} else {
2359 		/* Copy the Domain "a" parameter */
2360 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
2361 					      ecc->u.pm.domain_a_len);
2362 		if (ret)
2363 			goto e_src;
2364 		src.address += CCP_ECC_OPERAND_SIZE;
2365 
2366 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2367 			/* Copy the scalar value */
2368 			ret = ccp_reverse_set_dm_area(&src, 0,
2369 						      ecc->u.pm.scalar, 0,
2370 						      ecc->u.pm.scalar_len);
2371 			if (ret)
2372 				goto e_src;
2373 			src.address += CCP_ECC_OPERAND_SIZE;
2374 		}
2375 	}
2376 
2377 	/* Restore the workarea address */
2378 	src.address = save;
2379 
2380 	/* Prepare the output area for the operation */
2381 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2382 				   DMA_FROM_DEVICE);
2383 	if (ret)
2384 		goto e_src;
2385 
2386 	op.soc = 1;
2387 	op.src.u.dma.address = src.dma.address;
2388 	op.src.u.dma.offset = 0;
2389 	op.src.u.dma.length = src.length;
2390 	op.dst.u.dma.address = dst.dma.address;
2391 	op.dst.u.dma.offset = 0;
2392 	op.dst.u.dma.length = dst.length;
2393 
2394 	op.u.ecc.function = cmd->u.ecc.function;
2395 
2396 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2397 	if (ret) {
2398 		cmd->engine_error = cmd_q->cmd_error;
2399 		goto e_dst;
2400 	}
2401 
2402 	ecc->ecc_result = le16_to_cpup(
2403 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2404 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2405 		ret = -EIO;
2406 		goto e_dst;
2407 	}
2408 
2409 	/* Save the workarea address since it is updated as we walk through
2410 	 * to copy the point math result
2411 	 */
2412 	save = dst.address;
2413 
2414 	/* Save the ECC result X and Y coordinates */
2415 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
2416 				CCP_ECC_MODULUS_BYTES);
2417 	dst.address += CCP_ECC_OUTPUT_SIZE;
2418 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
2419 				CCP_ECC_MODULUS_BYTES);
2420 	dst.address += CCP_ECC_OUTPUT_SIZE;
2421 
2422 	/* Restore the workarea address */
2423 	dst.address = save;
2424 
2425 e_dst:
2426 	ccp_dm_free(&dst);
2427 
2428 e_src:
2429 	ccp_dm_free(&src);
2430 
2431 	return ret;
2432 }
2433 
2434 static noinline_for_stack int
2435 ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2436 {
2437 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2438 
2439 	ecc->ecc_result = 0;
2440 
2441 	if (!ecc->mod ||
2442 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2443 		return -EINVAL;
2444 
2445 	switch (ecc->function) {
2446 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2447 	case CCP_ECC_FUNCTION_MADD_384BIT:
2448 	case CCP_ECC_FUNCTION_MINV_384BIT:
2449 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2450 
2451 	case CCP_ECC_FUNCTION_PADD_384BIT:
2452 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2453 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2454 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2455 
2456 	default:
2457 		return -EINVAL;
2458 	}
2459 }
2460 
2461 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2462 {
2463 	int ret;
2464 
2465 	cmd->engine_error = 0;
2466 	cmd_q->cmd_error = 0;
2467 	cmd_q->int_rcvd = 0;
2468 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
2469 
2470 	switch (cmd->engine) {
2471 	case CCP_ENGINE_AES:
2472 		switch (cmd->u.aes.mode) {
2473 		case CCP_AES_MODE_CMAC:
2474 			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
2475 			break;
2476 		case CCP_AES_MODE_GCM:
2477 			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
2478 			break;
2479 		default:
2480 			ret = ccp_run_aes_cmd(cmd_q, cmd);
2481 			break;
2482 		}
2483 		break;
2484 	case CCP_ENGINE_XTS_AES_128:
2485 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2486 		break;
2487 	case CCP_ENGINE_DES3:
2488 		ret = ccp_run_des3_cmd(cmd_q, cmd);
2489 		break;
2490 	case CCP_ENGINE_SHA:
2491 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2492 		break;
2493 	case CCP_ENGINE_RSA:
2494 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2495 		break;
2496 	case CCP_ENGINE_PASSTHRU:
2497 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
2498 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
2499 		else
2500 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
2501 		break;
2502 	case CCP_ENGINE_ECC:
2503 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2504 		break;
2505 	default:
2506 		ret = -EINVAL;
2507 	}
2508 
2509 	return ret;
2510 }
2511