xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision c0ecca6604b80e438b032578634c6e133c7028f6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Cryptographic Coprocessor (CCP) driver
4  *
5  * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
6  *
7  * Author: Tom Lendacky <thomas.lendacky@amd.com>
8  * Author: Gary R Hook <gary.hook@amd.com>
9  */
10 
11 #include <linux/dma-mapping.h>
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/interrupt.h>
15 #include <crypto/scatterwalk.h>
16 #include <crypto/des.h>
17 #include <linux/ccp.h>
18 
19 #include "ccp-dev.h"
20 
21 /* SHA initial context values */
22 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
23 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
24 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
25 	cpu_to_be32(SHA1_H4),
26 };
27 
28 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
29 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
30 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
31 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
32 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
33 };
34 
35 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
36 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
37 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
38 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
39 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
40 };
41 
42 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
43 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
44 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
45 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
46 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
47 };
48 
49 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
50 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
51 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
52 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
53 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
54 };
55 
56 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
57 					ccp_gen_jobid(ccp) : 0)
58 
59 static u32 ccp_gen_jobid(struct ccp_device *ccp)
60 {
61 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
62 }
63 
64 static void ccp_sg_free(struct ccp_sg_workarea *wa)
65 {
66 	if (wa->dma_count)
67 		dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
68 
69 	wa->dma_count = 0;
70 }
71 
72 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
73 				struct scatterlist *sg, u64 len,
74 				enum dma_data_direction dma_dir)
75 {
76 	memset(wa, 0, sizeof(*wa));
77 
78 	wa->sg = sg;
79 	if (!sg)
80 		return 0;
81 
82 	wa->nents = sg_nents_for_len(sg, len);
83 	if (wa->nents < 0)
84 		return wa->nents;
85 
86 	wa->bytes_left = len;
87 	wa->sg_used = 0;
88 
89 	if (len == 0)
90 		return 0;
91 
92 	if (dma_dir == DMA_NONE)
93 		return 0;
94 
95 	wa->dma_sg = sg;
96 	wa->dma_sg_head = sg;
97 	wa->dma_dev = dev;
98 	wa->dma_dir = dma_dir;
99 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
100 	if (!wa->dma_count)
101 		return -ENOMEM;
102 
103 	return 0;
104 }
105 
106 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
107 {
108 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
109 	unsigned int sg_combined_len = 0;
110 
111 	if (!wa->sg)
112 		return;
113 
114 	wa->sg_used += nbytes;
115 	wa->bytes_left -= nbytes;
116 	if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
117 		/* Advance to the next DMA scatterlist entry */
118 		wa->dma_sg = sg_next(wa->dma_sg);
119 
120 		/* In the case that the DMA mapped scatterlist has entries
121 		 * that have been merged, the non-DMA mapped scatterlist
122 		 * must be advanced multiple times for each merged entry.
123 		 * This ensures that the current non-DMA mapped entry
124 		 * corresponds to the current DMA mapped entry.
125 		 */
126 		do {
127 			sg_combined_len += wa->sg->length;
128 			wa->sg = sg_next(wa->sg);
129 		} while (wa->sg_used > sg_combined_len);
130 
131 		wa->sg_used = 0;
132 	}
133 }
134 
135 static void ccp_dm_free(struct ccp_dm_workarea *wa)
136 {
137 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
138 		if (wa->address)
139 			dma_pool_free(wa->dma_pool, wa->address,
140 				      wa->dma.address);
141 	} else {
142 		if (wa->dma.address)
143 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
144 					 wa->dma.dir);
145 		kfree(wa->address);
146 	}
147 
148 	wa->address = NULL;
149 	wa->dma.address = 0;
150 }
151 
152 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
153 				struct ccp_cmd_queue *cmd_q,
154 				unsigned int len,
155 				enum dma_data_direction dir)
156 {
157 	memset(wa, 0, sizeof(*wa));
158 
159 	if (!len)
160 		return 0;
161 
162 	wa->dev = cmd_q->ccp->dev;
163 	wa->length = len;
164 
165 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
166 		wa->dma_pool = cmd_q->dma_pool;
167 
168 		wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL,
169 					     &wa->dma.address);
170 		if (!wa->address)
171 			return -ENOMEM;
172 
173 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
174 
175 	} else {
176 		wa->address = kzalloc(len, GFP_KERNEL);
177 		if (!wa->address)
178 			return -ENOMEM;
179 
180 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
181 						 dir);
182 		if (dma_mapping_error(wa->dev, wa->dma.address))
183 			return -ENOMEM;
184 
185 		wa->dma.length = len;
186 	}
187 	wa->dma.dir = dir;
188 
189 	return 0;
190 }
191 
192 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
193 			   struct scatterlist *sg, unsigned int sg_offset,
194 			   unsigned int len)
195 {
196 	WARN_ON(!wa->address);
197 
198 	if (len > (wa->length - wa_offset))
199 		return -EINVAL;
200 
201 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
202 				 0);
203 	return 0;
204 }
205 
206 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
207 			    struct scatterlist *sg, unsigned int sg_offset,
208 			    unsigned int len)
209 {
210 	WARN_ON(!wa->address);
211 
212 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
213 				 1);
214 }
215 
216 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
217 				   unsigned int wa_offset,
218 				   struct scatterlist *sg,
219 				   unsigned int sg_offset,
220 				   unsigned int len)
221 {
222 	u8 *p, *q;
223 	int	rc;
224 
225 	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
226 	if (rc)
227 		return rc;
228 
229 	p = wa->address + wa_offset;
230 	q = p + len - 1;
231 	while (p < q) {
232 		*p = *p ^ *q;
233 		*q = *p ^ *q;
234 		*p = *p ^ *q;
235 		p++;
236 		q--;
237 	}
238 	return 0;
239 }
240 
241 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
242 				    unsigned int wa_offset,
243 				    struct scatterlist *sg,
244 				    unsigned int sg_offset,
245 				    unsigned int len)
246 {
247 	u8 *p, *q;
248 
249 	p = wa->address + wa_offset;
250 	q = p + len - 1;
251 	while (p < q) {
252 		*p = *p ^ *q;
253 		*q = *p ^ *q;
254 		*p = *p ^ *q;
255 		p++;
256 		q--;
257 	}
258 
259 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
260 }
261 
262 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
263 {
264 	ccp_dm_free(&data->dm_wa);
265 	ccp_sg_free(&data->sg_wa);
266 }
267 
268 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
269 			 struct scatterlist *sg, u64 sg_len,
270 			 unsigned int dm_len,
271 			 enum dma_data_direction dir)
272 {
273 	int ret;
274 
275 	memset(data, 0, sizeof(*data));
276 
277 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
278 				   dir);
279 	if (ret)
280 		goto e_err;
281 
282 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
283 	if (ret)
284 		goto e_err;
285 
286 	return 0;
287 
288 e_err:
289 	ccp_free_data(data, cmd_q);
290 
291 	return ret;
292 }
293 
294 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
295 {
296 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
297 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
298 	unsigned int buf_count, nbytes;
299 
300 	/* Clear the buffer if setting it */
301 	if (!from)
302 		memset(dm_wa->address, 0, dm_wa->length);
303 
304 	if (!sg_wa->sg)
305 		return 0;
306 
307 	/* Perform the copy operation
308 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
309 	 *   an unsigned int
310 	 */
311 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
312 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
313 				 nbytes, from);
314 
315 	/* Update the structures and generate the count */
316 	buf_count = 0;
317 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
318 		nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
319 			     dm_wa->length - buf_count);
320 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
321 
322 		buf_count += nbytes;
323 		ccp_update_sg_workarea(sg_wa, nbytes);
324 	}
325 
326 	return buf_count;
327 }
328 
329 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
330 {
331 	return ccp_queue_buf(data, 0);
332 }
333 
334 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
335 {
336 	return ccp_queue_buf(data, 1);
337 }
338 
339 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
340 			     struct ccp_op *op, unsigned int block_size,
341 			     bool blocksize_op)
342 {
343 	unsigned int sg_src_len, sg_dst_len, op_len;
344 
345 	/* The CCP can only DMA from/to one address each per operation. This
346 	 * requires that we find the smallest DMA area between the source
347 	 * and destination. The resulting len values will always be <= UINT_MAX
348 	 * because the dma length is an unsigned int.
349 	 */
350 	sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
351 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
352 
353 	if (dst) {
354 		sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
355 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
356 		op_len = min(sg_src_len, sg_dst_len);
357 	} else {
358 		op_len = sg_src_len;
359 	}
360 
361 	/* The data operation length will be at least block_size in length
362 	 * or the smaller of available sg room remaining for the source or
363 	 * the destination
364 	 */
365 	op_len = max(op_len, block_size);
366 
367 	/* Unless we have to buffer data, there's no reason to wait */
368 	op->soc = 0;
369 
370 	if (sg_src_len < block_size) {
371 		/* Not enough data in the sg element, so it
372 		 * needs to be buffered into a blocksize chunk
373 		 */
374 		int cp_len = ccp_fill_queue_buf(src);
375 
376 		op->soc = 1;
377 		op->src.u.dma.address = src->dm_wa.dma.address;
378 		op->src.u.dma.offset = 0;
379 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
380 	} else {
381 		/* Enough data in the sg element, but we need to
382 		 * adjust for any previously copied data
383 		 */
384 		op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
385 		op->src.u.dma.offset = src->sg_wa.sg_used;
386 		op->src.u.dma.length = op_len & ~(block_size - 1);
387 
388 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
389 	}
390 
391 	if (dst) {
392 		if (sg_dst_len < block_size) {
393 			/* Not enough room in the sg element or we're on the
394 			 * last piece of data (when using padding), so the
395 			 * output needs to be buffered into a blocksize chunk
396 			 */
397 			op->soc = 1;
398 			op->dst.u.dma.address = dst->dm_wa.dma.address;
399 			op->dst.u.dma.offset = 0;
400 			op->dst.u.dma.length = op->src.u.dma.length;
401 		} else {
402 			/* Enough room in the sg element, but we need to
403 			 * adjust for any previously used area
404 			 */
405 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
406 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
407 			op->dst.u.dma.length = op->src.u.dma.length;
408 		}
409 	}
410 }
411 
412 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
413 			     struct ccp_op *op)
414 {
415 	op->init = 0;
416 
417 	if (dst) {
418 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
419 			ccp_empty_queue_buf(dst);
420 		else
421 			ccp_update_sg_workarea(&dst->sg_wa,
422 					       op->dst.u.dma.length);
423 	}
424 }
425 
426 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
427 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
428 			       u32 byte_swap, bool from)
429 {
430 	struct ccp_op op;
431 
432 	memset(&op, 0, sizeof(op));
433 
434 	op.cmd_q = cmd_q;
435 	op.jobid = jobid;
436 	op.eom = 1;
437 
438 	if (from) {
439 		op.soc = 1;
440 		op.src.type = CCP_MEMTYPE_SB;
441 		op.src.u.sb = sb;
442 		op.dst.type = CCP_MEMTYPE_SYSTEM;
443 		op.dst.u.dma.address = wa->dma.address;
444 		op.dst.u.dma.length = wa->length;
445 	} else {
446 		op.src.type = CCP_MEMTYPE_SYSTEM;
447 		op.src.u.dma.address = wa->dma.address;
448 		op.src.u.dma.length = wa->length;
449 		op.dst.type = CCP_MEMTYPE_SB;
450 		op.dst.u.sb = sb;
451 	}
452 
453 	op.u.passthru.byte_swap = byte_swap;
454 
455 	return cmd_q->ccp->vdata->perform->passthru(&op);
456 }
457 
458 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
459 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
460 			  u32 byte_swap)
461 {
462 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
463 }
464 
465 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
466 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
467 			    u32 byte_swap)
468 {
469 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
470 }
471 
472 static noinline_for_stack int
473 ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
474 {
475 	struct ccp_aes_engine *aes = &cmd->u.aes;
476 	struct ccp_dm_workarea key, ctx;
477 	struct ccp_data src;
478 	struct ccp_op op;
479 	unsigned int dm_offset;
480 	int ret;
481 
482 	if (!((aes->key_len == AES_KEYSIZE_128) ||
483 	      (aes->key_len == AES_KEYSIZE_192) ||
484 	      (aes->key_len == AES_KEYSIZE_256)))
485 		return -EINVAL;
486 
487 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
488 		return -EINVAL;
489 
490 	if (aes->iv_len != AES_BLOCK_SIZE)
491 		return -EINVAL;
492 
493 	if (!aes->key || !aes->iv || !aes->src)
494 		return -EINVAL;
495 
496 	if (aes->cmac_final) {
497 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
498 			return -EINVAL;
499 
500 		if (!aes->cmac_key)
501 			return -EINVAL;
502 	}
503 
504 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
505 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
506 
507 	ret = -EIO;
508 	memset(&op, 0, sizeof(op));
509 	op.cmd_q = cmd_q;
510 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
511 	op.sb_key = cmd_q->sb_key;
512 	op.sb_ctx = cmd_q->sb_ctx;
513 	op.init = 1;
514 	op.u.aes.type = aes->type;
515 	op.u.aes.mode = aes->mode;
516 	op.u.aes.action = aes->action;
517 
518 	/* All supported key sizes fit in a single (32-byte) SB entry
519 	 * and must be in little endian format. Use the 256-bit byte
520 	 * swap passthru option to convert from big endian to little
521 	 * endian.
522 	 */
523 	ret = ccp_init_dm_workarea(&key, cmd_q,
524 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
525 				   DMA_TO_DEVICE);
526 	if (ret)
527 		return ret;
528 
529 	dm_offset = CCP_SB_BYTES - aes->key_len;
530 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
531 	if (ret)
532 		goto e_key;
533 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
534 			     CCP_PASSTHRU_BYTESWAP_256BIT);
535 	if (ret) {
536 		cmd->engine_error = cmd_q->cmd_error;
537 		goto e_key;
538 	}
539 
540 	/* The AES context fits in a single (32-byte) SB entry and
541 	 * must be in little endian format. Use the 256-bit byte swap
542 	 * passthru option to convert from big endian to little endian.
543 	 */
544 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
545 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
546 				   DMA_BIDIRECTIONAL);
547 	if (ret)
548 		goto e_key;
549 
550 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
551 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
552 	if (ret)
553 		goto e_ctx;
554 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
555 			     CCP_PASSTHRU_BYTESWAP_256BIT);
556 	if (ret) {
557 		cmd->engine_error = cmd_q->cmd_error;
558 		goto e_ctx;
559 	}
560 
561 	/* Send data to the CCP AES engine */
562 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
563 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
564 	if (ret)
565 		goto e_ctx;
566 
567 	while (src.sg_wa.bytes_left) {
568 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
569 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
570 			op.eom = 1;
571 
572 			/* Push the K1/K2 key to the CCP now */
573 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
574 					       op.sb_ctx,
575 					       CCP_PASSTHRU_BYTESWAP_256BIT);
576 			if (ret) {
577 				cmd->engine_error = cmd_q->cmd_error;
578 				goto e_src;
579 			}
580 
581 			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
582 					      aes->cmac_key_len);
583 			if (ret)
584 				goto e_src;
585 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
586 					     CCP_PASSTHRU_BYTESWAP_256BIT);
587 			if (ret) {
588 				cmd->engine_error = cmd_q->cmd_error;
589 				goto e_src;
590 			}
591 		}
592 
593 		ret = cmd_q->ccp->vdata->perform->aes(&op);
594 		if (ret) {
595 			cmd->engine_error = cmd_q->cmd_error;
596 			goto e_src;
597 		}
598 
599 		ccp_process_data(&src, NULL, &op);
600 	}
601 
602 	/* Retrieve the AES context - convert from LE to BE using
603 	 * 32-byte (256-bit) byteswapping
604 	 */
605 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
606 			       CCP_PASSTHRU_BYTESWAP_256BIT);
607 	if (ret) {
608 		cmd->engine_error = cmd_q->cmd_error;
609 		goto e_src;
610 	}
611 
612 	/* ...but we only need AES_BLOCK_SIZE bytes */
613 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
614 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
615 
616 e_src:
617 	ccp_free_data(&src, cmd_q);
618 
619 e_ctx:
620 	ccp_dm_free(&ctx);
621 
622 e_key:
623 	ccp_dm_free(&key);
624 
625 	return ret;
626 }
627 
628 static noinline_for_stack int
629 ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
630 {
631 	struct ccp_aes_engine *aes = &cmd->u.aes;
632 	struct ccp_dm_workarea key, ctx, final_wa, tag;
633 	struct ccp_data src, dst;
634 	struct ccp_data aad;
635 	struct ccp_op op;
636 	unsigned int dm_offset;
637 	unsigned int authsize;
638 	unsigned int jobid;
639 	unsigned int ilen;
640 	bool in_place = true; /* Default value */
641 	__be64 *final;
642 	int ret;
643 
644 	struct scatterlist *p_inp, sg_inp[2];
645 	struct scatterlist *p_tag, sg_tag[2];
646 	struct scatterlist *p_outp, sg_outp[2];
647 	struct scatterlist *p_aad;
648 
649 	if (!aes->iv)
650 		return -EINVAL;
651 
652 	if (!((aes->key_len == AES_KEYSIZE_128) ||
653 		(aes->key_len == AES_KEYSIZE_192) ||
654 		(aes->key_len == AES_KEYSIZE_256)))
655 		return -EINVAL;
656 
657 	if (!aes->key) /* Gotta have a key SGL */
658 		return -EINVAL;
659 
660 	/* Zero defaults to 16 bytes, the maximum size */
661 	authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE;
662 	switch (authsize) {
663 	case 16:
664 	case 15:
665 	case 14:
666 	case 13:
667 	case 12:
668 	case 8:
669 	case 4:
670 		break;
671 	default:
672 		return -EINVAL;
673 	}
674 
675 	/* First, decompose the source buffer into AAD & PT,
676 	 * and the destination buffer into AAD, CT & tag, or
677 	 * the input into CT & tag.
678 	 * It is expected that the input and output SGs will
679 	 * be valid, even if the AAD and input lengths are 0.
680 	 */
681 	p_aad = aes->src;
682 	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
683 	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
684 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
685 		ilen = aes->src_len;
686 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
687 	} else {
688 		/* Input length for decryption includes tag */
689 		ilen = aes->src_len - authsize;
690 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
691 	}
692 
693 	jobid = CCP_NEW_JOBID(cmd_q->ccp);
694 
695 	memset(&op, 0, sizeof(op));
696 	op.cmd_q = cmd_q;
697 	op.jobid = jobid;
698 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
699 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
700 	op.init = 1;
701 	op.u.aes.type = aes->type;
702 
703 	/* Copy the key to the LSB */
704 	ret = ccp_init_dm_workarea(&key, cmd_q,
705 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
706 				   DMA_TO_DEVICE);
707 	if (ret)
708 		return ret;
709 
710 	dm_offset = CCP_SB_BYTES - aes->key_len;
711 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
712 	if (ret)
713 		goto e_key;
714 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
715 			     CCP_PASSTHRU_BYTESWAP_256BIT);
716 	if (ret) {
717 		cmd->engine_error = cmd_q->cmd_error;
718 		goto e_key;
719 	}
720 
721 	/* Copy the context (IV) to the LSB.
722 	 * There is an assumption here that the IV is 96 bits in length, plus
723 	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
724 	 */
725 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
726 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
727 				   DMA_BIDIRECTIONAL);
728 	if (ret)
729 		goto e_key;
730 
731 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
732 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
733 	if (ret)
734 		goto e_ctx;
735 
736 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
737 			     CCP_PASSTHRU_BYTESWAP_256BIT);
738 	if (ret) {
739 		cmd->engine_error = cmd_q->cmd_error;
740 		goto e_ctx;
741 	}
742 
743 	op.init = 1;
744 	if (aes->aad_len > 0) {
745 		/* Step 1: Run a GHASH over the Additional Authenticated Data */
746 		ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
747 				    AES_BLOCK_SIZE,
748 				    DMA_TO_DEVICE);
749 		if (ret)
750 			goto e_ctx;
751 
752 		op.u.aes.mode = CCP_AES_MODE_GHASH;
753 		op.u.aes.action = CCP_AES_GHASHAAD;
754 
755 		while (aad.sg_wa.bytes_left) {
756 			ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
757 
758 			ret = cmd_q->ccp->vdata->perform->aes(&op);
759 			if (ret) {
760 				cmd->engine_error = cmd_q->cmd_error;
761 				goto e_aad;
762 			}
763 
764 			ccp_process_data(&aad, NULL, &op);
765 			op.init = 0;
766 		}
767 	}
768 
769 	op.u.aes.mode = CCP_AES_MODE_GCTR;
770 	op.u.aes.action = aes->action;
771 
772 	if (ilen > 0) {
773 		/* Step 2: Run a GCTR over the plaintext */
774 		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
775 
776 		ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
777 				    AES_BLOCK_SIZE,
778 				    in_place ? DMA_BIDIRECTIONAL
779 					     : DMA_TO_DEVICE);
780 		if (ret)
781 			goto e_ctx;
782 
783 		if (in_place) {
784 			dst = src;
785 		} else {
786 			ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
787 					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
788 			if (ret)
789 				goto e_src;
790 		}
791 
792 		op.soc = 0;
793 		op.eom = 0;
794 		op.init = 1;
795 		while (src.sg_wa.bytes_left) {
796 			ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
797 			if (!src.sg_wa.bytes_left) {
798 				unsigned int nbytes = ilen % AES_BLOCK_SIZE;
799 
800 				if (nbytes) {
801 					op.eom = 1;
802 					op.u.aes.size = (nbytes * 8) - 1;
803 				}
804 			}
805 
806 			ret = cmd_q->ccp->vdata->perform->aes(&op);
807 			if (ret) {
808 				cmd->engine_error = cmd_q->cmd_error;
809 				goto e_dst;
810 			}
811 
812 			ccp_process_data(&src, &dst, &op);
813 			op.init = 0;
814 		}
815 	}
816 
817 	/* Step 3: Update the IV portion of the context with the original IV */
818 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
819 			       CCP_PASSTHRU_BYTESWAP_256BIT);
820 	if (ret) {
821 		cmd->engine_error = cmd_q->cmd_error;
822 		goto e_dst;
823 	}
824 
825 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
826 	if (ret)
827 		goto e_dst;
828 
829 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
830 			     CCP_PASSTHRU_BYTESWAP_256BIT);
831 	if (ret) {
832 		cmd->engine_error = cmd_q->cmd_error;
833 		goto e_dst;
834 	}
835 
836 	/* Step 4: Concatenate the lengths of the AAD and source, and
837 	 * hash that 16 byte buffer.
838 	 */
839 	ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
840 				   DMA_BIDIRECTIONAL);
841 	if (ret)
842 		goto e_dst;
843 	final = (__be64 *)final_wa.address;
844 	final[0] = cpu_to_be64(aes->aad_len * 8);
845 	final[1] = cpu_to_be64(ilen * 8);
846 
847 	memset(&op, 0, sizeof(op));
848 	op.cmd_q = cmd_q;
849 	op.jobid = jobid;
850 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
851 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
852 	op.init = 1;
853 	op.u.aes.type = aes->type;
854 	op.u.aes.mode = CCP_AES_MODE_GHASH;
855 	op.u.aes.action = CCP_AES_GHASHFINAL;
856 	op.src.type = CCP_MEMTYPE_SYSTEM;
857 	op.src.u.dma.address = final_wa.dma.address;
858 	op.src.u.dma.length = AES_BLOCK_SIZE;
859 	op.dst.type = CCP_MEMTYPE_SYSTEM;
860 	op.dst.u.dma.address = final_wa.dma.address;
861 	op.dst.u.dma.length = AES_BLOCK_SIZE;
862 	op.eom = 1;
863 	op.u.aes.size = 0;
864 	ret = cmd_q->ccp->vdata->perform->aes(&op);
865 	if (ret)
866 		goto e_dst;
867 
868 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
869 		/* Put the ciphered tag after the ciphertext. */
870 		ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize);
871 	} else {
872 		/* Does this ciphered tag match the input? */
873 		ret = ccp_init_dm_workarea(&tag, cmd_q, authsize,
874 					   DMA_BIDIRECTIONAL);
875 		if (ret)
876 			goto e_tag;
877 		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize);
878 		if (ret)
879 			goto e_tag;
880 
881 		ret = crypto_memneq(tag.address, final_wa.address,
882 				    authsize) ? -EBADMSG : 0;
883 		ccp_dm_free(&tag);
884 	}
885 
886 e_tag:
887 	ccp_dm_free(&final_wa);
888 
889 e_dst:
890 	if (ilen > 0 && !in_place)
891 		ccp_free_data(&dst, cmd_q);
892 
893 e_src:
894 	if (ilen > 0)
895 		ccp_free_data(&src, cmd_q);
896 
897 e_aad:
898 	if (aes->aad_len)
899 		ccp_free_data(&aad, cmd_q);
900 
901 e_ctx:
902 	ccp_dm_free(&ctx);
903 
904 e_key:
905 	ccp_dm_free(&key);
906 
907 	return ret;
908 }
909 
910 static noinline_for_stack int
911 ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
912 {
913 	struct ccp_aes_engine *aes = &cmd->u.aes;
914 	struct ccp_dm_workarea key, ctx;
915 	struct ccp_data src, dst;
916 	struct ccp_op op;
917 	unsigned int dm_offset;
918 	bool in_place = false;
919 	int ret;
920 
921 	if (!((aes->key_len == AES_KEYSIZE_128) ||
922 	      (aes->key_len == AES_KEYSIZE_192) ||
923 	      (aes->key_len == AES_KEYSIZE_256)))
924 		return -EINVAL;
925 
926 	if (((aes->mode == CCP_AES_MODE_ECB) ||
927 	     (aes->mode == CCP_AES_MODE_CBC)) &&
928 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
929 		return -EINVAL;
930 
931 	if (!aes->key || !aes->src || !aes->dst)
932 		return -EINVAL;
933 
934 	if (aes->mode != CCP_AES_MODE_ECB) {
935 		if (aes->iv_len != AES_BLOCK_SIZE)
936 			return -EINVAL;
937 
938 		if (!aes->iv)
939 			return -EINVAL;
940 	}
941 
942 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
943 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
944 
945 	ret = -EIO;
946 	memset(&op, 0, sizeof(op));
947 	op.cmd_q = cmd_q;
948 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
949 	op.sb_key = cmd_q->sb_key;
950 	op.sb_ctx = cmd_q->sb_ctx;
951 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
952 	op.u.aes.type = aes->type;
953 	op.u.aes.mode = aes->mode;
954 	op.u.aes.action = aes->action;
955 
956 	/* All supported key sizes fit in a single (32-byte) SB entry
957 	 * and must be in little endian format. Use the 256-bit byte
958 	 * swap passthru option to convert from big endian to little
959 	 * endian.
960 	 */
961 	ret = ccp_init_dm_workarea(&key, cmd_q,
962 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
963 				   DMA_TO_DEVICE);
964 	if (ret)
965 		return ret;
966 
967 	dm_offset = CCP_SB_BYTES - aes->key_len;
968 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
969 	if (ret)
970 		goto e_key;
971 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
972 			     CCP_PASSTHRU_BYTESWAP_256BIT);
973 	if (ret) {
974 		cmd->engine_error = cmd_q->cmd_error;
975 		goto e_key;
976 	}
977 
978 	/* The AES context fits in a single (32-byte) SB entry and
979 	 * must be in little endian format. Use the 256-bit byte swap
980 	 * passthru option to convert from big endian to little endian.
981 	 */
982 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
983 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
984 				   DMA_BIDIRECTIONAL);
985 	if (ret)
986 		goto e_key;
987 
988 	if (aes->mode != CCP_AES_MODE_ECB) {
989 		/* Load the AES context - convert to LE */
990 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
991 		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
992 		if (ret)
993 			goto e_ctx;
994 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
995 				     CCP_PASSTHRU_BYTESWAP_256BIT);
996 		if (ret) {
997 			cmd->engine_error = cmd_q->cmd_error;
998 			goto e_ctx;
999 		}
1000 	}
1001 	switch (aes->mode) {
1002 	case CCP_AES_MODE_CFB: /* CFB128 only */
1003 	case CCP_AES_MODE_CTR:
1004 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
1005 		break;
1006 	default:
1007 		op.u.aes.size = 0;
1008 	}
1009 
1010 	/* Prepare the input and output data workareas. For in-place
1011 	 * operations we need to set the dma direction to BIDIRECTIONAL
1012 	 * and copy the src workarea to the dst workarea.
1013 	 */
1014 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1015 		in_place = true;
1016 
1017 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1018 			    AES_BLOCK_SIZE,
1019 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1020 	if (ret)
1021 		goto e_ctx;
1022 
1023 	if (in_place) {
1024 		dst = src;
1025 	} else {
1026 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1027 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1028 		if (ret)
1029 			goto e_src;
1030 	}
1031 
1032 	/* Send data to the CCP AES engine */
1033 	while (src.sg_wa.bytes_left) {
1034 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1035 		if (!src.sg_wa.bytes_left) {
1036 			op.eom = 1;
1037 
1038 			/* Since we don't retrieve the AES context in ECB
1039 			 * mode we have to wait for the operation to complete
1040 			 * on the last piece of data
1041 			 */
1042 			if (aes->mode == CCP_AES_MODE_ECB)
1043 				op.soc = 1;
1044 		}
1045 
1046 		ret = cmd_q->ccp->vdata->perform->aes(&op);
1047 		if (ret) {
1048 			cmd->engine_error = cmd_q->cmd_error;
1049 			goto e_dst;
1050 		}
1051 
1052 		ccp_process_data(&src, &dst, &op);
1053 	}
1054 
1055 	if (aes->mode != CCP_AES_MODE_ECB) {
1056 		/* Retrieve the AES context - convert from LE to BE using
1057 		 * 32-byte (256-bit) byteswapping
1058 		 */
1059 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1060 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1061 		if (ret) {
1062 			cmd->engine_error = cmd_q->cmd_error;
1063 			goto e_dst;
1064 		}
1065 
1066 		/* ...but we only need AES_BLOCK_SIZE bytes */
1067 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1068 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1069 	}
1070 
1071 e_dst:
1072 	if (!in_place)
1073 		ccp_free_data(&dst, cmd_q);
1074 
1075 e_src:
1076 	ccp_free_data(&src, cmd_q);
1077 
1078 e_ctx:
1079 	ccp_dm_free(&ctx);
1080 
1081 e_key:
1082 	ccp_dm_free(&key);
1083 
1084 	return ret;
1085 }
1086 
1087 static noinline_for_stack int
1088 ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1089 {
1090 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1091 	struct ccp_dm_workarea key, ctx;
1092 	struct ccp_data src, dst;
1093 	struct ccp_op op;
1094 	unsigned int unit_size, dm_offset;
1095 	bool in_place = false;
1096 	unsigned int sb_count;
1097 	enum ccp_aes_type aestype;
1098 	int ret;
1099 
1100 	switch (xts->unit_size) {
1101 	case CCP_XTS_AES_UNIT_SIZE_16:
1102 		unit_size = 16;
1103 		break;
1104 	case CCP_XTS_AES_UNIT_SIZE_512:
1105 		unit_size = 512;
1106 		break;
1107 	case CCP_XTS_AES_UNIT_SIZE_1024:
1108 		unit_size = 1024;
1109 		break;
1110 	case CCP_XTS_AES_UNIT_SIZE_2048:
1111 		unit_size = 2048;
1112 		break;
1113 	case CCP_XTS_AES_UNIT_SIZE_4096:
1114 		unit_size = 4096;
1115 		break;
1116 
1117 	default:
1118 		return -EINVAL;
1119 	}
1120 
1121 	if (xts->key_len == AES_KEYSIZE_128)
1122 		aestype = CCP_AES_TYPE_128;
1123 	else if (xts->key_len == AES_KEYSIZE_256)
1124 		aestype = CCP_AES_TYPE_256;
1125 	else
1126 		return -EINVAL;
1127 
1128 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1129 		return -EINVAL;
1130 
1131 	if (xts->iv_len != AES_BLOCK_SIZE)
1132 		return -EINVAL;
1133 
1134 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1135 		return -EINVAL;
1136 
1137 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
1138 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
1139 
1140 	ret = -EIO;
1141 	memset(&op, 0, sizeof(op));
1142 	op.cmd_q = cmd_q;
1143 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1144 	op.sb_key = cmd_q->sb_key;
1145 	op.sb_ctx = cmd_q->sb_ctx;
1146 	op.init = 1;
1147 	op.u.xts.type = aestype;
1148 	op.u.xts.action = xts->action;
1149 	op.u.xts.unit_size = xts->unit_size;
1150 
1151 	/* A version 3 device only supports 128-bit keys, which fits into a
1152 	 * single SB entry. A version 5 device uses a 512-bit vector, so two
1153 	 * SB entries.
1154 	 */
1155 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1156 		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
1157 	else
1158 		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
1159 	ret = ccp_init_dm_workarea(&key, cmd_q,
1160 				   sb_count * CCP_SB_BYTES,
1161 				   DMA_TO_DEVICE);
1162 	if (ret)
1163 		return ret;
1164 
1165 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1166 		/* All supported key sizes must be in little endian format.
1167 		 * Use the 256-bit byte swap passthru option to convert from
1168 		 * big endian to little endian.
1169 		 */
1170 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
1171 		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1172 		if (ret)
1173 			goto e_key;
1174 		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
1175 		if (ret)
1176 			goto e_key;
1177 	} else {
1178 		/* Version 5 CCPs use a 512-bit space for the key: each portion
1179 		 * occupies 256 bits, or one entire slot, and is zero-padded.
1180 		 */
1181 		unsigned int pad;
1182 
1183 		dm_offset = CCP_SB_BYTES;
1184 		pad = dm_offset - xts->key_len;
1185 		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
1186 		if (ret)
1187 			goto e_key;
1188 		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
1189 				      xts->key_len, xts->key_len);
1190 		if (ret)
1191 			goto e_key;
1192 	}
1193 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1194 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1195 	if (ret) {
1196 		cmd->engine_error = cmd_q->cmd_error;
1197 		goto e_key;
1198 	}
1199 
1200 	/* The AES context fits in a single (32-byte) SB entry and
1201 	 * for XTS is already in little endian format so no byte swapping
1202 	 * is needed.
1203 	 */
1204 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1205 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
1206 				   DMA_BIDIRECTIONAL);
1207 	if (ret)
1208 		goto e_key;
1209 
1210 	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1211 	if (ret)
1212 		goto e_ctx;
1213 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1214 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1215 	if (ret) {
1216 		cmd->engine_error = cmd_q->cmd_error;
1217 		goto e_ctx;
1218 	}
1219 
1220 	/* Prepare the input and output data workareas. For in-place
1221 	 * operations we need to set the dma direction to BIDIRECTIONAL
1222 	 * and copy the src workarea to the dst workarea.
1223 	 */
1224 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1225 		in_place = true;
1226 
1227 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1228 			    unit_size,
1229 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1230 	if (ret)
1231 		goto e_ctx;
1232 
1233 	if (in_place) {
1234 		dst = src;
1235 	} else {
1236 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1237 				    unit_size, DMA_FROM_DEVICE);
1238 		if (ret)
1239 			goto e_src;
1240 	}
1241 
1242 	/* Send data to the CCP AES engine */
1243 	while (src.sg_wa.bytes_left) {
1244 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1245 		if (!src.sg_wa.bytes_left)
1246 			op.eom = 1;
1247 
1248 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
1249 		if (ret) {
1250 			cmd->engine_error = cmd_q->cmd_error;
1251 			goto e_dst;
1252 		}
1253 
1254 		ccp_process_data(&src, &dst, &op);
1255 	}
1256 
1257 	/* Retrieve the AES context - convert from LE to BE using
1258 	 * 32-byte (256-bit) byteswapping
1259 	 */
1260 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1261 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1262 	if (ret) {
1263 		cmd->engine_error = cmd_q->cmd_error;
1264 		goto e_dst;
1265 	}
1266 
1267 	/* ...but we only need AES_BLOCK_SIZE bytes */
1268 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1269 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1270 
1271 e_dst:
1272 	if (!in_place)
1273 		ccp_free_data(&dst, cmd_q);
1274 
1275 e_src:
1276 	ccp_free_data(&src, cmd_q);
1277 
1278 e_ctx:
1279 	ccp_dm_free(&ctx);
1280 
1281 e_key:
1282 	ccp_dm_free(&key);
1283 
1284 	return ret;
1285 }
1286 
1287 static noinline_for_stack int
1288 ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1289 {
1290 	struct ccp_des3_engine *des3 = &cmd->u.des3;
1291 
1292 	struct ccp_dm_workarea key, ctx;
1293 	struct ccp_data src, dst;
1294 	struct ccp_op op;
1295 	unsigned int dm_offset;
1296 	unsigned int len_singlekey;
1297 	bool in_place = false;
1298 	int ret;
1299 
1300 	/* Error checks */
1301 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
1302 		return -EINVAL;
1303 
1304 	if (!cmd_q->ccp->vdata->perform->des3)
1305 		return -EINVAL;
1306 
1307 	if (des3->key_len != DES3_EDE_KEY_SIZE)
1308 		return -EINVAL;
1309 
1310 	if (((des3->mode == CCP_DES3_MODE_ECB) ||
1311 		(des3->mode == CCP_DES3_MODE_CBC)) &&
1312 		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
1313 		return -EINVAL;
1314 
1315 	if (!des3->key || !des3->src || !des3->dst)
1316 		return -EINVAL;
1317 
1318 	if (des3->mode != CCP_DES3_MODE_ECB) {
1319 		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
1320 			return -EINVAL;
1321 
1322 		if (!des3->iv)
1323 			return -EINVAL;
1324 	}
1325 
1326 	/* Zero out all the fields of the command desc */
1327 	memset(&op, 0, sizeof(op));
1328 
1329 	/* Set up the Function field */
1330 	op.cmd_q = cmd_q;
1331 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1332 	op.sb_key = cmd_q->sb_key;
1333 
1334 	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
1335 	op.u.des3.type = des3->type;
1336 	op.u.des3.mode = des3->mode;
1337 	op.u.des3.action = des3->action;
1338 
1339 	/*
1340 	 * All supported key sizes fit in a single (32-byte) KSB entry and
1341 	 * (like AES) must be in little endian format. Use the 256-bit byte
1342 	 * swap passthru option to convert from big endian to little endian.
1343 	 */
1344 	ret = ccp_init_dm_workarea(&key, cmd_q,
1345 				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
1346 				   DMA_TO_DEVICE);
1347 	if (ret)
1348 		return ret;
1349 
1350 	/*
1351 	 * The contents of the key triplet are in the reverse order of what
1352 	 * is required by the engine. Copy the 3 pieces individually to put
1353 	 * them where they belong.
1354 	 */
1355 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
1356 
1357 	len_singlekey = des3->key_len / 3;
1358 	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
1359 			      des3->key, 0, len_singlekey);
1360 	if (ret)
1361 		goto e_key;
1362 	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
1363 			      des3->key, len_singlekey, len_singlekey);
1364 	if (ret)
1365 		goto e_key;
1366 	ret = ccp_set_dm_area(&key, dm_offset,
1367 			      des3->key, 2 * len_singlekey, len_singlekey);
1368 	if (ret)
1369 		goto e_key;
1370 
1371 	/* Copy the key to the SB */
1372 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1373 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1374 	if (ret) {
1375 		cmd->engine_error = cmd_q->cmd_error;
1376 		goto e_key;
1377 	}
1378 
1379 	/*
1380 	 * The DES3 context fits in a single (32-byte) KSB entry and
1381 	 * must be in little endian format. Use the 256-bit byte swap
1382 	 * passthru option to convert from big endian to little endian.
1383 	 */
1384 	if (des3->mode != CCP_DES3_MODE_ECB) {
1385 		op.sb_ctx = cmd_q->sb_ctx;
1386 
1387 		ret = ccp_init_dm_workarea(&ctx, cmd_q,
1388 					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
1389 					   DMA_BIDIRECTIONAL);
1390 		if (ret)
1391 			goto e_key;
1392 
1393 		/* Load the context into the LSB */
1394 		dm_offset = CCP_SB_BYTES - des3->iv_len;
1395 		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
1396 				      des3->iv_len);
1397 		if (ret)
1398 			goto e_ctx;
1399 
1400 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1401 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1402 		if (ret) {
1403 			cmd->engine_error = cmd_q->cmd_error;
1404 			goto e_ctx;
1405 		}
1406 	}
1407 
1408 	/*
1409 	 * Prepare the input and output data workareas. For in-place
1410 	 * operations we need to set the dma direction to BIDIRECTIONAL
1411 	 * and copy the src workarea to the dst workarea.
1412 	 */
1413 	if (sg_virt(des3->src) == sg_virt(des3->dst))
1414 		in_place = true;
1415 
1416 	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
1417 			DES3_EDE_BLOCK_SIZE,
1418 			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1419 	if (ret)
1420 		goto e_ctx;
1421 
1422 	if (in_place)
1423 		dst = src;
1424 	else {
1425 		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
1426 				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
1427 		if (ret)
1428 			goto e_src;
1429 	}
1430 
1431 	/* Send data to the CCP DES3 engine */
1432 	while (src.sg_wa.bytes_left) {
1433 		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
1434 		if (!src.sg_wa.bytes_left) {
1435 			op.eom = 1;
1436 
1437 			/* Since we don't retrieve the context in ECB mode
1438 			 * we have to wait for the operation to complete
1439 			 * on the last piece of data
1440 			 */
1441 			op.soc = 0;
1442 		}
1443 
1444 		ret = cmd_q->ccp->vdata->perform->des3(&op);
1445 		if (ret) {
1446 			cmd->engine_error = cmd_q->cmd_error;
1447 			goto e_dst;
1448 		}
1449 
1450 		ccp_process_data(&src, &dst, &op);
1451 	}
1452 
1453 	if (des3->mode != CCP_DES3_MODE_ECB) {
1454 		/* Retrieve the context and make BE */
1455 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1456 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1457 		if (ret) {
1458 			cmd->engine_error = cmd_q->cmd_error;
1459 			goto e_dst;
1460 		}
1461 
1462 		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
1463 		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
1464 				DES3_EDE_BLOCK_SIZE);
1465 	}
1466 e_dst:
1467 	if (!in_place)
1468 		ccp_free_data(&dst, cmd_q);
1469 
1470 e_src:
1471 	ccp_free_data(&src, cmd_q);
1472 
1473 e_ctx:
1474 	if (des3->mode != CCP_DES3_MODE_ECB)
1475 		ccp_dm_free(&ctx);
1476 
1477 e_key:
1478 	ccp_dm_free(&key);
1479 
1480 	return ret;
1481 }
1482 
1483 static noinline_for_stack int
1484 ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1485 {
1486 	struct ccp_sha_engine *sha = &cmd->u.sha;
1487 	struct ccp_dm_workarea ctx;
1488 	struct ccp_data src;
1489 	struct ccp_op op;
1490 	unsigned int ioffset, ooffset;
1491 	unsigned int digest_size;
1492 	int sb_count;
1493 	const void *init;
1494 	u64 block_size;
1495 	int ctx_size;
1496 	int ret;
1497 
1498 	switch (sha->type) {
1499 	case CCP_SHA_TYPE_1:
1500 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
1501 			return -EINVAL;
1502 		block_size = SHA1_BLOCK_SIZE;
1503 		break;
1504 	case CCP_SHA_TYPE_224:
1505 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
1506 			return -EINVAL;
1507 		block_size = SHA224_BLOCK_SIZE;
1508 		break;
1509 	case CCP_SHA_TYPE_256:
1510 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
1511 			return -EINVAL;
1512 		block_size = SHA256_BLOCK_SIZE;
1513 		break;
1514 	case CCP_SHA_TYPE_384:
1515 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1516 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
1517 			return -EINVAL;
1518 		block_size = SHA384_BLOCK_SIZE;
1519 		break;
1520 	case CCP_SHA_TYPE_512:
1521 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1522 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
1523 			return -EINVAL;
1524 		block_size = SHA512_BLOCK_SIZE;
1525 		break;
1526 	default:
1527 		return -EINVAL;
1528 	}
1529 
1530 	if (!sha->ctx)
1531 		return -EINVAL;
1532 
1533 	if (!sha->final && (sha->src_len & (block_size - 1)))
1534 		return -EINVAL;
1535 
1536 	/* The version 3 device can't handle zero-length input */
1537 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1538 
1539 		if (!sha->src_len) {
1540 			unsigned int digest_len;
1541 			const u8 *sha_zero;
1542 
1543 			/* Not final, just return */
1544 			if (!sha->final)
1545 				return 0;
1546 
1547 			/* CCP can't do a zero length sha operation so the
1548 			 * caller must buffer the data.
1549 			 */
1550 			if (sha->msg_bits)
1551 				return -EINVAL;
1552 
1553 			/* The CCP cannot perform zero-length sha operations
1554 			 * so the caller is required to buffer data for the
1555 			 * final operation. However, a sha operation for a
1556 			 * message with a total length of zero is valid so
1557 			 * known values are required to supply the result.
1558 			 */
1559 			switch (sha->type) {
1560 			case CCP_SHA_TYPE_1:
1561 				sha_zero = sha1_zero_message_hash;
1562 				digest_len = SHA1_DIGEST_SIZE;
1563 				break;
1564 			case CCP_SHA_TYPE_224:
1565 				sha_zero = sha224_zero_message_hash;
1566 				digest_len = SHA224_DIGEST_SIZE;
1567 				break;
1568 			case CCP_SHA_TYPE_256:
1569 				sha_zero = sha256_zero_message_hash;
1570 				digest_len = SHA256_DIGEST_SIZE;
1571 				break;
1572 			default:
1573 				return -EINVAL;
1574 			}
1575 
1576 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1577 						 digest_len, 1);
1578 
1579 			return 0;
1580 		}
1581 	}
1582 
1583 	/* Set variables used throughout */
1584 	switch (sha->type) {
1585 	case CCP_SHA_TYPE_1:
1586 		digest_size = SHA1_DIGEST_SIZE;
1587 		init = (void *) ccp_sha1_init;
1588 		ctx_size = SHA1_DIGEST_SIZE;
1589 		sb_count = 1;
1590 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1591 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1592 		else
1593 			ooffset = ioffset = 0;
1594 		break;
1595 	case CCP_SHA_TYPE_224:
1596 		digest_size = SHA224_DIGEST_SIZE;
1597 		init = (void *) ccp_sha224_init;
1598 		ctx_size = SHA256_DIGEST_SIZE;
1599 		sb_count = 1;
1600 		ioffset = 0;
1601 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1602 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1603 		else
1604 			ooffset = 0;
1605 		break;
1606 	case CCP_SHA_TYPE_256:
1607 		digest_size = SHA256_DIGEST_SIZE;
1608 		init = (void *) ccp_sha256_init;
1609 		ctx_size = SHA256_DIGEST_SIZE;
1610 		sb_count = 1;
1611 		ooffset = ioffset = 0;
1612 		break;
1613 	case CCP_SHA_TYPE_384:
1614 		digest_size = SHA384_DIGEST_SIZE;
1615 		init = (void *) ccp_sha384_init;
1616 		ctx_size = SHA512_DIGEST_SIZE;
1617 		sb_count = 2;
1618 		ioffset = 0;
1619 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1620 		break;
1621 	case CCP_SHA_TYPE_512:
1622 		digest_size = SHA512_DIGEST_SIZE;
1623 		init = (void *) ccp_sha512_init;
1624 		ctx_size = SHA512_DIGEST_SIZE;
1625 		sb_count = 2;
1626 		ooffset = ioffset = 0;
1627 		break;
1628 	default:
1629 		ret = -EINVAL;
1630 		goto e_data;
1631 	}
1632 
1633 	/* For zero-length plaintext the src pointer is ignored;
1634 	 * otherwise both parts must be valid
1635 	 */
1636 	if (sha->src_len && !sha->src)
1637 		return -EINVAL;
1638 
1639 	memset(&op, 0, sizeof(op));
1640 	op.cmd_q = cmd_q;
1641 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1642 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1643 	op.u.sha.type = sha->type;
1644 	op.u.sha.msg_bits = sha->msg_bits;
1645 
1646 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1647 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1648 	 * first slot, and the left half in the second. Each portion must then
1649 	 * be in little endian format: use the 256-bit byte swap option.
1650 	 */
1651 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1652 				   DMA_BIDIRECTIONAL);
1653 	if (ret)
1654 		return ret;
1655 	if (sha->first) {
1656 		switch (sha->type) {
1657 		case CCP_SHA_TYPE_1:
1658 		case CCP_SHA_TYPE_224:
1659 		case CCP_SHA_TYPE_256:
1660 			memcpy(ctx.address + ioffset, init, ctx_size);
1661 			break;
1662 		case CCP_SHA_TYPE_384:
1663 		case CCP_SHA_TYPE_512:
1664 			memcpy(ctx.address + ctx_size / 2, init,
1665 			       ctx_size / 2);
1666 			memcpy(ctx.address, init + ctx_size / 2,
1667 			       ctx_size / 2);
1668 			break;
1669 		default:
1670 			ret = -EINVAL;
1671 			goto e_ctx;
1672 		}
1673 	} else {
1674 		/* Restore the context */
1675 		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1676 				      sb_count * CCP_SB_BYTES);
1677 		if (ret)
1678 			goto e_ctx;
1679 	}
1680 
1681 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1682 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1683 	if (ret) {
1684 		cmd->engine_error = cmd_q->cmd_error;
1685 		goto e_ctx;
1686 	}
1687 
1688 	if (sha->src) {
1689 		/* Send data to the CCP SHA engine; block_size is set above */
1690 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1691 				    block_size, DMA_TO_DEVICE);
1692 		if (ret)
1693 			goto e_ctx;
1694 
1695 		while (src.sg_wa.bytes_left) {
1696 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1697 			if (sha->final && !src.sg_wa.bytes_left)
1698 				op.eom = 1;
1699 
1700 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1701 			if (ret) {
1702 				cmd->engine_error = cmd_q->cmd_error;
1703 				goto e_data;
1704 			}
1705 
1706 			ccp_process_data(&src, NULL, &op);
1707 		}
1708 	} else {
1709 		op.eom = 1;
1710 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1711 		if (ret) {
1712 			cmd->engine_error = cmd_q->cmd_error;
1713 			goto e_data;
1714 		}
1715 	}
1716 
1717 	/* Retrieve the SHA context - convert from LE to BE using
1718 	 * 32-byte (256-bit) byteswapping to BE
1719 	 */
1720 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1721 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1722 	if (ret) {
1723 		cmd->engine_error = cmd_q->cmd_error;
1724 		goto e_data;
1725 	}
1726 
1727 	if (sha->final) {
1728 		/* Finishing up, so get the digest */
1729 		switch (sha->type) {
1730 		case CCP_SHA_TYPE_1:
1731 		case CCP_SHA_TYPE_224:
1732 		case CCP_SHA_TYPE_256:
1733 			ccp_get_dm_area(&ctx, ooffset,
1734 					sha->ctx, 0,
1735 					digest_size);
1736 			break;
1737 		case CCP_SHA_TYPE_384:
1738 		case CCP_SHA_TYPE_512:
1739 			ccp_get_dm_area(&ctx, 0,
1740 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1741 					LSB_ITEM_SIZE);
1742 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1743 					sha->ctx, 0,
1744 					LSB_ITEM_SIZE - ooffset);
1745 			break;
1746 		default:
1747 			ret = -EINVAL;
1748 			goto e_data;
1749 		}
1750 	} else {
1751 		/* Stash the context */
1752 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1753 				sb_count * CCP_SB_BYTES);
1754 	}
1755 
1756 	if (sha->final && sha->opad) {
1757 		/* HMAC operation, recursively perform final SHA */
1758 		struct ccp_cmd hmac_cmd;
1759 		struct scatterlist sg;
1760 		u8 *hmac_buf;
1761 
1762 		if (sha->opad_len != block_size) {
1763 			ret = -EINVAL;
1764 			goto e_data;
1765 		}
1766 
1767 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1768 		if (!hmac_buf) {
1769 			ret = -ENOMEM;
1770 			goto e_data;
1771 		}
1772 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1773 
1774 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1775 		switch (sha->type) {
1776 		case CCP_SHA_TYPE_1:
1777 		case CCP_SHA_TYPE_224:
1778 		case CCP_SHA_TYPE_256:
1779 			memcpy(hmac_buf + block_size,
1780 			       ctx.address + ooffset,
1781 			       digest_size);
1782 			break;
1783 		case CCP_SHA_TYPE_384:
1784 		case CCP_SHA_TYPE_512:
1785 			memcpy(hmac_buf + block_size,
1786 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1787 			       LSB_ITEM_SIZE);
1788 			memcpy(hmac_buf + block_size +
1789 			       (LSB_ITEM_SIZE - ooffset),
1790 			       ctx.address,
1791 			       LSB_ITEM_SIZE);
1792 			break;
1793 		default:
1794 			kfree(hmac_buf);
1795 			ret = -EINVAL;
1796 			goto e_data;
1797 		}
1798 
1799 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1800 		hmac_cmd.engine = CCP_ENGINE_SHA;
1801 		hmac_cmd.u.sha.type = sha->type;
1802 		hmac_cmd.u.sha.ctx = sha->ctx;
1803 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1804 		hmac_cmd.u.sha.src = &sg;
1805 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1806 		hmac_cmd.u.sha.opad = NULL;
1807 		hmac_cmd.u.sha.opad_len = 0;
1808 		hmac_cmd.u.sha.first = 1;
1809 		hmac_cmd.u.sha.final = 1;
1810 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1811 
1812 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1813 		if (ret)
1814 			cmd->engine_error = hmac_cmd.engine_error;
1815 
1816 		kfree(hmac_buf);
1817 	}
1818 
1819 e_data:
1820 	if (sha->src)
1821 		ccp_free_data(&src, cmd_q);
1822 
1823 e_ctx:
1824 	ccp_dm_free(&ctx);
1825 
1826 	return ret;
1827 }
1828 
1829 static noinline_for_stack int
1830 ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1831 {
1832 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1833 	struct ccp_dm_workarea exp, src, dst;
1834 	struct ccp_op op;
1835 	unsigned int sb_count, i_len, o_len;
1836 	int ret;
1837 
1838 	/* Check against the maximum allowable size, in bits */
1839 	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
1840 		return -EINVAL;
1841 
1842 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1843 		return -EINVAL;
1844 
1845 	memset(&op, 0, sizeof(op));
1846 	op.cmd_q = cmd_q;
1847 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1848 
1849 	/* The RSA modulus must precede the message being acted upon, so
1850 	 * it must be copied to a DMA area where the message and the
1851 	 * modulus can be concatenated.  Therefore the input buffer
1852 	 * length required is twice the output buffer length (which
1853 	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
1854 	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
1855 	 * required.
1856 	 */
1857 	o_len = 32 * ((rsa->key_size + 255) / 256);
1858 	i_len = o_len * 2;
1859 
1860 	sb_count = 0;
1861 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1862 		/* sb_count is the number of storage block slots required
1863 		 * for the modulus.
1864 		 */
1865 		sb_count = o_len / CCP_SB_BYTES;
1866 		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
1867 								sb_count);
1868 		if (!op.sb_key)
1869 			return -EIO;
1870 	} else {
1871 		/* A version 5 device allows a modulus size that will not fit
1872 		 * in the LSB, so the command will transfer it from memory.
1873 		 * Set the sb key to the default, even though it's not used.
1874 		 */
1875 		op.sb_key = cmd_q->sb_key;
1876 	}
1877 
1878 	/* The RSA exponent must be in little endian format. Reverse its
1879 	 * byte order.
1880 	 */
1881 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1882 	if (ret)
1883 		goto e_sb;
1884 
1885 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1886 	if (ret)
1887 		goto e_exp;
1888 
1889 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1890 		/* Copy the exponent to the local storage block, using
1891 		 * as many 32-byte blocks as were allocated above. It's
1892 		 * already little endian, so no further change is required.
1893 		 */
1894 		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1895 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1896 		if (ret) {
1897 			cmd->engine_error = cmd_q->cmd_error;
1898 			goto e_exp;
1899 		}
1900 	} else {
1901 		/* The exponent can be retrieved from memory via DMA. */
1902 		op.exp.u.dma.address = exp.dma.address;
1903 		op.exp.u.dma.offset = 0;
1904 	}
1905 
1906 	/* Concatenate the modulus and the message. Both the modulus and
1907 	 * the operands must be in little endian format.  Since the input
1908 	 * is in big endian format it must be converted.
1909 	 */
1910 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1911 	if (ret)
1912 		goto e_exp;
1913 
1914 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1915 	if (ret)
1916 		goto e_src;
1917 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1918 	if (ret)
1919 		goto e_src;
1920 
1921 	/* Prepare the output area for the operation */
1922 	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
1923 	if (ret)
1924 		goto e_src;
1925 
1926 	op.soc = 1;
1927 	op.src.u.dma.address = src.dma.address;
1928 	op.src.u.dma.offset = 0;
1929 	op.src.u.dma.length = i_len;
1930 	op.dst.u.dma.address = dst.dma.address;
1931 	op.dst.u.dma.offset = 0;
1932 	op.dst.u.dma.length = o_len;
1933 
1934 	op.u.rsa.mod_size = rsa->key_size;
1935 	op.u.rsa.input_len = i_len;
1936 
1937 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1938 	if (ret) {
1939 		cmd->engine_error = cmd_q->cmd_error;
1940 		goto e_dst;
1941 	}
1942 
1943 	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
1944 
1945 e_dst:
1946 	ccp_dm_free(&dst);
1947 
1948 e_src:
1949 	ccp_dm_free(&src);
1950 
1951 e_exp:
1952 	ccp_dm_free(&exp);
1953 
1954 e_sb:
1955 	if (sb_count)
1956 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1957 
1958 	return ret;
1959 }
1960 
1961 static noinline_for_stack int
1962 ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1963 {
1964 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1965 	struct ccp_dm_workarea mask;
1966 	struct ccp_data src, dst;
1967 	struct ccp_op op;
1968 	bool in_place = false;
1969 	unsigned int i;
1970 	int ret = 0;
1971 
1972 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1973 		return -EINVAL;
1974 
1975 	if (!pt->src || !pt->dst)
1976 		return -EINVAL;
1977 
1978 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1979 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1980 			return -EINVAL;
1981 		if (!pt->mask)
1982 			return -EINVAL;
1983 	}
1984 
1985 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1986 
1987 	memset(&op, 0, sizeof(op));
1988 	op.cmd_q = cmd_q;
1989 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1990 
1991 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1992 		/* Load the mask */
1993 		op.sb_key = cmd_q->sb_key;
1994 
1995 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1996 					   CCP_PASSTHRU_SB_COUNT *
1997 					   CCP_SB_BYTES,
1998 					   DMA_TO_DEVICE);
1999 		if (ret)
2000 			return ret;
2001 
2002 		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
2003 		if (ret)
2004 			goto e_mask;
2005 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2006 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2007 		if (ret) {
2008 			cmd->engine_error = cmd_q->cmd_error;
2009 			goto e_mask;
2010 		}
2011 	}
2012 
2013 	/* Prepare the input and output data workareas. For in-place
2014 	 * operations we need to set the dma direction to BIDIRECTIONAL
2015 	 * and copy the src workarea to the dst workarea.
2016 	 */
2017 	if (sg_virt(pt->src) == sg_virt(pt->dst))
2018 		in_place = true;
2019 
2020 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
2021 			    CCP_PASSTHRU_MASKSIZE,
2022 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
2023 	if (ret)
2024 		goto e_mask;
2025 
2026 	if (in_place) {
2027 		dst = src;
2028 	} else {
2029 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
2030 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
2031 		if (ret)
2032 			goto e_src;
2033 	}
2034 
2035 	/* Send data to the CCP Passthru engine
2036 	 *   Because the CCP engine works on a single source and destination
2037 	 *   dma address at a time, each entry in the source scatterlist
2038 	 *   (after the dma_map_sg call) must be less than or equal to the
2039 	 *   (remaining) length in the destination scatterlist entry and the
2040 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
2041 	 */
2042 	dst.sg_wa.sg_used = 0;
2043 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
2044 		if (!dst.sg_wa.sg ||
2045 		    (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
2046 			ret = -EINVAL;
2047 			goto e_dst;
2048 		}
2049 
2050 		if (i == src.sg_wa.dma_count) {
2051 			op.eom = 1;
2052 			op.soc = 1;
2053 		}
2054 
2055 		op.src.type = CCP_MEMTYPE_SYSTEM;
2056 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
2057 		op.src.u.dma.offset = 0;
2058 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
2059 
2060 		op.dst.type = CCP_MEMTYPE_SYSTEM;
2061 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
2062 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
2063 		op.dst.u.dma.length = op.src.u.dma.length;
2064 
2065 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
2066 		if (ret) {
2067 			cmd->engine_error = cmd_q->cmd_error;
2068 			goto e_dst;
2069 		}
2070 
2071 		dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
2072 		if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
2073 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
2074 			dst.sg_wa.sg_used = 0;
2075 		}
2076 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
2077 	}
2078 
2079 e_dst:
2080 	if (!in_place)
2081 		ccp_free_data(&dst, cmd_q);
2082 
2083 e_src:
2084 	ccp_free_data(&src, cmd_q);
2085 
2086 e_mask:
2087 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
2088 		ccp_dm_free(&mask);
2089 
2090 	return ret;
2091 }
2092 
2093 static noinline_for_stack int
2094 ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
2095 				      struct ccp_cmd *cmd)
2096 {
2097 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
2098 	struct ccp_dm_workarea mask;
2099 	struct ccp_op op;
2100 	int ret;
2101 
2102 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
2103 		return -EINVAL;
2104 
2105 	if (!pt->src_dma || !pt->dst_dma)
2106 		return -EINVAL;
2107 
2108 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2109 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
2110 			return -EINVAL;
2111 		if (!pt->mask)
2112 			return -EINVAL;
2113 	}
2114 
2115 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2116 
2117 	memset(&op, 0, sizeof(op));
2118 	op.cmd_q = cmd_q;
2119 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2120 
2121 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2122 		/* Load the mask */
2123 		op.sb_key = cmd_q->sb_key;
2124 
2125 		mask.length = pt->mask_len;
2126 		mask.dma.address = pt->mask;
2127 		mask.dma.length = pt->mask_len;
2128 
2129 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2130 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2131 		if (ret) {
2132 			cmd->engine_error = cmd_q->cmd_error;
2133 			return ret;
2134 		}
2135 	}
2136 
2137 	/* Send data to the CCP Passthru engine */
2138 	op.eom = 1;
2139 	op.soc = 1;
2140 
2141 	op.src.type = CCP_MEMTYPE_SYSTEM;
2142 	op.src.u.dma.address = pt->src_dma;
2143 	op.src.u.dma.offset = 0;
2144 	op.src.u.dma.length = pt->src_len;
2145 
2146 	op.dst.type = CCP_MEMTYPE_SYSTEM;
2147 	op.dst.u.dma.address = pt->dst_dma;
2148 	op.dst.u.dma.offset = 0;
2149 	op.dst.u.dma.length = pt->src_len;
2150 
2151 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
2152 	if (ret)
2153 		cmd->engine_error = cmd_q->cmd_error;
2154 
2155 	return ret;
2156 }
2157 
2158 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2159 {
2160 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2161 	struct ccp_dm_workarea src, dst;
2162 	struct ccp_op op;
2163 	int ret;
2164 	u8 *save;
2165 
2166 	if (!ecc->u.mm.operand_1 ||
2167 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
2168 		return -EINVAL;
2169 
2170 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
2171 		if (!ecc->u.mm.operand_2 ||
2172 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
2173 			return -EINVAL;
2174 
2175 	if (!ecc->u.mm.result ||
2176 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
2177 		return -EINVAL;
2178 
2179 	memset(&op, 0, sizeof(op));
2180 	op.cmd_q = cmd_q;
2181 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2182 
2183 	/* Concatenate the modulus and the operands. Both the modulus and
2184 	 * the operands must be in little endian format.  Since the input
2185 	 * is in big endian format it must be converted and placed in a
2186 	 * fixed length buffer.
2187 	 */
2188 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2189 				   DMA_TO_DEVICE);
2190 	if (ret)
2191 		return ret;
2192 
2193 	/* Save the workarea address since it is updated in order to perform
2194 	 * the concatenation
2195 	 */
2196 	save = src.address;
2197 
2198 	/* Copy the ECC modulus */
2199 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2200 	if (ret)
2201 		goto e_src;
2202 	src.address += CCP_ECC_OPERAND_SIZE;
2203 
2204 	/* Copy the first operand */
2205 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
2206 				      ecc->u.mm.operand_1_len);
2207 	if (ret)
2208 		goto e_src;
2209 	src.address += CCP_ECC_OPERAND_SIZE;
2210 
2211 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
2212 		/* Copy the second operand */
2213 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
2214 					      ecc->u.mm.operand_2_len);
2215 		if (ret)
2216 			goto e_src;
2217 		src.address += CCP_ECC_OPERAND_SIZE;
2218 	}
2219 
2220 	/* Restore the workarea address */
2221 	src.address = save;
2222 
2223 	/* Prepare the output area for the operation */
2224 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2225 				   DMA_FROM_DEVICE);
2226 	if (ret)
2227 		goto e_src;
2228 
2229 	op.soc = 1;
2230 	op.src.u.dma.address = src.dma.address;
2231 	op.src.u.dma.offset = 0;
2232 	op.src.u.dma.length = src.length;
2233 	op.dst.u.dma.address = dst.dma.address;
2234 	op.dst.u.dma.offset = 0;
2235 	op.dst.u.dma.length = dst.length;
2236 
2237 	op.u.ecc.function = cmd->u.ecc.function;
2238 
2239 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2240 	if (ret) {
2241 		cmd->engine_error = cmd_q->cmd_error;
2242 		goto e_dst;
2243 	}
2244 
2245 	ecc->ecc_result = le16_to_cpup(
2246 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2247 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2248 		ret = -EIO;
2249 		goto e_dst;
2250 	}
2251 
2252 	/* Save the ECC result */
2253 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
2254 				CCP_ECC_MODULUS_BYTES);
2255 
2256 e_dst:
2257 	ccp_dm_free(&dst);
2258 
2259 e_src:
2260 	ccp_dm_free(&src);
2261 
2262 	return ret;
2263 }
2264 
2265 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2266 {
2267 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2268 	struct ccp_dm_workarea src, dst;
2269 	struct ccp_op op;
2270 	int ret;
2271 	u8 *save;
2272 
2273 	if (!ecc->u.pm.point_1.x ||
2274 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
2275 	    !ecc->u.pm.point_1.y ||
2276 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
2277 		return -EINVAL;
2278 
2279 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2280 		if (!ecc->u.pm.point_2.x ||
2281 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
2282 		    !ecc->u.pm.point_2.y ||
2283 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
2284 			return -EINVAL;
2285 	} else {
2286 		if (!ecc->u.pm.domain_a ||
2287 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
2288 			return -EINVAL;
2289 
2290 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
2291 			if (!ecc->u.pm.scalar ||
2292 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
2293 				return -EINVAL;
2294 	}
2295 
2296 	if (!ecc->u.pm.result.x ||
2297 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
2298 	    !ecc->u.pm.result.y ||
2299 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
2300 		return -EINVAL;
2301 
2302 	memset(&op, 0, sizeof(op));
2303 	op.cmd_q = cmd_q;
2304 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2305 
2306 	/* Concatenate the modulus and the operands. Both the modulus and
2307 	 * the operands must be in little endian format.  Since the input
2308 	 * is in big endian format it must be converted and placed in a
2309 	 * fixed length buffer.
2310 	 */
2311 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2312 				   DMA_TO_DEVICE);
2313 	if (ret)
2314 		return ret;
2315 
2316 	/* Save the workarea address since it is updated in order to perform
2317 	 * the concatenation
2318 	 */
2319 	save = src.address;
2320 
2321 	/* Copy the ECC modulus */
2322 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2323 	if (ret)
2324 		goto e_src;
2325 	src.address += CCP_ECC_OPERAND_SIZE;
2326 
2327 	/* Copy the first point X and Y coordinate */
2328 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
2329 				      ecc->u.pm.point_1.x_len);
2330 	if (ret)
2331 		goto e_src;
2332 	src.address += CCP_ECC_OPERAND_SIZE;
2333 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
2334 				      ecc->u.pm.point_1.y_len);
2335 	if (ret)
2336 		goto e_src;
2337 	src.address += CCP_ECC_OPERAND_SIZE;
2338 
2339 	/* Set the first point Z coordinate to 1 */
2340 	*src.address = 0x01;
2341 	src.address += CCP_ECC_OPERAND_SIZE;
2342 
2343 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2344 		/* Copy the second point X and Y coordinate */
2345 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
2346 					      ecc->u.pm.point_2.x_len);
2347 		if (ret)
2348 			goto e_src;
2349 		src.address += CCP_ECC_OPERAND_SIZE;
2350 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
2351 					      ecc->u.pm.point_2.y_len);
2352 		if (ret)
2353 			goto e_src;
2354 		src.address += CCP_ECC_OPERAND_SIZE;
2355 
2356 		/* Set the second point Z coordinate to 1 */
2357 		*src.address = 0x01;
2358 		src.address += CCP_ECC_OPERAND_SIZE;
2359 	} else {
2360 		/* Copy the Domain "a" parameter */
2361 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
2362 					      ecc->u.pm.domain_a_len);
2363 		if (ret)
2364 			goto e_src;
2365 		src.address += CCP_ECC_OPERAND_SIZE;
2366 
2367 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2368 			/* Copy the scalar value */
2369 			ret = ccp_reverse_set_dm_area(&src, 0,
2370 						      ecc->u.pm.scalar, 0,
2371 						      ecc->u.pm.scalar_len);
2372 			if (ret)
2373 				goto e_src;
2374 			src.address += CCP_ECC_OPERAND_SIZE;
2375 		}
2376 	}
2377 
2378 	/* Restore the workarea address */
2379 	src.address = save;
2380 
2381 	/* Prepare the output area for the operation */
2382 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2383 				   DMA_FROM_DEVICE);
2384 	if (ret)
2385 		goto e_src;
2386 
2387 	op.soc = 1;
2388 	op.src.u.dma.address = src.dma.address;
2389 	op.src.u.dma.offset = 0;
2390 	op.src.u.dma.length = src.length;
2391 	op.dst.u.dma.address = dst.dma.address;
2392 	op.dst.u.dma.offset = 0;
2393 	op.dst.u.dma.length = dst.length;
2394 
2395 	op.u.ecc.function = cmd->u.ecc.function;
2396 
2397 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2398 	if (ret) {
2399 		cmd->engine_error = cmd_q->cmd_error;
2400 		goto e_dst;
2401 	}
2402 
2403 	ecc->ecc_result = le16_to_cpup(
2404 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2405 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2406 		ret = -EIO;
2407 		goto e_dst;
2408 	}
2409 
2410 	/* Save the workarea address since it is updated as we walk through
2411 	 * to copy the point math result
2412 	 */
2413 	save = dst.address;
2414 
2415 	/* Save the ECC result X and Y coordinates */
2416 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
2417 				CCP_ECC_MODULUS_BYTES);
2418 	dst.address += CCP_ECC_OUTPUT_SIZE;
2419 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
2420 				CCP_ECC_MODULUS_BYTES);
2421 
2422 	/* Restore the workarea address */
2423 	dst.address = save;
2424 
2425 e_dst:
2426 	ccp_dm_free(&dst);
2427 
2428 e_src:
2429 	ccp_dm_free(&src);
2430 
2431 	return ret;
2432 }
2433 
2434 static noinline_for_stack int
2435 ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2436 {
2437 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2438 
2439 	ecc->ecc_result = 0;
2440 
2441 	if (!ecc->mod ||
2442 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2443 		return -EINVAL;
2444 
2445 	switch (ecc->function) {
2446 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2447 	case CCP_ECC_FUNCTION_MADD_384BIT:
2448 	case CCP_ECC_FUNCTION_MINV_384BIT:
2449 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2450 
2451 	case CCP_ECC_FUNCTION_PADD_384BIT:
2452 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2453 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2454 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2455 
2456 	default:
2457 		return -EINVAL;
2458 	}
2459 }
2460 
2461 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2462 {
2463 	int ret;
2464 
2465 	cmd->engine_error = 0;
2466 	cmd_q->cmd_error = 0;
2467 	cmd_q->int_rcvd = 0;
2468 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
2469 
2470 	switch (cmd->engine) {
2471 	case CCP_ENGINE_AES:
2472 		switch (cmd->u.aes.mode) {
2473 		case CCP_AES_MODE_CMAC:
2474 			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
2475 			break;
2476 		case CCP_AES_MODE_GCM:
2477 			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
2478 			break;
2479 		default:
2480 			ret = ccp_run_aes_cmd(cmd_q, cmd);
2481 			break;
2482 		}
2483 		break;
2484 	case CCP_ENGINE_XTS_AES_128:
2485 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2486 		break;
2487 	case CCP_ENGINE_DES3:
2488 		ret = ccp_run_des3_cmd(cmd_q, cmd);
2489 		break;
2490 	case CCP_ENGINE_SHA:
2491 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2492 		break;
2493 	case CCP_ENGINE_RSA:
2494 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2495 		break;
2496 	case CCP_ENGINE_PASSTHRU:
2497 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
2498 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
2499 		else
2500 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
2501 		break;
2502 	case CCP_ENGINE_ECC:
2503 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2504 		break;
2505 	default:
2506 		ret = -EINVAL;
2507 	}
2508 
2509 	return ret;
2510 }
2511