xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision 92a76f6d)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/interrupt.h>
17 #include <crypto/scatterwalk.h>
18 #include <linux/ccp.h>
19 
20 #include "ccp-dev.h"
21 
22 /* SHA initial context values */
23 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
24 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
25 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
26 	cpu_to_be32(SHA1_H4), 0, 0, 0,
27 };
28 
29 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
30 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
31 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
32 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
33 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
34 };
35 
36 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
37 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
38 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
39 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
40 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
41 };
42 
43 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
44 {
45 	int start;
46 
47 	for (;;) {
48 		mutex_lock(&ccp->ksb_mutex);
49 
50 		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
51 							ccp->ksb_count,
52 							ccp->ksb_start,
53 							count, 0);
54 		if (start <= ccp->ksb_count) {
55 			bitmap_set(ccp->ksb, start, count);
56 
57 			mutex_unlock(&ccp->ksb_mutex);
58 			break;
59 		}
60 
61 		ccp->ksb_avail = 0;
62 
63 		mutex_unlock(&ccp->ksb_mutex);
64 
65 		/* Wait for KSB entries to become available */
66 		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
67 			return 0;
68 	}
69 
70 	return KSB_START + start;
71 }
72 
73 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
74 			 unsigned int count)
75 {
76 	if (!start)
77 		return;
78 
79 	mutex_lock(&ccp->ksb_mutex);
80 
81 	bitmap_clear(ccp->ksb, start - KSB_START, count);
82 
83 	ccp->ksb_avail = 1;
84 
85 	mutex_unlock(&ccp->ksb_mutex);
86 
87 	wake_up_interruptible_all(&ccp->ksb_queue);
88 }
89 
90 static u32 ccp_gen_jobid(struct ccp_device *ccp)
91 {
92 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
93 }
94 
95 static void ccp_sg_free(struct ccp_sg_workarea *wa)
96 {
97 	if (wa->dma_count)
98 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
99 
100 	wa->dma_count = 0;
101 }
102 
103 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
104 				struct scatterlist *sg, u64 len,
105 				enum dma_data_direction dma_dir)
106 {
107 	memset(wa, 0, sizeof(*wa));
108 
109 	wa->sg = sg;
110 	if (!sg)
111 		return 0;
112 
113 	wa->nents = sg_nents_for_len(sg, len);
114 	if (wa->nents < 0)
115 		return wa->nents;
116 
117 	wa->bytes_left = len;
118 	wa->sg_used = 0;
119 
120 	if (len == 0)
121 		return 0;
122 
123 	if (dma_dir == DMA_NONE)
124 		return 0;
125 
126 	wa->dma_sg = sg;
127 	wa->dma_dev = dev;
128 	wa->dma_dir = dma_dir;
129 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
130 	if (!wa->dma_count)
131 		return -ENOMEM;
132 
133 	return 0;
134 }
135 
136 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
137 {
138 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
139 
140 	if (!wa->sg)
141 		return;
142 
143 	wa->sg_used += nbytes;
144 	wa->bytes_left -= nbytes;
145 	if (wa->sg_used == wa->sg->length) {
146 		wa->sg = sg_next(wa->sg);
147 		wa->sg_used = 0;
148 	}
149 }
150 
151 static void ccp_dm_free(struct ccp_dm_workarea *wa)
152 {
153 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
154 		if (wa->address)
155 			dma_pool_free(wa->dma_pool, wa->address,
156 				      wa->dma.address);
157 	} else {
158 		if (wa->dma.address)
159 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
160 					 wa->dma.dir);
161 		kfree(wa->address);
162 	}
163 
164 	wa->address = NULL;
165 	wa->dma.address = 0;
166 }
167 
168 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
169 				struct ccp_cmd_queue *cmd_q,
170 				unsigned int len,
171 				enum dma_data_direction dir)
172 {
173 	memset(wa, 0, sizeof(*wa));
174 
175 	if (!len)
176 		return 0;
177 
178 	wa->dev = cmd_q->ccp->dev;
179 	wa->length = len;
180 
181 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
182 		wa->dma_pool = cmd_q->dma_pool;
183 
184 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
185 					     &wa->dma.address);
186 		if (!wa->address)
187 			return -ENOMEM;
188 
189 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
190 
191 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
192 	} else {
193 		wa->address = kzalloc(len, GFP_KERNEL);
194 		if (!wa->address)
195 			return -ENOMEM;
196 
197 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
198 						 dir);
199 		if (!wa->dma.address)
200 			return -ENOMEM;
201 
202 		wa->dma.length = len;
203 	}
204 	wa->dma.dir = dir;
205 
206 	return 0;
207 }
208 
209 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
210 			    struct scatterlist *sg, unsigned int sg_offset,
211 			    unsigned int len)
212 {
213 	WARN_ON(!wa->address);
214 
215 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
216 				 0);
217 }
218 
219 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
220 			    struct scatterlist *sg, unsigned int sg_offset,
221 			    unsigned int len)
222 {
223 	WARN_ON(!wa->address);
224 
225 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
226 				 1);
227 }
228 
229 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
230 				   struct scatterlist *sg,
231 				   unsigned int len, unsigned int se_len,
232 				   bool sign_extend)
233 {
234 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
235 	u8 buffer[CCP_REVERSE_BUF_SIZE];
236 
237 	if (WARN_ON(se_len > sizeof(buffer)))
238 		return -EINVAL;
239 
240 	sg_offset = len;
241 	dm_offset = 0;
242 	nbytes = len;
243 	while (nbytes) {
244 		ksb_len = min_t(unsigned int, nbytes, se_len);
245 		sg_offset -= ksb_len;
246 
247 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
248 		for (i = 0; i < ksb_len; i++)
249 			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
250 
251 		dm_offset += ksb_len;
252 		nbytes -= ksb_len;
253 
254 		if ((ksb_len != se_len) && sign_extend) {
255 			/* Must sign-extend to nearest sign-extend length */
256 			if (wa->address[dm_offset - 1] & 0x80)
257 				memset(wa->address + dm_offset, 0xff,
258 				       se_len - ksb_len);
259 		}
260 	}
261 
262 	return 0;
263 }
264 
265 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
266 				    struct scatterlist *sg,
267 				    unsigned int len)
268 {
269 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
270 	u8 buffer[CCP_REVERSE_BUF_SIZE];
271 
272 	sg_offset = 0;
273 	dm_offset = len;
274 	nbytes = len;
275 	while (nbytes) {
276 		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
277 		dm_offset -= ksb_len;
278 
279 		for (i = 0; i < ksb_len; i++)
280 			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
281 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
282 
283 		sg_offset += ksb_len;
284 		nbytes -= ksb_len;
285 	}
286 }
287 
288 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
289 {
290 	ccp_dm_free(&data->dm_wa);
291 	ccp_sg_free(&data->sg_wa);
292 }
293 
294 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
295 			 struct scatterlist *sg, u64 sg_len,
296 			 unsigned int dm_len,
297 			 enum dma_data_direction dir)
298 {
299 	int ret;
300 
301 	memset(data, 0, sizeof(*data));
302 
303 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
304 				   dir);
305 	if (ret)
306 		goto e_err;
307 
308 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
309 	if (ret)
310 		goto e_err;
311 
312 	return 0;
313 
314 e_err:
315 	ccp_free_data(data, cmd_q);
316 
317 	return ret;
318 }
319 
320 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
321 {
322 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
323 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
324 	unsigned int buf_count, nbytes;
325 
326 	/* Clear the buffer if setting it */
327 	if (!from)
328 		memset(dm_wa->address, 0, dm_wa->length);
329 
330 	if (!sg_wa->sg)
331 		return 0;
332 
333 	/* Perform the copy operation
334 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
335 	 *   an unsigned int
336 	 */
337 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
338 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
339 				 nbytes, from);
340 
341 	/* Update the structures and generate the count */
342 	buf_count = 0;
343 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
344 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
345 			     dm_wa->length - buf_count);
346 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
347 
348 		buf_count += nbytes;
349 		ccp_update_sg_workarea(sg_wa, nbytes);
350 	}
351 
352 	return buf_count;
353 }
354 
355 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
356 {
357 	return ccp_queue_buf(data, 0);
358 }
359 
360 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
361 {
362 	return ccp_queue_buf(data, 1);
363 }
364 
365 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
366 			     struct ccp_op *op, unsigned int block_size,
367 			     bool blocksize_op)
368 {
369 	unsigned int sg_src_len, sg_dst_len, op_len;
370 
371 	/* The CCP can only DMA from/to one address each per operation. This
372 	 * requires that we find the smallest DMA area between the source
373 	 * and destination. The resulting len values will always be <= UINT_MAX
374 	 * because the dma length is an unsigned int.
375 	 */
376 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
377 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
378 
379 	if (dst) {
380 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
381 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
382 		op_len = min(sg_src_len, sg_dst_len);
383 	} else {
384 		op_len = sg_src_len;
385 	}
386 
387 	/* The data operation length will be at least block_size in length
388 	 * or the smaller of available sg room remaining for the source or
389 	 * the destination
390 	 */
391 	op_len = max(op_len, block_size);
392 
393 	/* Unless we have to buffer data, there's no reason to wait */
394 	op->soc = 0;
395 
396 	if (sg_src_len < block_size) {
397 		/* Not enough data in the sg element, so it
398 		 * needs to be buffered into a blocksize chunk
399 		 */
400 		int cp_len = ccp_fill_queue_buf(src);
401 
402 		op->soc = 1;
403 		op->src.u.dma.address = src->dm_wa.dma.address;
404 		op->src.u.dma.offset = 0;
405 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
406 	} else {
407 		/* Enough data in the sg element, but we need to
408 		 * adjust for any previously copied data
409 		 */
410 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
411 		op->src.u.dma.offset = src->sg_wa.sg_used;
412 		op->src.u.dma.length = op_len & ~(block_size - 1);
413 
414 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
415 	}
416 
417 	if (dst) {
418 		if (sg_dst_len < block_size) {
419 			/* Not enough room in the sg element or we're on the
420 			 * last piece of data (when using padding), so the
421 			 * output needs to be buffered into a blocksize chunk
422 			 */
423 			op->soc = 1;
424 			op->dst.u.dma.address = dst->dm_wa.dma.address;
425 			op->dst.u.dma.offset = 0;
426 			op->dst.u.dma.length = op->src.u.dma.length;
427 		} else {
428 			/* Enough room in the sg element, but we need to
429 			 * adjust for any previously used area
430 			 */
431 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
432 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
433 			op->dst.u.dma.length = op->src.u.dma.length;
434 		}
435 	}
436 }
437 
438 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
439 			     struct ccp_op *op)
440 {
441 	op->init = 0;
442 
443 	if (dst) {
444 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
445 			ccp_empty_queue_buf(dst);
446 		else
447 			ccp_update_sg_workarea(&dst->sg_wa,
448 					       op->dst.u.dma.length);
449 	}
450 }
451 
452 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
453 				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
454 				u32 byte_swap, bool from)
455 {
456 	struct ccp_op op;
457 
458 	memset(&op, 0, sizeof(op));
459 
460 	op.cmd_q = cmd_q;
461 	op.jobid = jobid;
462 	op.eom = 1;
463 
464 	if (from) {
465 		op.soc = 1;
466 		op.src.type = CCP_MEMTYPE_KSB;
467 		op.src.u.ksb = ksb;
468 		op.dst.type = CCP_MEMTYPE_SYSTEM;
469 		op.dst.u.dma.address = wa->dma.address;
470 		op.dst.u.dma.length = wa->length;
471 	} else {
472 		op.src.type = CCP_MEMTYPE_SYSTEM;
473 		op.src.u.dma.address = wa->dma.address;
474 		op.src.u.dma.length = wa->length;
475 		op.dst.type = CCP_MEMTYPE_KSB;
476 		op.dst.u.ksb = ksb;
477 	}
478 
479 	op.u.passthru.byte_swap = byte_swap;
480 
481 	return cmd_q->ccp->vdata->perform->perform_passthru(&op);
482 }
483 
484 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
485 			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
486 			   u32 byte_swap)
487 {
488 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
489 }
490 
491 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
492 			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
493 			     u32 byte_swap)
494 {
495 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
496 }
497 
498 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
499 				struct ccp_cmd *cmd)
500 {
501 	struct ccp_aes_engine *aes = &cmd->u.aes;
502 	struct ccp_dm_workarea key, ctx;
503 	struct ccp_data src;
504 	struct ccp_op op;
505 	unsigned int dm_offset;
506 	int ret;
507 
508 	if (!((aes->key_len == AES_KEYSIZE_128) ||
509 	      (aes->key_len == AES_KEYSIZE_192) ||
510 	      (aes->key_len == AES_KEYSIZE_256)))
511 		return -EINVAL;
512 
513 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
514 		return -EINVAL;
515 
516 	if (aes->iv_len != AES_BLOCK_SIZE)
517 		return -EINVAL;
518 
519 	if (!aes->key || !aes->iv || !aes->src)
520 		return -EINVAL;
521 
522 	if (aes->cmac_final) {
523 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
524 			return -EINVAL;
525 
526 		if (!aes->cmac_key)
527 			return -EINVAL;
528 	}
529 
530 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
531 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
532 
533 	ret = -EIO;
534 	memset(&op, 0, sizeof(op));
535 	op.cmd_q = cmd_q;
536 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
537 	op.ksb_key = cmd_q->ksb_key;
538 	op.ksb_ctx = cmd_q->ksb_ctx;
539 	op.init = 1;
540 	op.u.aes.type = aes->type;
541 	op.u.aes.mode = aes->mode;
542 	op.u.aes.action = aes->action;
543 
544 	/* All supported key sizes fit in a single (32-byte) KSB entry
545 	 * and must be in little endian format. Use the 256-bit byte
546 	 * swap passthru option to convert from big endian to little
547 	 * endian.
548 	 */
549 	ret = ccp_init_dm_workarea(&key, cmd_q,
550 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
551 				   DMA_TO_DEVICE);
552 	if (ret)
553 		return ret;
554 
555 	dm_offset = CCP_KSB_BYTES - aes->key_len;
556 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
557 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
558 			      CCP_PASSTHRU_BYTESWAP_256BIT);
559 	if (ret) {
560 		cmd->engine_error = cmd_q->cmd_error;
561 		goto e_key;
562 	}
563 
564 	/* The AES context fits in a single (32-byte) KSB entry and
565 	 * must be in little endian format. Use the 256-bit byte swap
566 	 * passthru option to convert from big endian to little endian.
567 	 */
568 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
569 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
570 				   DMA_BIDIRECTIONAL);
571 	if (ret)
572 		goto e_key;
573 
574 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
575 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
576 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
577 			      CCP_PASSTHRU_BYTESWAP_256BIT);
578 	if (ret) {
579 		cmd->engine_error = cmd_q->cmd_error;
580 		goto e_ctx;
581 	}
582 
583 	/* Send data to the CCP AES engine */
584 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
585 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
586 	if (ret)
587 		goto e_ctx;
588 
589 	while (src.sg_wa.bytes_left) {
590 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
591 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
592 			op.eom = 1;
593 
594 			/* Push the K1/K2 key to the CCP now */
595 			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
596 						op.ksb_ctx,
597 						CCP_PASSTHRU_BYTESWAP_256BIT);
598 			if (ret) {
599 				cmd->engine_error = cmd_q->cmd_error;
600 				goto e_src;
601 			}
602 
603 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
604 					aes->cmac_key_len);
605 			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
606 					      CCP_PASSTHRU_BYTESWAP_256BIT);
607 			if (ret) {
608 				cmd->engine_error = cmd_q->cmd_error;
609 				goto e_src;
610 			}
611 		}
612 
613 		ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
614 		if (ret) {
615 			cmd->engine_error = cmd_q->cmd_error;
616 			goto e_src;
617 		}
618 
619 		ccp_process_data(&src, NULL, &op);
620 	}
621 
622 	/* Retrieve the AES context - convert from LE to BE using
623 	 * 32-byte (256-bit) byteswapping
624 	 */
625 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
626 				CCP_PASSTHRU_BYTESWAP_256BIT);
627 	if (ret) {
628 		cmd->engine_error = cmd_q->cmd_error;
629 		goto e_src;
630 	}
631 
632 	/* ...but we only need AES_BLOCK_SIZE bytes */
633 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
634 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
635 
636 e_src:
637 	ccp_free_data(&src, cmd_q);
638 
639 e_ctx:
640 	ccp_dm_free(&ctx);
641 
642 e_key:
643 	ccp_dm_free(&key);
644 
645 	return ret;
646 }
647 
648 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
649 {
650 	struct ccp_aes_engine *aes = &cmd->u.aes;
651 	struct ccp_dm_workarea key, ctx;
652 	struct ccp_data src, dst;
653 	struct ccp_op op;
654 	unsigned int dm_offset;
655 	bool in_place = false;
656 	int ret;
657 
658 	if (aes->mode == CCP_AES_MODE_CMAC)
659 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
660 
661 	if (!((aes->key_len == AES_KEYSIZE_128) ||
662 	      (aes->key_len == AES_KEYSIZE_192) ||
663 	      (aes->key_len == AES_KEYSIZE_256)))
664 		return -EINVAL;
665 
666 	if (((aes->mode == CCP_AES_MODE_ECB) ||
667 	     (aes->mode == CCP_AES_MODE_CBC) ||
668 	     (aes->mode == CCP_AES_MODE_CFB)) &&
669 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
670 		return -EINVAL;
671 
672 	if (!aes->key || !aes->src || !aes->dst)
673 		return -EINVAL;
674 
675 	if (aes->mode != CCP_AES_MODE_ECB) {
676 		if (aes->iv_len != AES_BLOCK_SIZE)
677 			return -EINVAL;
678 
679 		if (!aes->iv)
680 			return -EINVAL;
681 	}
682 
683 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
684 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
685 
686 	ret = -EIO;
687 	memset(&op, 0, sizeof(op));
688 	op.cmd_q = cmd_q;
689 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
690 	op.ksb_key = cmd_q->ksb_key;
691 	op.ksb_ctx = cmd_q->ksb_ctx;
692 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
693 	op.u.aes.type = aes->type;
694 	op.u.aes.mode = aes->mode;
695 	op.u.aes.action = aes->action;
696 
697 	/* All supported key sizes fit in a single (32-byte) KSB entry
698 	 * and must be in little endian format. Use the 256-bit byte
699 	 * swap passthru option to convert from big endian to little
700 	 * endian.
701 	 */
702 	ret = ccp_init_dm_workarea(&key, cmd_q,
703 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
704 				   DMA_TO_DEVICE);
705 	if (ret)
706 		return ret;
707 
708 	dm_offset = CCP_KSB_BYTES - aes->key_len;
709 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
710 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
711 			      CCP_PASSTHRU_BYTESWAP_256BIT);
712 	if (ret) {
713 		cmd->engine_error = cmd_q->cmd_error;
714 		goto e_key;
715 	}
716 
717 	/* The AES context fits in a single (32-byte) KSB entry and
718 	 * must be in little endian format. Use the 256-bit byte swap
719 	 * passthru option to convert from big endian to little endian.
720 	 */
721 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
722 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
723 				   DMA_BIDIRECTIONAL);
724 	if (ret)
725 		goto e_key;
726 
727 	if (aes->mode != CCP_AES_MODE_ECB) {
728 		/* Load the AES context - conver to LE */
729 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
730 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
731 		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
732 				      CCP_PASSTHRU_BYTESWAP_256BIT);
733 		if (ret) {
734 			cmd->engine_error = cmd_q->cmd_error;
735 			goto e_ctx;
736 		}
737 	}
738 
739 	/* Prepare the input and output data workareas. For in-place
740 	 * operations we need to set the dma direction to BIDIRECTIONAL
741 	 * and copy the src workarea to the dst workarea.
742 	 */
743 	if (sg_virt(aes->src) == sg_virt(aes->dst))
744 		in_place = true;
745 
746 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
747 			    AES_BLOCK_SIZE,
748 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
749 	if (ret)
750 		goto e_ctx;
751 
752 	if (in_place) {
753 		dst = src;
754 	} else {
755 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
756 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
757 		if (ret)
758 			goto e_src;
759 	}
760 
761 	/* Send data to the CCP AES engine */
762 	while (src.sg_wa.bytes_left) {
763 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
764 		if (!src.sg_wa.bytes_left) {
765 			op.eom = 1;
766 
767 			/* Since we don't retrieve the AES context in ECB
768 			 * mode we have to wait for the operation to complete
769 			 * on the last piece of data
770 			 */
771 			if (aes->mode == CCP_AES_MODE_ECB)
772 				op.soc = 1;
773 		}
774 
775 		ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
776 		if (ret) {
777 			cmd->engine_error = cmd_q->cmd_error;
778 			goto e_dst;
779 		}
780 
781 		ccp_process_data(&src, &dst, &op);
782 	}
783 
784 	if (aes->mode != CCP_AES_MODE_ECB) {
785 		/* Retrieve the AES context - convert from LE to BE using
786 		 * 32-byte (256-bit) byteswapping
787 		 */
788 		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
789 					CCP_PASSTHRU_BYTESWAP_256BIT);
790 		if (ret) {
791 			cmd->engine_error = cmd_q->cmd_error;
792 			goto e_dst;
793 		}
794 
795 		/* ...but we only need AES_BLOCK_SIZE bytes */
796 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
797 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
798 	}
799 
800 e_dst:
801 	if (!in_place)
802 		ccp_free_data(&dst, cmd_q);
803 
804 e_src:
805 	ccp_free_data(&src, cmd_q);
806 
807 e_ctx:
808 	ccp_dm_free(&ctx);
809 
810 e_key:
811 	ccp_dm_free(&key);
812 
813 	return ret;
814 }
815 
816 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
817 			       struct ccp_cmd *cmd)
818 {
819 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
820 	struct ccp_dm_workarea key, ctx;
821 	struct ccp_data src, dst;
822 	struct ccp_op op;
823 	unsigned int unit_size, dm_offset;
824 	bool in_place = false;
825 	int ret;
826 
827 	switch (xts->unit_size) {
828 	case CCP_XTS_AES_UNIT_SIZE_16:
829 		unit_size = 16;
830 		break;
831 	case CCP_XTS_AES_UNIT_SIZE_512:
832 		unit_size = 512;
833 		break;
834 	case CCP_XTS_AES_UNIT_SIZE_1024:
835 		unit_size = 1024;
836 		break;
837 	case CCP_XTS_AES_UNIT_SIZE_2048:
838 		unit_size = 2048;
839 		break;
840 	case CCP_XTS_AES_UNIT_SIZE_4096:
841 		unit_size = 4096;
842 		break;
843 
844 	default:
845 		return -EINVAL;
846 	}
847 
848 	if (xts->key_len != AES_KEYSIZE_128)
849 		return -EINVAL;
850 
851 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
852 		return -EINVAL;
853 
854 	if (xts->iv_len != AES_BLOCK_SIZE)
855 		return -EINVAL;
856 
857 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
858 		return -EINVAL;
859 
860 	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
861 	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
862 
863 	ret = -EIO;
864 	memset(&op, 0, sizeof(op));
865 	op.cmd_q = cmd_q;
866 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
867 	op.ksb_key = cmd_q->ksb_key;
868 	op.ksb_ctx = cmd_q->ksb_ctx;
869 	op.init = 1;
870 	op.u.xts.action = xts->action;
871 	op.u.xts.unit_size = xts->unit_size;
872 
873 	/* All supported key sizes fit in a single (32-byte) KSB entry
874 	 * and must be in little endian format. Use the 256-bit byte
875 	 * swap passthru option to convert from big endian to little
876 	 * endian.
877 	 */
878 	ret = ccp_init_dm_workarea(&key, cmd_q,
879 				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
880 				   DMA_TO_DEVICE);
881 	if (ret)
882 		return ret;
883 
884 	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
885 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
886 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
887 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
888 			      CCP_PASSTHRU_BYTESWAP_256BIT);
889 	if (ret) {
890 		cmd->engine_error = cmd_q->cmd_error;
891 		goto e_key;
892 	}
893 
894 	/* The AES context fits in a single (32-byte) KSB entry and
895 	 * for XTS is already in little endian format so no byte swapping
896 	 * is needed.
897 	 */
898 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
899 				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
900 				   DMA_BIDIRECTIONAL);
901 	if (ret)
902 		goto e_key;
903 
904 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
905 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
906 			      CCP_PASSTHRU_BYTESWAP_NOOP);
907 	if (ret) {
908 		cmd->engine_error = cmd_q->cmd_error;
909 		goto e_ctx;
910 	}
911 
912 	/* Prepare the input and output data workareas. For in-place
913 	 * operations we need to set the dma direction to BIDIRECTIONAL
914 	 * and copy the src workarea to the dst workarea.
915 	 */
916 	if (sg_virt(xts->src) == sg_virt(xts->dst))
917 		in_place = true;
918 
919 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
920 			    unit_size,
921 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
922 	if (ret)
923 		goto e_ctx;
924 
925 	if (in_place) {
926 		dst = src;
927 	} else {
928 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
929 				    unit_size, DMA_FROM_DEVICE);
930 		if (ret)
931 			goto e_src;
932 	}
933 
934 	/* Send data to the CCP AES engine */
935 	while (src.sg_wa.bytes_left) {
936 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
937 		if (!src.sg_wa.bytes_left)
938 			op.eom = 1;
939 
940 		ret = cmd_q->ccp->vdata->perform->perform_xts_aes(&op);
941 		if (ret) {
942 			cmd->engine_error = cmd_q->cmd_error;
943 			goto e_dst;
944 		}
945 
946 		ccp_process_data(&src, &dst, &op);
947 	}
948 
949 	/* Retrieve the AES context - convert from LE to BE using
950 	 * 32-byte (256-bit) byteswapping
951 	 */
952 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
953 				CCP_PASSTHRU_BYTESWAP_256BIT);
954 	if (ret) {
955 		cmd->engine_error = cmd_q->cmd_error;
956 		goto e_dst;
957 	}
958 
959 	/* ...but we only need AES_BLOCK_SIZE bytes */
960 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
961 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
962 
963 e_dst:
964 	if (!in_place)
965 		ccp_free_data(&dst, cmd_q);
966 
967 e_src:
968 	ccp_free_data(&src, cmd_q);
969 
970 e_ctx:
971 	ccp_dm_free(&ctx);
972 
973 e_key:
974 	ccp_dm_free(&key);
975 
976 	return ret;
977 }
978 
979 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
980 {
981 	struct ccp_sha_engine *sha = &cmd->u.sha;
982 	struct ccp_dm_workarea ctx;
983 	struct ccp_data src;
984 	struct ccp_op op;
985 	int ret;
986 
987 	if (sha->ctx_len != CCP_SHA_CTXSIZE)
988 		return -EINVAL;
989 
990 	if (!sha->ctx)
991 		return -EINVAL;
992 
993 	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
994 		return -EINVAL;
995 
996 	if (!sha->src_len) {
997 		const u8 *sha_zero;
998 
999 		/* Not final, just return */
1000 		if (!sha->final)
1001 			return 0;
1002 
1003 		/* CCP can't do a zero length sha operation so the caller
1004 		 * must buffer the data.
1005 		 */
1006 		if (sha->msg_bits)
1007 			return -EINVAL;
1008 
1009 		/* The CCP cannot perform zero-length sha operations so the
1010 		 * caller is required to buffer data for the final operation.
1011 		 * However, a sha operation for a message with a total length
1012 		 * of zero is valid so known values are required to supply
1013 		 * the result.
1014 		 */
1015 		switch (sha->type) {
1016 		case CCP_SHA_TYPE_1:
1017 			sha_zero = sha1_zero_message_hash;
1018 			break;
1019 		case CCP_SHA_TYPE_224:
1020 			sha_zero = sha224_zero_message_hash;
1021 			break;
1022 		case CCP_SHA_TYPE_256:
1023 			sha_zero = sha256_zero_message_hash;
1024 			break;
1025 		default:
1026 			return -EINVAL;
1027 		}
1028 
1029 		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1030 					 sha->ctx_len, 1);
1031 
1032 		return 0;
1033 	}
1034 
1035 	if (!sha->src)
1036 		return -EINVAL;
1037 
1038 	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1039 
1040 	memset(&op, 0, sizeof(op));
1041 	op.cmd_q = cmd_q;
1042 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1043 	op.ksb_ctx = cmd_q->ksb_ctx;
1044 	op.u.sha.type = sha->type;
1045 	op.u.sha.msg_bits = sha->msg_bits;
1046 
1047 	/* The SHA context fits in a single (32-byte) KSB entry and
1048 	 * must be in little endian format. Use the 256-bit byte swap
1049 	 * passthru option to convert from big endian to little endian.
1050 	 */
1051 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1052 				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1053 				   DMA_BIDIRECTIONAL);
1054 	if (ret)
1055 		return ret;
1056 
1057 	if (sha->first) {
1058 		const __be32 *init;
1059 
1060 		switch (sha->type) {
1061 		case CCP_SHA_TYPE_1:
1062 			init = ccp_sha1_init;
1063 			break;
1064 		case CCP_SHA_TYPE_224:
1065 			init = ccp_sha224_init;
1066 			break;
1067 		case CCP_SHA_TYPE_256:
1068 			init = ccp_sha256_init;
1069 			break;
1070 		default:
1071 			ret = -EINVAL;
1072 			goto e_ctx;
1073 		}
1074 		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1075 	} else {
1076 		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1077 	}
1078 
1079 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1080 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1081 	if (ret) {
1082 		cmd->engine_error = cmd_q->cmd_error;
1083 		goto e_ctx;
1084 	}
1085 
1086 	/* Send data to the CCP SHA engine */
1087 	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1088 			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1089 	if (ret)
1090 		goto e_ctx;
1091 
1092 	while (src.sg_wa.bytes_left) {
1093 		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1094 		if (sha->final && !src.sg_wa.bytes_left)
1095 			op.eom = 1;
1096 
1097 		ret = cmd_q->ccp->vdata->perform->perform_sha(&op);
1098 		if (ret) {
1099 			cmd->engine_error = cmd_q->cmd_error;
1100 			goto e_data;
1101 		}
1102 
1103 		ccp_process_data(&src, NULL, &op);
1104 	}
1105 
1106 	/* Retrieve the SHA context - convert from LE to BE using
1107 	 * 32-byte (256-bit) byteswapping to BE
1108 	 */
1109 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1110 				CCP_PASSTHRU_BYTESWAP_256BIT);
1111 	if (ret) {
1112 		cmd->engine_error = cmd_q->cmd_error;
1113 		goto e_data;
1114 	}
1115 
1116 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1117 
1118 	if (sha->final && sha->opad) {
1119 		/* HMAC operation, recursively perform final SHA */
1120 		struct ccp_cmd hmac_cmd;
1121 		struct scatterlist sg;
1122 		u64 block_size, digest_size;
1123 		u8 *hmac_buf;
1124 
1125 		switch (sha->type) {
1126 		case CCP_SHA_TYPE_1:
1127 			block_size = SHA1_BLOCK_SIZE;
1128 			digest_size = SHA1_DIGEST_SIZE;
1129 			break;
1130 		case CCP_SHA_TYPE_224:
1131 			block_size = SHA224_BLOCK_SIZE;
1132 			digest_size = SHA224_DIGEST_SIZE;
1133 			break;
1134 		case CCP_SHA_TYPE_256:
1135 			block_size = SHA256_BLOCK_SIZE;
1136 			digest_size = SHA256_DIGEST_SIZE;
1137 			break;
1138 		default:
1139 			ret = -EINVAL;
1140 			goto e_data;
1141 		}
1142 
1143 		if (sha->opad_len != block_size) {
1144 			ret = -EINVAL;
1145 			goto e_data;
1146 		}
1147 
1148 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1149 		if (!hmac_buf) {
1150 			ret = -ENOMEM;
1151 			goto e_data;
1152 		}
1153 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1154 
1155 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1156 		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1157 
1158 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1159 		hmac_cmd.engine = CCP_ENGINE_SHA;
1160 		hmac_cmd.u.sha.type = sha->type;
1161 		hmac_cmd.u.sha.ctx = sha->ctx;
1162 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1163 		hmac_cmd.u.sha.src = &sg;
1164 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1165 		hmac_cmd.u.sha.opad = NULL;
1166 		hmac_cmd.u.sha.opad_len = 0;
1167 		hmac_cmd.u.sha.first = 1;
1168 		hmac_cmd.u.sha.final = 1;
1169 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1170 
1171 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1172 		if (ret)
1173 			cmd->engine_error = hmac_cmd.engine_error;
1174 
1175 		kfree(hmac_buf);
1176 	}
1177 
1178 e_data:
1179 	ccp_free_data(&src, cmd_q);
1180 
1181 e_ctx:
1182 	ccp_dm_free(&ctx);
1183 
1184 	return ret;
1185 }
1186 
1187 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1188 {
1189 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1190 	struct ccp_dm_workarea exp, src;
1191 	struct ccp_data dst;
1192 	struct ccp_op op;
1193 	unsigned int ksb_count, i_len, o_len;
1194 	int ret;
1195 
1196 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1197 		return -EINVAL;
1198 
1199 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1200 		return -EINVAL;
1201 
1202 	/* The RSA modulus must precede the message being acted upon, so
1203 	 * it must be copied to a DMA area where the message and the
1204 	 * modulus can be concatenated.  Therefore the input buffer
1205 	 * length required is twice the output buffer length (which
1206 	 * must be a multiple of 256-bits).
1207 	 */
1208 	o_len = ((rsa->key_size + 255) / 256) * 32;
1209 	i_len = o_len * 2;
1210 
1211 	ksb_count = o_len / CCP_KSB_BYTES;
1212 
1213 	memset(&op, 0, sizeof(op));
1214 	op.cmd_q = cmd_q;
1215 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1216 	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1217 	if (!op.ksb_key)
1218 		return -EIO;
1219 
1220 	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1221 	 * be in little endian format. Reverse copy each 32-byte chunk
1222 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1223 	 * and each byte within that chunk and do not perform any byte swap
1224 	 * operations on the passthru operation.
1225 	 */
1226 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1227 	if (ret)
1228 		goto e_ksb;
1229 
1230 	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
1231 				      CCP_KSB_BYTES, false);
1232 	if (ret)
1233 		goto e_exp;
1234 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1235 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1236 	if (ret) {
1237 		cmd->engine_error = cmd_q->cmd_error;
1238 		goto e_exp;
1239 	}
1240 
1241 	/* Concatenate the modulus and the message. Both the modulus and
1242 	 * the operands must be in little endian format.  Since the input
1243 	 * is in big endian format it must be converted.
1244 	 */
1245 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1246 	if (ret)
1247 		goto e_exp;
1248 
1249 	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
1250 				      CCP_KSB_BYTES, false);
1251 	if (ret)
1252 		goto e_src;
1253 	src.address += o_len;	/* Adjust the address for the copy operation */
1254 	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
1255 				      CCP_KSB_BYTES, false);
1256 	if (ret)
1257 		goto e_src;
1258 	src.address -= o_len;	/* Reset the address to original value */
1259 
1260 	/* Prepare the output area for the operation */
1261 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1262 			    o_len, DMA_FROM_DEVICE);
1263 	if (ret)
1264 		goto e_src;
1265 
1266 	op.soc = 1;
1267 	op.src.u.dma.address = src.dma.address;
1268 	op.src.u.dma.offset = 0;
1269 	op.src.u.dma.length = i_len;
1270 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1271 	op.dst.u.dma.offset = 0;
1272 	op.dst.u.dma.length = o_len;
1273 
1274 	op.u.rsa.mod_size = rsa->key_size;
1275 	op.u.rsa.input_len = i_len;
1276 
1277 	ret = cmd_q->ccp->vdata->perform->perform_rsa(&op);
1278 	if (ret) {
1279 		cmd->engine_error = cmd_q->cmd_error;
1280 		goto e_dst;
1281 	}
1282 
1283 	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1284 
1285 e_dst:
1286 	ccp_free_data(&dst, cmd_q);
1287 
1288 e_src:
1289 	ccp_dm_free(&src);
1290 
1291 e_exp:
1292 	ccp_dm_free(&exp);
1293 
1294 e_ksb:
1295 	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1296 
1297 	return ret;
1298 }
1299 
1300 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1301 				struct ccp_cmd *cmd)
1302 {
1303 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1304 	struct ccp_dm_workarea mask;
1305 	struct ccp_data src, dst;
1306 	struct ccp_op op;
1307 	bool in_place = false;
1308 	unsigned int i;
1309 	int ret;
1310 
1311 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1312 		return -EINVAL;
1313 
1314 	if (!pt->src || !pt->dst)
1315 		return -EINVAL;
1316 
1317 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1318 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1319 			return -EINVAL;
1320 		if (!pt->mask)
1321 			return -EINVAL;
1322 	}
1323 
1324 	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1325 
1326 	memset(&op, 0, sizeof(op));
1327 	op.cmd_q = cmd_q;
1328 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1329 
1330 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1331 		/* Load the mask */
1332 		op.ksb_key = cmd_q->ksb_key;
1333 
1334 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1335 					   CCP_PASSTHRU_KSB_COUNT *
1336 					   CCP_KSB_BYTES,
1337 					   DMA_TO_DEVICE);
1338 		if (ret)
1339 			return ret;
1340 
1341 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1342 		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1343 				      CCP_PASSTHRU_BYTESWAP_NOOP);
1344 		if (ret) {
1345 			cmd->engine_error = cmd_q->cmd_error;
1346 			goto e_mask;
1347 		}
1348 	}
1349 
1350 	/* Prepare the input and output data workareas. For in-place
1351 	 * operations we need to set the dma direction to BIDIRECTIONAL
1352 	 * and copy the src workarea to the dst workarea.
1353 	 */
1354 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1355 		in_place = true;
1356 
1357 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1358 			    CCP_PASSTHRU_MASKSIZE,
1359 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1360 	if (ret)
1361 		goto e_mask;
1362 
1363 	if (in_place) {
1364 		dst = src;
1365 	} else {
1366 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1367 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1368 		if (ret)
1369 			goto e_src;
1370 	}
1371 
1372 	/* Send data to the CCP Passthru engine
1373 	 *   Because the CCP engine works on a single source and destination
1374 	 *   dma address at a time, each entry in the source scatterlist
1375 	 *   (after the dma_map_sg call) must be less than or equal to the
1376 	 *   (remaining) length in the destination scatterlist entry and the
1377 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1378 	 */
1379 	dst.sg_wa.sg_used = 0;
1380 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1381 		if (!dst.sg_wa.sg ||
1382 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1383 			ret = -EINVAL;
1384 			goto e_dst;
1385 		}
1386 
1387 		if (i == src.sg_wa.dma_count) {
1388 			op.eom = 1;
1389 			op.soc = 1;
1390 		}
1391 
1392 		op.src.type = CCP_MEMTYPE_SYSTEM;
1393 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1394 		op.src.u.dma.offset = 0;
1395 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1396 
1397 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1398 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1399 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1400 		op.dst.u.dma.length = op.src.u.dma.length;
1401 
1402 		ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
1403 		if (ret) {
1404 			cmd->engine_error = cmd_q->cmd_error;
1405 			goto e_dst;
1406 		}
1407 
1408 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1409 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1410 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1411 			dst.sg_wa.sg_used = 0;
1412 		}
1413 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1414 	}
1415 
1416 e_dst:
1417 	if (!in_place)
1418 		ccp_free_data(&dst, cmd_q);
1419 
1420 e_src:
1421 	ccp_free_data(&src, cmd_q);
1422 
1423 e_mask:
1424 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1425 		ccp_dm_free(&mask);
1426 
1427 	return ret;
1428 }
1429 
1430 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1431 {
1432 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1433 	struct ccp_dm_workarea src, dst;
1434 	struct ccp_op op;
1435 	int ret;
1436 	u8 *save;
1437 
1438 	if (!ecc->u.mm.operand_1 ||
1439 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1440 		return -EINVAL;
1441 
1442 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1443 		if (!ecc->u.mm.operand_2 ||
1444 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1445 			return -EINVAL;
1446 
1447 	if (!ecc->u.mm.result ||
1448 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1449 		return -EINVAL;
1450 
1451 	memset(&op, 0, sizeof(op));
1452 	op.cmd_q = cmd_q;
1453 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1454 
1455 	/* Concatenate the modulus and the operands. Both the modulus and
1456 	 * the operands must be in little endian format.  Since the input
1457 	 * is in big endian format it must be converted and placed in a
1458 	 * fixed length buffer.
1459 	 */
1460 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1461 				   DMA_TO_DEVICE);
1462 	if (ret)
1463 		return ret;
1464 
1465 	/* Save the workarea address since it is updated in order to perform
1466 	 * the concatenation
1467 	 */
1468 	save = src.address;
1469 
1470 	/* Copy the ECC modulus */
1471 	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1472 				      CCP_ECC_OPERAND_SIZE, false);
1473 	if (ret)
1474 		goto e_src;
1475 	src.address += CCP_ECC_OPERAND_SIZE;
1476 
1477 	/* Copy the first operand */
1478 	ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1479 				      ecc->u.mm.operand_1_len,
1480 				      CCP_ECC_OPERAND_SIZE, false);
1481 	if (ret)
1482 		goto e_src;
1483 	src.address += CCP_ECC_OPERAND_SIZE;
1484 
1485 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1486 		/* Copy the second operand */
1487 		ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1488 					      ecc->u.mm.operand_2_len,
1489 					      CCP_ECC_OPERAND_SIZE, false);
1490 		if (ret)
1491 			goto e_src;
1492 		src.address += CCP_ECC_OPERAND_SIZE;
1493 	}
1494 
1495 	/* Restore the workarea address */
1496 	src.address = save;
1497 
1498 	/* Prepare the output area for the operation */
1499 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1500 				   DMA_FROM_DEVICE);
1501 	if (ret)
1502 		goto e_src;
1503 
1504 	op.soc = 1;
1505 	op.src.u.dma.address = src.dma.address;
1506 	op.src.u.dma.offset = 0;
1507 	op.src.u.dma.length = src.length;
1508 	op.dst.u.dma.address = dst.dma.address;
1509 	op.dst.u.dma.offset = 0;
1510 	op.dst.u.dma.length = dst.length;
1511 
1512 	op.u.ecc.function = cmd->u.ecc.function;
1513 
1514 	ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
1515 	if (ret) {
1516 		cmd->engine_error = cmd_q->cmd_error;
1517 		goto e_dst;
1518 	}
1519 
1520 	ecc->ecc_result = le16_to_cpup(
1521 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1522 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1523 		ret = -EIO;
1524 		goto e_dst;
1525 	}
1526 
1527 	/* Save the ECC result */
1528 	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1529 
1530 e_dst:
1531 	ccp_dm_free(&dst);
1532 
1533 e_src:
1534 	ccp_dm_free(&src);
1535 
1536 	return ret;
1537 }
1538 
1539 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1540 {
1541 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1542 	struct ccp_dm_workarea src, dst;
1543 	struct ccp_op op;
1544 	int ret;
1545 	u8 *save;
1546 
1547 	if (!ecc->u.pm.point_1.x ||
1548 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1549 	    !ecc->u.pm.point_1.y ||
1550 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1551 		return -EINVAL;
1552 
1553 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1554 		if (!ecc->u.pm.point_2.x ||
1555 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1556 		    !ecc->u.pm.point_2.y ||
1557 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1558 			return -EINVAL;
1559 	} else {
1560 		if (!ecc->u.pm.domain_a ||
1561 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1562 			return -EINVAL;
1563 
1564 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1565 			if (!ecc->u.pm.scalar ||
1566 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1567 				return -EINVAL;
1568 	}
1569 
1570 	if (!ecc->u.pm.result.x ||
1571 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1572 	    !ecc->u.pm.result.y ||
1573 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1574 		return -EINVAL;
1575 
1576 	memset(&op, 0, sizeof(op));
1577 	op.cmd_q = cmd_q;
1578 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1579 
1580 	/* Concatenate the modulus and the operands. Both the modulus and
1581 	 * the operands must be in little endian format.  Since the input
1582 	 * is in big endian format it must be converted and placed in a
1583 	 * fixed length buffer.
1584 	 */
1585 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1586 				   DMA_TO_DEVICE);
1587 	if (ret)
1588 		return ret;
1589 
1590 	/* Save the workarea address since it is updated in order to perform
1591 	 * the concatenation
1592 	 */
1593 	save = src.address;
1594 
1595 	/* Copy the ECC modulus */
1596 	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1597 				      CCP_ECC_OPERAND_SIZE, false);
1598 	if (ret)
1599 		goto e_src;
1600 	src.address += CCP_ECC_OPERAND_SIZE;
1601 
1602 	/* Copy the first point X and Y coordinate */
1603 	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1604 				      ecc->u.pm.point_1.x_len,
1605 				      CCP_ECC_OPERAND_SIZE, false);
1606 	if (ret)
1607 		goto e_src;
1608 	src.address += CCP_ECC_OPERAND_SIZE;
1609 	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1610 				      ecc->u.pm.point_1.y_len,
1611 				      CCP_ECC_OPERAND_SIZE, false);
1612 	if (ret)
1613 		goto e_src;
1614 	src.address += CCP_ECC_OPERAND_SIZE;
1615 
1616 	/* Set the first point Z coordianate to 1 */
1617 	*src.address = 0x01;
1618 	src.address += CCP_ECC_OPERAND_SIZE;
1619 
1620 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1621 		/* Copy the second point X and Y coordinate */
1622 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1623 					      ecc->u.pm.point_2.x_len,
1624 					      CCP_ECC_OPERAND_SIZE, false);
1625 		if (ret)
1626 			goto e_src;
1627 		src.address += CCP_ECC_OPERAND_SIZE;
1628 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1629 					      ecc->u.pm.point_2.y_len,
1630 					      CCP_ECC_OPERAND_SIZE, false);
1631 		if (ret)
1632 			goto e_src;
1633 		src.address += CCP_ECC_OPERAND_SIZE;
1634 
1635 		/* Set the second point Z coordianate to 1 */
1636 		*src.address = 0x01;
1637 		src.address += CCP_ECC_OPERAND_SIZE;
1638 	} else {
1639 		/* Copy the Domain "a" parameter */
1640 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1641 					      ecc->u.pm.domain_a_len,
1642 					      CCP_ECC_OPERAND_SIZE, false);
1643 		if (ret)
1644 			goto e_src;
1645 		src.address += CCP_ECC_OPERAND_SIZE;
1646 
1647 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1648 			/* Copy the scalar value */
1649 			ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
1650 						      ecc->u.pm.scalar_len,
1651 						      CCP_ECC_OPERAND_SIZE,
1652 						      false);
1653 			if (ret)
1654 				goto e_src;
1655 			src.address += CCP_ECC_OPERAND_SIZE;
1656 		}
1657 	}
1658 
1659 	/* Restore the workarea address */
1660 	src.address = save;
1661 
1662 	/* Prepare the output area for the operation */
1663 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1664 				   DMA_FROM_DEVICE);
1665 	if (ret)
1666 		goto e_src;
1667 
1668 	op.soc = 1;
1669 	op.src.u.dma.address = src.dma.address;
1670 	op.src.u.dma.offset = 0;
1671 	op.src.u.dma.length = src.length;
1672 	op.dst.u.dma.address = dst.dma.address;
1673 	op.dst.u.dma.offset = 0;
1674 	op.dst.u.dma.length = dst.length;
1675 
1676 	op.u.ecc.function = cmd->u.ecc.function;
1677 
1678 	ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
1679 	if (ret) {
1680 		cmd->engine_error = cmd_q->cmd_error;
1681 		goto e_dst;
1682 	}
1683 
1684 	ecc->ecc_result = le16_to_cpup(
1685 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1686 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1687 		ret = -EIO;
1688 		goto e_dst;
1689 	}
1690 
1691 	/* Save the workarea address since it is updated as we walk through
1692 	 * to copy the point math result
1693 	 */
1694 	save = dst.address;
1695 
1696 	/* Save the ECC result X and Y coordinates */
1697 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
1698 				CCP_ECC_MODULUS_BYTES);
1699 	dst.address += CCP_ECC_OUTPUT_SIZE;
1700 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
1701 				CCP_ECC_MODULUS_BYTES);
1702 	dst.address += CCP_ECC_OUTPUT_SIZE;
1703 
1704 	/* Restore the workarea address */
1705 	dst.address = save;
1706 
1707 e_dst:
1708 	ccp_dm_free(&dst);
1709 
1710 e_src:
1711 	ccp_dm_free(&src);
1712 
1713 	return ret;
1714 }
1715 
1716 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1717 {
1718 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1719 
1720 	ecc->ecc_result = 0;
1721 
1722 	if (!ecc->mod ||
1723 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1724 		return -EINVAL;
1725 
1726 	switch (ecc->function) {
1727 	case CCP_ECC_FUNCTION_MMUL_384BIT:
1728 	case CCP_ECC_FUNCTION_MADD_384BIT:
1729 	case CCP_ECC_FUNCTION_MINV_384BIT:
1730 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1731 
1732 	case CCP_ECC_FUNCTION_PADD_384BIT:
1733 	case CCP_ECC_FUNCTION_PMUL_384BIT:
1734 	case CCP_ECC_FUNCTION_PDBL_384BIT:
1735 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1736 
1737 	default:
1738 		return -EINVAL;
1739 	}
1740 }
1741 
1742 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1743 {
1744 	int ret;
1745 
1746 	cmd->engine_error = 0;
1747 	cmd_q->cmd_error = 0;
1748 	cmd_q->int_rcvd = 0;
1749 	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
1750 
1751 	switch (cmd->engine) {
1752 	case CCP_ENGINE_AES:
1753 		ret = ccp_run_aes_cmd(cmd_q, cmd);
1754 		break;
1755 	case CCP_ENGINE_XTS_AES_128:
1756 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
1757 		break;
1758 	case CCP_ENGINE_SHA:
1759 		ret = ccp_run_sha_cmd(cmd_q, cmd);
1760 		break;
1761 	case CCP_ENGINE_RSA:
1762 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
1763 		break;
1764 	case CCP_ENGINE_PASSTHRU:
1765 		ret = ccp_run_passthru_cmd(cmd_q, cmd);
1766 		break;
1767 	case CCP_ENGINE_ECC:
1768 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
1769 		break;
1770 	default:
1771 		ret = -EINVAL;
1772 	}
1773 
1774 	return ret;
1775 }
1776