xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision a06c488d)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
27 
28 #include "ccp-dev.h"
29 
30 enum ccp_memtype {
31 	CCP_MEMTYPE_SYSTEM = 0,
32 	CCP_MEMTYPE_KSB,
33 	CCP_MEMTYPE_LOCAL,
34 	CCP_MEMTYPE__LAST,
35 };
36 
37 struct ccp_dma_info {
38 	dma_addr_t address;
39 	unsigned int offset;
40 	unsigned int length;
41 	enum dma_data_direction dir;
42 };
43 
44 struct ccp_dm_workarea {
45 	struct device *dev;
46 	struct dma_pool *dma_pool;
47 	unsigned int length;
48 
49 	u8 *address;
50 	struct ccp_dma_info dma;
51 };
52 
53 struct ccp_sg_workarea {
54 	struct scatterlist *sg;
55 	int nents;
56 
57 	struct scatterlist *dma_sg;
58 	struct device *dma_dev;
59 	unsigned int dma_count;
60 	enum dma_data_direction dma_dir;
61 
62 	unsigned int sg_used;
63 
64 	u64 bytes_left;
65 };
66 
67 struct ccp_data {
68 	struct ccp_sg_workarea sg_wa;
69 	struct ccp_dm_workarea dm_wa;
70 };
71 
72 struct ccp_mem {
73 	enum ccp_memtype type;
74 	union {
75 		struct ccp_dma_info dma;
76 		u32 ksb;
77 	} u;
78 };
79 
80 struct ccp_aes_op {
81 	enum ccp_aes_type type;
82 	enum ccp_aes_mode mode;
83 	enum ccp_aes_action action;
84 };
85 
86 struct ccp_xts_aes_op {
87 	enum ccp_aes_action action;
88 	enum ccp_xts_aes_unit_size unit_size;
89 };
90 
91 struct ccp_sha_op {
92 	enum ccp_sha_type type;
93 	u64 msg_bits;
94 };
95 
96 struct ccp_rsa_op {
97 	u32 mod_size;
98 	u32 input_len;
99 };
100 
101 struct ccp_passthru_op {
102 	enum ccp_passthru_bitwise bit_mod;
103 	enum ccp_passthru_byteswap byte_swap;
104 };
105 
106 struct ccp_ecc_op {
107 	enum ccp_ecc_function function;
108 };
109 
110 struct ccp_op {
111 	struct ccp_cmd_queue *cmd_q;
112 
113 	u32 jobid;
114 	u32 ioc;
115 	u32 soc;
116 	u32 ksb_key;
117 	u32 ksb_ctx;
118 	u32 init;
119 	u32 eom;
120 
121 	struct ccp_mem src;
122 	struct ccp_mem dst;
123 
124 	union {
125 		struct ccp_aes_op aes;
126 		struct ccp_xts_aes_op xts;
127 		struct ccp_sha_op sha;
128 		struct ccp_rsa_op rsa;
129 		struct ccp_passthru_op passthru;
130 		struct ccp_ecc_op ecc;
131 	} u;
132 };
133 
134 /* SHA initial context values */
135 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
136 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
137 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
138 	cpu_to_be32(SHA1_H4), 0, 0, 0,
139 };
140 
141 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
142 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
143 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
144 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
145 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
146 };
147 
148 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
149 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
150 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
151 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
152 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
153 };
154 
155 static u32 ccp_addr_lo(struct ccp_dma_info *info)
156 {
157 	return lower_32_bits(info->address + info->offset);
158 }
159 
160 static u32 ccp_addr_hi(struct ccp_dma_info *info)
161 {
162 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
163 }
164 
165 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
166 {
167 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
168 	struct ccp_device *ccp = cmd_q->ccp;
169 	void __iomem *cr_addr;
170 	u32 cr0, cmd;
171 	unsigned int i;
172 	int ret = 0;
173 
174 	/* We could read a status register to see how many free slots
175 	 * are actually available, but reading that register resets it
176 	 * and you could lose some error information.
177 	 */
178 	cmd_q->free_slots--;
179 
180 	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
181 	      | (op->jobid << REQ0_JOBID_SHIFT)
182 	      | REQ0_WAIT_FOR_WRITE;
183 
184 	if (op->soc)
185 		cr0 |= REQ0_STOP_ON_COMPLETE
186 		       | REQ0_INT_ON_COMPLETE;
187 
188 	if (op->ioc || !cmd_q->free_slots)
189 		cr0 |= REQ0_INT_ON_COMPLETE;
190 
191 	/* Start at CMD_REQ1 */
192 	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
193 
194 	mutex_lock(&ccp->req_mutex);
195 
196 	/* Write CMD_REQ1 through CMD_REQx first */
197 	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
198 		iowrite32(*(cr + i), cr_addr);
199 
200 	/* Tell the CCP to start */
201 	wmb();
202 	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
203 
204 	mutex_unlock(&ccp->req_mutex);
205 
206 	if (cr0 & REQ0_INT_ON_COMPLETE) {
207 		/* Wait for the job to complete */
208 		ret = wait_event_interruptible(cmd_q->int_queue,
209 					       cmd_q->int_rcvd);
210 		if (ret || cmd_q->cmd_error) {
211 			/* On error delete all related jobs from the queue */
212 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
213 			      | op->jobid;
214 
215 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
216 
217 			if (!ret)
218 				ret = -EIO;
219 		} else if (op->soc) {
220 			/* Delete just head job from the queue on SoC */
221 			cmd = DEL_Q_ACTIVE
222 			      | (cmd_q->id << DEL_Q_ID_SHIFT)
223 			      | op->jobid;
224 
225 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
226 		}
227 
228 		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
229 
230 		cmd_q->int_rcvd = 0;
231 	}
232 
233 	return ret;
234 }
235 
236 static int ccp_perform_aes(struct ccp_op *op)
237 {
238 	u32 cr[6];
239 
240 	/* Fill out the register contents for REQ1 through REQ6 */
241 	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
242 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
243 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
244 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
245 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
246 	cr[1] = op->src.u.dma.length - 1;
247 	cr[2] = ccp_addr_lo(&op->src.u.dma);
248 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
249 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
250 		| ccp_addr_hi(&op->src.u.dma);
251 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
252 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
253 		| ccp_addr_hi(&op->dst.u.dma);
254 
255 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
256 		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
257 
258 	if (op->eom)
259 		cr[0] |= REQ1_EOM;
260 
261 	if (op->init)
262 		cr[0] |= REQ1_INIT;
263 
264 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
265 }
266 
267 static int ccp_perform_xts_aes(struct ccp_op *op)
268 {
269 	u32 cr[6];
270 
271 	/* Fill out the register contents for REQ1 through REQ6 */
272 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
273 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
274 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
275 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
276 	cr[1] = op->src.u.dma.length - 1;
277 	cr[2] = ccp_addr_lo(&op->src.u.dma);
278 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
279 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
280 		| ccp_addr_hi(&op->src.u.dma);
281 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
282 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
283 		| ccp_addr_hi(&op->dst.u.dma);
284 
285 	if (op->eom)
286 		cr[0] |= REQ1_EOM;
287 
288 	if (op->init)
289 		cr[0] |= REQ1_INIT;
290 
291 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
292 }
293 
294 static int ccp_perform_sha(struct ccp_op *op)
295 {
296 	u32 cr[6];
297 
298 	/* Fill out the register contents for REQ1 through REQ6 */
299 	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
300 		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
301 		| REQ1_INIT;
302 	cr[1] = op->src.u.dma.length - 1;
303 	cr[2] = ccp_addr_lo(&op->src.u.dma);
304 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
305 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
306 		| ccp_addr_hi(&op->src.u.dma);
307 
308 	if (op->eom) {
309 		cr[0] |= REQ1_EOM;
310 		cr[4] = lower_32_bits(op->u.sha.msg_bits);
311 		cr[5] = upper_32_bits(op->u.sha.msg_bits);
312 	} else {
313 		cr[4] = 0;
314 		cr[5] = 0;
315 	}
316 
317 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
318 }
319 
320 static int ccp_perform_rsa(struct ccp_op *op)
321 {
322 	u32 cr[6];
323 
324 	/* Fill out the register contents for REQ1 through REQ6 */
325 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
326 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
327 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
328 		| REQ1_EOM;
329 	cr[1] = op->u.rsa.input_len - 1;
330 	cr[2] = ccp_addr_lo(&op->src.u.dma);
331 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
332 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
333 		| ccp_addr_hi(&op->src.u.dma);
334 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
335 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
336 		| ccp_addr_hi(&op->dst.u.dma);
337 
338 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
339 }
340 
341 static int ccp_perform_passthru(struct ccp_op *op)
342 {
343 	u32 cr[6];
344 
345 	/* Fill out the register contents for REQ1 through REQ6 */
346 	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
347 		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
348 		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
349 
350 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
351 		cr[1] = op->src.u.dma.length - 1;
352 	else
353 		cr[1] = op->dst.u.dma.length - 1;
354 
355 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
356 		cr[2] = ccp_addr_lo(&op->src.u.dma);
357 		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
358 			| ccp_addr_hi(&op->src.u.dma);
359 
360 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
361 			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
362 	} else {
363 		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
364 		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
365 	}
366 
367 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
368 		cr[4] = ccp_addr_lo(&op->dst.u.dma);
369 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
370 			| ccp_addr_hi(&op->dst.u.dma);
371 	} else {
372 		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
373 		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
374 	}
375 
376 	if (op->eom)
377 		cr[0] |= REQ1_EOM;
378 
379 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
380 }
381 
382 static int ccp_perform_ecc(struct ccp_op *op)
383 {
384 	u32 cr[6];
385 
386 	/* Fill out the register contents for REQ1 through REQ6 */
387 	cr[0] = REQ1_ECC_AFFINE_CONVERT
388 		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
389 		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
390 		| REQ1_EOM;
391 	cr[1] = op->src.u.dma.length - 1;
392 	cr[2] = ccp_addr_lo(&op->src.u.dma);
393 	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
394 		| ccp_addr_hi(&op->src.u.dma);
395 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
396 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
397 		| ccp_addr_hi(&op->dst.u.dma);
398 
399 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
400 }
401 
402 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
403 {
404 	int start;
405 
406 	for (;;) {
407 		mutex_lock(&ccp->ksb_mutex);
408 
409 		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
410 							ccp->ksb_count,
411 							ccp->ksb_start,
412 							count, 0);
413 		if (start <= ccp->ksb_count) {
414 			bitmap_set(ccp->ksb, start, count);
415 
416 			mutex_unlock(&ccp->ksb_mutex);
417 			break;
418 		}
419 
420 		ccp->ksb_avail = 0;
421 
422 		mutex_unlock(&ccp->ksb_mutex);
423 
424 		/* Wait for KSB entries to become available */
425 		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
426 			return 0;
427 	}
428 
429 	return KSB_START + start;
430 }
431 
432 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
433 			 unsigned int count)
434 {
435 	if (!start)
436 		return;
437 
438 	mutex_lock(&ccp->ksb_mutex);
439 
440 	bitmap_clear(ccp->ksb, start - KSB_START, count);
441 
442 	ccp->ksb_avail = 1;
443 
444 	mutex_unlock(&ccp->ksb_mutex);
445 
446 	wake_up_interruptible_all(&ccp->ksb_queue);
447 }
448 
449 static u32 ccp_gen_jobid(struct ccp_device *ccp)
450 {
451 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
452 }
453 
454 static void ccp_sg_free(struct ccp_sg_workarea *wa)
455 {
456 	if (wa->dma_count)
457 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
458 
459 	wa->dma_count = 0;
460 }
461 
462 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
463 				struct scatterlist *sg, u64 len,
464 				enum dma_data_direction dma_dir)
465 {
466 	memset(wa, 0, sizeof(*wa));
467 
468 	wa->sg = sg;
469 	if (!sg)
470 		return 0;
471 
472 	wa->nents = sg_nents_for_len(sg, len);
473 	if (wa->nents < 0)
474 		return wa->nents;
475 
476 	wa->bytes_left = len;
477 	wa->sg_used = 0;
478 
479 	if (len == 0)
480 		return 0;
481 
482 	if (dma_dir == DMA_NONE)
483 		return 0;
484 
485 	wa->dma_sg = sg;
486 	wa->dma_dev = dev;
487 	wa->dma_dir = dma_dir;
488 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
489 	if (!wa->dma_count)
490 		return -ENOMEM;
491 
492 	return 0;
493 }
494 
495 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
496 {
497 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
498 
499 	if (!wa->sg)
500 		return;
501 
502 	wa->sg_used += nbytes;
503 	wa->bytes_left -= nbytes;
504 	if (wa->sg_used == wa->sg->length) {
505 		wa->sg = sg_next(wa->sg);
506 		wa->sg_used = 0;
507 	}
508 }
509 
510 static void ccp_dm_free(struct ccp_dm_workarea *wa)
511 {
512 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
513 		if (wa->address)
514 			dma_pool_free(wa->dma_pool, wa->address,
515 				      wa->dma.address);
516 	} else {
517 		if (wa->dma.address)
518 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
519 					 wa->dma.dir);
520 		kfree(wa->address);
521 	}
522 
523 	wa->address = NULL;
524 	wa->dma.address = 0;
525 }
526 
527 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
528 				struct ccp_cmd_queue *cmd_q,
529 				unsigned int len,
530 				enum dma_data_direction dir)
531 {
532 	memset(wa, 0, sizeof(*wa));
533 
534 	if (!len)
535 		return 0;
536 
537 	wa->dev = cmd_q->ccp->dev;
538 	wa->length = len;
539 
540 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
541 		wa->dma_pool = cmd_q->dma_pool;
542 
543 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
544 					     &wa->dma.address);
545 		if (!wa->address)
546 			return -ENOMEM;
547 
548 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
549 
550 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
551 	} else {
552 		wa->address = kzalloc(len, GFP_KERNEL);
553 		if (!wa->address)
554 			return -ENOMEM;
555 
556 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
557 						 dir);
558 		if (!wa->dma.address)
559 			return -ENOMEM;
560 
561 		wa->dma.length = len;
562 	}
563 	wa->dma.dir = dir;
564 
565 	return 0;
566 }
567 
568 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
569 			    struct scatterlist *sg, unsigned int sg_offset,
570 			    unsigned int len)
571 {
572 	WARN_ON(!wa->address);
573 
574 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
575 				 0);
576 }
577 
578 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
579 			    struct scatterlist *sg, unsigned int sg_offset,
580 			    unsigned int len)
581 {
582 	WARN_ON(!wa->address);
583 
584 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
585 				 1);
586 }
587 
588 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
589 				   struct scatterlist *sg,
590 				   unsigned int len, unsigned int se_len,
591 				   bool sign_extend)
592 {
593 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
594 	u8 buffer[CCP_REVERSE_BUF_SIZE];
595 
596 	if (WARN_ON(se_len > sizeof(buffer)))
597 		return -EINVAL;
598 
599 	sg_offset = len;
600 	dm_offset = 0;
601 	nbytes = len;
602 	while (nbytes) {
603 		ksb_len = min_t(unsigned int, nbytes, se_len);
604 		sg_offset -= ksb_len;
605 
606 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
607 		for (i = 0; i < ksb_len; i++)
608 			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
609 
610 		dm_offset += ksb_len;
611 		nbytes -= ksb_len;
612 
613 		if ((ksb_len != se_len) && sign_extend) {
614 			/* Must sign-extend to nearest sign-extend length */
615 			if (wa->address[dm_offset - 1] & 0x80)
616 				memset(wa->address + dm_offset, 0xff,
617 				       se_len - ksb_len);
618 		}
619 	}
620 
621 	return 0;
622 }
623 
624 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
625 				    struct scatterlist *sg,
626 				    unsigned int len)
627 {
628 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
629 	u8 buffer[CCP_REVERSE_BUF_SIZE];
630 
631 	sg_offset = 0;
632 	dm_offset = len;
633 	nbytes = len;
634 	while (nbytes) {
635 		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
636 		dm_offset -= ksb_len;
637 
638 		for (i = 0; i < ksb_len; i++)
639 			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
640 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
641 
642 		sg_offset += ksb_len;
643 		nbytes -= ksb_len;
644 	}
645 }
646 
647 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
648 {
649 	ccp_dm_free(&data->dm_wa);
650 	ccp_sg_free(&data->sg_wa);
651 }
652 
653 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
654 			 struct scatterlist *sg, u64 sg_len,
655 			 unsigned int dm_len,
656 			 enum dma_data_direction dir)
657 {
658 	int ret;
659 
660 	memset(data, 0, sizeof(*data));
661 
662 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
663 				   dir);
664 	if (ret)
665 		goto e_err;
666 
667 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
668 	if (ret)
669 		goto e_err;
670 
671 	return 0;
672 
673 e_err:
674 	ccp_free_data(data, cmd_q);
675 
676 	return ret;
677 }
678 
679 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
680 {
681 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
682 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
683 	unsigned int buf_count, nbytes;
684 
685 	/* Clear the buffer if setting it */
686 	if (!from)
687 		memset(dm_wa->address, 0, dm_wa->length);
688 
689 	if (!sg_wa->sg)
690 		return 0;
691 
692 	/* Perform the copy operation
693 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
694 	 *   an unsigned int
695 	 */
696 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
697 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
698 				 nbytes, from);
699 
700 	/* Update the structures and generate the count */
701 	buf_count = 0;
702 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
703 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
704 			     dm_wa->length - buf_count);
705 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
706 
707 		buf_count += nbytes;
708 		ccp_update_sg_workarea(sg_wa, nbytes);
709 	}
710 
711 	return buf_count;
712 }
713 
714 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
715 {
716 	return ccp_queue_buf(data, 0);
717 }
718 
719 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
720 {
721 	return ccp_queue_buf(data, 1);
722 }
723 
724 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
725 			     struct ccp_op *op, unsigned int block_size,
726 			     bool blocksize_op)
727 {
728 	unsigned int sg_src_len, sg_dst_len, op_len;
729 
730 	/* The CCP can only DMA from/to one address each per operation. This
731 	 * requires that we find the smallest DMA area between the source
732 	 * and destination. The resulting len values will always be <= UINT_MAX
733 	 * because the dma length is an unsigned int.
734 	 */
735 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
736 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
737 
738 	if (dst) {
739 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
740 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
741 		op_len = min(sg_src_len, sg_dst_len);
742 	} else {
743 		op_len = sg_src_len;
744 	}
745 
746 	/* The data operation length will be at least block_size in length
747 	 * or the smaller of available sg room remaining for the source or
748 	 * the destination
749 	 */
750 	op_len = max(op_len, block_size);
751 
752 	/* Unless we have to buffer data, there's no reason to wait */
753 	op->soc = 0;
754 
755 	if (sg_src_len < block_size) {
756 		/* Not enough data in the sg element, so it
757 		 * needs to be buffered into a blocksize chunk
758 		 */
759 		int cp_len = ccp_fill_queue_buf(src);
760 
761 		op->soc = 1;
762 		op->src.u.dma.address = src->dm_wa.dma.address;
763 		op->src.u.dma.offset = 0;
764 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
765 	} else {
766 		/* Enough data in the sg element, but we need to
767 		 * adjust for any previously copied data
768 		 */
769 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
770 		op->src.u.dma.offset = src->sg_wa.sg_used;
771 		op->src.u.dma.length = op_len & ~(block_size - 1);
772 
773 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
774 	}
775 
776 	if (dst) {
777 		if (sg_dst_len < block_size) {
778 			/* Not enough room in the sg element or we're on the
779 			 * last piece of data (when using padding), so the
780 			 * output needs to be buffered into a blocksize chunk
781 			 */
782 			op->soc = 1;
783 			op->dst.u.dma.address = dst->dm_wa.dma.address;
784 			op->dst.u.dma.offset = 0;
785 			op->dst.u.dma.length = op->src.u.dma.length;
786 		} else {
787 			/* Enough room in the sg element, but we need to
788 			 * adjust for any previously used area
789 			 */
790 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
791 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
792 			op->dst.u.dma.length = op->src.u.dma.length;
793 		}
794 	}
795 }
796 
797 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
798 			     struct ccp_op *op)
799 {
800 	op->init = 0;
801 
802 	if (dst) {
803 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
804 			ccp_empty_queue_buf(dst);
805 		else
806 			ccp_update_sg_workarea(&dst->sg_wa,
807 					       op->dst.u.dma.length);
808 	}
809 }
810 
811 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
812 				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
813 				u32 byte_swap, bool from)
814 {
815 	struct ccp_op op;
816 
817 	memset(&op, 0, sizeof(op));
818 
819 	op.cmd_q = cmd_q;
820 	op.jobid = jobid;
821 	op.eom = 1;
822 
823 	if (from) {
824 		op.soc = 1;
825 		op.src.type = CCP_MEMTYPE_KSB;
826 		op.src.u.ksb = ksb;
827 		op.dst.type = CCP_MEMTYPE_SYSTEM;
828 		op.dst.u.dma.address = wa->dma.address;
829 		op.dst.u.dma.length = wa->length;
830 	} else {
831 		op.src.type = CCP_MEMTYPE_SYSTEM;
832 		op.src.u.dma.address = wa->dma.address;
833 		op.src.u.dma.length = wa->length;
834 		op.dst.type = CCP_MEMTYPE_KSB;
835 		op.dst.u.ksb = ksb;
836 	}
837 
838 	op.u.passthru.byte_swap = byte_swap;
839 
840 	return ccp_perform_passthru(&op);
841 }
842 
843 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
844 			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
845 			   u32 byte_swap)
846 {
847 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
848 }
849 
850 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
851 			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
852 			     u32 byte_swap)
853 {
854 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
855 }
856 
857 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
858 				struct ccp_cmd *cmd)
859 {
860 	struct ccp_aes_engine *aes = &cmd->u.aes;
861 	struct ccp_dm_workarea key, ctx;
862 	struct ccp_data src;
863 	struct ccp_op op;
864 	unsigned int dm_offset;
865 	int ret;
866 
867 	if (!((aes->key_len == AES_KEYSIZE_128) ||
868 	      (aes->key_len == AES_KEYSIZE_192) ||
869 	      (aes->key_len == AES_KEYSIZE_256)))
870 		return -EINVAL;
871 
872 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
873 		return -EINVAL;
874 
875 	if (aes->iv_len != AES_BLOCK_SIZE)
876 		return -EINVAL;
877 
878 	if (!aes->key || !aes->iv || !aes->src)
879 		return -EINVAL;
880 
881 	if (aes->cmac_final) {
882 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
883 			return -EINVAL;
884 
885 		if (!aes->cmac_key)
886 			return -EINVAL;
887 	}
888 
889 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
890 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
891 
892 	ret = -EIO;
893 	memset(&op, 0, sizeof(op));
894 	op.cmd_q = cmd_q;
895 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
896 	op.ksb_key = cmd_q->ksb_key;
897 	op.ksb_ctx = cmd_q->ksb_ctx;
898 	op.init = 1;
899 	op.u.aes.type = aes->type;
900 	op.u.aes.mode = aes->mode;
901 	op.u.aes.action = aes->action;
902 
903 	/* All supported key sizes fit in a single (32-byte) KSB entry
904 	 * and must be in little endian format. Use the 256-bit byte
905 	 * swap passthru option to convert from big endian to little
906 	 * endian.
907 	 */
908 	ret = ccp_init_dm_workarea(&key, cmd_q,
909 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
910 				   DMA_TO_DEVICE);
911 	if (ret)
912 		return ret;
913 
914 	dm_offset = CCP_KSB_BYTES - aes->key_len;
915 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
916 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
917 			      CCP_PASSTHRU_BYTESWAP_256BIT);
918 	if (ret) {
919 		cmd->engine_error = cmd_q->cmd_error;
920 		goto e_key;
921 	}
922 
923 	/* The AES context fits in a single (32-byte) KSB entry and
924 	 * must be in little endian format. Use the 256-bit byte swap
925 	 * passthru option to convert from big endian to little endian.
926 	 */
927 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
928 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
929 				   DMA_BIDIRECTIONAL);
930 	if (ret)
931 		goto e_key;
932 
933 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
934 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
935 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
936 			      CCP_PASSTHRU_BYTESWAP_256BIT);
937 	if (ret) {
938 		cmd->engine_error = cmd_q->cmd_error;
939 		goto e_ctx;
940 	}
941 
942 	/* Send data to the CCP AES engine */
943 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
944 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
945 	if (ret)
946 		goto e_ctx;
947 
948 	while (src.sg_wa.bytes_left) {
949 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
950 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
951 			op.eom = 1;
952 
953 			/* Push the K1/K2 key to the CCP now */
954 			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
955 						op.ksb_ctx,
956 						CCP_PASSTHRU_BYTESWAP_256BIT);
957 			if (ret) {
958 				cmd->engine_error = cmd_q->cmd_error;
959 				goto e_src;
960 			}
961 
962 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
963 					aes->cmac_key_len);
964 			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
965 					      CCP_PASSTHRU_BYTESWAP_256BIT);
966 			if (ret) {
967 				cmd->engine_error = cmd_q->cmd_error;
968 				goto e_src;
969 			}
970 		}
971 
972 		ret = ccp_perform_aes(&op);
973 		if (ret) {
974 			cmd->engine_error = cmd_q->cmd_error;
975 			goto e_src;
976 		}
977 
978 		ccp_process_data(&src, NULL, &op);
979 	}
980 
981 	/* Retrieve the AES context - convert from LE to BE using
982 	 * 32-byte (256-bit) byteswapping
983 	 */
984 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
985 				CCP_PASSTHRU_BYTESWAP_256BIT);
986 	if (ret) {
987 		cmd->engine_error = cmd_q->cmd_error;
988 		goto e_src;
989 	}
990 
991 	/* ...but we only need AES_BLOCK_SIZE bytes */
992 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
993 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
994 
995 e_src:
996 	ccp_free_data(&src, cmd_q);
997 
998 e_ctx:
999 	ccp_dm_free(&ctx);
1000 
1001 e_key:
1002 	ccp_dm_free(&key);
1003 
1004 	return ret;
1005 }
1006 
1007 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1008 {
1009 	struct ccp_aes_engine *aes = &cmd->u.aes;
1010 	struct ccp_dm_workarea key, ctx;
1011 	struct ccp_data src, dst;
1012 	struct ccp_op op;
1013 	unsigned int dm_offset;
1014 	bool in_place = false;
1015 	int ret;
1016 
1017 	if (aes->mode == CCP_AES_MODE_CMAC)
1018 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1019 
1020 	if (!((aes->key_len == AES_KEYSIZE_128) ||
1021 	      (aes->key_len == AES_KEYSIZE_192) ||
1022 	      (aes->key_len == AES_KEYSIZE_256)))
1023 		return -EINVAL;
1024 
1025 	if (((aes->mode == CCP_AES_MODE_ECB) ||
1026 	     (aes->mode == CCP_AES_MODE_CBC) ||
1027 	     (aes->mode == CCP_AES_MODE_CFB)) &&
1028 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1029 		return -EINVAL;
1030 
1031 	if (!aes->key || !aes->src || !aes->dst)
1032 		return -EINVAL;
1033 
1034 	if (aes->mode != CCP_AES_MODE_ECB) {
1035 		if (aes->iv_len != AES_BLOCK_SIZE)
1036 			return -EINVAL;
1037 
1038 		if (!aes->iv)
1039 			return -EINVAL;
1040 	}
1041 
1042 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1043 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1044 
1045 	ret = -EIO;
1046 	memset(&op, 0, sizeof(op));
1047 	op.cmd_q = cmd_q;
1048 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1049 	op.ksb_key = cmd_q->ksb_key;
1050 	op.ksb_ctx = cmd_q->ksb_ctx;
1051 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1052 	op.u.aes.type = aes->type;
1053 	op.u.aes.mode = aes->mode;
1054 	op.u.aes.action = aes->action;
1055 
1056 	/* All supported key sizes fit in a single (32-byte) KSB entry
1057 	 * and must be in little endian format. Use the 256-bit byte
1058 	 * swap passthru option to convert from big endian to little
1059 	 * endian.
1060 	 */
1061 	ret = ccp_init_dm_workarea(&key, cmd_q,
1062 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1063 				   DMA_TO_DEVICE);
1064 	if (ret)
1065 		return ret;
1066 
1067 	dm_offset = CCP_KSB_BYTES - aes->key_len;
1068 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1069 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1070 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1071 	if (ret) {
1072 		cmd->engine_error = cmd_q->cmd_error;
1073 		goto e_key;
1074 	}
1075 
1076 	/* The AES context fits in a single (32-byte) KSB entry and
1077 	 * must be in little endian format. Use the 256-bit byte swap
1078 	 * passthru option to convert from big endian to little endian.
1079 	 */
1080 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1081 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1082 				   DMA_BIDIRECTIONAL);
1083 	if (ret)
1084 		goto e_key;
1085 
1086 	if (aes->mode != CCP_AES_MODE_ECB) {
1087 		/* Load the AES context - conver to LE */
1088 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1089 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1090 		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1091 				      CCP_PASSTHRU_BYTESWAP_256BIT);
1092 		if (ret) {
1093 			cmd->engine_error = cmd_q->cmd_error;
1094 			goto e_ctx;
1095 		}
1096 	}
1097 
1098 	/* Prepare the input and output data workareas. For in-place
1099 	 * operations we need to set the dma direction to BIDIRECTIONAL
1100 	 * and copy the src workarea to the dst workarea.
1101 	 */
1102 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1103 		in_place = true;
1104 
1105 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1106 			    AES_BLOCK_SIZE,
1107 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1108 	if (ret)
1109 		goto e_ctx;
1110 
1111 	if (in_place) {
1112 		dst = src;
1113 	} else {
1114 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1115 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1116 		if (ret)
1117 			goto e_src;
1118 	}
1119 
1120 	/* Send data to the CCP AES engine */
1121 	while (src.sg_wa.bytes_left) {
1122 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1123 		if (!src.sg_wa.bytes_left) {
1124 			op.eom = 1;
1125 
1126 			/* Since we don't retrieve the AES context in ECB
1127 			 * mode we have to wait for the operation to complete
1128 			 * on the last piece of data
1129 			 */
1130 			if (aes->mode == CCP_AES_MODE_ECB)
1131 				op.soc = 1;
1132 		}
1133 
1134 		ret = ccp_perform_aes(&op);
1135 		if (ret) {
1136 			cmd->engine_error = cmd_q->cmd_error;
1137 			goto e_dst;
1138 		}
1139 
1140 		ccp_process_data(&src, &dst, &op);
1141 	}
1142 
1143 	if (aes->mode != CCP_AES_MODE_ECB) {
1144 		/* Retrieve the AES context - convert from LE to BE using
1145 		 * 32-byte (256-bit) byteswapping
1146 		 */
1147 		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1148 					CCP_PASSTHRU_BYTESWAP_256BIT);
1149 		if (ret) {
1150 			cmd->engine_error = cmd_q->cmd_error;
1151 			goto e_dst;
1152 		}
1153 
1154 		/* ...but we only need AES_BLOCK_SIZE bytes */
1155 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1156 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1157 	}
1158 
1159 e_dst:
1160 	if (!in_place)
1161 		ccp_free_data(&dst, cmd_q);
1162 
1163 e_src:
1164 	ccp_free_data(&src, cmd_q);
1165 
1166 e_ctx:
1167 	ccp_dm_free(&ctx);
1168 
1169 e_key:
1170 	ccp_dm_free(&key);
1171 
1172 	return ret;
1173 }
1174 
1175 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1176 			       struct ccp_cmd *cmd)
1177 {
1178 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1179 	struct ccp_dm_workarea key, ctx;
1180 	struct ccp_data src, dst;
1181 	struct ccp_op op;
1182 	unsigned int unit_size, dm_offset;
1183 	bool in_place = false;
1184 	int ret;
1185 
1186 	switch (xts->unit_size) {
1187 	case CCP_XTS_AES_UNIT_SIZE_16:
1188 		unit_size = 16;
1189 		break;
1190 	case CCP_XTS_AES_UNIT_SIZE_512:
1191 		unit_size = 512;
1192 		break;
1193 	case CCP_XTS_AES_UNIT_SIZE_1024:
1194 		unit_size = 1024;
1195 		break;
1196 	case CCP_XTS_AES_UNIT_SIZE_2048:
1197 		unit_size = 2048;
1198 		break;
1199 	case CCP_XTS_AES_UNIT_SIZE_4096:
1200 		unit_size = 4096;
1201 		break;
1202 
1203 	default:
1204 		return -EINVAL;
1205 	}
1206 
1207 	if (xts->key_len != AES_KEYSIZE_128)
1208 		return -EINVAL;
1209 
1210 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1211 		return -EINVAL;
1212 
1213 	if (xts->iv_len != AES_BLOCK_SIZE)
1214 		return -EINVAL;
1215 
1216 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1217 		return -EINVAL;
1218 
1219 	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1220 	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1221 
1222 	ret = -EIO;
1223 	memset(&op, 0, sizeof(op));
1224 	op.cmd_q = cmd_q;
1225 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1226 	op.ksb_key = cmd_q->ksb_key;
1227 	op.ksb_ctx = cmd_q->ksb_ctx;
1228 	op.init = 1;
1229 	op.u.xts.action = xts->action;
1230 	op.u.xts.unit_size = xts->unit_size;
1231 
1232 	/* All supported key sizes fit in a single (32-byte) KSB entry
1233 	 * and must be in little endian format. Use the 256-bit byte
1234 	 * swap passthru option to convert from big endian to little
1235 	 * endian.
1236 	 */
1237 	ret = ccp_init_dm_workarea(&key, cmd_q,
1238 				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1239 				   DMA_TO_DEVICE);
1240 	if (ret)
1241 		return ret;
1242 
1243 	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1244 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1245 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1246 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1247 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1248 	if (ret) {
1249 		cmd->engine_error = cmd_q->cmd_error;
1250 		goto e_key;
1251 	}
1252 
1253 	/* The AES context fits in a single (32-byte) KSB entry and
1254 	 * for XTS is already in little endian format so no byte swapping
1255 	 * is needed.
1256 	 */
1257 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1258 				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1259 				   DMA_BIDIRECTIONAL);
1260 	if (ret)
1261 		goto e_key;
1262 
1263 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1264 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1265 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1266 	if (ret) {
1267 		cmd->engine_error = cmd_q->cmd_error;
1268 		goto e_ctx;
1269 	}
1270 
1271 	/* Prepare the input and output data workareas. For in-place
1272 	 * operations we need to set the dma direction to BIDIRECTIONAL
1273 	 * and copy the src workarea to the dst workarea.
1274 	 */
1275 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1276 		in_place = true;
1277 
1278 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1279 			    unit_size,
1280 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1281 	if (ret)
1282 		goto e_ctx;
1283 
1284 	if (in_place) {
1285 		dst = src;
1286 	} else {
1287 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1288 				    unit_size, DMA_FROM_DEVICE);
1289 		if (ret)
1290 			goto e_src;
1291 	}
1292 
1293 	/* Send data to the CCP AES engine */
1294 	while (src.sg_wa.bytes_left) {
1295 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1296 		if (!src.sg_wa.bytes_left)
1297 			op.eom = 1;
1298 
1299 		ret = ccp_perform_xts_aes(&op);
1300 		if (ret) {
1301 			cmd->engine_error = cmd_q->cmd_error;
1302 			goto e_dst;
1303 		}
1304 
1305 		ccp_process_data(&src, &dst, &op);
1306 	}
1307 
1308 	/* Retrieve the AES context - convert from LE to BE using
1309 	 * 32-byte (256-bit) byteswapping
1310 	 */
1311 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1312 				CCP_PASSTHRU_BYTESWAP_256BIT);
1313 	if (ret) {
1314 		cmd->engine_error = cmd_q->cmd_error;
1315 		goto e_dst;
1316 	}
1317 
1318 	/* ...but we only need AES_BLOCK_SIZE bytes */
1319 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1320 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1321 
1322 e_dst:
1323 	if (!in_place)
1324 		ccp_free_data(&dst, cmd_q);
1325 
1326 e_src:
1327 	ccp_free_data(&src, cmd_q);
1328 
1329 e_ctx:
1330 	ccp_dm_free(&ctx);
1331 
1332 e_key:
1333 	ccp_dm_free(&key);
1334 
1335 	return ret;
1336 }
1337 
1338 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1339 {
1340 	struct ccp_sha_engine *sha = &cmd->u.sha;
1341 	struct ccp_dm_workarea ctx;
1342 	struct ccp_data src;
1343 	struct ccp_op op;
1344 	int ret;
1345 
1346 	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1347 		return -EINVAL;
1348 
1349 	if (!sha->ctx)
1350 		return -EINVAL;
1351 
1352 	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1353 		return -EINVAL;
1354 
1355 	if (!sha->src_len) {
1356 		const u8 *sha_zero;
1357 
1358 		/* Not final, just return */
1359 		if (!sha->final)
1360 			return 0;
1361 
1362 		/* CCP can't do a zero length sha operation so the caller
1363 		 * must buffer the data.
1364 		 */
1365 		if (sha->msg_bits)
1366 			return -EINVAL;
1367 
1368 		/* The CCP cannot perform zero-length sha operations so the
1369 		 * caller is required to buffer data for the final operation.
1370 		 * However, a sha operation for a message with a total length
1371 		 * of zero is valid so known values are required to supply
1372 		 * the result.
1373 		 */
1374 		switch (sha->type) {
1375 		case CCP_SHA_TYPE_1:
1376 			sha_zero = sha1_zero_message_hash;
1377 			break;
1378 		case CCP_SHA_TYPE_224:
1379 			sha_zero = sha224_zero_message_hash;
1380 			break;
1381 		case CCP_SHA_TYPE_256:
1382 			sha_zero = sha256_zero_message_hash;
1383 			break;
1384 		default:
1385 			return -EINVAL;
1386 		}
1387 
1388 		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1389 					 sha->ctx_len, 1);
1390 
1391 		return 0;
1392 	}
1393 
1394 	if (!sha->src)
1395 		return -EINVAL;
1396 
1397 	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1398 
1399 	memset(&op, 0, sizeof(op));
1400 	op.cmd_q = cmd_q;
1401 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1402 	op.ksb_ctx = cmd_q->ksb_ctx;
1403 	op.u.sha.type = sha->type;
1404 	op.u.sha.msg_bits = sha->msg_bits;
1405 
1406 	/* The SHA context fits in a single (32-byte) KSB entry and
1407 	 * must be in little endian format. Use the 256-bit byte swap
1408 	 * passthru option to convert from big endian to little endian.
1409 	 */
1410 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1411 				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1412 				   DMA_BIDIRECTIONAL);
1413 	if (ret)
1414 		return ret;
1415 
1416 	if (sha->first) {
1417 		const __be32 *init;
1418 
1419 		switch (sha->type) {
1420 		case CCP_SHA_TYPE_1:
1421 			init = ccp_sha1_init;
1422 			break;
1423 		case CCP_SHA_TYPE_224:
1424 			init = ccp_sha224_init;
1425 			break;
1426 		case CCP_SHA_TYPE_256:
1427 			init = ccp_sha256_init;
1428 			break;
1429 		default:
1430 			ret = -EINVAL;
1431 			goto e_ctx;
1432 		}
1433 		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1434 	} else {
1435 		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1436 	}
1437 
1438 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1439 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1440 	if (ret) {
1441 		cmd->engine_error = cmd_q->cmd_error;
1442 		goto e_ctx;
1443 	}
1444 
1445 	/* Send data to the CCP SHA engine */
1446 	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1447 			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1448 	if (ret)
1449 		goto e_ctx;
1450 
1451 	while (src.sg_wa.bytes_left) {
1452 		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1453 		if (sha->final && !src.sg_wa.bytes_left)
1454 			op.eom = 1;
1455 
1456 		ret = ccp_perform_sha(&op);
1457 		if (ret) {
1458 			cmd->engine_error = cmd_q->cmd_error;
1459 			goto e_data;
1460 		}
1461 
1462 		ccp_process_data(&src, NULL, &op);
1463 	}
1464 
1465 	/* Retrieve the SHA context - convert from LE to BE using
1466 	 * 32-byte (256-bit) byteswapping to BE
1467 	 */
1468 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1469 				CCP_PASSTHRU_BYTESWAP_256BIT);
1470 	if (ret) {
1471 		cmd->engine_error = cmd_q->cmd_error;
1472 		goto e_data;
1473 	}
1474 
1475 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1476 
1477 	if (sha->final && sha->opad) {
1478 		/* HMAC operation, recursively perform final SHA */
1479 		struct ccp_cmd hmac_cmd;
1480 		struct scatterlist sg;
1481 		u64 block_size, digest_size;
1482 		u8 *hmac_buf;
1483 
1484 		switch (sha->type) {
1485 		case CCP_SHA_TYPE_1:
1486 			block_size = SHA1_BLOCK_SIZE;
1487 			digest_size = SHA1_DIGEST_SIZE;
1488 			break;
1489 		case CCP_SHA_TYPE_224:
1490 			block_size = SHA224_BLOCK_SIZE;
1491 			digest_size = SHA224_DIGEST_SIZE;
1492 			break;
1493 		case CCP_SHA_TYPE_256:
1494 			block_size = SHA256_BLOCK_SIZE;
1495 			digest_size = SHA256_DIGEST_SIZE;
1496 			break;
1497 		default:
1498 			ret = -EINVAL;
1499 			goto e_data;
1500 		}
1501 
1502 		if (sha->opad_len != block_size) {
1503 			ret = -EINVAL;
1504 			goto e_data;
1505 		}
1506 
1507 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1508 		if (!hmac_buf) {
1509 			ret = -ENOMEM;
1510 			goto e_data;
1511 		}
1512 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1513 
1514 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1515 		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1516 
1517 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1518 		hmac_cmd.engine = CCP_ENGINE_SHA;
1519 		hmac_cmd.u.sha.type = sha->type;
1520 		hmac_cmd.u.sha.ctx = sha->ctx;
1521 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1522 		hmac_cmd.u.sha.src = &sg;
1523 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1524 		hmac_cmd.u.sha.opad = NULL;
1525 		hmac_cmd.u.sha.opad_len = 0;
1526 		hmac_cmd.u.sha.first = 1;
1527 		hmac_cmd.u.sha.final = 1;
1528 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1529 
1530 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1531 		if (ret)
1532 			cmd->engine_error = hmac_cmd.engine_error;
1533 
1534 		kfree(hmac_buf);
1535 	}
1536 
1537 e_data:
1538 	ccp_free_data(&src, cmd_q);
1539 
1540 e_ctx:
1541 	ccp_dm_free(&ctx);
1542 
1543 	return ret;
1544 }
1545 
1546 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1547 {
1548 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1549 	struct ccp_dm_workarea exp, src;
1550 	struct ccp_data dst;
1551 	struct ccp_op op;
1552 	unsigned int ksb_count, i_len, o_len;
1553 	int ret;
1554 
1555 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1556 		return -EINVAL;
1557 
1558 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1559 		return -EINVAL;
1560 
1561 	/* The RSA modulus must precede the message being acted upon, so
1562 	 * it must be copied to a DMA area where the message and the
1563 	 * modulus can be concatenated.  Therefore the input buffer
1564 	 * length required is twice the output buffer length (which
1565 	 * must be a multiple of 256-bits).
1566 	 */
1567 	o_len = ((rsa->key_size + 255) / 256) * 32;
1568 	i_len = o_len * 2;
1569 
1570 	ksb_count = o_len / CCP_KSB_BYTES;
1571 
1572 	memset(&op, 0, sizeof(op));
1573 	op.cmd_q = cmd_q;
1574 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1575 	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1576 	if (!op.ksb_key)
1577 		return -EIO;
1578 
1579 	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1580 	 * be in little endian format. Reverse copy each 32-byte chunk
1581 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1582 	 * and each byte within that chunk and do not perform any byte swap
1583 	 * operations on the passthru operation.
1584 	 */
1585 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1586 	if (ret)
1587 		goto e_ksb;
1588 
1589 	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
1590 				      CCP_KSB_BYTES, false);
1591 	if (ret)
1592 		goto e_exp;
1593 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1594 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1595 	if (ret) {
1596 		cmd->engine_error = cmd_q->cmd_error;
1597 		goto e_exp;
1598 	}
1599 
1600 	/* Concatenate the modulus and the message. Both the modulus and
1601 	 * the operands must be in little endian format.  Since the input
1602 	 * is in big endian format it must be converted.
1603 	 */
1604 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1605 	if (ret)
1606 		goto e_exp;
1607 
1608 	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
1609 				      CCP_KSB_BYTES, false);
1610 	if (ret)
1611 		goto e_src;
1612 	src.address += o_len;	/* Adjust the address for the copy operation */
1613 	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
1614 				      CCP_KSB_BYTES, false);
1615 	if (ret)
1616 		goto e_src;
1617 	src.address -= o_len;	/* Reset the address to original value */
1618 
1619 	/* Prepare the output area for the operation */
1620 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1621 			    o_len, DMA_FROM_DEVICE);
1622 	if (ret)
1623 		goto e_src;
1624 
1625 	op.soc = 1;
1626 	op.src.u.dma.address = src.dma.address;
1627 	op.src.u.dma.offset = 0;
1628 	op.src.u.dma.length = i_len;
1629 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1630 	op.dst.u.dma.offset = 0;
1631 	op.dst.u.dma.length = o_len;
1632 
1633 	op.u.rsa.mod_size = rsa->key_size;
1634 	op.u.rsa.input_len = i_len;
1635 
1636 	ret = ccp_perform_rsa(&op);
1637 	if (ret) {
1638 		cmd->engine_error = cmd_q->cmd_error;
1639 		goto e_dst;
1640 	}
1641 
1642 	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1643 
1644 e_dst:
1645 	ccp_free_data(&dst, cmd_q);
1646 
1647 e_src:
1648 	ccp_dm_free(&src);
1649 
1650 e_exp:
1651 	ccp_dm_free(&exp);
1652 
1653 e_ksb:
1654 	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1655 
1656 	return ret;
1657 }
1658 
1659 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1660 				struct ccp_cmd *cmd)
1661 {
1662 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1663 	struct ccp_dm_workarea mask;
1664 	struct ccp_data src, dst;
1665 	struct ccp_op op;
1666 	bool in_place = false;
1667 	unsigned int i;
1668 	int ret;
1669 
1670 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1671 		return -EINVAL;
1672 
1673 	if (!pt->src || !pt->dst)
1674 		return -EINVAL;
1675 
1676 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1677 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1678 			return -EINVAL;
1679 		if (!pt->mask)
1680 			return -EINVAL;
1681 	}
1682 
1683 	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1684 
1685 	memset(&op, 0, sizeof(op));
1686 	op.cmd_q = cmd_q;
1687 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1688 
1689 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1690 		/* Load the mask */
1691 		op.ksb_key = cmd_q->ksb_key;
1692 
1693 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1694 					   CCP_PASSTHRU_KSB_COUNT *
1695 					   CCP_KSB_BYTES,
1696 					   DMA_TO_DEVICE);
1697 		if (ret)
1698 			return ret;
1699 
1700 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1701 		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1702 				      CCP_PASSTHRU_BYTESWAP_NOOP);
1703 		if (ret) {
1704 			cmd->engine_error = cmd_q->cmd_error;
1705 			goto e_mask;
1706 		}
1707 	}
1708 
1709 	/* Prepare the input and output data workareas. For in-place
1710 	 * operations we need to set the dma direction to BIDIRECTIONAL
1711 	 * and copy the src workarea to the dst workarea.
1712 	 */
1713 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1714 		in_place = true;
1715 
1716 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1717 			    CCP_PASSTHRU_MASKSIZE,
1718 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1719 	if (ret)
1720 		goto e_mask;
1721 
1722 	if (in_place) {
1723 		dst = src;
1724 	} else {
1725 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1726 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1727 		if (ret)
1728 			goto e_src;
1729 	}
1730 
1731 	/* Send data to the CCP Passthru engine
1732 	 *   Because the CCP engine works on a single source and destination
1733 	 *   dma address at a time, each entry in the source scatterlist
1734 	 *   (after the dma_map_sg call) must be less than or equal to the
1735 	 *   (remaining) length in the destination scatterlist entry and the
1736 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1737 	 */
1738 	dst.sg_wa.sg_used = 0;
1739 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1740 		if (!dst.sg_wa.sg ||
1741 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1742 			ret = -EINVAL;
1743 			goto e_dst;
1744 		}
1745 
1746 		if (i == src.sg_wa.dma_count) {
1747 			op.eom = 1;
1748 			op.soc = 1;
1749 		}
1750 
1751 		op.src.type = CCP_MEMTYPE_SYSTEM;
1752 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1753 		op.src.u.dma.offset = 0;
1754 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1755 
1756 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1757 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1758 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1759 		op.dst.u.dma.length = op.src.u.dma.length;
1760 
1761 		ret = ccp_perform_passthru(&op);
1762 		if (ret) {
1763 			cmd->engine_error = cmd_q->cmd_error;
1764 			goto e_dst;
1765 		}
1766 
1767 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1768 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1769 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1770 			dst.sg_wa.sg_used = 0;
1771 		}
1772 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1773 	}
1774 
1775 e_dst:
1776 	if (!in_place)
1777 		ccp_free_data(&dst, cmd_q);
1778 
1779 e_src:
1780 	ccp_free_data(&src, cmd_q);
1781 
1782 e_mask:
1783 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1784 		ccp_dm_free(&mask);
1785 
1786 	return ret;
1787 }
1788 
1789 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1790 {
1791 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1792 	struct ccp_dm_workarea src, dst;
1793 	struct ccp_op op;
1794 	int ret;
1795 	u8 *save;
1796 
1797 	if (!ecc->u.mm.operand_1 ||
1798 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1799 		return -EINVAL;
1800 
1801 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1802 		if (!ecc->u.mm.operand_2 ||
1803 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1804 			return -EINVAL;
1805 
1806 	if (!ecc->u.mm.result ||
1807 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1808 		return -EINVAL;
1809 
1810 	memset(&op, 0, sizeof(op));
1811 	op.cmd_q = cmd_q;
1812 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1813 
1814 	/* Concatenate the modulus and the operands. Both the modulus and
1815 	 * the operands must be in little endian format.  Since the input
1816 	 * is in big endian format it must be converted and placed in a
1817 	 * fixed length buffer.
1818 	 */
1819 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1820 				   DMA_TO_DEVICE);
1821 	if (ret)
1822 		return ret;
1823 
1824 	/* Save the workarea address since it is updated in order to perform
1825 	 * the concatenation
1826 	 */
1827 	save = src.address;
1828 
1829 	/* Copy the ECC modulus */
1830 	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1831 				      CCP_ECC_OPERAND_SIZE, false);
1832 	if (ret)
1833 		goto e_src;
1834 	src.address += CCP_ECC_OPERAND_SIZE;
1835 
1836 	/* Copy the first operand */
1837 	ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1838 				      ecc->u.mm.operand_1_len,
1839 				      CCP_ECC_OPERAND_SIZE, false);
1840 	if (ret)
1841 		goto e_src;
1842 	src.address += CCP_ECC_OPERAND_SIZE;
1843 
1844 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1845 		/* Copy the second operand */
1846 		ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1847 					      ecc->u.mm.operand_2_len,
1848 					      CCP_ECC_OPERAND_SIZE, false);
1849 		if (ret)
1850 			goto e_src;
1851 		src.address += CCP_ECC_OPERAND_SIZE;
1852 	}
1853 
1854 	/* Restore the workarea address */
1855 	src.address = save;
1856 
1857 	/* Prepare the output area for the operation */
1858 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1859 				   DMA_FROM_DEVICE);
1860 	if (ret)
1861 		goto e_src;
1862 
1863 	op.soc = 1;
1864 	op.src.u.dma.address = src.dma.address;
1865 	op.src.u.dma.offset = 0;
1866 	op.src.u.dma.length = src.length;
1867 	op.dst.u.dma.address = dst.dma.address;
1868 	op.dst.u.dma.offset = 0;
1869 	op.dst.u.dma.length = dst.length;
1870 
1871 	op.u.ecc.function = cmd->u.ecc.function;
1872 
1873 	ret = ccp_perform_ecc(&op);
1874 	if (ret) {
1875 		cmd->engine_error = cmd_q->cmd_error;
1876 		goto e_dst;
1877 	}
1878 
1879 	ecc->ecc_result = le16_to_cpup(
1880 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1881 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1882 		ret = -EIO;
1883 		goto e_dst;
1884 	}
1885 
1886 	/* Save the ECC result */
1887 	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1888 
1889 e_dst:
1890 	ccp_dm_free(&dst);
1891 
1892 e_src:
1893 	ccp_dm_free(&src);
1894 
1895 	return ret;
1896 }
1897 
1898 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1899 {
1900 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1901 	struct ccp_dm_workarea src, dst;
1902 	struct ccp_op op;
1903 	int ret;
1904 	u8 *save;
1905 
1906 	if (!ecc->u.pm.point_1.x ||
1907 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1908 	    !ecc->u.pm.point_1.y ||
1909 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1910 		return -EINVAL;
1911 
1912 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1913 		if (!ecc->u.pm.point_2.x ||
1914 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1915 		    !ecc->u.pm.point_2.y ||
1916 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1917 			return -EINVAL;
1918 	} else {
1919 		if (!ecc->u.pm.domain_a ||
1920 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1921 			return -EINVAL;
1922 
1923 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1924 			if (!ecc->u.pm.scalar ||
1925 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1926 				return -EINVAL;
1927 	}
1928 
1929 	if (!ecc->u.pm.result.x ||
1930 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1931 	    !ecc->u.pm.result.y ||
1932 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1933 		return -EINVAL;
1934 
1935 	memset(&op, 0, sizeof(op));
1936 	op.cmd_q = cmd_q;
1937 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1938 
1939 	/* Concatenate the modulus and the operands. Both the modulus and
1940 	 * the operands must be in little endian format.  Since the input
1941 	 * is in big endian format it must be converted and placed in a
1942 	 * fixed length buffer.
1943 	 */
1944 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1945 				   DMA_TO_DEVICE);
1946 	if (ret)
1947 		return ret;
1948 
1949 	/* Save the workarea address since it is updated in order to perform
1950 	 * the concatenation
1951 	 */
1952 	save = src.address;
1953 
1954 	/* Copy the ECC modulus */
1955 	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1956 				      CCP_ECC_OPERAND_SIZE, false);
1957 	if (ret)
1958 		goto e_src;
1959 	src.address += CCP_ECC_OPERAND_SIZE;
1960 
1961 	/* Copy the first point X and Y coordinate */
1962 	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1963 				      ecc->u.pm.point_1.x_len,
1964 				      CCP_ECC_OPERAND_SIZE, false);
1965 	if (ret)
1966 		goto e_src;
1967 	src.address += CCP_ECC_OPERAND_SIZE;
1968 	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1969 				      ecc->u.pm.point_1.y_len,
1970 				      CCP_ECC_OPERAND_SIZE, false);
1971 	if (ret)
1972 		goto e_src;
1973 	src.address += CCP_ECC_OPERAND_SIZE;
1974 
1975 	/* Set the first point Z coordianate to 1 */
1976 	*src.address = 0x01;
1977 	src.address += CCP_ECC_OPERAND_SIZE;
1978 
1979 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1980 		/* Copy the second point X and Y coordinate */
1981 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1982 					      ecc->u.pm.point_2.x_len,
1983 					      CCP_ECC_OPERAND_SIZE, false);
1984 		if (ret)
1985 			goto e_src;
1986 		src.address += CCP_ECC_OPERAND_SIZE;
1987 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1988 					      ecc->u.pm.point_2.y_len,
1989 					      CCP_ECC_OPERAND_SIZE, false);
1990 		if (ret)
1991 			goto e_src;
1992 		src.address += CCP_ECC_OPERAND_SIZE;
1993 
1994 		/* Set the second point Z coordianate to 1 */
1995 		*src.address = 0x01;
1996 		src.address += CCP_ECC_OPERAND_SIZE;
1997 	} else {
1998 		/* Copy the Domain "a" parameter */
1999 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
2000 					      ecc->u.pm.domain_a_len,
2001 					      CCP_ECC_OPERAND_SIZE, false);
2002 		if (ret)
2003 			goto e_src;
2004 		src.address += CCP_ECC_OPERAND_SIZE;
2005 
2006 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2007 			/* Copy the scalar value */
2008 			ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2009 						      ecc->u.pm.scalar_len,
2010 						      CCP_ECC_OPERAND_SIZE,
2011 						      false);
2012 			if (ret)
2013 				goto e_src;
2014 			src.address += CCP_ECC_OPERAND_SIZE;
2015 		}
2016 	}
2017 
2018 	/* Restore the workarea address */
2019 	src.address = save;
2020 
2021 	/* Prepare the output area for the operation */
2022 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2023 				   DMA_FROM_DEVICE);
2024 	if (ret)
2025 		goto e_src;
2026 
2027 	op.soc = 1;
2028 	op.src.u.dma.address = src.dma.address;
2029 	op.src.u.dma.offset = 0;
2030 	op.src.u.dma.length = src.length;
2031 	op.dst.u.dma.address = dst.dma.address;
2032 	op.dst.u.dma.offset = 0;
2033 	op.dst.u.dma.length = dst.length;
2034 
2035 	op.u.ecc.function = cmd->u.ecc.function;
2036 
2037 	ret = ccp_perform_ecc(&op);
2038 	if (ret) {
2039 		cmd->engine_error = cmd_q->cmd_error;
2040 		goto e_dst;
2041 	}
2042 
2043 	ecc->ecc_result = le16_to_cpup(
2044 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2045 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2046 		ret = -EIO;
2047 		goto e_dst;
2048 	}
2049 
2050 	/* Save the workarea address since it is updated as we walk through
2051 	 * to copy the point math result
2052 	 */
2053 	save = dst.address;
2054 
2055 	/* Save the ECC result X and Y coordinates */
2056 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2057 				CCP_ECC_MODULUS_BYTES);
2058 	dst.address += CCP_ECC_OUTPUT_SIZE;
2059 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2060 				CCP_ECC_MODULUS_BYTES);
2061 	dst.address += CCP_ECC_OUTPUT_SIZE;
2062 
2063 	/* Restore the workarea address */
2064 	dst.address = save;
2065 
2066 e_dst:
2067 	ccp_dm_free(&dst);
2068 
2069 e_src:
2070 	ccp_dm_free(&src);
2071 
2072 	return ret;
2073 }
2074 
2075 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2076 {
2077 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2078 
2079 	ecc->ecc_result = 0;
2080 
2081 	if (!ecc->mod ||
2082 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2083 		return -EINVAL;
2084 
2085 	switch (ecc->function) {
2086 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2087 	case CCP_ECC_FUNCTION_MADD_384BIT:
2088 	case CCP_ECC_FUNCTION_MINV_384BIT:
2089 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2090 
2091 	case CCP_ECC_FUNCTION_PADD_384BIT:
2092 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2093 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2094 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2095 
2096 	default:
2097 		return -EINVAL;
2098 	}
2099 }
2100 
2101 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2102 {
2103 	int ret;
2104 
2105 	cmd->engine_error = 0;
2106 	cmd_q->cmd_error = 0;
2107 	cmd_q->int_rcvd = 0;
2108 	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2109 
2110 	switch (cmd->engine) {
2111 	case CCP_ENGINE_AES:
2112 		ret = ccp_run_aes_cmd(cmd_q, cmd);
2113 		break;
2114 	case CCP_ENGINE_XTS_AES_128:
2115 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2116 		break;
2117 	case CCP_ENGINE_SHA:
2118 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2119 		break;
2120 	case CCP_ENGINE_RSA:
2121 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2122 		break;
2123 	case CCP_ENGINE_PASSTHRU:
2124 		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2125 		break;
2126 	case CCP_ENGINE_ECC:
2127 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2128 		break;
2129 	default:
2130 		ret = -EINVAL;
2131 	}
2132 
2133 	return ret;
2134 }
2135