xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision 355eba5d)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
27 
28 #include "ccp-dev.h"
29 
30 enum ccp_memtype {
31 	CCP_MEMTYPE_SYSTEM = 0,
32 	CCP_MEMTYPE_KSB,
33 	CCP_MEMTYPE_LOCAL,
34 	CCP_MEMTYPE__LAST,
35 };
36 
37 struct ccp_dma_info {
38 	dma_addr_t address;
39 	unsigned int offset;
40 	unsigned int length;
41 	enum dma_data_direction dir;
42 };
43 
44 struct ccp_dm_workarea {
45 	struct device *dev;
46 	struct dma_pool *dma_pool;
47 	unsigned int length;
48 
49 	u8 *address;
50 	struct ccp_dma_info dma;
51 };
52 
53 struct ccp_sg_workarea {
54 	struct scatterlist *sg;
55 	int nents;
56 
57 	struct scatterlist *dma_sg;
58 	struct device *dma_dev;
59 	unsigned int dma_count;
60 	enum dma_data_direction dma_dir;
61 
62 	unsigned int sg_used;
63 
64 	u64 bytes_left;
65 };
66 
67 struct ccp_data {
68 	struct ccp_sg_workarea sg_wa;
69 	struct ccp_dm_workarea dm_wa;
70 };
71 
72 struct ccp_mem {
73 	enum ccp_memtype type;
74 	union {
75 		struct ccp_dma_info dma;
76 		u32 ksb;
77 	} u;
78 };
79 
80 struct ccp_aes_op {
81 	enum ccp_aes_type type;
82 	enum ccp_aes_mode mode;
83 	enum ccp_aes_action action;
84 };
85 
86 struct ccp_xts_aes_op {
87 	enum ccp_aes_action action;
88 	enum ccp_xts_aes_unit_size unit_size;
89 };
90 
91 struct ccp_sha_op {
92 	enum ccp_sha_type type;
93 	u64 msg_bits;
94 };
95 
96 struct ccp_rsa_op {
97 	u32 mod_size;
98 	u32 input_len;
99 };
100 
101 struct ccp_passthru_op {
102 	enum ccp_passthru_bitwise bit_mod;
103 	enum ccp_passthru_byteswap byte_swap;
104 };
105 
106 struct ccp_ecc_op {
107 	enum ccp_ecc_function function;
108 };
109 
110 struct ccp_op {
111 	struct ccp_cmd_queue *cmd_q;
112 
113 	u32 jobid;
114 	u32 ioc;
115 	u32 soc;
116 	u32 ksb_key;
117 	u32 ksb_ctx;
118 	u32 init;
119 	u32 eom;
120 
121 	struct ccp_mem src;
122 	struct ccp_mem dst;
123 
124 	union {
125 		struct ccp_aes_op aes;
126 		struct ccp_xts_aes_op xts;
127 		struct ccp_sha_op sha;
128 		struct ccp_rsa_op rsa;
129 		struct ccp_passthru_op passthru;
130 		struct ccp_ecc_op ecc;
131 	} u;
132 };
133 
134 /* SHA initial context values */
135 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
136 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
137 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
138 	cpu_to_be32(SHA1_H4), 0, 0, 0,
139 };
140 
141 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
142 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
143 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
144 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
145 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
146 };
147 
148 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
149 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
150 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
151 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
152 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
153 };
154 
155 /* The CCP cannot perform zero-length sha operations so the caller
156  * is required to buffer data for the final operation.  However, a
157  * sha operation for a message with a total length of zero is valid
158  * so known values are required to supply the result.
159  */
160 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
161 	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
162 	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
163 	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
164 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
165 };
166 
167 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
168 	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
169 	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
170 	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
171 	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
172 };
173 
174 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
175 	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
176 	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
177 	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
178 	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
179 };
180 
181 static u32 ccp_addr_lo(struct ccp_dma_info *info)
182 {
183 	return lower_32_bits(info->address + info->offset);
184 }
185 
186 static u32 ccp_addr_hi(struct ccp_dma_info *info)
187 {
188 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
189 }
190 
191 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
192 {
193 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
194 	struct ccp_device *ccp = cmd_q->ccp;
195 	void __iomem *cr_addr;
196 	u32 cr0, cmd;
197 	unsigned int i;
198 	int ret = 0;
199 
200 	/* We could read a status register to see how many free slots
201 	 * are actually available, but reading that register resets it
202 	 * and you could lose some error information.
203 	 */
204 	cmd_q->free_slots--;
205 
206 	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
207 	      | (op->jobid << REQ0_JOBID_SHIFT)
208 	      | REQ0_WAIT_FOR_WRITE;
209 
210 	if (op->soc)
211 		cr0 |= REQ0_STOP_ON_COMPLETE
212 		       | REQ0_INT_ON_COMPLETE;
213 
214 	if (op->ioc || !cmd_q->free_slots)
215 		cr0 |= REQ0_INT_ON_COMPLETE;
216 
217 	/* Start at CMD_REQ1 */
218 	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
219 
220 	mutex_lock(&ccp->req_mutex);
221 
222 	/* Write CMD_REQ1 through CMD_REQx first */
223 	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
224 		iowrite32(*(cr + i), cr_addr);
225 
226 	/* Tell the CCP to start */
227 	wmb();
228 	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
229 
230 	mutex_unlock(&ccp->req_mutex);
231 
232 	if (cr0 & REQ0_INT_ON_COMPLETE) {
233 		/* Wait for the job to complete */
234 		ret = wait_event_interruptible(cmd_q->int_queue,
235 					       cmd_q->int_rcvd);
236 		if (ret || cmd_q->cmd_error) {
237 			/* On error delete all related jobs from the queue */
238 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
239 			      | op->jobid;
240 
241 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
242 
243 			if (!ret)
244 				ret = -EIO;
245 		} else if (op->soc) {
246 			/* Delete just head job from the queue on SoC */
247 			cmd = DEL_Q_ACTIVE
248 			      | (cmd_q->id << DEL_Q_ID_SHIFT)
249 			      | op->jobid;
250 
251 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
252 		}
253 
254 		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
255 
256 		cmd_q->int_rcvd = 0;
257 	}
258 
259 	return ret;
260 }
261 
262 static int ccp_perform_aes(struct ccp_op *op)
263 {
264 	u32 cr[6];
265 
266 	/* Fill out the register contents for REQ1 through REQ6 */
267 	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
268 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
269 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
270 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
271 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
272 	cr[1] = op->src.u.dma.length - 1;
273 	cr[2] = ccp_addr_lo(&op->src.u.dma);
274 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
275 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
276 		| ccp_addr_hi(&op->src.u.dma);
277 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
278 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
279 		| ccp_addr_hi(&op->dst.u.dma);
280 
281 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
282 		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
283 
284 	if (op->eom)
285 		cr[0] |= REQ1_EOM;
286 
287 	if (op->init)
288 		cr[0] |= REQ1_INIT;
289 
290 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
291 }
292 
293 static int ccp_perform_xts_aes(struct ccp_op *op)
294 {
295 	u32 cr[6];
296 
297 	/* Fill out the register contents for REQ1 through REQ6 */
298 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
299 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
300 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
301 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
302 	cr[1] = op->src.u.dma.length - 1;
303 	cr[2] = ccp_addr_lo(&op->src.u.dma);
304 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
305 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
306 		| ccp_addr_hi(&op->src.u.dma);
307 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
308 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
309 		| ccp_addr_hi(&op->dst.u.dma);
310 
311 	if (op->eom)
312 		cr[0] |= REQ1_EOM;
313 
314 	if (op->init)
315 		cr[0] |= REQ1_INIT;
316 
317 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
318 }
319 
320 static int ccp_perform_sha(struct ccp_op *op)
321 {
322 	u32 cr[6];
323 
324 	/* Fill out the register contents for REQ1 through REQ6 */
325 	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
326 		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
327 		| REQ1_INIT;
328 	cr[1] = op->src.u.dma.length - 1;
329 	cr[2] = ccp_addr_lo(&op->src.u.dma);
330 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
331 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
332 		| ccp_addr_hi(&op->src.u.dma);
333 
334 	if (op->eom) {
335 		cr[0] |= REQ1_EOM;
336 		cr[4] = lower_32_bits(op->u.sha.msg_bits);
337 		cr[5] = upper_32_bits(op->u.sha.msg_bits);
338 	} else {
339 		cr[4] = 0;
340 		cr[5] = 0;
341 	}
342 
343 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
344 }
345 
346 static int ccp_perform_rsa(struct ccp_op *op)
347 {
348 	u32 cr[6];
349 
350 	/* Fill out the register contents for REQ1 through REQ6 */
351 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
352 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
353 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
354 		| REQ1_EOM;
355 	cr[1] = op->u.rsa.input_len - 1;
356 	cr[2] = ccp_addr_lo(&op->src.u.dma);
357 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
358 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
359 		| ccp_addr_hi(&op->src.u.dma);
360 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
361 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
362 		| ccp_addr_hi(&op->dst.u.dma);
363 
364 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
365 }
366 
367 static int ccp_perform_passthru(struct ccp_op *op)
368 {
369 	u32 cr[6];
370 
371 	/* Fill out the register contents for REQ1 through REQ6 */
372 	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
373 		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
374 		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
375 
376 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
377 		cr[1] = op->src.u.dma.length - 1;
378 	else
379 		cr[1] = op->dst.u.dma.length - 1;
380 
381 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
382 		cr[2] = ccp_addr_lo(&op->src.u.dma);
383 		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
384 			| ccp_addr_hi(&op->src.u.dma);
385 
386 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
387 			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
388 	} else {
389 		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
390 		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
391 	}
392 
393 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
394 		cr[4] = ccp_addr_lo(&op->dst.u.dma);
395 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
396 			| ccp_addr_hi(&op->dst.u.dma);
397 	} else {
398 		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
399 		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
400 	}
401 
402 	if (op->eom)
403 		cr[0] |= REQ1_EOM;
404 
405 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
406 }
407 
408 static int ccp_perform_ecc(struct ccp_op *op)
409 {
410 	u32 cr[6];
411 
412 	/* Fill out the register contents for REQ1 through REQ6 */
413 	cr[0] = REQ1_ECC_AFFINE_CONVERT
414 		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
415 		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
416 		| REQ1_EOM;
417 	cr[1] = op->src.u.dma.length - 1;
418 	cr[2] = ccp_addr_lo(&op->src.u.dma);
419 	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
420 		| ccp_addr_hi(&op->src.u.dma);
421 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
422 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
423 		| ccp_addr_hi(&op->dst.u.dma);
424 
425 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
426 }
427 
428 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
429 {
430 	int start;
431 
432 	for (;;) {
433 		mutex_lock(&ccp->ksb_mutex);
434 
435 		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
436 							ccp->ksb_count,
437 							ccp->ksb_start,
438 							count, 0);
439 		if (start <= ccp->ksb_count) {
440 			bitmap_set(ccp->ksb, start, count);
441 
442 			mutex_unlock(&ccp->ksb_mutex);
443 			break;
444 		}
445 
446 		ccp->ksb_avail = 0;
447 
448 		mutex_unlock(&ccp->ksb_mutex);
449 
450 		/* Wait for KSB entries to become available */
451 		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
452 			return 0;
453 	}
454 
455 	return KSB_START + start;
456 }
457 
458 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
459 			 unsigned int count)
460 {
461 	if (!start)
462 		return;
463 
464 	mutex_lock(&ccp->ksb_mutex);
465 
466 	bitmap_clear(ccp->ksb, start - KSB_START, count);
467 
468 	ccp->ksb_avail = 1;
469 
470 	mutex_unlock(&ccp->ksb_mutex);
471 
472 	wake_up_interruptible_all(&ccp->ksb_queue);
473 }
474 
475 static u32 ccp_gen_jobid(struct ccp_device *ccp)
476 {
477 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
478 }
479 
480 static void ccp_sg_free(struct ccp_sg_workarea *wa)
481 {
482 	if (wa->dma_count)
483 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
484 
485 	wa->dma_count = 0;
486 }
487 
488 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
489 				struct scatterlist *sg, u64 len,
490 				enum dma_data_direction dma_dir)
491 {
492 	memset(wa, 0, sizeof(*wa));
493 
494 	wa->sg = sg;
495 	if (!sg)
496 		return 0;
497 
498 	wa->nents = sg_nents_for_len(sg, len);
499 	if (wa->nents < 0)
500 		return wa->nents;
501 
502 	wa->bytes_left = len;
503 	wa->sg_used = 0;
504 
505 	if (len == 0)
506 		return 0;
507 
508 	if (dma_dir == DMA_NONE)
509 		return 0;
510 
511 	wa->dma_sg = sg;
512 	wa->dma_dev = dev;
513 	wa->dma_dir = dma_dir;
514 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
515 	if (!wa->dma_count)
516 		return -ENOMEM;
517 
518 	return 0;
519 }
520 
521 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
522 {
523 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
524 
525 	if (!wa->sg)
526 		return;
527 
528 	wa->sg_used += nbytes;
529 	wa->bytes_left -= nbytes;
530 	if (wa->sg_used == wa->sg->length) {
531 		wa->sg = sg_next(wa->sg);
532 		wa->sg_used = 0;
533 	}
534 }
535 
536 static void ccp_dm_free(struct ccp_dm_workarea *wa)
537 {
538 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
539 		if (wa->address)
540 			dma_pool_free(wa->dma_pool, wa->address,
541 				      wa->dma.address);
542 	} else {
543 		if (wa->dma.address)
544 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
545 					 wa->dma.dir);
546 		kfree(wa->address);
547 	}
548 
549 	wa->address = NULL;
550 	wa->dma.address = 0;
551 }
552 
553 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
554 				struct ccp_cmd_queue *cmd_q,
555 				unsigned int len,
556 				enum dma_data_direction dir)
557 {
558 	memset(wa, 0, sizeof(*wa));
559 
560 	if (!len)
561 		return 0;
562 
563 	wa->dev = cmd_q->ccp->dev;
564 	wa->length = len;
565 
566 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
567 		wa->dma_pool = cmd_q->dma_pool;
568 
569 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
570 					     &wa->dma.address);
571 		if (!wa->address)
572 			return -ENOMEM;
573 
574 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
575 
576 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
577 	} else {
578 		wa->address = kzalloc(len, GFP_KERNEL);
579 		if (!wa->address)
580 			return -ENOMEM;
581 
582 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
583 						 dir);
584 		if (!wa->dma.address)
585 			return -ENOMEM;
586 
587 		wa->dma.length = len;
588 	}
589 	wa->dma.dir = dir;
590 
591 	return 0;
592 }
593 
594 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
595 			    struct scatterlist *sg, unsigned int sg_offset,
596 			    unsigned int len)
597 {
598 	WARN_ON(!wa->address);
599 
600 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
601 				 0);
602 }
603 
604 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
605 			    struct scatterlist *sg, unsigned int sg_offset,
606 			    unsigned int len)
607 {
608 	WARN_ON(!wa->address);
609 
610 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
611 				 1);
612 }
613 
614 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
615 				   struct scatterlist *sg,
616 				   unsigned int len, unsigned int se_len,
617 				   bool sign_extend)
618 {
619 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
620 	u8 buffer[CCP_REVERSE_BUF_SIZE];
621 
622 	if (WARN_ON(se_len > sizeof(buffer)))
623 		return -EINVAL;
624 
625 	sg_offset = len;
626 	dm_offset = 0;
627 	nbytes = len;
628 	while (nbytes) {
629 		ksb_len = min_t(unsigned int, nbytes, se_len);
630 		sg_offset -= ksb_len;
631 
632 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
633 		for (i = 0; i < ksb_len; i++)
634 			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
635 
636 		dm_offset += ksb_len;
637 		nbytes -= ksb_len;
638 
639 		if ((ksb_len != se_len) && sign_extend) {
640 			/* Must sign-extend to nearest sign-extend length */
641 			if (wa->address[dm_offset - 1] & 0x80)
642 				memset(wa->address + dm_offset, 0xff,
643 				       se_len - ksb_len);
644 		}
645 	}
646 
647 	return 0;
648 }
649 
650 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
651 				    struct scatterlist *sg,
652 				    unsigned int len)
653 {
654 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
655 	u8 buffer[CCP_REVERSE_BUF_SIZE];
656 
657 	sg_offset = 0;
658 	dm_offset = len;
659 	nbytes = len;
660 	while (nbytes) {
661 		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
662 		dm_offset -= ksb_len;
663 
664 		for (i = 0; i < ksb_len; i++)
665 			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
666 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
667 
668 		sg_offset += ksb_len;
669 		nbytes -= ksb_len;
670 	}
671 }
672 
673 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
674 {
675 	ccp_dm_free(&data->dm_wa);
676 	ccp_sg_free(&data->sg_wa);
677 }
678 
679 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
680 			 struct scatterlist *sg, u64 sg_len,
681 			 unsigned int dm_len,
682 			 enum dma_data_direction dir)
683 {
684 	int ret;
685 
686 	memset(data, 0, sizeof(*data));
687 
688 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
689 				   dir);
690 	if (ret)
691 		goto e_err;
692 
693 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
694 	if (ret)
695 		goto e_err;
696 
697 	return 0;
698 
699 e_err:
700 	ccp_free_data(data, cmd_q);
701 
702 	return ret;
703 }
704 
705 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
706 {
707 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
708 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
709 	unsigned int buf_count, nbytes;
710 
711 	/* Clear the buffer if setting it */
712 	if (!from)
713 		memset(dm_wa->address, 0, dm_wa->length);
714 
715 	if (!sg_wa->sg)
716 		return 0;
717 
718 	/* Perform the copy operation
719 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
720 	 *   an unsigned int
721 	 */
722 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
723 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
724 				 nbytes, from);
725 
726 	/* Update the structures and generate the count */
727 	buf_count = 0;
728 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
729 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
730 			     dm_wa->length - buf_count);
731 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
732 
733 		buf_count += nbytes;
734 		ccp_update_sg_workarea(sg_wa, nbytes);
735 	}
736 
737 	return buf_count;
738 }
739 
740 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
741 {
742 	return ccp_queue_buf(data, 0);
743 }
744 
745 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
746 {
747 	return ccp_queue_buf(data, 1);
748 }
749 
750 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
751 			     struct ccp_op *op, unsigned int block_size,
752 			     bool blocksize_op)
753 {
754 	unsigned int sg_src_len, sg_dst_len, op_len;
755 
756 	/* The CCP can only DMA from/to one address each per operation. This
757 	 * requires that we find the smallest DMA area between the source
758 	 * and destination. The resulting len values will always be <= UINT_MAX
759 	 * because the dma length is an unsigned int.
760 	 */
761 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
762 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
763 
764 	if (dst) {
765 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
766 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
767 		op_len = min(sg_src_len, sg_dst_len);
768 	} else {
769 		op_len = sg_src_len;
770 	}
771 
772 	/* The data operation length will be at least block_size in length
773 	 * or the smaller of available sg room remaining for the source or
774 	 * the destination
775 	 */
776 	op_len = max(op_len, block_size);
777 
778 	/* Unless we have to buffer data, there's no reason to wait */
779 	op->soc = 0;
780 
781 	if (sg_src_len < block_size) {
782 		/* Not enough data in the sg element, so it
783 		 * needs to be buffered into a blocksize chunk
784 		 */
785 		int cp_len = ccp_fill_queue_buf(src);
786 
787 		op->soc = 1;
788 		op->src.u.dma.address = src->dm_wa.dma.address;
789 		op->src.u.dma.offset = 0;
790 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
791 	} else {
792 		/* Enough data in the sg element, but we need to
793 		 * adjust for any previously copied data
794 		 */
795 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
796 		op->src.u.dma.offset = src->sg_wa.sg_used;
797 		op->src.u.dma.length = op_len & ~(block_size - 1);
798 
799 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
800 	}
801 
802 	if (dst) {
803 		if (sg_dst_len < block_size) {
804 			/* Not enough room in the sg element or we're on the
805 			 * last piece of data (when using padding), so the
806 			 * output needs to be buffered into a blocksize chunk
807 			 */
808 			op->soc = 1;
809 			op->dst.u.dma.address = dst->dm_wa.dma.address;
810 			op->dst.u.dma.offset = 0;
811 			op->dst.u.dma.length = op->src.u.dma.length;
812 		} else {
813 			/* Enough room in the sg element, but we need to
814 			 * adjust for any previously used area
815 			 */
816 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
817 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
818 			op->dst.u.dma.length = op->src.u.dma.length;
819 		}
820 	}
821 }
822 
823 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
824 			     struct ccp_op *op)
825 {
826 	op->init = 0;
827 
828 	if (dst) {
829 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
830 			ccp_empty_queue_buf(dst);
831 		else
832 			ccp_update_sg_workarea(&dst->sg_wa,
833 					       op->dst.u.dma.length);
834 	}
835 }
836 
837 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
838 				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
839 				u32 byte_swap, bool from)
840 {
841 	struct ccp_op op;
842 
843 	memset(&op, 0, sizeof(op));
844 
845 	op.cmd_q = cmd_q;
846 	op.jobid = jobid;
847 	op.eom = 1;
848 
849 	if (from) {
850 		op.soc = 1;
851 		op.src.type = CCP_MEMTYPE_KSB;
852 		op.src.u.ksb = ksb;
853 		op.dst.type = CCP_MEMTYPE_SYSTEM;
854 		op.dst.u.dma.address = wa->dma.address;
855 		op.dst.u.dma.length = wa->length;
856 	} else {
857 		op.src.type = CCP_MEMTYPE_SYSTEM;
858 		op.src.u.dma.address = wa->dma.address;
859 		op.src.u.dma.length = wa->length;
860 		op.dst.type = CCP_MEMTYPE_KSB;
861 		op.dst.u.ksb = ksb;
862 	}
863 
864 	op.u.passthru.byte_swap = byte_swap;
865 
866 	return ccp_perform_passthru(&op);
867 }
868 
869 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
870 			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
871 			   u32 byte_swap)
872 {
873 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
874 }
875 
876 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
877 			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
878 			     u32 byte_swap)
879 {
880 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
881 }
882 
883 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
884 				struct ccp_cmd *cmd)
885 {
886 	struct ccp_aes_engine *aes = &cmd->u.aes;
887 	struct ccp_dm_workarea key, ctx;
888 	struct ccp_data src;
889 	struct ccp_op op;
890 	unsigned int dm_offset;
891 	int ret;
892 
893 	if (!((aes->key_len == AES_KEYSIZE_128) ||
894 	      (aes->key_len == AES_KEYSIZE_192) ||
895 	      (aes->key_len == AES_KEYSIZE_256)))
896 		return -EINVAL;
897 
898 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
899 		return -EINVAL;
900 
901 	if (aes->iv_len != AES_BLOCK_SIZE)
902 		return -EINVAL;
903 
904 	if (!aes->key || !aes->iv || !aes->src)
905 		return -EINVAL;
906 
907 	if (aes->cmac_final) {
908 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
909 			return -EINVAL;
910 
911 		if (!aes->cmac_key)
912 			return -EINVAL;
913 	}
914 
915 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
916 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
917 
918 	ret = -EIO;
919 	memset(&op, 0, sizeof(op));
920 	op.cmd_q = cmd_q;
921 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
922 	op.ksb_key = cmd_q->ksb_key;
923 	op.ksb_ctx = cmd_q->ksb_ctx;
924 	op.init = 1;
925 	op.u.aes.type = aes->type;
926 	op.u.aes.mode = aes->mode;
927 	op.u.aes.action = aes->action;
928 
929 	/* All supported key sizes fit in a single (32-byte) KSB entry
930 	 * and must be in little endian format. Use the 256-bit byte
931 	 * swap passthru option to convert from big endian to little
932 	 * endian.
933 	 */
934 	ret = ccp_init_dm_workarea(&key, cmd_q,
935 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
936 				   DMA_TO_DEVICE);
937 	if (ret)
938 		return ret;
939 
940 	dm_offset = CCP_KSB_BYTES - aes->key_len;
941 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
942 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
943 			      CCP_PASSTHRU_BYTESWAP_256BIT);
944 	if (ret) {
945 		cmd->engine_error = cmd_q->cmd_error;
946 		goto e_key;
947 	}
948 
949 	/* The AES context fits in a single (32-byte) KSB entry and
950 	 * must be in little endian format. Use the 256-bit byte swap
951 	 * passthru option to convert from big endian to little endian.
952 	 */
953 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
954 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
955 				   DMA_BIDIRECTIONAL);
956 	if (ret)
957 		goto e_key;
958 
959 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
960 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
961 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
962 			      CCP_PASSTHRU_BYTESWAP_256BIT);
963 	if (ret) {
964 		cmd->engine_error = cmd_q->cmd_error;
965 		goto e_ctx;
966 	}
967 
968 	/* Send data to the CCP AES engine */
969 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
970 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
971 	if (ret)
972 		goto e_ctx;
973 
974 	while (src.sg_wa.bytes_left) {
975 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
976 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
977 			op.eom = 1;
978 
979 			/* Push the K1/K2 key to the CCP now */
980 			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
981 						op.ksb_ctx,
982 						CCP_PASSTHRU_BYTESWAP_256BIT);
983 			if (ret) {
984 				cmd->engine_error = cmd_q->cmd_error;
985 				goto e_src;
986 			}
987 
988 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
989 					aes->cmac_key_len);
990 			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
991 					      CCP_PASSTHRU_BYTESWAP_256BIT);
992 			if (ret) {
993 				cmd->engine_error = cmd_q->cmd_error;
994 				goto e_src;
995 			}
996 		}
997 
998 		ret = ccp_perform_aes(&op);
999 		if (ret) {
1000 			cmd->engine_error = cmd_q->cmd_error;
1001 			goto e_src;
1002 		}
1003 
1004 		ccp_process_data(&src, NULL, &op);
1005 	}
1006 
1007 	/* Retrieve the AES context - convert from LE to BE using
1008 	 * 32-byte (256-bit) byteswapping
1009 	 */
1010 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1011 				CCP_PASSTHRU_BYTESWAP_256BIT);
1012 	if (ret) {
1013 		cmd->engine_error = cmd_q->cmd_error;
1014 		goto e_src;
1015 	}
1016 
1017 	/* ...but we only need AES_BLOCK_SIZE bytes */
1018 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1019 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1020 
1021 e_src:
1022 	ccp_free_data(&src, cmd_q);
1023 
1024 e_ctx:
1025 	ccp_dm_free(&ctx);
1026 
1027 e_key:
1028 	ccp_dm_free(&key);
1029 
1030 	return ret;
1031 }
1032 
1033 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1034 {
1035 	struct ccp_aes_engine *aes = &cmd->u.aes;
1036 	struct ccp_dm_workarea key, ctx;
1037 	struct ccp_data src, dst;
1038 	struct ccp_op op;
1039 	unsigned int dm_offset;
1040 	bool in_place = false;
1041 	int ret;
1042 
1043 	if (aes->mode == CCP_AES_MODE_CMAC)
1044 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1045 
1046 	if (!((aes->key_len == AES_KEYSIZE_128) ||
1047 	      (aes->key_len == AES_KEYSIZE_192) ||
1048 	      (aes->key_len == AES_KEYSIZE_256)))
1049 		return -EINVAL;
1050 
1051 	if (((aes->mode == CCP_AES_MODE_ECB) ||
1052 	     (aes->mode == CCP_AES_MODE_CBC) ||
1053 	     (aes->mode == CCP_AES_MODE_CFB)) &&
1054 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1055 		return -EINVAL;
1056 
1057 	if (!aes->key || !aes->src || !aes->dst)
1058 		return -EINVAL;
1059 
1060 	if (aes->mode != CCP_AES_MODE_ECB) {
1061 		if (aes->iv_len != AES_BLOCK_SIZE)
1062 			return -EINVAL;
1063 
1064 		if (!aes->iv)
1065 			return -EINVAL;
1066 	}
1067 
1068 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1069 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1070 
1071 	ret = -EIO;
1072 	memset(&op, 0, sizeof(op));
1073 	op.cmd_q = cmd_q;
1074 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1075 	op.ksb_key = cmd_q->ksb_key;
1076 	op.ksb_ctx = cmd_q->ksb_ctx;
1077 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1078 	op.u.aes.type = aes->type;
1079 	op.u.aes.mode = aes->mode;
1080 	op.u.aes.action = aes->action;
1081 
1082 	/* All supported key sizes fit in a single (32-byte) KSB entry
1083 	 * and must be in little endian format. Use the 256-bit byte
1084 	 * swap passthru option to convert from big endian to little
1085 	 * endian.
1086 	 */
1087 	ret = ccp_init_dm_workarea(&key, cmd_q,
1088 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1089 				   DMA_TO_DEVICE);
1090 	if (ret)
1091 		return ret;
1092 
1093 	dm_offset = CCP_KSB_BYTES - aes->key_len;
1094 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1095 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1096 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1097 	if (ret) {
1098 		cmd->engine_error = cmd_q->cmd_error;
1099 		goto e_key;
1100 	}
1101 
1102 	/* The AES context fits in a single (32-byte) KSB entry and
1103 	 * must be in little endian format. Use the 256-bit byte swap
1104 	 * passthru option to convert from big endian to little endian.
1105 	 */
1106 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1107 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1108 				   DMA_BIDIRECTIONAL);
1109 	if (ret)
1110 		goto e_key;
1111 
1112 	if (aes->mode != CCP_AES_MODE_ECB) {
1113 		/* Load the AES context - conver to LE */
1114 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1115 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1116 		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1117 				      CCP_PASSTHRU_BYTESWAP_256BIT);
1118 		if (ret) {
1119 			cmd->engine_error = cmd_q->cmd_error;
1120 			goto e_ctx;
1121 		}
1122 	}
1123 
1124 	/* Prepare the input and output data workareas. For in-place
1125 	 * operations we need to set the dma direction to BIDIRECTIONAL
1126 	 * and copy the src workarea to the dst workarea.
1127 	 */
1128 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1129 		in_place = true;
1130 
1131 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1132 			    AES_BLOCK_SIZE,
1133 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1134 	if (ret)
1135 		goto e_ctx;
1136 
1137 	if (in_place) {
1138 		dst = src;
1139 	} else {
1140 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1141 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1142 		if (ret)
1143 			goto e_src;
1144 	}
1145 
1146 	/* Send data to the CCP AES engine */
1147 	while (src.sg_wa.bytes_left) {
1148 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1149 		if (!src.sg_wa.bytes_left) {
1150 			op.eom = 1;
1151 
1152 			/* Since we don't retrieve the AES context in ECB
1153 			 * mode we have to wait for the operation to complete
1154 			 * on the last piece of data
1155 			 */
1156 			if (aes->mode == CCP_AES_MODE_ECB)
1157 				op.soc = 1;
1158 		}
1159 
1160 		ret = ccp_perform_aes(&op);
1161 		if (ret) {
1162 			cmd->engine_error = cmd_q->cmd_error;
1163 			goto e_dst;
1164 		}
1165 
1166 		ccp_process_data(&src, &dst, &op);
1167 	}
1168 
1169 	if (aes->mode != CCP_AES_MODE_ECB) {
1170 		/* Retrieve the AES context - convert from LE to BE using
1171 		 * 32-byte (256-bit) byteswapping
1172 		 */
1173 		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1174 					CCP_PASSTHRU_BYTESWAP_256BIT);
1175 		if (ret) {
1176 			cmd->engine_error = cmd_q->cmd_error;
1177 			goto e_dst;
1178 		}
1179 
1180 		/* ...but we only need AES_BLOCK_SIZE bytes */
1181 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1182 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1183 	}
1184 
1185 e_dst:
1186 	if (!in_place)
1187 		ccp_free_data(&dst, cmd_q);
1188 
1189 e_src:
1190 	ccp_free_data(&src, cmd_q);
1191 
1192 e_ctx:
1193 	ccp_dm_free(&ctx);
1194 
1195 e_key:
1196 	ccp_dm_free(&key);
1197 
1198 	return ret;
1199 }
1200 
1201 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1202 			       struct ccp_cmd *cmd)
1203 {
1204 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1205 	struct ccp_dm_workarea key, ctx;
1206 	struct ccp_data src, dst;
1207 	struct ccp_op op;
1208 	unsigned int unit_size, dm_offset;
1209 	bool in_place = false;
1210 	int ret;
1211 
1212 	switch (xts->unit_size) {
1213 	case CCP_XTS_AES_UNIT_SIZE_16:
1214 		unit_size = 16;
1215 		break;
1216 	case CCP_XTS_AES_UNIT_SIZE_512:
1217 		unit_size = 512;
1218 		break;
1219 	case CCP_XTS_AES_UNIT_SIZE_1024:
1220 		unit_size = 1024;
1221 		break;
1222 	case CCP_XTS_AES_UNIT_SIZE_2048:
1223 		unit_size = 2048;
1224 		break;
1225 	case CCP_XTS_AES_UNIT_SIZE_4096:
1226 		unit_size = 4096;
1227 		break;
1228 
1229 	default:
1230 		return -EINVAL;
1231 	}
1232 
1233 	if (xts->key_len != AES_KEYSIZE_128)
1234 		return -EINVAL;
1235 
1236 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1237 		return -EINVAL;
1238 
1239 	if (xts->iv_len != AES_BLOCK_SIZE)
1240 		return -EINVAL;
1241 
1242 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1243 		return -EINVAL;
1244 
1245 	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1246 	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1247 
1248 	ret = -EIO;
1249 	memset(&op, 0, sizeof(op));
1250 	op.cmd_q = cmd_q;
1251 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1252 	op.ksb_key = cmd_q->ksb_key;
1253 	op.ksb_ctx = cmd_q->ksb_ctx;
1254 	op.init = 1;
1255 	op.u.xts.action = xts->action;
1256 	op.u.xts.unit_size = xts->unit_size;
1257 
1258 	/* All supported key sizes fit in a single (32-byte) KSB entry
1259 	 * and must be in little endian format. Use the 256-bit byte
1260 	 * swap passthru option to convert from big endian to little
1261 	 * endian.
1262 	 */
1263 	ret = ccp_init_dm_workarea(&key, cmd_q,
1264 				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1265 				   DMA_TO_DEVICE);
1266 	if (ret)
1267 		return ret;
1268 
1269 	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1270 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1271 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1272 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1273 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1274 	if (ret) {
1275 		cmd->engine_error = cmd_q->cmd_error;
1276 		goto e_key;
1277 	}
1278 
1279 	/* The AES context fits in a single (32-byte) KSB entry and
1280 	 * for XTS is already in little endian format so no byte swapping
1281 	 * is needed.
1282 	 */
1283 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1284 				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1285 				   DMA_BIDIRECTIONAL);
1286 	if (ret)
1287 		goto e_key;
1288 
1289 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1290 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1291 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1292 	if (ret) {
1293 		cmd->engine_error = cmd_q->cmd_error;
1294 		goto e_ctx;
1295 	}
1296 
1297 	/* Prepare the input and output data workareas. For in-place
1298 	 * operations we need to set the dma direction to BIDIRECTIONAL
1299 	 * and copy the src workarea to the dst workarea.
1300 	 */
1301 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1302 		in_place = true;
1303 
1304 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1305 			    unit_size,
1306 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1307 	if (ret)
1308 		goto e_ctx;
1309 
1310 	if (in_place) {
1311 		dst = src;
1312 	} else {
1313 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1314 				    unit_size, DMA_FROM_DEVICE);
1315 		if (ret)
1316 			goto e_src;
1317 	}
1318 
1319 	/* Send data to the CCP AES engine */
1320 	while (src.sg_wa.bytes_left) {
1321 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1322 		if (!src.sg_wa.bytes_left)
1323 			op.eom = 1;
1324 
1325 		ret = ccp_perform_xts_aes(&op);
1326 		if (ret) {
1327 			cmd->engine_error = cmd_q->cmd_error;
1328 			goto e_dst;
1329 		}
1330 
1331 		ccp_process_data(&src, &dst, &op);
1332 	}
1333 
1334 	/* Retrieve the AES context - convert from LE to BE using
1335 	 * 32-byte (256-bit) byteswapping
1336 	 */
1337 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1338 				CCP_PASSTHRU_BYTESWAP_256BIT);
1339 	if (ret) {
1340 		cmd->engine_error = cmd_q->cmd_error;
1341 		goto e_dst;
1342 	}
1343 
1344 	/* ...but we only need AES_BLOCK_SIZE bytes */
1345 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1346 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1347 
1348 e_dst:
1349 	if (!in_place)
1350 		ccp_free_data(&dst, cmd_q);
1351 
1352 e_src:
1353 	ccp_free_data(&src, cmd_q);
1354 
1355 e_ctx:
1356 	ccp_dm_free(&ctx);
1357 
1358 e_key:
1359 	ccp_dm_free(&key);
1360 
1361 	return ret;
1362 }
1363 
1364 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1365 {
1366 	struct ccp_sha_engine *sha = &cmd->u.sha;
1367 	struct ccp_dm_workarea ctx;
1368 	struct ccp_data src;
1369 	struct ccp_op op;
1370 	int ret;
1371 
1372 	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1373 		return -EINVAL;
1374 
1375 	if (!sha->ctx)
1376 		return -EINVAL;
1377 
1378 	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1379 		return -EINVAL;
1380 
1381 	if (!sha->src_len) {
1382 		const u8 *sha_zero;
1383 
1384 		/* Not final, just return */
1385 		if (!sha->final)
1386 			return 0;
1387 
1388 		/* CCP can't do a zero length sha operation so the caller
1389 		 * must buffer the data.
1390 		 */
1391 		if (sha->msg_bits)
1392 			return -EINVAL;
1393 
1394 		/* A sha operation for a message with a total length of zero,
1395 		 * return known result.
1396 		 */
1397 		switch (sha->type) {
1398 		case CCP_SHA_TYPE_1:
1399 			sha_zero = ccp_sha1_zero;
1400 			break;
1401 		case CCP_SHA_TYPE_224:
1402 			sha_zero = ccp_sha224_zero;
1403 			break;
1404 		case CCP_SHA_TYPE_256:
1405 			sha_zero = ccp_sha256_zero;
1406 			break;
1407 		default:
1408 			return -EINVAL;
1409 		}
1410 
1411 		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1412 					 sha->ctx_len, 1);
1413 
1414 		return 0;
1415 	}
1416 
1417 	if (!sha->src)
1418 		return -EINVAL;
1419 
1420 	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1421 
1422 	memset(&op, 0, sizeof(op));
1423 	op.cmd_q = cmd_q;
1424 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1425 	op.ksb_ctx = cmd_q->ksb_ctx;
1426 	op.u.sha.type = sha->type;
1427 	op.u.sha.msg_bits = sha->msg_bits;
1428 
1429 	/* The SHA context fits in a single (32-byte) KSB entry and
1430 	 * must be in little endian format. Use the 256-bit byte swap
1431 	 * passthru option to convert from big endian to little endian.
1432 	 */
1433 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1434 				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1435 				   DMA_BIDIRECTIONAL);
1436 	if (ret)
1437 		return ret;
1438 
1439 	if (sha->first) {
1440 		const __be32 *init;
1441 
1442 		switch (sha->type) {
1443 		case CCP_SHA_TYPE_1:
1444 			init = ccp_sha1_init;
1445 			break;
1446 		case CCP_SHA_TYPE_224:
1447 			init = ccp_sha224_init;
1448 			break;
1449 		case CCP_SHA_TYPE_256:
1450 			init = ccp_sha256_init;
1451 			break;
1452 		default:
1453 			ret = -EINVAL;
1454 			goto e_ctx;
1455 		}
1456 		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1457 	} else {
1458 		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1459 	}
1460 
1461 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1462 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1463 	if (ret) {
1464 		cmd->engine_error = cmd_q->cmd_error;
1465 		goto e_ctx;
1466 	}
1467 
1468 	/* Send data to the CCP SHA engine */
1469 	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1470 			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1471 	if (ret)
1472 		goto e_ctx;
1473 
1474 	while (src.sg_wa.bytes_left) {
1475 		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1476 		if (sha->final && !src.sg_wa.bytes_left)
1477 			op.eom = 1;
1478 
1479 		ret = ccp_perform_sha(&op);
1480 		if (ret) {
1481 			cmd->engine_error = cmd_q->cmd_error;
1482 			goto e_data;
1483 		}
1484 
1485 		ccp_process_data(&src, NULL, &op);
1486 	}
1487 
1488 	/* Retrieve the SHA context - convert from LE to BE using
1489 	 * 32-byte (256-bit) byteswapping to BE
1490 	 */
1491 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1492 				CCP_PASSTHRU_BYTESWAP_256BIT);
1493 	if (ret) {
1494 		cmd->engine_error = cmd_q->cmd_error;
1495 		goto e_data;
1496 	}
1497 
1498 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1499 
1500 	if (sha->final && sha->opad) {
1501 		/* HMAC operation, recursively perform final SHA */
1502 		struct ccp_cmd hmac_cmd;
1503 		struct scatterlist sg;
1504 		u64 block_size, digest_size;
1505 		u8 *hmac_buf;
1506 
1507 		switch (sha->type) {
1508 		case CCP_SHA_TYPE_1:
1509 			block_size = SHA1_BLOCK_SIZE;
1510 			digest_size = SHA1_DIGEST_SIZE;
1511 			break;
1512 		case CCP_SHA_TYPE_224:
1513 			block_size = SHA224_BLOCK_SIZE;
1514 			digest_size = SHA224_DIGEST_SIZE;
1515 			break;
1516 		case CCP_SHA_TYPE_256:
1517 			block_size = SHA256_BLOCK_SIZE;
1518 			digest_size = SHA256_DIGEST_SIZE;
1519 			break;
1520 		default:
1521 			ret = -EINVAL;
1522 			goto e_data;
1523 		}
1524 
1525 		if (sha->opad_len != block_size) {
1526 			ret = -EINVAL;
1527 			goto e_data;
1528 		}
1529 
1530 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1531 		if (!hmac_buf) {
1532 			ret = -ENOMEM;
1533 			goto e_data;
1534 		}
1535 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1536 
1537 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1538 		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1539 
1540 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1541 		hmac_cmd.engine = CCP_ENGINE_SHA;
1542 		hmac_cmd.u.sha.type = sha->type;
1543 		hmac_cmd.u.sha.ctx = sha->ctx;
1544 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1545 		hmac_cmd.u.sha.src = &sg;
1546 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1547 		hmac_cmd.u.sha.opad = NULL;
1548 		hmac_cmd.u.sha.opad_len = 0;
1549 		hmac_cmd.u.sha.first = 1;
1550 		hmac_cmd.u.sha.final = 1;
1551 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1552 
1553 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1554 		if (ret)
1555 			cmd->engine_error = hmac_cmd.engine_error;
1556 
1557 		kfree(hmac_buf);
1558 	}
1559 
1560 e_data:
1561 	ccp_free_data(&src, cmd_q);
1562 
1563 e_ctx:
1564 	ccp_dm_free(&ctx);
1565 
1566 	return ret;
1567 }
1568 
1569 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1570 {
1571 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1572 	struct ccp_dm_workarea exp, src;
1573 	struct ccp_data dst;
1574 	struct ccp_op op;
1575 	unsigned int ksb_count, i_len, o_len;
1576 	int ret;
1577 
1578 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1579 		return -EINVAL;
1580 
1581 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1582 		return -EINVAL;
1583 
1584 	/* The RSA modulus must precede the message being acted upon, so
1585 	 * it must be copied to a DMA area where the message and the
1586 	 * modulus can be concatenated.  Therefore the input buffer
1587 	 * length required is twice the output buffer length (which
1588 	 * must be a multiple of 256-bits).
1589 	 */
1590 	o_len = ((rsa->key_size + 255) / 256) * 32;
1591 	i_len = o_len * 2;
1592 
1593 	ksb_count = o_len / CCP_KSB_BYTES;
1594 
1595 	memset(&op, 0, sizeof(op));
1596 	op.cmd_q = cmd_q;
1597 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1598 	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1599 	if (!op.ksb_key)
1600 		return -EIO;
1601 
1602 	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1603 	 * be in little endian format. Reverse copy each 32-byte chunk
1604 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1605 	 * and each byte within that chunk and do not perform any byte swap
1606 	 * operations on the passthru operation.
1607 	 */
1608 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1609 	if (ret)
1610 		goto e_ksb;
1611 
1612 	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
1613 				      CCP_KSB_BYTES, false);
1614 	if (ret)
1615 		goto e_exp;
1616 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1617 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1618 	if (ret) {
1619 		cmd->engine_error = cmd_q->cmd_error;
1620 		goto e_exp;
1621 	}
1622 
1623 	/* Concatenate the modulus and the message. Both the modulus and
1624 	 * the operands must be in little endian format.  Since the input
1625 	 * is in big endian format it must be converted.
1626 	 */
1627 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1628 	if (ret)
1629 		goto e_exp;
1630 
1631 	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
1632 				      CCP_KSB_BYTES, false);
1633 	if (ret)
1634 		goto e_src;
1635 	src.address += o_len;	/* Adjust the address for the copy operation */
1636 	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
1637 				      CCP_KSB_BYTES, false);
1638 	if (ret)
1639 		goto e_src;
1640 	src.address -= o_len;	/* Reset the address to original value */
1641 
1642 	/* Prepare the output area for the operation */
1643 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1644 			    o_len, DMA_FROM_DEVICE);
1645 	if (ret)
1646 		goto e_src;
1647 
1648 	op.soc = 1;
1649 	op.src.u.dma.address = src.dma.address;
1650 	op.src.u.dma.offset = 0;
1651 	op.src.u.dma.length = i_len;
1652 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1653 	op.dst.u.dma.offset = 0;
1654 	op.dst.u.dma.length = o_len;
1655 
1656 	op.u.rsa.mod_size = rsa->key_size;
1657 	op.u.rsa.input_len = i_len;
1658 
1659 	ret = ccp_perform_rsa(&op);
1660 	if (ret) {
1661 		cmd->engine_error = cmd_q->cmd_error;
1662 		goto e_dst;
1663 	}
1664 
1665 	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1666 
1667 e_dst:
1668 	ccp_free_data(&dst, cmd_q);
1669 
1670 e_src:
1671 	ccp_dm_free(&src);
1672 
1673 e_exp:
1674 	ccp_dm_free(&exp);
1675 
1676 e_ksb:
1677 	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1678 
1679 	return ret;
1680 }
1681 
1682 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1683 				struct ccp_cmd *cmd)
1684 {
1685 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1686 	struct ccp_dm_workarea mask;
1687 	struct ccp_data src, dst;
1688 	struct ccp_op op;
1689 	bool in_place = false;
1690 	unsigned int i;
1691 	int ret;
1692 
1693 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1694 		return -EINVAL;
1695 
1696 	if (!pt->src || !pt->dst)
1697 		return -EINVAL;
1698 
1699 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1700 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1701 			return -EINVAL;
1702 		if (!pt->mask)
1703 			return -EINVAL;
1704 	}
1705 
1706 	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1707 
1708 	memset(&op, 0, sizeof(op));
1709 	op.cmd_q = cmd_q;
1710 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1711 
1712 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1713 		/* Load the mask */
1714 		op.ksb_key = cmd_q->ksb_key;
1715 
1716 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1717 					   CCP_PASSTHRU_KSB_COUNT *
1718 					   CCP_KSB_BYTES,
1719 					   DMA_TO_DEVICE);
1720 		if (ret)
1721 			return ret;
1722 
1723 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1724 		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1725 				      CCP_PASSTHRU_BYTESWAP_NOOP);
1726 		if (ret) {
1727 			cmd->engine_error = cmd_q->cmd_error;
1728 			goto e_mask;
1729 		}
1730 	}
1731 
1732 	/* Prepare the input and output data workareas. For in-place
1733 	 * operations we need to set the dma direction to BIDIRECTIONAL
1734 	 * and copy the src workarea to the dst workarea.
1735 	 */
1736 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1737 		in_place = true;
1738 
1739 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1740 			    CCP_PASSTHRU_MASKSIZE,
1741 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1742 	if (ret)
1743 		goto e_mask;
1744 
1745 	if (in_place) {
1746 		dst = src;
1747 	} else {
1748 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1749 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1750 		if (ret)
1751 			goto e_src;
1752 	}
1753 
1754 	/* Send data to the CCP Passthru engine
1755 	 *   Because the CCP engine works on a single source and destination
1756 	 *   dma address at a time, each entry in the source scatterlist
1757 	 *   (after the dma_map_sg call) must be less than or equal to the
1758 	 *   (remaining) length in the destination scatterlist entry and the
1759 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1760 	 */
1761 	dst.sg_wa.sg_used = 0;
1762 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1763 		if (!dst.sg_wa.sg ||
1764 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1765 			ret = -EINVAL;
1766 			goto e_dst;
1767 		}
1768 
1769 		if (i == src.sg_wa.dma_count) {
1770 			op.eom = 1;
1771 			op.soc = 1;
1772 		}
1773 
1774 		op.src.type = CCP_MEMTYPE_SYSTEM;
1775 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1776 		op.src.u.dma.offset = 0;
1777 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1778 
1779 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1780 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1781 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1782 		op.dst.u.dma.length = op.src.u.dma.length;
1783 
1784 		ret = ccp_perform_passthru(&op);
1785 		if (ret) {
1786 			cmd->engine_error = cmd_q->cmd_error;
1787 			goto e_dst;
1788 		}
1789 
1790 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1791 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1792 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1793 			dst.sg_wa.sg_used = 0;
1794 		}
1795 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1796 	}
1797 
1798 e_dst:
1799 	if (!in_place)
1800 		ccp_free_data(&dst, cmd_q);
1801 
1802 e_src:
1803 	ccp_free_data(&src, cmd_q);
1804 
1805 e_mask:
1806 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1807 		ccp_dm_free(&mask);
1808 
1809 	return ret;
1810 }
1811 
1812 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1813 {
1814 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1815 	struct ccp_dm_workarea src, dst;
1816 	struct ccp_op op;
1817 	int ret;
1818 	u8 *save;
1819 
1820 	if (!ecc->u.mm.operand_1 ||
1821 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1822 		return -EINVAL;
1823 
1824 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1825 		if (!ecc->u.mm.operand_2 ||
1826 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1827 			return -EINVAL;
1828 
1829 	if (!ecc->u.mm.result ||
1830 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1831 		return -EINVAL;
1832 
1833 	memset(&op, 0, sizeof(op));
1834 	op.cmd_q = cmd_q;
1835 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1836 
1837 	/* Concatenate the modulus and the operands. Both the modulus and
1838 	 * the operands must be in little endian format.  Since the input
1839 	 * is in big endian format it must be converted and placed in a
1840 	 * fixed length buffer.
1841 	 */
1842 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1843 				   DMA_TO_DEVICE);
1844 	if (ret)
1845 		return ret;
1846 
1847 	/* Save the workarea address since it is updated in order to perform
1848 	 * the concatenation
1849 	 */
1850 	save = src.address;
1851 
1852 	/* Copy the ECC modulus */
1853 	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1854 				      CCP_ECC_OPERAND_SIZE, false);
1855 	if (ret)
1856 		goto e_src;
1857 	src.address += CCP_ECC_OPERAND_SIZE;
1858 
1859 	/* Copy the first operand */
1860 	ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1861 				      ecc->u.mm.operand_1_len,
1862 				      CCP_ECC_OPERAND_SIZE, false);
1863 	if (ret)
1864 		goto e_src;
1865 	src.address += CCP_ECC_OPERAND_SIZE;
1866 
1867 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1868 		/* Copy the second operand */
1869 		ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1870 					      ecc->u.mm.operand_2_len,
1871 					      CCP_ECC_OPERAND_SIZE, false);
1872 		if (ret)
1873 			goto e_src;
1874 		src.address += CCP_ECC_OPERAND_SIZE;
1875 	}
1876 
1877 	/* Restore the workarea address */
1878 	src.address = save;
1879 
1880 	/* Prepare the output area for the operation */
1881 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1882 				   DMA_FROM_DEVICE);
1883 	if (ret)
1884 		goto e_src;
1885 
1886 	op.soc = 1;
1887 	op.src.u.dma.address = src.dma.address;
1888 	op.src.u.dma.offset = 0;
1889 	op.src.u.dma.length = src.length;
1890 	op.dst.u.dma.address = dst.dma.address;
1891 	op.dst.u.dma.offset = 0;
1892 	op.dst.u.dma.length = dst.length;
1893 
1894 	op.u.ecc.function = cmd->u.ecc.function;
1895 
1896 	ret = ccp_perform_ecc(&op);
1897 	if (ret) {
1898 		cmd->engine_error = cmd_q->cmd_error;
1899 		goto e_dst;
1900 	}
1901 
1902 	ecc->ecc_result = le16_to_cpup(
1903 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1904 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1905 		ret = -EIO;
1906 		goto e_dst;
1907 	}
1908 
1909 	/* Save the ECC result */
1910 	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1911 
1912 e_dst:
1913 	ccp_dm_free(&dst);
1914 
1915 e_src:
1916 	ccp_dm_free(&src);
1917 
1918 	return ret;
1919 }
1920 
1921 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1922 {
1923 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1924 	struct ccp_dm_workarea src, dst;
1925 	struct ccp_op op;
1926 	int ret;
1927 	u8 *save;
1928 
1929 	if (!ecc->u.pm.point_1.x ||
1930 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1931 	    !ecc->u.pm.point_1.y ||
1932 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1933 		return -EINVAL;
1934 
1935 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1936 		if (!ecc->u.pm.point_2.x ||
1937 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1938 		    !ecc->u.pm.point_2.y ||
1939 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1940 			return -EINVAL;
1941 	} else {
1942 		if (!ecc->u.pm.domain_a ||
1943 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1944 			return -EINVAL;
1945 
1946 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1947 			if (!ecc->u.pm.scalar ||
1948 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1949 				return -EINVAL;
1950 	}
1951 
1952 	if (!ecc->u.pm.result.x ||
1953 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1954 	    !ecc->u.pm.result.y ||
1955 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1956 		return -EINVAL;
1957 
1958 	memset(&op, 0, sizeof(op));
1959 	op.cmd_q = cmd_q;
1960 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1961 
1962 	/* Concatenate the modulus and the operands. Both the modulus and
1963 	 * the operands must be in little endian format.  Since the input
1964 	 * is in big endian format it must be converted and placed in a
1965 	 * fixed length buffer.
1966 	 */
1967 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1968 				   DMA_TO_DEVICE);
1969 	if (ret)
1970 		return ret;
1971 
1972 	/* Save the workarea address since it is updated in order to perform
1973 	 * the concatenation
1974 	 */
1975 	save = src.address;
1976 
1977 	/* Copy the ECC modulus */
1978 	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1979 				      CCP_ECC_OPERAND_SIZE, false);
1980 	if (ret)
1981 		goto e_src;
1982 	src.address += CCP_ECC_OPERAND_SIZE;
1983 
1984 	/* Copy the first point X and Y coordinate */
1985 	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1986 				      ecc->u.pm.point_1.x_len,
1987 				      CCP_ECC_OPERAND_SIZE, false);
1988 	if (ret)
1989 		goto e_src;
1990 	src.address += CCP_ECC_OPERAND_SIZE;
1991 	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1992 				      ecc->u.pm.point_1.y_len,
1993 				      CCP_ECC_OPERAND_SIZE, false);
1994 	if (ret)
1995 		goto e_src;
1996 	src.address += CCP_ECC_OPERAND_SIZE;
1997 
1998 	/* Set the first point Z coordianate to 1 */
1999 	*src.address = 0x01;
2000 	src.address += CCP_ECC_OPERAND_SIZE;
2001 
2002 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2003 		/* Copy the second point X and Y coordinate */
2004 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
2005 					      ecc->u.pm.point_2.x_len,
2006 					      CCP_ECC_OPERAND_SIZE, false);
2007 		if (ret)
2008 			goto e_src;
2009 		src.address += CCP_ECC_OPERAND_SIZE;
2010 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
2011 					      ecc->u.pm.point_2.y_len,
2012 					      CCP_ECC_OPERAND_SIZE, false);
2013 		if (ret)
2014 			goto e_src;
2015 		src.address += CCP_ECC_OPERAND_SIZE;
2016 
2017 		/* Set the second point Z coordianate to 1 */
2018 		*src.address = 0x01;
2019 		src.address += CCP_ECC_OPERAND_SIZE;
2020 	} else {
2021 		/* Copy the Domain "a" parameter */
2022 		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
2023 					      ecc->u.pm.domain_a_len,
2024 					      CCP_ECC_OPERAND_SIZE, false);
2025 		if (ret)
2026 			goto e_src;
2027 		src.address += CCP_ECC_OPERAND_SIZE;
2028 
2029 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2030 			/* Copy the scalar value */
2031 			ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2032 						      ecc->u.pm.scalar_len,
2033 						      CCP_ECC_OPERAND_SIZE,
2034 						      false);
2035 			if (ret)
2036 				goto e_src;
2037 			src.address += CCP_ECC_OPERAND_SIZE;
2038 		}
2039 	}
2040 
2041 	/* Restore the workarea address */
2042 	src.address = save;
2043 
2044 	/* Prepare the output area for the operation */
2045 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2046 				   DMA_FROM_DEVICE);
2047 	if (ret)
2048 		goto e_src;
2049 
2050 	op.soc = 1;
2051 	op.src.u.dma.address = src.dma.address;
2052 	op.src.u.dma.offset = 0;
2053 	op.src.u.dma.length = src.length;
2054 	op.dst.u.dma.address = dst.dma.address;
2055 	op.dst.u.dma.offset = 0;
2056 	op.dst.u.dma.length = dst.length;
2057 
2058 	op.u.ecc.function = cmd->u.ecc.function;
2059 
2060 	ret = ccp_perform_ecc(&op);
2061 	if (ret) {
2062 		cmd->engine_error = cmd_q->cmd_error;
2063 		goto e_dst;
2064 	}
2065 
2066 	ecc->ecc_result = le16_to_cpup(
2067 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2068 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2069 		ret = -EIO;
2070 		goto e_dst;
2071 	}
2072 
2073 	/* Save the workarea address since it is updated as we walk through
2074 	 * to copy the point math result
2075 	 */
2076 	save = dst.address;
2077 
2078 	/* Save the ECC result X and Y coordinates */
2079 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2080 				CCP_ECC_MODULUS_BYTES);
2081 	dst.address += CCP_ECC_OUTPUT_SIZE;
2082 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2083 				CCP_ECC_MODULUS_BYTES);
2084 	dst.address += CCP_ECC_OUTPUT_SIZE;
2085 
2086 	/* Restore the workarea address */
2087 	dst.address = save;
2088 
2089 e_dst:
2090 	ccp_dm_free(&dst);
2091 
2092 e_src:
2093 	ccp_dm_free(&src);
2094 
2095 	return ret;
2096 }
2097 
2098 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2099 {
2100 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2101 
2102 	ecc->ecc_result = 0;
2103 
2104 	if (!ecc->mod ||
2105 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2106 		return -EINVAL;
2107 
2108 	switch (ecc->function) {
2109 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2110 	case CCP_ECC_FUNCTION_MADD_384BIT:
2111 	case CCP_ECC_FUNCTION_MINV_384BIT:
2112 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2113 
2114 	case CCP_ECC_FUNCTION_PADD_384BIT:
2115 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2116 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2117 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2118 
2119 	default:
2120 		return -EINVAL;
2121 	}
2122 }
2123 
2124 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2125 {
2126 	int ret;
2127 
2128 	cmd->engine_error = 0;
2129 	cmd_q->cmd_error = 0;
2130 	cmd_q->int_rcvd = 0;
2131 	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2132 
2133 	switch (cmd->engine) {
2134 	case CCP_ENGINE_AES:
2135 		ret = ccp_run_aes_cmd(cmd_q, cmd);
2136 		break;
2137 	case CCP_ENGINE_XTS_AES_128:
2138 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2139 		break;
2140 	case CCP_ENGINE_SHA:
2141 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2142 		break;
2143 	case CCP_ENGINE_RSA:
2144 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2145 		break;
2146 	case CCP_ENGINE_PASSTHRU:
2147 		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2148 		break;
2149 	case CCP_ENGINE_ECC:
2150 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2151 		break;
2152 	default:
2153 		ret = -EINVAL;
2154 	}
2155 
2156 	return ret;
2157 }
2158