xref: /openbmc/linux/drivers/crypto/ccp/ccp-ops.c (revision 6774def6)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
27 
28 #include "ccp-dev.h"
29 
30 
31 enum ccp_memtype {
32 	CCP_MEMTYPE_SYSTEM = 0,
33 	CCP_MEMTYPE_KSB,
34 	CCP_MEMTYPE_LOCAL,
35 	CCP_MEMTYPE__LAST,
36 };
37 
38 struct ccp_dma_info {
39 	dma_addr_t address;
40 	unsigned int offset;
41 	unsigned int length;
42 	enum dma_data_direction dir;
43 };
44 
45 struct ccp_dm_workarea {
46 	struct device *dev;
47 	struct dma_pool *dma_pool;
48 	unsigned int length;
49 
50 	u8 *address;
51 	struct ccp_dma_info dma;
52 };
53 
54 struct ccp_sg_workarea {
55 	struct scatterlist *sg;
56 	unsigned int nents;
57 	unsigned int length;
58 
59 	struct scatterlist *dma_sg;
60 	struct device *dma_dev;
61 	unsigned int dma_count;
62 	enum dma_data_direction dma_dir;
63 
64 	unsigned int sg_used;
65 
66 	u64 bytes_left;
67 };
68 
69 struct ccp_data {
70 	struct ccp_sg_workarea sg_wa;
71 	struct ccp_dm_workarea dm_wa;
72 };
73 
74 struct ccp_mem {
75 	enum ccp_memtype type;
76 	union {
77 		struct ccp_dma_info dma;
78 		u32 ksb;
79 	} u;
80 };
81 
82 struct ccp_aes_op {
83 	enum ccp_aes_type type;
84 	enum ccp_aes_mode mode;
85 	enum ccp_aes_action action;
86 };
87 
88 struct ccp_xts_aes_op {
89 	enum ccp_aes_action action;
90 	enum ccp_xts_aes_unit_size unit_size;
91 };
92 
93 struct ccp_sha_op {
94 	enum ccp_sha_type type;
95 	u64 msg_bits;
96 };
97 
98 struct ccp_rsa_op {
99 	u32 mod_size;
100 	u32 input_len;
101 };
102 
103 struct ccp_passthru_op {
104 	enum ccp_passthru_bitwise bit_mod;
105 	enum ccp_passthru_byteswap byte_swap;
106 };
107 
108 struct ccp_ecc_op {
109 	enum ccp_ecc_function function;
110 };
111 
112 struct ccp_op {
113 	struct ccp_cmd_queue *cmd_q;
114 
115 	u32 jobid;
116 	u32 ioc;
117 	u32 soc;
118 	u32 ksb_key;
119 	u32 ksb_ctx;
120 	u32 init;
121 	u32 eom;
122 
123 	struct ccp_mem src;
124 	struct ccp_mem dst;
125 
126 	union {
127 		struct ccp_aes_op aes;
128 		struct ccp_xts_aes_op xts;
129 		struct ccp_sha_op sha;
130 		struct ccp_rsa_op rsa;
131 		struct ccp_passthru_op passthru;
132 		struct ccp_ecc_op ecc;
133 	} u;
134 };
135 
136 /* SHA initial context values */
137 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
138 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
139 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
140 	cpu_to_be32(SHA1_H4), 0, 0, 0,
141 };
142 
143 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
144 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
145 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
146 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
147 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
148 };
149 
150 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
151 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
152 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
153 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
154 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
155 };
156 
157 /* The CCP cannot perform zero-length sha operations so the caller
158  * is required to buffer data for the final operation.  However, a
159  * sha operation for a message with a total length of zero is valid
160  * so known values are required to supply the result.
161  */
162 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
163 	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
164 	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
165 	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
166 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
167 };
168 
169 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
170 	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
171 	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
172 	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
173 	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
174 };
175 
176 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
177 	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
178 	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
179 	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
180 	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
181 };
182 
183 static u32 ccp_addr_lo(struct ccp_dma_info *info)
184 {
185 	return lower_32_bits(info->address + info->offset);
186 }
187 
188 static u32 ccp_addr_hi(struct ccp_dma_info *info)
189 {
190 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
191 }
192 
193 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
194 {
195 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
196 	struct ccp_device *ccp = cmd_q->ccp;
197 	void __iomem *cr_addr;
198 	u32 cr0, cmd;
199 	unsigned int i;
200 	int ret = 0;
201 
202 	/* We could read a status register to see how many free slots
203 	 * are actually available, but reading that register resets it
204 	 * and you could lose some error information.
205 	 */
206 	cmd_q->free_slots--;
207 
208 	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
209 	      | (op->jobid << REQ0_JOBID_SHIFT)
210 	      | REQ0_WAIT_FOR_WRITE;
211 
212 	if (op->soc)
213 		cr0 |= REQ0_STOP_ON_COMPLETE
214 		       | REQ0_INT_ON_COMPLETE;
215 
216 	if (op->ioc || !cmd_q->free_slots)
217 		cr0 |= REQ0_INT_ON_COMPLETE;
218 
219 	/* Start at CMD_REQ1 */
220 	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
221 
222 	mutex_lock(&ccp->req_mutex);
223 
224 	/* Write CMD_REQ1 through CMD_REQx first */
225 	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
226 		iowrite32(*(cr + i), cr_addr);
227 
228 	/* Tell the CCP to start */
229 	wmb();
230 	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
231 
232 	mutex_unlock(&ccp->req_mutex);
233 
234 	if (cr0 & REQ0_INT_ON_COMPLETE) {
235 		/* Wait for the job to complete */
236 		ret = wait_event_interruptible(cmd_q->int_queue,
237 					       cmd_q->int_rcvd);
238 		if (ret || cmd_q->cmd_error) {
239 			/* On error delete all related jobs from the queue */
240 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
241 			      | op->jobid;
242 
243 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
244 
245 			if (!ret)
246 				ret = -EIO;
247 		} else if (op->soc) {
248 			/* Delete just head job from the queue on SoC */
249 			cmd = DEL_Q_ACTIVE
250 			      | (cmd_q->id << DEL_Q_ID_SHIFT)
251 			      | op->jobid;
252 
253 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
254 		}
255 
256 		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
257 
258 		cmd_q->int_rcvd = 0;
259 	}
260 
261 	return ret;
262 }
263 
264 static int ccp_perform_aes(struct ccp_op *op)
265 {
266 	u32 cr[6];
267 
268 	/* Fill out the register contents for REQ1 through REQ6 */
269 	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
270 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
271 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
272 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
273 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
274 	cr[1] = op->src.u.dma.length - 1;
275 	cr[2] = ccp_addr_lo(&op->src.u.dma);
276 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
277 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
278 		| ccp_addr_hi(&op->src.u.dma);
279 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
280 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
281 		| ccp_addr_hi(&op->dst.u.dma);
282 
283 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
284 		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
285 
286 	if (op->eom)
287 		cr[0] |= REQ1_EOM;
288 
289 	if (op->init)
290 		cr[0] |= REQ1_INIT;
291 
292 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
293 }
294 
295 static int ccp_perform_xts_aes(struct ccp_op *op)
296 {
297 	u32 cr[6];
298 
299 	/* Fill out the register contents for REQ1 through REQ6 */
300 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
301 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
302 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
303 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
304 	cr[1] = op->src.u.dma.length - 1;
305 	cr[2] = ccp_addr_lo(&op->src.u.dma);
306 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
307 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
308 		| ccp_addr_hi(&op->src.u.dma);
309 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
310 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
311 		| ccp_addr_hi(&op->dst.u.dma);
312 
313 	if (op->eom)
314 		cr[0] |= REQ1_EOM;
315 
316 	if (op->init)
317 		cr[0] |= REQ1_INIT;
318 
319 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
320 }
321 
322 static int ccp_perform_sha(struct ccp_op *op)
323 {
324 	u32 cr[6];
325 
326 	/* Fill out the register contents for REQ1 through REQ6 */
327 	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
328 		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
329 		| REQ1_INIT;
330 	cr[1] = op->src.u.dma.length - 1;
331 	cr[2] = ccp_addr_lo(&op->src.u.dma);
332 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
333 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
334 		| ccp_addr_hi(&op->src.u.dma);
335 
336 	if (op->eom) {
337 		cr[0] |= REQ1_EOM;
338 		cr[4] = lower_32_bits(op->u.sha.msg_bits);
339 		cr[5] = upper_32_bits(op->u.sha.msg_bits);
340 	} else {
341 		cr[4] = 0;
342 		cr[5] = 0;
343 	}
344 
345 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
346 }
347 
348 static int ccp_perform_rsa(struct ccp_op *op)
349 {
350 	u32 cr[6];
351 
352 	/* Fill out the register contents for REQ1 through REQ6 */
353 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
354 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
355 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
356 		| REQ1_EOM;
357 	cr[1] = op->u.rsa.input_len - 1;
358 	cr[2] = ccp_addr_lo(&op->src.u.dma);
359 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
360 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
361 		| ccp_addr_hi(&op->src.u.dma);
362 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
363 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
364 		| ccp_addr_hi(&op->dst.u.dma);
365 
366 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
367 }
368 
369 static int ccp_perform_passthru(struct ccp_op *op)
370 {
371 	u32 cr[6];
372 
373 	/* Fill out the register contents for REQ1 through REQ6 */
374 	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
375 		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
376 		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
377 
378 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
379 		cr[1] = op->src.u.dma.length - 1;
380 	else
381 		cr[1] = op->dst.u.dma.length - 1;
382 
383 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
384 		cr[2] = ccp_addr_lo(&op->src.u.dma);
385 		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
386 			| ccp_addr_hi(&op->src.u.dma);
387 
388 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
389 			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
390 	} else {
391 		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
392 		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
393 	}
394 
395 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
396 		cr[4] = ccp_addr_lo(&op->dst.u.dma);
397 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
398 			| ccp_addr_hi(&op->dst.u.dma);
399 	} else {
400 		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
401 		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
402 	}
403 
404 	if (op->eom)
405 		cr[0] |= REQ1_EOM;
406 
407 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
408 }
409 
410 static int ccp_perform_ecc(struct ccp_op *op)
411 {
412 	u32 cr[6];
413 
414 	/* Fill out the register contents for REQ1 through REQ6 */
415 	cr[0] = REQ1_ECC_AFFINE_CONVERT
416 		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
417 		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
418 		| REQ1_EOM;
419 	cr[1] = op->src.u.dma.length - 1;
420 	cr[2] = ccp_addr_lo(&op->src.u.dma);
421 	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
422 		| ccp_addr_hi(&op->src.u.dma);
423 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
424 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
425 		| ccp_addr_hi(&op->dst.u.dma);
426 
427 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
428 }
429 
430 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
431 {
432 	int start;
433 
434 	for (;;) {
435 		mutex_lock(&ccp->ksb_mutex);
436 
437 		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
438 							ccp->ksb_count,
439 							ccp->ksb_start,
440 							count, 0);
441 		if (start <= ccp->ksb_count) {
442 			bitmap_set(ccp->ksb, start, count);
443 
444 			mutex_unlock(&ccp->ksb_mutex);
445 			break;
446 		}
447 
448 		ccp->ksb_avail = 0;
449 
450 		mutex_unlock(&ccp->ksb_mutex);
451 
452 		/* Wait for KSB entries to become available */
453 		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
454 			return 0;
455 	}
456 
457 	return KSB_START + start;
458 }
459 
460 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
461 			 unsigned int count)
462 {
463 	if (!start)
464 		return;
465 
466 	mutex_lock(&ccp->ksb_mutex);
467 
468 	bitmap_clear(ccp->ksb, start - KSB_START, count);
469 
470 	ccp->ksb_avail = 1;
471 
472 	mutex_unlock(&ccp->ksb_mutex);
473 
474 	wake_up_interruptible_all(&ccp->ksb_queue);
475 }
476 
477 static u32 ccp_gen_jobid(struct ccp_device *ccp)
478 {
479 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
480 }
481 
482 static void ccp_sg_free(struct ccp_sg_workarea *wa)
483 {
484 	if (wa->dma_count)
485 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
486 
487 	wa->dma_count = 0;
488 }
489 
490 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
491 				struct scatterlist *sg, u64 len,
492 				enum dma_data_direction dma_dir)
493 {
494 	memset(wa, 0, sizeof(*wa));
495 
496 	wa->sg = sg;
497 	if (!sg)
498 		return 0;
499 
500 	wa->nents = sg_nents(sg);
501 	wa->length = sg->length;
502 	wa->bytes_left = len;
503 	wa->sg_used = 0;
504 
505 	if (len == 0)
506 		return 0;
507 
508 	if (dma_dir == DMA_NONE)
509 		return 0;
510 
511 	wa->dma_sg = sg;
512 	wa->dma_dev = dev;
513 	wa->dma_dir = dma_dir;
514 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
515 	if (!wa->dma_count)
516 		return -ENOMEM;
517 
518 
519 	return 0;
520 }
521 
522 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
523 {
524 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
525 
526 	if (!wa->sg)
527 		return;
528 
529 	wa->sg_used += nbytes;
530 	wa->bytes_left -= nbytes;
531 	if (wa->sg_used == wa->sg->length) {
532 		wa->sg = sg_next(wa->sg);
533 		wa->sg_used = 0;
534 	}
535 }
536 
537 static void ccp_dm_free(struct ccp_dm_workarea *wa)
538 {
539 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
540 		if (wa->address)
541 			dma_pool_free(wa->dma_pool, wa->address,
542 				      wa->dma.address);
543 	} else {
544 		if (wa->dma.address)
545 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
546 					 wa->dma.dir);
547 		kfree(wa->address);
548 	}
549 
550 	wa->address = NULL;
551 	wa->dma.address = 0;
552 }
553 
554 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
555 				struct ccp_cmd_queue *cmd_q,
556 				unsigned int len,
557 				enum dma_data_direction dir)
558 {
559 	memset(wa, 0, sizeof(*wa));
560 
561 	if (!len)
562 		return 0;
563 
564 	wa->dev = cmd_q->ccp->dev;
565 	wa->length = len;
566 
567 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
568 		wa->dma_pool = cmd_q->dma_pool;
569 
570 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
571 					     &wa->dma.address);
572 		if (!wa->address)
573 			return -ENOMEM;
574 
575 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
576 
577 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
578 	} else {
579 		wa->address = kzalloc(len, GFP_KERNEL);
580 		if (!wa->address)
581 			return -ENOMEM;
582 
583 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
584 						 dir);
585 		if (!wa->dma.address)
586 			return -ENOMEM;
587 
588 		wa->dma.length = len;
589 	}
590 	wa->dma.dir = dir;
591 
592 	return 0;
593 }
594 
595 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
596 			    struct scatterlist *sg, unsigned int sg_offset,
597 			    unsigned int len)
598 {
599 	WARN_ON(!wa->address);
600 
601 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
602 				 0);
603 }
604 
605 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
606 			    struct scatterlist *sg, unsigned int sg_offset,
607 			    unsigned int len)
608 {
609 	WARN_ON(!wa->address);
610 
611 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
612 				 1);
613 }
614 
615 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
616 				    struct scatterlist *sg,
617 				    unsigned int len, unsigned int se_len,
618 				    bool sign_extend)
619 {
620 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
621 	u8 buffer[CCP_REVERSE_BUF_SIZE];
622 
623 	BUG_ON(se_len > sizeof(buffer));
624 
625 	sg_offset = len;
626 	dm_offset = 0;
627 	nbytes = len;
628 	while (nbytes) {
629 		ksb_len = min_t(unsigned int, nbytes, se_len);
630 		sg_offset -= ksb_len;
631 
632 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
633 		for (i = 0; i < ksb_len; i++)
634 			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
635 
636 		dm_offset += ksb_len;
637 		nbytes -= ksb_len;
638 
639 		if ((ksb_len != se_len) && sign_extend) {
640 			/* Must sign-extend to nearest sign-extend length */
641 			if (wa->address[dm_offset - 1] & 0x80)
642 				memset(wa->address + dm_offset, 0xff,
643 				       se_len - ksb_len);
644 		}
645 	}
646 }
647 
648 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
649 				    struct scatterlist *sg,
650 				    unsigned int len)
651 {
652 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
653 	u8 buffer[CCP_REVERSE_BUF_SIZE];
654 
655 	sg_offset = 0;
656 	dm_offset = len;
657 	nbytes = len;
658 	while (nbytes) {
659 		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
660 		dm_offset -= ksb_len;
661 
662 		for (i = 0; i < ksb_len; i++)
663 			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
664 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
665 
666 		sg_offset += ksb_len;
667 		nbytes -= ksb_len;
668 	}
669 }
670 
671 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
672 {
673 	ccp_dm_free(&data->dm_wa);
674 	ccp_sg_free(&data->sg_wa);
675 }
676 
677 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
678 			 struct scatterlist *sg, u64 sg_len,
679 			 unsigned int dm_len,
680 			 enum dma_data_direction dir)
681 {
682 	int ret;
683 
684 	memset(data, 0, sizeof(*data));
685 
686 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
687 				   dir);
688 	if (ret)
689 		goto e_err;
690 
691 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
692 	if (ret)
693 		goto e_err;
694 
695 	return 0;
696 
697 e_err:
698 	ccp_free_data(data, cmd_q);
699 
700 	return ret;
701 }
702 
703 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
704 {
705 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
706 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
707 	unsigned int buf_count, nbytes;
708 
709 	/* Clear the buffer if setting it */
710 	if (!from)
711 		memset(dm_wa->address, 0, dm_wa->length);
712 
713 	if (!sg_wa->sg)
714 		return 0;
715 
716 	/* Perform the copy operation
717 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
718 	 *   an unsigned int
719 	 */
720 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
721 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
722 				 nbytes, from);
723 
724 	/* Update the structures and generate the count */
725 	buf_count = 0;
726 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
727 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
728 			     dm_wa->length - buf_count);
729 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
730 
731 		buf_count += nbytes;
732 		ccp_update_sg_workarea(sg_wa, nbytes);
733 	}
734 
735 	return buf_count;
736 }
737 
738 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
739 {
740 	return ccp_queue_buf(data, 0);
741 }
742 
743 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
744 {
745 	return ccp_queue_buf(data, 1);
746 }
747 
748 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
749 			     struct ccp_op *op, unsigned int block_size,
750 			     bool blocksize_op)
751 {
752 	unsigned int sg_src_len, sg_dst_len, op_len;
753 
754 	/* The CCP can only DMA from/to one address each per operation. This
755 	 * requires that we find the smallest DMA area between the source
756 	 * and destination. The resulting len values will always be <= UINT_MAX
757 	 * because the dma length is an unsigned int.
758 	 */
759 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
760 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
761 
762 	if (dst) {
763 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
764 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
765 		op_len = min(sg_src_len, sg_dst_len);
766 	} else
767 		op_len = sg_src_len;
768 
769 	/* The data operation length will be at least block_size in length
770 	 * or the smaller of available sg room remaining for the source or
771 	 * the destination
772 	 */
773 	op_len = max(op_len, block_size);
774 
775 	/* Unless we have to buffer data, there's no reason to wait */
776 	op->soc = 0;
777 
778 	if (sg_src_len < block_size) {
779 		/* Not enough data in the sg element, so it
780 		 * needs to be buffered into a blocksize chunk
781 		 */
782 		int cp_len = ccp_fill_queue_buf(src);
783 
784 		op->soc = 1;
785 		op->src.u.dma.address = src->dm_wa.dma.address;
786 		op->src.u.dma.offset = 0;
787 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
788 	} else {
789 		/* Enough data in the sg element, but we need to
790 		 * adjust for any previously copied data
791 		 */
792 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
793 		op->src.u.dma.offset = src->sg_wa.sg_used;
794 		op->src.u.dma.length = op_len & ~(block_size - 1);
795 
796 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
797 	}
798 
799 	if (dst) {
800 		if (sg_dst_len < block_size) {
801 			/* Not enough room in the sg element or we're on the
802 			 * last piece of data (when using padding), so the
803 			 * output needs to be buffered into a blocksize chunk
804 			 */
805 			op->soc = 1;
806 			op->dst.u.dma.address = dst->dm_wa.dma.address;
807 			op->dst.u.dma.offset = 0;
808 			op->dst.u.dma.length = op->src.u.dma.length;
809 		} else {
810 			/* Enough room in the sg element, but we need to
811 			 * adjust for any previously used area
812 			 */
813 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
814 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
815 			op->dst.u.dma.length = op->src.u.dma.length;
816 		}
817 	}
818 }
819 
820 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
821 			     struct ccp_op *op)
822 {
823 	op->init = 0;
824 
825 	if (dst) {
826 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
827 			ccp_empty_queue_buf(dst);
828 		else
829 			ccp_update_sg_workarea(&dst->sg_wa,
830 					       op->dst.u.dma.length);
831 	}
832 }
833 
834 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
835 				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
836 				u32 byte_swap, bool from)
837 {
838 	struct ccp_op op;
839 
840 	memset(&op, 0, sizeof(op));
841 
842 	op.cmd_q = cmd_q;
843 	op.jobid = jobid;
844 	op.eom = 1;
845 
846 	if (from) {
847 		op.soc = 1;
848 		op.src.type = CCP_MEMTYPE_KSB;
849 		op.src.u.ksb = ksb;
850 		op.dst.type = CCP_MEMTYPE_SYSTEM;
851 		op.dst.u.dma.address = wa->dma.address;
852 		op.dst.u.dma.length = wa->length;
853 	} else {
854 		op.src.type = CCP_MEMTYPE_SYSTEM;
855 		op.src.u.dma.address = wa->dma.address;
856 		op.src.u.dma.length = wa->length;
857 		op.dst.type = CCP_MEMTYPE_KSB;
858 		op.dst.u.ksb = ksb;
859 	}
860 
861 	op.u.passthru.byte_swap = byte_swap;
862 
863 	return ccp_perform_passthru(&op);
864 }
865 
866 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
867 			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
868 			   u32 byte_swap)
869 {
870 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
871 }
872 
873 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
874 			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
875 			     u32 byte_swap)
876 {
877 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
878 }
879 
880 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
881 				struct ccp_cmd *cmd)
882 {
883 	struct ccp_aes_engine *aes = &cmd->u.aes;
884 	struct ccp_dm_workarea key, ctx;
885 	struct ccp_data src;
886 	struct ccp_op op;
887 	unsigned int dm_offset;
888 	int ret;
889 
890 	if (!((aes->key_len == AES_KEYSIZE_128) ||
891 	      (aes->key_len == AES_KEYSIZE_192) ||
892 	      (aes->key_len == AES_KEYSIZE_256)))
893 		return -EINVAL;
894 
895 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
896 		return -EINVAL;
897 
898 	if (aes->iv_len != AES_BLOCK_SIZE)
899 		return -EINVAL;
900 
901 	if (!aes->key || !aes->iv || !aes->src)
902 		return -EINVAL;
903 
904 	if (aes->cmac_final) {
905 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
906 			return -EINVAL;
907 
908 		if (!aes->cmac_key)
909 			return -EINVAL;
910 	}
911 
912 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
913 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
914 
915 	ret = -EIO;
916 	memset(&op, 0, sizeof(op));
917 	op.cmd_q = cmd_q;
918 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
919 	op.ksb_key = cmd_q->ksb_key;
920 	op.ksb_ctx = cmd_q->ksb_ctx;
921 	op.init = 1;
922 	op.u.aes.type = aes->type;
923 	op.u.aes.mode = aes->mode;
924 	op.u.aes.action = aes->action;
925 
926 	/* All supported key sizes fit in a single (32-byte) KSB entry
927 	 * and must be in little endian format. Use the 256-bit byte
928 	 * swap passthru option to convert from big endian to little
929 	 * endian.
930 	 */
931 	ret = ccp_init_dm_workarea(&key, cmd_q,
932 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
933 				   DMA_TO_DEVICE);
934 	if (ret)
935 		return ret;
936 
937 	dm_offset = CCP_KSB_BYTES - aes->key_len;
938 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
939 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
940 			      CCP_PASSTHRU_BYTESWAP_256BIT);
941 	if (ret) {
942 		cmd->engine_error = cmd_q->cmd_error;
943 		goto e_key;
944 	}
945 
946 	/* The AES context fits in a single (32-byte) KSB entry and
947 	 * must be in little endian format. Use the 256-bit byte swap
948 	 * passthru option to convert from big endian to little endian.
949 	 */
950 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
951 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
952 				   DMA_BIDIRECTIONAL);
953 	if (ret)
954 		goto e_key;
955 
956 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
957 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
958 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
959 			      CCP_PASSTHRU_BYTESWAP_256BIT);
960 	if (ret) {
961 		cmd->engine_error = cmd_q->cmd_error;
962 		goto e_ctx;
963 	}
964 
965 	/* Send data to the CCP AES engine */
966 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
967 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
968 	if (ret)
969 		goto e_ctx;
970 
971 	while (src.sg_wa.bytes_left) {
972 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
973 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
974 			op.eom = 1;
975 
976 			/* Push the K1/K2 key to the CCP now */
977 			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
978 						op.ksb_ctx,
979 						CCP_PASSTHRU_BYTESWAP_256BIT);
980 			if (ret) {
981 				cmd->engine_error = cmd_q->cmd_error;
982 				goto e_src;
983 			}
984 
985 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
986 					aes->cmac_key_len);
987 			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
988 					      CCP_PASSTHRU_BYTESWAP_256BIT);
989 			if (ret) {
990 				cmd->engine_error = cmd_q->cmd_error;
991 				goto e_src;
992 			}
993 		}
994 
995 		ret = ccp_perform_aes(&op);
996 		if (ret) {
997 			cmd->engine_error = cmd_q->cmd_error;
998 			goto e_src;
999 		}
1000 
1001 		ccp_process_data(&src, NULL, &op);
1002 	}
1003 
1004 	/* Retrieve the AES context - convert from LE to BE using
1005 	 * 32-byte (256-bit) byteswapping
1006 	 */
1007 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1008 				CCP_PASSTHRU_BYTESWAP_256BIT);
1009 	if (ret) {
1010 		cmd->engine_error = cmd_q->cmd_error;
1011 		goto e_src;
1012 	}
1013 
1014 	/* ...but we only need AES_BLOCK_SIZE bytes */
1015 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1016 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1017 
1018 e_src:
1019 	ccp_free_data(&src, cmd_q);
1020 
1021 e_ctx:
1022 	ccp_dm_free(&ctx);
1023 
1024 e_key:
1025 	ccp_dm_free(&key);
1026 
1027 	return ret;
1028 }
1029 
1030 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1031 {
1032 	struct ccp_aes_engine *aes = &cmd->u.aes;
1033 	struct ccp_dm_workarea key, ctx;
1034 	struct ccp_data src, dst;
1035 	struct ccp_op op;
1036 	unsigned int dm_offset;
1037 	bool in_place = false;
1038 	int ret;
1039 
1040 	if (aes->mode == CCP_AES_MODE_CMAC)
1041 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1042 
1043 	if (!((aes->key_len == AES_KEYSIZE_128) ||
1044 	      (aes->key_len == AES_KEYSIZE_192) ||
1045 	      (aes->key_len == AES_KEYSIZE_256)))
1046 		return -EINVAL;
1047 
1048 	if (((aes->mode == CCP_AES_MODE_ECB) ||
1049 	     (aes->mode == CCP_AES_MODE_CBC) ||
1050 	     (aes->mode == CCP_AES_MODE_CFB)) &&
1051 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1052 		return -EINVAL;
1053 
1054 	if (!aes->key || !aes->src || !aes->dst)
1055 		return -EINVAL;
1056 
1057 	if (aes->mode != CCP_AES_MODE_ECB) {
1058 		if (aes->iv_len != AES_BLOCK_SIZE)
1059 			return -EINVAL;
1060 
1061 		if (!aes->iv)
1062 			return -EINVAL;
1063 	}
1064 
1065 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1066 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1067 
1068 	ret = -EIO;
1069 	memset(&op, 0, sizeof(op));
1070 	op.cmd_q = cmd_q;
1071 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1072 	op.ksb_key = cmd_q->ksb_key;
1073 	op.ksb_ctx = cmd_q->ksb_ctx;
1074 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1075 	op.u.aes.type = aes->type;
1076 	op.u.aes.mode = aes->mode;
1077 	op.u.aes.action = aes->action;
1078 
1079 	/* All supported key sizes fit in a single (32-byte) KSB entry
1080 	 * and must be in little endian format. Use the 256-bit byte
1081 	 * swap passthru option to convert from big endian to little
1082 	 * endian.
1083 	 */
1084 	ret = ccp_init_dm_workarea(&key, cmd_q,
1085 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1086 				   DMA_TO_DEVICE);
1087 	if (ret)
1088 		return ret;
1089 
1090 	dm_offset = CCP_KSB_BYTES - aes->key_len;
1091 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1092 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1093 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1094 	if (ret) {
1095 		cmd->engine_error = cmd_q->cmd_error;
1096 		goto e_key;
1097 	}
1098 
1099 	/* The AES context fits in a single (32-byte) KSB entry and
1100 	 * must be in little endian format. Use the 256-bit byte swap
1101 	 * passthru option to convert from big endian to little endian.
1102 	 */
1103 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1104 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1105 				   DMA_BIDIRECTIONAL);
1106 	if (ret)
1107 		goto e_key;
1108 
1109 	if (aes->mode != CCP_AES_MODE_ECB) {
1110 		/* Load the AES context - conver to LE */
1111 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1112 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1113 		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1114 				      CCP_PASSTHRU_BYTESWAP_256BIT);
1115 		if (ret) {
1116 			cmd->engine_error = cmd_q->cmd_error;
1117 			goto e_ctx;
1118 		}
1119 	}
1120 
1121 	/* Prepare the input and output data workareas. For in-place
1122 	 * operations we need to set the dma direction to BIDIRECTIONAL
1123 	 * and copy the src workarea to the dst workarea.
1124 	 */
1125 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1126 		in_place = true;
1127 
1128 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1129 			    AES_BLOCK_SIZE,
1130 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1131 	if (ret)
1132 		goto e_ctx;
1133 
1134 	if (in_place)
1135 		dst = src;
1136 	else {
1137 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1138 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1139 		if (ret)
1140 			goto e_src;
1141 	}
1142 
1143 	/* Send data to the CCP AES engine */
1144 	while (src.sg_wa.bytes_left) {
1145 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1146 		if (!src.sg_wa.bytes_left) {
1147 			op.eom = 1;
1148 
1149 			/* Since we don't retrieve the AES context in ECB
1150 			 * mode we have to wait for the operation to complete
1151 			 * on the last piece of data
1152 			 */
1153 			if (aes->mode == CCP_AES_MODE_ECB)
1154 				op.soc = 1;
1155 		}
1156 
1157 		ret = ccp_perform_aes(&op);
1158 		if (ret) {
1159 			cmd->engine_error = cmd_q->cmd_error;
1160 			goto e_dst;
1161 		}
1162 
1163 		ccp_process_data(&src, &dst, &op);
1164 	}
1165 
1166 	if (aes->mode != CCP_AES_MODE_ECB) {
1167 		/* Retrieve the AES context - convert from LE to BE using
1168 		 * 32-byte (256-bit) byteswapping
1169 		 */
1170 		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1171 					CCP_PASSTHRU_BYTESWAP_256BIT);
1172 		if (ret) {
1173 			cmd->engine_error = cmd_q->cmd_error;
1174 			goto e_dst;
1175 		}
1176 
1177 		/* ...but we only need AES_BLOCK_SIZE bytes */
1178 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1179 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1180 	}
1181 
1182 e_dst:
1183 	if (!in_place)
1184 		ccp_free_data(&dst, cmd_q);
1185 
1186 e_src:
1187 	ccp_free_data(&src, cmd_q);
1188 
1189 e_ctx:
1190 	ccp_dm_free(&ctx);
1191 
1192 e_key:
1193 	ccp_dm_free(&key);
1194 
1195 	return ret;
1196 }
1197 
1198 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1199 			       struct ccp_cmd *cmd)
1200 {
1201 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1202 	struct ccp_dm_workarea key, ctx;
1203 	struct ccp_data src, dst;
1204 	struct ccp_op op;
1205 	unsigned int unit_size, dm_offset;
1206 	bool in_place = false;
1207 	int ret;
1208 
1209 	switch (xts->unit_size) {
1210 	case CCP_XTS_AES_UNIT_SIZE_16:
1211 		unit_size = 16;
1212 		break;
1213 	case CCP_XTS_AES_UNIT_SIZE_512:
1214 		unit_size = 512;
1215 		break;
1216 	case CCP_XTS_AES_UNIT_SIZE_1024:
1217 		unit_size = 1024;
1218 		break;
1219 	case CCP_XTS_AES_UNIT_SIZE_2048:
1220 		unit_size = 2048;
1221 		break;
1222 	case CCP_XTS_AES_UNIT_SIZE_4096:
1223 		unit_size = 4096;
1224 		break;
1225 
1226 	default:
1227 		return -EINVAL;
1228 	}
1229 
1230 	if (xts->key_len != AES_KEYSIZE_128)
1231 		return -EINVAL;
1232 
1233 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1234 		return -EINVAL;
1235 
1236 	if (xts->iv_len != AES_BLOCK_SIZE)
1237 		return -EINVAL;
1238 
1239 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1240 		return -EINVAL;
1241 
1242 	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1243 	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1244 
1245 	ret = -EIO;
1246 	memset(&op, 0, sizeof(op));
1247 	op.cmd_q = cmd_q;
1248 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1249 	op.ksb_key = cmd_q->ksb_key;
1250 	op.ksb_ctx = cmd_q->ksb_ctx;
1251 	op.init = 1;
1252 	op.u.xts.action = xts->action;
1253 	op.u.xts.unit_size = xts->unit_size;
1254 
1255 	/* All supported key sizes fit in a single (32-byte) KSB entry
1256 	 * and must be in little endian format. Use the 256-bit byte
1257 	 * swap passthru option to convert from big endian to little
1258 	 * endian.
1259 	 */
1260 	ret = ccp_init_dm_workarea(&key, cmd_q,
1261 				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1262 				   DMA_TO_DEVICE);
1263 	if (ret)
1264 		return ret;
1265 
1266 	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1267 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1268 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1269 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1270 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1271 	if (ret) {
1272 		cmd->engine_error = cmd_q->cmd_error;
1273 		goto e_key;
1274 	}
1275 
1276 	/* The AES context fits in a single (32-byte) KSB entry and
1277 	 * for XTS is already in little endian format so no byte swapping
1278 	 * is needed.
1279 	 */
1280 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1281 				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1282 				   DMA_BIDIRECTIONAL);
1283 	if (ret)
1284 		goto e_key;
1285 
1286 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1287 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1288 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1289 	if (ret) {
1290 		cmd->engine_error = cmd_q->cmd_error;
1291 		goto e_ctx;
1292 	}
1293 
1294 	/* Prepare the input and output data workareas. For in-place
1295 	 * operations we need to set the dma direction to BIDIRECTIONAL
1296 	 * and copy the src workarea to the dst workarea.
1297 	 */
1298 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1299 		in_place = true;
1300 
1301 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1302 			    unit_size,
1303 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1304 	if (ret)
1305 		goto e_ctx;
1306 
1307 	if (in_place)
1308 		dst = src;
1309 	else {
1310 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1311 				    unit_size, DMA_FROM_DEVICE);
1312 		if (ret)
1313 			goto e_src;
1314 	}
1315 
1316 	/* Send data to the CCP AES engine */
1317 	while (src.sg_wa.bytes_left) {
1318 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1319 		if (!src.sg_wa.bytes_left)
1320 			op.eom = 1;
1321 
1322 		ret = ccp_perform_xts_aes(&op);
1323 		if (ret) {
1324 			cmd->engine_error = cmd_q->cmd_error;
1325 			goto e_dst;
1326 		}
1327 
1328 		ccp_process_data(&src, &dst, &op);
1329 	}
1330 
1331 	/* Retrieve the AES context - convert from LE to BE using
1332 	 * 32-byte (256-bit) byteswapping
1333 	 */
1334 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1335 				CCP_PASSTHRU_BYTESWAP_256BIT);
1336 	if (ret) {
1337 		cmd->engine_error = cmd_q->cmd_error;
1338 		goto e_dst;
1339 	}
1340 
1341 	/* ...but we only need AES_BLOCK_SIZE bytes */
1342 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1343 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1344 
1345 e_dst:
1346 	if (!in_place)
1347 		ccp_free_data(&dst, cmd_q);
1348 
1349 e_src:
1350 	ccp_free_data(&src, cmd_q);
1351 
1352 e_ctx:
1353 	ccp_dm_free(&ctx);
1354 
1355 e_key:
1356 	ccp_dm_free(&key);
1357 
1358 	return ret;
1359 }
1360 
1361 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1362 {
1363 	struct ccp_sha_engine *sha = &cmd->u.sha;
1364 	struct ccp_dm_workarea ctx;
1365 	struct ccp_data src;
1366 	struct ccp_op op;
1367 	int ret;
1368 
1369 	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1370 		return -EINVAL;
1371 
1372 	if (!sha->ctx)
1373 		return -EINVAL;
1374 
1375 	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1376 		return -EINVAL;
1377 
1378 	if (!sha->src_len) {
1379 		const u8 *sha_zero;
1380 
1381 		/* Not final, just return */
1382 		if (!sha->final)
1383 			return 0;
1384 
1385 		/* CCP can't do a zero length sha operation so the caller
1386 		 * must buffer the data.
1387 		 */
1388 		if (sha->msg_bits)
1389 			return -EINVAL;
1390 
1391 		/* A sha operation for a message with a total length of zero,
1392 		 * return known result.
1393 		 */
1394 		switch (sha->type) {
1395 		case CCP_SHA_TYPE_1:
1396 			sha_zero = ccp_sha1_zero;
1397 			break;
1398 		case CCP_SHA_TYPE_224:
1399 			sha_zero = ccp_sha224_zero;
1400 			break;
1401 		case CCP_SHA_TYPE_256:
1402 			sha_zero = ccp_sha256_zero;
1403 			break;
1404 		default:
1405 			return -EINVAL;
1406 		}
1407 
1408 		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1409 					 sha->ctx_len, 1);
1410 
1411 		return 0;
1412 	}
1413 
1414 	if (!sha->src)
1415 		return -EINVAL;
1416 
1417 	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1418 
1419 	memset(&op, 0, sizeof(op));
1420 	op.cmd_q = cmd_q;
1421 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1422 	op.ksb_ctx = cmd_q->ksb_ctx;
1423 	op.u.sha.type = sha->type;
1424 	op.u.sha.msg_bits = sha->msg_bits;
1425 
1426 	/* The SHA context fits in a single (32-byte) KSB entry and
1427 	 * must be in little endian format. Use the 256-bit byte swap
1428 	 * passthru option to convert from big endian to little endian.
1429 	 */
1430 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1431 				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1432 				   DMA_BIDIRECTIONAL);
1433 	if (ret)
1434 		return ret;
1435 
1436 	if (sha->first) {
1437 		const __be32 *init;
1438 
1439 		switch (sha->type) {
1440 		case CCP_SHA_TYPE_1:
1441 			init = ccp_sha1_init;
1442 			break;
1443 		case CCP_SHA_TYPE_224:
1444 			init = ccp_sha224_init;
1445 			break;
1446 		case CCP_SHA_TYPE_256:
1447 			init = ccp_sha256_init;
1448 			break;
1449 		default:
1450 			ret = -EINVAL;
1451 			goto e_ctx;
1452 		}
1453 		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1454 	} else
1455 		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1456 
1457 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1458 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1459 	if (ret) {
1460 		cmd->engine_error = cmd_q->cmd_error;
1461 		goto e_ctx;
1462 	}
1463 
1464 	/* Send data to the CCP SHA engine */
1465 	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1466 			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1467 	if (ret)
1468 		goto e_ctx;
1469 
1470 	while (src.sg_wa.bytes_left) {
1471 		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1472 		if (sha->final && !src.sg_wa.bytes_left)
1473 			op.eom = 1;
1474 
1475 		ret = ccp_perform_sha(&op);
1476 		if (ret) {
1477 			cmd->engine_error = cmd_q->cmd_error;
1478 			goto e_data;
1479 		}
1480 
1481 		ccp_process_data(&src, NULL, &op);
1482 	}
1483 
1484 	/* Retrieve the SHA context - convert from LE to BE using
1485 	 * 32-byte (256-bit) byteswapping to BE
1486 	 */
1487 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1488 				CCP_PASSTHRU_BYTESWAP_256BIT);
1489 	if (ret) {
1490 		cmd->engine_error = cmd_q->cmd_error;
1491 		goto e_data;
1492 	}
1493 
1494 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1495 
1496 	if (sha->final && sha->opad) {
1497 		/* HMAC operation, recursively perform final SHA */
1498 		struct ccp_cmd hmac_cmd;
1499 		struct scatterlist sg;
1500 		u64 block_size, digest_size;
1501 		u8 *hmac_buf;
1502 
1503 		switch (sha->type) {
1504 		case CCP_SHA_TYPE_1:
1505 			block_size = SHA1_BLOCK_SIZE;
1506 			digest_size = SHA1_DIGEST_SIZE;
1507 			break;
1508 		case CCP_SHA_TYPE_224:
1509 			block_size = SHA224_BLOCK_SIZE;
1510 			digest_size = SHA224_DIGEST_SIZE;
1511 			break;
1512 		case CCP_SHA_TYPE_256:
1513 			block_size = SHA256_BLOCK_SIZE;
1514 			digest_size = SHA256_DIGEST_SIZE;
1515 			break;
1516 		default:
1517 			ret = -EINVAL;
1518 			goto e_data;
1519 		}
1520 
1521 		if (sha->opad_len != block_size) {
1522 			ret = -EINVAL;
1523 			goto e_data;
1524 		}
1525 
1526 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1527 		if (!hmac_buf) {
1528 			ret = -ENOMEM;
1529 			goto e_data;
1530 		}
1531 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1532 
1533 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1534 		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1535 
1536 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1537 		hmac_cmd.engine = CCP_ENGINE_SHA;
1538 		hmac_cmd.u.sha.type = sha->type;
1539 		hmac_cmd.u.sha.ctx = sha->ctx;
1540 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1541 		hmac_cmd.u.sha.src = &sg;
1542 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1543 		hmac_cmd.u.sha.opad = NULL;
1544 		hmac_cmd.u.sha.opad_len = 0;
1545 		hmac_cmd.u.sha.first = 1;
1546 		hmac_cmd.u.sha.final = 1;
1547 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1548 
1549 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1550 		if (ret)
1551 			cmd->engine_error = hmac_cmd.engine_error;
1552 
1553 		kfree(hmac_buf);
1554 	}
1555 
1556 e_data:
1557 	ccp_free_data(&src, cmd_q);
1558 
1559 e_ctx:
1560 	ccp_dm_free(&ctx);
1561 
1562 	return ret;
1563 }
1564 
1565 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1566 {
1567 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1568 	struct ccp_dm_workarea exp, src;
1569 	struct ccp_data dst;
1570 	struct ccp_op op;
1571 	unsigned int ksb_count, i_len, o_len;
1572 	int ret;
1573 
1574 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1575 		return -EINVAL;
1576 
1577 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1578 		return -EINVAL;
1579 
1580 	/* The RSA modulus must precede the message being acted upon, so
1581 	 * it must be copied to a DMA area where the message and the
1582 	 * modulus can be concatenated.  Therefore the input buffer
1583 	 * length required is twice the output buffer length (which
1584 	 * must be a multiple of 256-bits).
1585 	 */
1586 	o_len = ((rsa->key_size + 255) / 256) * 32;
1587 	i_len = o_len * 2;
1588 
1589 	ksb_count = o_len / CCP_KSB_BYTES;
1590 
1591 	memset(&op, 0, sizeof(op));
1592 	op.cmd_q = cmd_q;
1593 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1594 	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1595 	if (!op.ksb_key)
1596 		return -EIO;
1597 
1598 	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1599 	 * be in little endian format. Reverse copy each 32-byte chunk
1600 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1601 	 * and each byte within that chunk and do not perform any byte swap
1602 	 * operations on the passthru operation.
1603 	 */
1604 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1605 	if (ret)
1606 		goto e_ksb;
1607 
1608 	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1609 				false);
1610 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1611 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1612 	if (ret) {
1613 		cmd->engine_error = cmd_q->cmd_error;
1614 		goto e_exp;
1615 	}
1616 
1617 	/* Concatenate the modulus and the message. Both the modulus and
1618 	 * the operands must be in little endian format.  Since the input
1619 	 * is in big endian format it must be converted.
1620 	 */
1621 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1622 	if (ret)
1623 		goto e_exp;
1624 
1625 	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1626 				false);
1627 	src.address += o_len;	/* Adjust the address for the copy operation */
1628 	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1629 				false);
1630 	src.address -= o_len;	/* Reset the address to original value */
1631 
1632 	/* Prepare the output area for the operation */
1633 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1634 			    o_len, DMA_FROM_DEVICE);
1635 	if (ret)
1636 		goto e_src;
1637 
1638 	op.soc = 1;
1639 	op.src.u.dma.address = src.dma.address;
1640 	op.src.u.dma.offset = 0;
1641 	op.src.u.dma.length = i_len;
1642 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1643 	op.dst.u.dma.offset = 0;
1644 	op.dst.u.dma.length = o_len;
1645 
1646 	op.u.rsa.mod_size = rsa->key_size;
1647 	op.u.rsa.input_len = i_len;
1648 
1649 	ret = ccp_perform_rsa(&op);
1650 	if (ret) {
1651 		cmd->engine_error = cmd_q->cmd_error;
1652 		goto e_dst;
1653 	}
1654 
1655 	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1656 
1657 e_dst:
1658 	ccp_free_data(&dst, cmd_q);
1659 
1660 e_src:
1661 	ccp_dm_free(&src);
1662 
1663 e_exp:
1664 	ccp_dm_free(&exp);
1665 
1666 e_ksb:
1667 	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1668 
1669 	return ret;
1670 }
1671 
1672 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1673 				struct ccp_cmd *cmd)
1674 {
1675 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1676 	struct ccp_dm_workarea mask;
1677 	struct ccp_data src, dst;
1678 	struct ccp_op op;
1679 	bool in_place = false;
1680 	unsigned int i;
1681 	int ret;
1682 
1683 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1684 		return -EINVAL;
1685 
1686 	if (!pt->src || !pt->dst)
1687 		return -EINVAL;
1688 
1689 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1690 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1691 			return -EINVAL;
1692 		if (!pt->mask)
1693 			return -EINVAL;
1694 	}
1695 
1696 	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1697 
1698 	memset(&op, 0, sizeof(op));
1699 	op.cmd_q = cmd_q;
1700 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1701 
1702 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1703 		/* Load the mask */
1704 		op.ksb_key = cmd_q->ksb_key;
1705 
1706 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1707 					   CCP_PASSTHRU_KSB_COUNT *
1708 					   CCP_KSB_BYTES,
1709 					   DMA_TO_DEVICE);
1710 		if (ret)
1711 			return ret;
1712 
1713 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1714 		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1715 				      CCP_PASSTHRU_BYTESWAP_NOOP);
1716 		if (ret) {
1717 			cmd->engine_error = cmd_q->cmd_error;
1718 			goto e_mask;
1719 		}
1720 	}
1721 
1722 	/* Prepare the input and output data workareas. For in-place
1723 	 * operations we need to set the dma direction to BIDIRECTIONAL
1724 	 * and copy the src workarea to the dst workarea.
1725 	 */
1726 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1727 		in_place = true;
1728 
1729 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1730 			    CCP_PASSTHRU_MASKSIZE,
1731 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1732 	if (ret)
1733 		goto e_mask;
1734 
1735 	if (in_place)
1736 		dst = src;
1737 	else {
1738 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1739 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1740 		if (ret)
1741 			goto e_src;
1742 	}
1743 
1744 	/* Send data to the CCP Passthru engine
1745 	 *   Because the CCP engine works on a single source and destination
1746 	 *   dma address at a time, each entry in the source scatterlist
1747 	 *   (after the dma_map_sg call) must be less than or equal to the
1748 	 *   (remaining) length in the destination scatterlist entry and the
1749 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1750 	 */
1751 	dst.sg_wa.sg_used = 0;
1752 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1753 		if (!dst.sg_wa.sg ||
1754 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1755 			ret = -EINVAL;
1756 			goto e_dst;
1757 		}
1758 
1759 		if (i == src.sg_wa.dma_count) {
1760 			op.eom = 1;
1761 			op.soc = 1;
1762 		}
1763 
1764 		op.src.type = CCP_MEMTYPE_SYSTEM;
1765 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1766 		op.src.u.dma.offset = 0;
1767 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1768 
1769 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1770 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1771 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1772 		op.dst.u.dma.length = op.src.u.dma.length;
1773 
1774 		ret = ccp_perform_passthru(&op);
1775 		if (ret) {
1776 			cmd->engine_error = cmd_q->cmd_error;
1777 			goto e_dst;
1778 		}
1779 
1780 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1781 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1782 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1783 			dst.sg_wa.sg_used = 0;
1784 		}
1785 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1786 	}
1787 
1788 e_dst:
1789 	if (!in_place)
1790 		ccp_free_data(&dst, cmd_q);
1791 
1792 e_src:
1793 	ccp_free_data(&src, cmd_q);
1794 
1795 e_mask:
1796 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1797 		ccp_dm_free(&mask);
1798 
1799 	return ret;
1800 }
1801 
1802 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1803 {
1804 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1805 	struct ccp_dm_workarea src, dst;
1806 	struct ccp_op op;
1807 	int ret;
1808 	u8 *save;
1809 
1810 	if (!ecc->u.mm.operand_1 ||
1811 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1812 		return -EINVAL;
1813 
1814 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1815 		if (!ecc->u.mm.operand_2 ||
1816 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1817 			return -EINVAL;
1818 
1819 	if (!ecc->u.mm.result ||
1820 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1821 		return -EINVAL;
1822 
1823 	memset(&op, 0, sizeof(op));
1824 	op.cmd_q = cmd_q;
1825 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1826 
1827 	/* Concatenate the modulus and the operands. Both the modulus and
1828 	 * the operands must be in little endian format.  Since the input
1829 	 * is in big endian format it must be converted and placed in a
1830 	 * fixed length buffer.
1831 	 */
1832 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1833 				   DMA_TO_DEVICE);
1834 	if (ret)
1835 		return ret;
1836 
1837 	/* Save the workarea address since it is updated in order to perform
1838 	 * the concatenation
1839 	 */
1840 	save = src.address;
1841 
1842 	/* Copy the ECC modulus */
1843 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1844 				CCP_ECC_OPERAND_SIZE, false);
1845 	src.address += CCP_ECC_OPERAND_SIZE;
1846 
1847 	/* Copy the first operand */
1848 	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1849 				ecc->u.mm.operand_1_len,
1850 				CCP_ECC_OPERAND_SIZE, false);
1851 	src.address += CCP_ECC_OPERAND_SIZE;
1852 
1853 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1854 		/* Copy the second operand */
1855 		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1856 					ecc->u.mm.operand_2_len,
1857 					CCP_ECC_OPERAND_SIZE, false);
1858 		src.address += CCP_ECC_OPERAND_SIZE;
1859 	}
1860 
1861 	/* Restore the workarea address */
1862 	src.address = save;
1863 
1864 	/* Prepare the output area for the operation */
1865 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1866 				   DMA_FROM_DEVICE);
1867 	if (ret)
1868 		goto e_src;
1869 
1870 	op.soc = 1;
1871 	op.src.u.dma.address = src.dma.address;
1872 	op.src.u.dma.offset = 0;
1873 	op.src.u.dma.length = src.length;
1874 	op.dst.u.dma.address = dst.dma.address;
1875 	op.dst.u.dma.offset = 0;
1876 	op.dst.u.dma.length = dst.length;
1877 
1878 	op.u.ecc.function = cmd->u.ecc.function;
1879 
1880 	ret = ccp_perform_ecc(&op);
1881 	if (ret) {
1882 		cmd->engine_error = cmd_q->cmd_error;
1883 		goto e_dst;
1884 	}
1885 
1886 	ecc->ecc_result = le16_to_cpup(
1887 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1888 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1889 		ret = -EIO;
1890 		goto e_dst;
1891 	}
1892 
1893 	/* Save the ECC result */
1894 	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1895 
1896 e_dst:
1897 	ccp_dm_free(&dst);
1898 
1899 e_src:
1900 	ccp_dm_free(&src);
1901 
1902 	return ret;
1903 }
1904 
1905 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1906 {
1907 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1908 	struct ccp_dm_workarea src, dst;
1909 	struct ccp_op op;
1910 	int ret;
1911 	u8 *save;
1912 
1913 	if (!ecc->u.pm.point_1.x ||
1914 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1915 	    !ecc->u.pm.point_1.y ||
1916 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1917 		return -EINVAL;
1918 
1919 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1920 		if (!ecc->u.pm.point_2.x ||
1921 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1922 		    !ecc->u.pm.point_2.y ||
1923 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1924 			return -EINVAL;
1925 	} else {
1926 		if (!ecc->u.pm.domain_a ||
1927 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1928 			return -EINVAL;
1929 
1930 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1931 			if (!ecc->u.pm.scalar ||
1932 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1933 				return -EINVAL;
1934 	}
1935 
1936 	if (!ecc->u.pm.result.x ||
1937 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1938 	    !ecc->u.pm.result.y ||
1939 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1940 		return -EINVAL;
1941 
1942 	memset(&op, 0, sizeof(op));
1943 	op.cmd_q = cmd_q;
1944 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1945 
1946 	/* Concatenate the modulus and the operands. Both the modulus and
1947 	 * the operands must be in little endian format.  Since the input
1948 	 * is in big endian format it must be converted and placed in a
1949 	 * fixed length buffer.
1950 	 */
1951 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1952 				   DMA_TO_DEVICE);
1953 	if (ret)
1954 		return ret;
1955 
1956 	/* Save the workarea address since it is updated in order to perform
1957 	 * the concatenation
1958 	 */
1959 	save = src.address;
1960 
1961 	/* Copy the ECC modulus */
1962 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1963 				CCP_ECC_OPERAND_SIZE, false);
1964 	src.address += CCP_ECC_OPERAND_SIZE;
1965 
1966 	/* Copy the first point X and Y coordinate */
1967 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1968 				ecc->u.pm.point_1.x_len,
1969 				CCP_ECC_OPERAND_SIZE, false);
1970 	src.address += CCP_ECC_OPERAND_SIZE;
1971 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1972 				ecc->u.pm.point_1.y_len,
1973 				CCP_ECC_OPERAND_SIZE, false);
1974 	src.address += CCP_ECC_OPERAND_SIZE;
1975 
1976 	/* Set the first point Z coordianate to 1 */
1977 	*(src.address) = 0x01;
1978 	src.address += CCP_ECC_OPERAND_SIZE;
1979 
1980 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1981 		/* Copy the second point X and Y coordinate */
1982 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1983 					ecc->u.pm.point_2.x_len,
1984 					CCP_ECC_OPERAND_SIZE, false);
1985 		src.address += CCP_ECC_OPERAND_SIZE;
1986 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1987 					ecc->u.pm.point_2.y_len,
1988 					CCP_ECC_OPERAND_SIZE, false);
1989 		src.address += CCP_ECC_OPERAND_SIZE;
1990 
1991 		/* Set the second point Z coordianate to 1 */
1992 		*(src.address) = 0x01;
1993 		src.address += CCP_ECC_OPERAND_SIZE;
1994 	} else {
1995 		/* Copy the Domain "a" parameter */
1996 		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1997 					ecc->u.pm.domain_a_len,
1998 					CCP_ECC_OPERAND_SIZE, false);
1999 		src.address += CCP_ECC_OPERAND_SIZE;
2000 
2001 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2002 			/* Copy the scalar value */
2003 			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2004 						ecc->u.pm.scalar_len,
2005 						CCP_ECC_OPERAND_SIZE, false);
2006 			src.address += CCP_ECC_OPERAND_SIZE;
2007 		}
2008 	}
2009 
2010 	/* Restore the workarea address */
2011 	src.address = save;
2012 
2013 	/* Prepare the output area for the operation */
2014 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2015 				   DMA_FROM_DEVICE);
2016 	if (ret)
2017 		goto e_src;
2018 
2019 	op.soc = 1;
2020 	op.src.u.dma.address = src.dma.address;
2021 	op.src.u.dma.offset = 0;
2022 	op.src.u.dma.length = src.length;
2023 	op.dst.u.dma.address = dst.dma.address;
2024 	op.dst.u.dma.offset = 0;
2025 	op.dst.u.dma.length = dst.length;
2026 
2027 	op.u.ecc.function = cmd->u.ecc.function;
2028 
2029 	ret = ccp_perform_ecc(&op);
2030 	if (ret) {
2031 		cmd->engine_error = cmd_q->cmd_error;
2032 		goto e_dst;
2033 	}
2034 
2035 	ecc->ecc_result = le16_to_cpup(
2036 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2037 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2038 		ret = -EIO;
2039 		goto e_dst;
2040 	}
2041 
2042 	/* Save the workarea address since it is updated as we walk through
2043 	 * to copy the point math result
2044 	 */
2045 	save = dst.address;
2046 
2047 	/* Save the ECC result X and Y coordinates */
2048 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2049 				CCP_ECC_MODULUS_BYTES);
2050 	dst.address += CCP_ECC_OUTPUT_SIZE;
2051 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2052 				CCP_ECC_MODULUS_BYTES);
2053 	dst.address += CCP_ECC_OUTPUT_SIZE;
2054 
2055 	/* Restore the workarea address */
2056 	dst.address = save;
2057 
2058 e_dst:
2059 	ccp_dm_free(&dst);
2060 
2061 e_src:
2062 	ccp_dm_free(&src);
2063 
2064 	return ret;
2065 }
2066 
2067 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2068 {
2069 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2070 
2071 	ecc->ecc_result = 0;
2072 
2073 	if (!ecc->mod ||
2074 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2075 		return -EINVAL;
2076 
2077 	switch (ecc->function) {
2078 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2079 	case CCP_ECC_FUNCTION_MADD_384BIT:
2080 	case CCP_ECC_FUNCTION_MINV_384BIT:
2081 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2082 
2083 	case CCP_ECC_FUNCTION_PADD_384BIT:
2084 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2085 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2086 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2087 
2088 	default:
2089 		return -EINVAL;
2090 	}
2091 }
2092 
2093 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2094 {
2095 	int ret;
2096 
2097 	cmd->engine_error = 0;
2098 	cmd_q->cmd_error = 0;
2099 	cmd_q->int_rcvd = 0;
2100 	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2101 
2102 	switch (cmd->engine) {
2103 	case CCP_ENGINE_AES:
2104 		ret = ccp_run_aes_cmd(cmd_q, cmd);
2105 		break;
2106 	case CCP_ENGINE_XTS_AES_128:
2107 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2108 		break;
2109 	case CCP_ENGINE_SHA:
2110 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2111 		break;
2112 	case CCP_ENGINE_RSA:
2113 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2114 		break;
2115 	case CCP_ENGINE_PASSTHRU:
2116 		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2117 		break;
2118 	case CCP_ENGINE_ECC:
2119 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2120 		break;
2121 	default:
2122 		ret = -EINVAL;
2123 	}
2124 
2125 	return ret;
2126 }
2127