xref: /openbmc/linux/drivers/crypto/ccp/ccp-dev-v3.c (revision 6c7c3245)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/kthread.h>
17 #include <linux/interrupt.h>
18 #include <linux/ccp.h>
19 
20 #include "ccp-dev.h"
21 
22 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
23 {
24 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
25 	struct ccp_device *ccp = cmd_q->ccp;
26 	void __iomem *cr_addr;
27 	u32 cr0, cmd;
28 	unsigned int i;
29 	int ret = 0;
30 
31 	/* We could read a status register to see how many free slots
32 	 * are actually available, but reading that register resets it
33 	 * and you could lose some error information.
34 	 */
35 	cmd_q->free_slots--;
36 
37 	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
38 	      | (op->jobid << REQ0_JOBID_SHIFT)
39 	      | REQ0_WAIT_FOR_WRITE;
40 
41 	if (op->soc)
42 		cr0 |= REQ0_STOP_ON_COMPLETE
43 		       | REQ0_INT_ON_COMPLETE;
44 
45 	if (op->ioc || !cmd_q->free_slots)
46 		cr0 |= REQ0_INT_ON_COMPLETE;
47 
48 	/* Start at CMD_REQ1 */
49 	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
50 
51 	mutex_lock(&ccp->req_mutex);
52 
53 	/* Write CMD_REQ1 through CMD_REQx first */
54 	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
55 		iowrite32(*(cr + i), cr_addr);
56 
57 	/* Tell the CCP to start */
58 	wmb();
59 	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
60 
61 	mutex_unlock(&ccp->req_mutex);
62 
63 	if (cr0 & REQ0_INT_ON_COMPLETE) {
64 		/* Wait for the job to complete */
65 		ret = wait_event_interruptible(cmd_q->int_queue,
66 					       cmd_q->int_rcvd);
67 		if (ret || cmd_q->cmd_error) {
68 			/* On error delete all related jobs from the queue */
69 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
70 			      | op->jobid;
71 
72 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
73 
74 			if (!ret)
75 				ret = -EIO;
76 		} else if (op->soc) {
77 			/* Delete just head job from the queue on SoC */
78 			cmd = DEL_Q_ACTIVE
79 			      | (cmd_q->id << DEL_Q_ID_SHIFT)
80 			      | op->jobid;
81 
82 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
83 		}
84 
85 		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
86 
87 		cmd_q->int_rcvd = 0;
88 	}
89 
90 	return ret;
91 }
92 
93 static int ccp_perform_aes(struct ccp_op *op)
94 {
95 	u32 cr[6];
96 
97 	/* Fill out the register contents for REQ1 through REQ6 */
98 	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
99 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
100 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
101 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
102 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
103 	cr[1] = op->src.u.dma.length - 1;
104 	cr[2] = ccp_addr_lo(&op->src.u.dma);
105 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
106 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
107 		| ccp_addr_hi(&op->src.u.dma);
108 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
109 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
110 		| ccp_addr_hi(&op->dst.u.dma);
111 
112 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
113 		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
114 
115 	if (op->eom)
116 		cr[0] |= REQ1_EOM;
117 
118 	if (op->init)
119 		cr[0] |= REQ1_INIT;
120 
121 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
122 }
123 
124 static int ccp_perform_xts_aes(struct ccp_op *op)
125 {
126 	u32 cr[6];
127 
128 	/* Fill out the register contents for REQ1 through REQ6 */
129 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
130 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
131 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
132 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
133 	cr[1] = op->src.u.dma.length - 1;
134 	cr[2] = ccp_addr_lo(&op->src.u.dma);
135 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
136 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
137 		| ccp_addr_hi(&op->src.u.dma);
138 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
139 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
140 		| ccp_addr_hi(&op->dst.u.dma);
141 
142 	if (op->eom)
143 		cr[0] |= REQ1_EOM;
144 
145 	if (op->init)
146 		cr[0] |= REQ1_INIT;
147 
148 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
149 }
150 
151 static int ccp_perform_sha(struct ccp_op *op)
152 {
153 	u32 cr[6];
154 
155 	/* Fill out the register contents for REQ1 through REQ6 */
156 	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
157 		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
158 		| REQ1_INIT;
159 	cr[1] = op->src.u.dma.length - 1;
160 	cr[2] = ccp_addr_lo(&op->src.u.dma);
161 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
162 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
163 		| ccp_addr_hi(&op->src.u.dma);
164 
165 	if (op->eom) {
166 		cr[0] |= REQ1_EOM;
167 		cr[4] = lower_32_bits(op->u.sha.msg_bits);
168 		cr[5] = upper_32_bits(op->u.sha.msg_bits);
169 	} else {
170 		cr[4] = 0;
171 		cr[5] = 0;
172 	}
173 
174 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
175 }
176 
177 static int ccp_perform_rsa(struct ccp_op *op)
178 {
179 	u32 cr[6];
180 
181 	/* Fill out the register contents for REQ1 through REQ6 */
182 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
183 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
184 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
185 		| REQ1_EOM;
186 	cr[1] = op->u.rsa.input_len - 1;
187 	cr[2] = ccp_addr_lo(&op->src.u.dma);
188 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
189 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
190 		| ccp_addr_hi(&op->src.u.dma);
191 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
192 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
193 		| ccp_addr_hi(&op->dst.u.dma);
194 
195 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
196 }
197 
198 static int ccp_perform_passthru(struct ccp_op *op)
199 {
200 	u32 cr[6];
201 
202 	/* Fill out the register contents for REQ1 through REQ6 */
203 	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
204 		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
205 		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
206 
207 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
208 		cr[1] = op->src.u.dma.length - 1;
209 	else
210 		cr[1] = op->dst.u.dma.length - 1;
211 
212 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
213 		cr[2] = ccp_addr_lo(&op->src.u.dma);
214 		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
215 			| ccp_addr_hi(&op->src.u.dma);
216 
217 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
218 			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
219 	} else {
220 		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
221 		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
222 	}
223 
224 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
225 		cr[4] = ccp_addr_lo(&op->dst.u.dma);
226 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
227 			| ccp_addr_hi(&op->dst.u.dma);
228 	} else {
229 		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
230 		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
231 	}
232 
233 	if (op->eom)
234 		cr[0] |= REQ1_EOM;
235 
236 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
237 }
238 
239 static int ccp_perform_ecc(struct ccp_op *op)
240 {
241 	u32 cr[6];
242 
243 	/* Fill out the register contents for REQ1 through REQ6 */
244 	cr[0] = REQ1_ECC_AFFINE_CONVERT
245 		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
246 		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
247 		| REQ1_EOM;
248 	cr[1] = op->src.u.dma.length - 1;
249 	cr[2] = ccp_addr_lo(&op->src.u.dma);
250 	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
251 		| ccp_addr_hi(&op->src.u.dma);
252 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
253 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
254 		| ccp_addr_hi(&op->dst.u.dma);
255 
256 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
257 }
258 
259 static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
260 {
261 	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
262 	u32 trng_value;
263 	int len = min_t(int, sizeof(trng_value), max);
264 
265 	/*
266 	 * Locking is provided by the caller so we can update device
267 	 * hwrng-related fields safely
268 	 */
269 	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
270 	if (!trng_value) {
271 		/* Zero is returned if not data is available or if a
272 		 * bad-entropy error is present. Assume an error if
273 		 * we exceed TRNG_RETRIES reads of zero.
274 		 */
275 		if (ccp->hwrng_retries++ > TRNG_RETRIES)
276 			return -EIO;
277 
278 		return 0;
279 	}
280 
281 	/* Reset the counter and save the rng value */
282 	ccp->hwrng_retries = 0;
283 	memcpy(data, &trng_value, len);
284 
285 	return len;
286 }
287 
288 static int ccp_init(struct ccp_device *ccp)
289 {
290 	struct device *dev = ccp->dev;
291 	struct ccp_cmd_queue *cmd_q;
292 	struct dma_pool *dma_pool;
293 	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
294 	unsigned int qmr, qim, i;
295 	int ret;
296 
297 	/* Find available queues */
298 	qim = 0;
299 	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
300 	for (i = 0; i < MAX_HW_QUEUES; i++) {
301 		if (!(qmr & (1 << i)))
302 			continue;
303 
304 		/* Allocate a dma pool for this queue */
305 		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
306 			 ccp->name, i);
307 		dma_pool = dma_pool_create(dma_pool_name, dev,
308 					   CCP_DMAPOOL_MAX_SIZE,
309 					   CCP_DMAPOOL_ALIGN, 0);
310 		if (!dma_pool) {
311 			dev_err(dev, "unable to allocate dma pool\n");
312 			ret = -ENOMEM;
313 			goto e_pool;
314 		}
315 
316 		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
317 		ccp->cmd_q_count++;
318 
319 		cmd_q->ccp = ccp;
320 		cmd_q->id = i;
321 		cmd_q->dma_pool = dma_pool;
322 
323 		/* Reserve 2 KSB regions for the queue */
324 		cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
325 		cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
326 		ccp->ksb_count -= 2;
327 
328 		/* Preset some register values and masks that are queue
329 		 * number dependent
330 		 */
331 		cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
332 				    (CMD_Q_STATUS_INCR * i);
333 		cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
334 					(CMD_Q_STATUS_INCR * i);
335 		cmd_q->int_ok = 1 << (i * 2);
336 		cmd_q->int_err = 1 << ((i * 2) + 1);
337 
338 		cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
339 
340 		init_waitqueue_head(&cmd_q->int_queue);
341 
342 		/* Build queue interrupt mask (two interrupts per queue) */
343 		qim |= cmd_q->int_ok | cmd_q->int_err;
344 
345 #ifdef CONFIG_ARM64
346 		/* For arm64 set the recommended queue cache settings */
347 		iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE +
348 			  (CMD_Q_CACHE_INC * i));
349 #endif
350 
351 		dev_dbg(dev, "queue #%u available\n", i);
352 	}
353 	if (ccp->cmd_q_count == 0) {
354 		dev_notice(dev, "no command queues available\n");
355 		ret = -EIO;
356 		goto e_pool;
357 	}
358 	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
359 
360 	/* Disable and clear interrupts until ready */
361 	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
362 	for (i = 0; i < ccp->cmd_q_count; i++) {
363 		cmd_q = &ccp->cmd_q[i];
364 
365 		ioread32(cmd_q->reg_int_status);
366 		ioread32(cmd_q->reg_status);
367 	}
368 	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
369 
370 	/* Request an irq */
371 	ret = ccp->get_irq(ccp);
372 	if (ret) {
373 		dev_err(dev, "unable to allocate an IRQ\n");
374 		goto e_pool;
375 	}
376 
377 	/* Initialize the queues used to wait for KSB space and suspend */
378 	init_waitqueue_head(&ccp->ksb_queue);
379 	init_waitqueue_head(&ccp->suspend_queue);
380 
381 	/* Create a kthread for each queue */
382 	for (i = 0; i < ccp->cmd_q_count; i++) {
383 		struct task_struct *kthread;
384 
385 		cmd_q = &ccp->cmd_q[i];
386 
387 		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
388 					 "%s-q%u", ccp->name, cmd_q->id);
389 		if (IS_ERR(kthread)) {
390 			dev_err(dev, "error creating queue thread (%ld)\n",
391 				PTR_ERR(kthread));
392 			ret = PTR_ERR(kthread);
393 			goto e_kthread;
394 		}
395 
396 		cmd_q->kthread = kthread;
397 		wake_up_process(kthread);
398 	}
399 
400 	/* Register the RNG */
401 	ccp->hwrng.name = ccp->rngname;
402 	ccp->hwrng.read = ccp_trng_read;
403 	ret = hwrng_register(&ccp->hwrng);
404 	if (ret) {
405 		dev_err(dev, "error registering hwrng (%d)\n", ret);
406 		goto e_kthread;
407 	}
408 
409 	/* Register the DMA engine support */
410 	ret = ccp_dmaengine_register(ccp);
411 	if (ret)
412 		goto e_hwrng;
413 
414 	ccp_add_device(ccp);
415 
416 	/* Enable interrupts */
417 	iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
418 
419 	return 0;
420 
421 e_hwrng:
422 	hwrng_unregister(&ccp->hwrng);
423 
424 e_kthread:
425 	for (i = 0; i < ccp->cmd_q_count; i++)
426 		if (ccp->cmd_q[i].kthread)
427 			kthread_stop(ccp->cmd_q[i].kthread);
428 
429 	ccp->free_irq(ccp);
430 
431 e_pool:
432 	for (i = 0; i < ccp->cmd_q_count; i++)
433 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
434 
435 	return ret;
436 }
437 
438 static void ccp_destroy(struct ccp_device *ccp)
439 {
440 	struct ccp_cmd_queue *cmd_q;
441 	struct ccp_cmd *cmd;
442 	unsigned int qim, i;
443 
444 	/* Remove this device from the list of available units first */
445 	ccp_del_device(ccp);
446 
447 	/* Unregister the DMA engine */
448 	ccp_dmaengine_unregister(ccp);
449 
450 	/* Unregister the RNG */
451 	hwrng_unregister(&ccp->hwrng);
452 
453 	/* Stop the queue kthreads */
454 	for (i = 0; i < ccp->cmd_q_count; i++)
455 		if (ccp->cmd_q[i].kthread)
456 			kthread_stop(ccp->cmd_q[i].kthread);
457 
458 	/* Build queue interrupt mask (two interrupt masks per queue) */
459 	qim = 0;
460 	for (i = 0; i < ccp->cmd_q_count; i++) {
461 		cmd_q = &ccp->cmd_q[i];
462 		qim |= cmd_q->int_ok | cmd_q->int_err;
463 	}
464 
465 	/* Disable and clear interrupts */
466 	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
467 	for (i = 0; i < ccp->cmd_q_count; i++) {
468 		cmd_q = &ccp->cmd_q[i];
469 
470 		ioread32(cmd_q->reg_int_status);
471 		ioread32(cmd_q->reg_status);
472 	}
473 	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
474 
475 	ccp->free_irq(ccp);
476 
477 	for (i = 0; i < ccp->cmd_q_count; i++)
478 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
479 
480 	/* Flush the cmd and backlog queue */
481 	while (!list_empty(&ccp->cmd)) {
482 		/* Invoke the callback directly with an error code */
483 		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
484 		list_del(&cmd->entry);
485 		cmd->callback(cmd->data, -ENODEV);
486 	}
487 	while (!list_empty(&ccp->backlog)) {
488 		/* Invoke the callback directly with an error code */
489 		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
490 		list_del(&cmd->entry);
491 		cmd->callback(cmd->data, -ENODEV);
492 	}
493 }
494 
495 static irqreturn_t ccp_irq_handler(int irq, void *data)
496 {
497 	struct device *dev = data;
498 	struct ccp_device *ccp = dev_get_drvdata(dev);
499 	struct ccp_cmd_queue *cmd_q;
500 	u32 q_int, status;
501 	unsigned int i;
502 
503 	status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
504 
505 	for (i = 0; i < ccp->cmd_q_count; i++) {
506 		cmd_q = &ccp->cmd_q[i];
507 
508 		q_int = status & (cmd_q->int_ok | cmd_q->int_err);
509 		if (q_int) {
510 			cmd_q->int_status = status;
511 			cmd_q->q_status = ioread32(cmd_q->reg_status);
512 			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
513 
514 			/* On error, only save the first error value */
515 			if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
516 				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
517 
518 			cmd_q->int_rcvd = 1;
519 
520 			/* Acknowledge the interrupt and wake the kthread */
521 			iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
522 			wake_up_interruptible(&cmd_q->int_queue);
523 		}
524 	}
525 
526 	return IRQ_HANDLED;
527 }
528 
529 static const struct ccp_actions ccp3_actions = {
530 	.perform_aes = ccp_perform_aes,
531 	.perform_xts_aes = ccp_perform_xts_aes,
532 	.perform_sha = ccp_perform_sha,
533 	.perform_rsa = ccp_perform_rsa,
534 	.perform_passthru = ccp_perform_passthru,
535 	.perform_ecc = ccp_perform_ecc,
536 	.init = ccp_init,
537 	.destroy = ccp_destroy,
538 	.irqhandler = ccp_irq_handler,
539 };
540 
541 struct ccp_vdata ccpv3 = {
542 	.version = CCP_VERSION(3, 0),
543 	.perform = &ccp3_actions,
544 };
545