1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * AMD Cryptographic Coprocessor (CCP) driver 4 * 5 * Copyright (C) 2013-2019 Advanced Micro Devices, Inc. 6 * 7 * Author: Tom Lendacky <thomas.lendacky@amd.com> 8 * Author: Gary R Hook <gary.hook@amd.com> 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License version 2 as 12 * published by the Free Software Foundation. 13 */ 14 15 #include <linux/module.h> 16 #include <linux/kernel.h> 17 #include <linux/pci.h> 18 #include <linux/interrupt.h> 19 #include <crypto/scatterwalk.h> 20 #include <crypto/des.h> 21 #include <linux/ccp.h> 22 23 #include "ccp-dev.h" 24 25 /* SHA initial context values */ 26 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = { 27 cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), 28 cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), 29 cpu_to_be32(SHA1_H4), 30 }; 31 32 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = { 33 cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), 34 cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), 35 cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), 36 cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), 37 }; 38 39 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = { 40 cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), 41 cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), 42 cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), 43 cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), 44 }; 45 46 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = { 47 cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1), 48 cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3), 49 cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5), 50 cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7), 51 }; 52 53 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = { 54 cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1), 55 cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3), 56 cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5), 57 cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7), 58 }; 59 60 #define CCP_NEW_JOBID(ccp) ((ccp->vdata->version == CCP_VERSION(3, 0)) ? \ 61 ccp_gen_jobid(ccp) : 0) 62 63 static u32 ccp_gen_jobid(struct ccp_device *ccp) 64 { 65 return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK; 66 } 67 68 static void ccp_sg_free(struct ccp_sg_workarea *wa) 69 { 70 if (wa->dma_count) 71 dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir); 72 73 wa->dma_count = 0; 74 } 75 76 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, 77 struct scatterlist *sg, u64 len, 78 enum dma_data_direction dma_dir) 79 { 80 memset(wa, 0, sizeof(*wa)); 81 82 wa->sg = sg; 83 if (!sg) 84 return 0; 85 86 wa->nents = sg_nents_for_len(sg, len); 87 if (wa->nents < 0) 88 return wa->nents; 89 90 wa->bytes_left = len; 91 wa->sg_used = 0; 92 93 if (len == 0) 94 return 0; 95 96 if (dma_dir == DMA_NONE) 97 return 0; 98 99 wa->dma_sg = sg; 100 wa->dma_dev = dev; 101 wa->dma_dir = dma_dir; 102 wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir); 103 if (!wa->dma_count) 104 return -ENOMEM; 105 106 return 0; 107 } 108 109 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len) 110 { 111 unsigned int nbytes = min_t(u64, len, wa->bytes_left); 112 113 if (!wa->sg) 114 return; 115 116 wa->sg_used += nbytes; 117 wa->bytes_left -= nbytes; 118 if (wa->sg_used == wa->sg->length) { 119 wa->sg = sg_next(wa->sg); 120 wa->sg_used = 0; 121 } 122 } 123 124 static void ccp_dm_free(struct ccp_dm_workarea *wa) 125 { 126 if (wa->length <= CCP_DMAPOOL_MAX_SIZE) { 127 if (wa->address) 128 dma_pool_free(wa->dma_pool, wa->address, 129 wa->dma.address); 130 } else { 131 if (wa->dma.address) 132 dma_unmap_single(wa->dev, wa->dma.address, wa->length, 133 wa->dma.dir); 134 kfree(wa->address); 135 } 136 137 wa->address = NULL; 138 wa->dma.address = 0; 139 } 140 141 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa, 142 struct ccp_cmd_queue *cmd_q, 143 unsigned int len, 144 enum dma_data_direction dir) 145 { 146 memset(wa, 0, sizeof(*wa)); 147 148 if (!len) 149 return 0; 150 151 wa->dev = cmd_q->ccp->dev; 152 wa->length = len; 153 154 if (len <= CCP_DMAPOOL_MAX_SIZE) { 155 wa->dma_pool = cmd_q->dma_pool; 156 157 wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL, 158 &wa->dma.address); 159 if (!wa->address) 160 return -ENOMEM; 161 162 wa->dma.length = CCP_DMAPOOL_MAX_SIZE; 163 164 memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE); 165 } else { 166 wa->address = kzalloc(len, GFP_KERNEL); 167 if (!wa->address) 168 return -ENOMEM; 169 170 wa->dma.address = dma_map_single(wa->dev, wa->address, len, 171 dir); 172 if (dma_mapping_error(wa->dev, wa->dma.address)) 173 return -ENOMEM; 174 175 wa->dma.length = len; 176 } 177 wa->dma.dir = dir; 178 179 return 0; 180 } 181 182 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, 183 struct scatterlist *sg, unsigned int sg_offset, 184 unsigned int len) 185 { 186 WARN_ON(!wa->address); 187 188 if (len > (wa->length - wa_offset)) 189 return -EINVAL; 190 191 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, 192 0); 193 return 0; 194 } 195 196 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, 197 struct scatterlist *sg, unsigned int sg_offset, 198 unsigned int len) 199 { 200 WARN_ON(!wa->address); 201 202 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, 203 1); 204 } 205 206 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, 207 unsigned int wa_offset, 208 struct scatterlist *sg, 209 unsigned int sg_offset, 210 unsigned int len) 211 { 212 u8 *p, *q; 213 int rc; 214 215 rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len); 216 if (rc) 217 return rc; 218 219 p = wa->address + wa_offset; 220 q = p + len - 1; 221 while (p < q) { 222 *p = *p ^ *q; 223 *q = *p ^ *q; 224 *p = *p ^ *q; 225 p++; 226 q--; 227 } 228 return 0; 229 } 230 231 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa, 232 unsigned int wa_offset, 233 struct scatterlist *sg, 234 unsigned int sg_offset, 235 unsigned int len) 236 { 237 u8 *p, *q; 238 239 p = wa->address + wa_offset; 240 q = p + len - 1; 241 while (p < q) { 242 *p = *p ^ *q; 243 *q = *p ^ *q; 244 *p = *p ^ *q; 245 p++; 246 q--; 247 } 248 249 ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len); 250 } 251 252 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q) 253 { 254 ccp_dm_free(&data->dm_wa); 255 ccp_sg_free(&data->sg_wa); 256 } 257 258 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q, 259 struct scatterlist *sg, u64 sg_len, 260 unsigned int dm_len, 261 enum dma_data_direction dir) 262 { 263 int ret; 264 265 memset(data, 0, sizeof(*data)); 266 267 ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len, 268 dir); 269 if (ret) 270 goto e_err; 271 272 ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir); 273 if (ret) 274 goto e_err; 275 276 return 0; 277 278 e_err: 279 ccp_free_data(data, cmd_q); 280 281 return ret; 282 } 283 284 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from) 285 { 286 struct ccp_sg_workarea *sg_wa = &data->sg_wa; 287 struct ccp_dm_workarea *dm_wa = &data->dm_wa; 288 unsigned int buf_count, nbytes; 289 290 /* Clear the buffer if setting it */ 291 if (!from) 292 memset(dm_wa->address, 0, dm_wa->length); 293 294 if (!sg_wa->sg) 295 return 0; 296 297 /* Perform the copy operation 298 * nbytes will always be <= UINT_MAX because dm_wa->length is 299 * an unsigned int 300 */ 301 nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length); 302 scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used, 303 nbytes, from); 304 305 /* Update the structures and generate the count */ 306 buf_count = 0; 307 while (sg_wa->bytes_left && (buf_count < dm_wa->length)) { 308 nbytes = min(sg_wa->sg->length - sg_wa->sg_used, 309 dm_wa->length - buf_count); 310 nbytes = min_t(u64, sg_wa->bytes_left, nbytes); 311 312 buf_count += nbytes; 313 ccp_update_sg_workarea(sg_wa, nbytes); 314 } 315 316 return buf_count; 317 } 318 319 static unsigned int ccp_fill_queue_buf(struct ccp_data *data) 320 { 321 return ccp_queue_buf(data, 0); 322 } 323 324 static unsigned int ccp_empty_queue_buf(struct ccp_data *data) 325 { 326 return ccp_queue_buf(data, 1); 327 } 328 329 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst, 330 struct ccp_op *op, unsigned int block_size, 331 bool blocksize_op) 332 { 333 unsigned int sg_src_len, sg_dst_len, op_len; 334 335 /* The CCP can only DMA from/to one address each per operation. This 336 * requires that we find the smallest DMA area between the source 337 * and destination. The resulting len values will always be <= UINT_MAX 338 * because the dma length is an unsigned int. 339 */ 340 sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used; 341 sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len); 342 343 if (dst) { 344 sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; 345 sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); 346 op_len = min(sg_src_len, sg_dst_len); 347 } else { 348 op_len = sg_src_len; 349 } 350 351 /* The data operation length will be at least block_size in length 352 * or the smaller of available sg room remaining for the source or 353 * the destination 354 */ 355 op_len = max(op_len, block_size); 356 357 /* Unless we have to buffer data, there's no reason to wait */ 358 op->soc = 0; 359 360 if (sg_src_len < block_size) { 361 /* Not enough data in the sg element, so it 362 * needs to be buffered into a blocksize chunk 363 */ 364 int cp_len = ccp_fill_queue_buf(src); 365 366 op->soc = 1; 367 op->src.u.dma.address = src->dm_wa.dma.address; 368 op->src.u.dma.offset = 0; 369 op->src.u.dma.length = (blocksize_op) ? block_size : cp_len; 370 } else { 371 /* Enough data in the sg element, but we need to 372 * adjust for any previously copied data 373 */ 374 op->src.u.dma.address = sg_dma_address(src->sg_wa.sg); 375 op->src.u.dma.offset = src->sg_wa.sg_used; 376 op->src.u.dma.length = op_len & ~(block_size - 1); 377 378 ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length); 379 } 380 381 if (dst) { 382 if (sg_dst_len < block_size) { 383 /* Not enough room in the sg element or we're on the 384 * last piece of data (when using padding), so the 385 * output needs to be buffered into a blocksize chunk 386 */ 387 op->soc = 1; 388 op->dst.u.dma.address = dst->dm_wa.dma.address; 389 op->dst.u.dma.offset = 0; 390 op->dst.u.dma.length = op->src.u.dma.length; 391 } else { 392 /* Enough room in the sg element, but we need to 393 * adjust for any previously used area 394 */ 395 op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg); 396 op->dst.u.dma.offset = dst->sg_wa.sg_used; 397 op->dst.u.dma.length = op->src.u.dma.length; 398 } 399 } 400 } 401 402 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst, 403 struct ccp_op *op) 404 { 405 op->init = 0; 406 407 if (dst) { 408 if (op->dst.u.dma.address == dst->dm_wa.dma.address) 409 ccp_empty_queue_buf(dst); 410 else 411 ccp_update_sg_workarea(&dst->sg_wa, 412 op->dst.u.dma.length); 413 } 414 } 415 416 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q, 417 struct ccp_dm_workarea *wa, u32 jobid, u32 sb, 418 u32 byte_swap, bool from) 419 { 420 struct ccp_op op; 421 422 memset(&op, 0, sizeof(op)); 423 424 op.cmd_q = cmd_q; 425 op.jobid = jobid; 426 op.eom = 1; 427 428 if (from) { 429 op.soc = 1; 430 op.src.type = CCP_MEMTYPE_SB; 431 op.src.u.sb = sb; 432 op.dst.type = CCP_MEMTYPE_SYSTEM; 433 op.dst.u.dma.address = wa->dma.address; 434 op.dst.u.dma.length = wa->length; 435 } else { 436 op.src.type = CCP_MEMTYPE_SYSTEM; 437 op.src.u.dma.address = wa->dma.address; 438 op.src.u.dma.length = wa->length; 439 op.dst.type = CCP_MEMTYPE_SB; 440 op.dst.u.sb = sb; 441 } 442 443 op.u.passthru.byte_swap = byte_swap; 444 445 return cmd_q->ccp->vdata->perform->passthru(&op); 446 } 447 448 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q, 449 struct ccp_dm_workarea *wa, u32 jobid, u32 sb, 450 u32 byte_swap) 451 { 452 return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false); 453 } 454 455 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q, 456 struct ccp_dm_workarea *wa, u32 jobid, u32 sb, 457 u32 byte_swap) 458 { 459 return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true); 460 } 461 462 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, 463 struct ccp_cmd *cmd) 464 { 465 struct ccp_aes_engine *aes = &cmd->u.aes; 466 struct ccp_dm_workarea key, ctx; 467 struct ccp_data src; 468 struct ccp_op op; 469 unsigned int dm_offset; 470 int ret; 471 472 if (!((aes->key_len == AES_KEYSIZE_128) || 473 (aes->key_len == AES_KEYSIZE_192) || 474 (aes->key_len == AES_KEYSIZE_256))) 475 return -EINVAL; 476 477 if (aes->src_len & (AES_BLOCK_SIZE - 1)) 478 return -EINVAL; 479 480 if (aes->iv_len != AES_BLOCK_SIZE) 481 return -EINVAL; 482 483 if (!aes->key || !aes->iv || !aes->src) 484 return -EINVAL; 485 486 if (aes->cmac_final) { 487 if (aes->cmac_key_len != AES_BLOCK_SIZE) 488 return -EINVAL; 489 490 if (!aes->cmac_key) 491 return -EINVAL; 492 } 493 494 BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1); 495 BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1); 496 497 ret = -EIO; 498 memset(&op, 0, sizeof(op)); 499 op.cmd_q = cmd_q; 500 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 501 op.sb_key = cmd_q->sb_key; 502 op.sb_ctx = cmd_q->sb_ctx; 503 op.init = 1; 504 op.u.aes.type = aes->type; 505 op.u.aes.mode = aes->mode; 506 op.u.aes.action = aes->action; 507 508 /* All supported key sizes fit in a single (32-byte) SB entry 509 * and must be in little endian format. Use the 256-bit byte 510 * swap passthru option to convert from big endian to little 511 * endian. 512 */ 513 ret = ccp_init_dm_workarea(&key, cmd_q, 514 CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES, 515 DMA_TO_DEVICE); 516 if (ret) 517 return ret; 518 519 dm_offset = CCP_SB_BYTES - aes->key_len; 520 ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); 521 if (ret) 522 goto e_key; 523 ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, 524 CCP_PASSTHRU_BYTESWAP_256BIT); 525 if (ret) { 526 cmd->engine_error = cmd_q->cmd_error; 527 goto e_key; 528 } 529 530 /* The AES context fits in a single (32-byte) SB entry and 531 * must be in little endian format. Use the 256-bit byte swap 532 * passthru option to convert from big endian to little endian. 533 */ 534 ret = ccp_init_dm_workarea(&ctx, cmd_q, 535 CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, 536 DMA_BIDIRECTIONAL); 537 if (ret) 538 goto e_key; 539 540 dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; 541 ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); 542 if (ret) 543 goto e_ctx; 544 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 545 CCP_PASSTHRU_BYTESWAP_256BIT); 546 if (ret) { 547 cmd->engine_error = cmd_q->cmd_error; 548 goto e_ctx; 549 } 550 551 /* Send data to the CCP AES engine */ 552 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, 553 AES_BLOCK_SIZE, DMA_TO_DEVICE); 554 if (ret) 555 goto e_ctx; 556 557 while (src.sg_wa.bytes_left) { 558 ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true); 559 if (aes->cmac_final && !src.sg_wa.bytes_left) { 560 op.eom = 1; 561 562 /* Push the K1/K2 key to the CCP now */ 563 ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, 564 op.sb_ctx, 565 CCP_PASSTHRU_BYTESWAP_256BIT); 566 if (ret) { 567 cmd->engine_error = cmd_q->cmd_error; 568 goto e_src; 569 } 570 571 ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0, 572 aes->cmac_key_len); 573 if (ret) 574 goto e_src; 575 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 576 CCP_PASSTHRU_BYTESWAP_256BIT); 577 if (ret) { 578 cmd->engine_error = cmd_q->cmd_error; 579 goto e_src; 580 } 581 } 582 583 ret = cmd_q->ccp->vdata->perform->aes(&op); 584 if (ret) { 585 cmd->engine_error = cmd_q->cmd_error; 586 goto e_src; 587 } 588 589 ccp_process_data(&src, NULL, &op); 590 } 591 592 /* Retrieve the AES context - convert from LE to BE using 593 * 32-byte (256-bit) byteswapping 594 */ 595 ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 596 CCP_PASSTHRU_BYTESWAP_256BIT); 597 if (ret) { 598 cmd->engine_error = cmd_q->cmd_error; 599 goto e_src; 600 } 601 602 /* ...but we only need AES_BLOCK_SIZE bytes */ 603 dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; 604 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); 605 606 e_src: 607 ccp_free_data(&src, cmd_q); 608 609 e_ctx: 610 ccp_dm_free(&ctx); 611 612 e_key: 613 ccp_dm_free(&key); 614 615 return ret; 616 } 617 618 static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, 619 struct ccp_cmd *cmd) 620 { 621 struct ccp_aes_engine *aes = &cmd->u.aes; 622 struct ccp_dm_workarea key, ctx, final_wa, tag; 623 struct ccp_data src, dst; 624 struct ccp_data aad; 625 struct ccp_op op; 626 627 unsigned long long *final; 628 unsigned int dm_offset; 629 unsigned int ilen; 630 bool in_place = true; /* Default value */ 631 int ret; 632 633 struct scatterlist *p_inp, sg_inp[2]; 634 struct scatterlist *p_tag, sg_tag[2]; 635 struct scatterlist *p_outp, sg_outp[2]; 636 struct scatterlist *p_aad; 637 638 if (!aes->iv) 639 return -EINVAL; 640 641 if (!((aes->key_len == AES_KEYSIZE_128) || 642 (aes->key_len == AES_KEYSIZE_192) || 643 (aes->key_len == AES_KEYSIZE_256))) 644 return -EINVAL; 645 646 if (!aes->key) /* Gotta have a key SGL */ 647 return -EINVAL; 648 649 /* First, decompose the source buffer into AAD & PT, 650 * and the destination buffer into AAD, CT & tag, or 651 * the input into CT & tag. 652 * It is expected that the input and output SGs will 653 * be valid, even if the AAD and input lengths are 0. 654 */ 655 p_aad = aes->src; 656 p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len); 657 p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len); 658 if (aes->action == CCP_AES_ACTION_ENCRYPT) { 659 ilen = aes->src_len; 660 p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen); 661 } else { 662 /* Input length for decryption includes tag */ 663 ilen = aes->src_len - AES_BLOCK_SIZE; 664 p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen); 665 } 666 667 memset(&op, 0, sizeof(op)); 668 op.cmd_q = cmd_q; 669 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 670 op.sb_key = cmd_q->sb_key; /* Pre-allocated */ 671 op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ 672 op.init = 1; 673 op.u.aes.type = aes->type; 674 675 /* Copy the key to the LSB */ 676 ret = ccp_init_dm_workarea(&key, cmd_q, 677 CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, 678 DMA_TO_DEVICE); 679 if (ret) 680 return ret; 681 682 dm_offset = CCP_SB_BYTES - aes->key_len; 683 ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); 684 if (ret) 685 goto e_key; 686 ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, 687 CCP_PASSTHRU_BYTESWAP_256BIT); 688 if (ret) { 689 cmd->engine_error = cmd_q->cmd_error; 690 goto e_key; 691 } 692 693 /* Copy the context (IV) to the LSB. 694 * There is an assumption here that the IV is 96 bits in length, plus 695 * a nonce of 32 bits. If no IV is present, use a zeroed buffer. 696 */ 697 ret = ccp_init_dm_workarea(&ctx, cmd_q, 698 CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, 699 DMA_BIDIRECTIONAL); 700 if (ret) 701 goto e_key; 702 703 dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len; 704 ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); 705 if (ret) 706 goto e_ctx; 707 708 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 709 CCP_PASSTHRU_BYTESWAP_256BIT); 710 if (ret) { 711 cmd->engine_error = cmd_q->cmd_error; 712 goto e_ctx; 713 } 714 715 op.init = 1; 716 if (aes->aad_len > 0) { 717 /* Step 1: Run a GHASH over the Additional Authenticated Data */ 718 ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len, 719 AES_BLOCK_SIZE, 720 DMA_TO_DEVICE); 721 if (ret) 722 goto e_ctx; 723 724 op.u.aes.mode = CCP_AES_MODE_GHASH; 725 op.u.aes.action = CCP_AES_GHASHAAD; 726 727 while (aad.sg_wa.bytes_left) { 728 ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true); 729 730 ret = cmd_q->ccp->vdata->perform->aes(&op); 731 if (ret) { 732 cmd->engine_error = cmd_q->cmd_error; 733 goto e_aad; 734 } 735 736 ccp_process_data(&aad, NULL, &op); 737 op.init = 0; 738 } 739 } 740 741 op.u.aes.mode = CCP_AES_MODE_GCTR; 742 op.u.aes.action = aes->action; 743 744 if (ilen > 0) { 745 /* Step 2: Run a GCTR over the plaintext */ 746 in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false; 747 748 ret = ccp_init_data(&src, cmd_q, p_inp, ilen, 749 AES_BLOCK_SIZE, 750 in_place ? DMA_BIDIRECTIONAL 751 : DMA_TO_DEVICE); 752 if (ret) 753 goto e_ctx; 754 755 if (in_place) { 756 dst = src; 757 } else { 758 ret = ccp_init_data(&dst, cmd_q, p_outp, ilen, 759 AES_BLOCK_SIZE, DMA_FROM_DEVICE); 760 if (ret) 761 goto e_src; 762 } 763 764 op.soc = 0; 765 op.eom = 0; 766 op.init = 1; 767 while (src.sg_wa.bytes_left) { 768 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); 769 if (!src.sg_wa.bytes_left) { 770 unsigned int nbytes = aes->src_len 771 % AES_BLOCK_SIZE; 772 773 if (nbytes) { 774 op.eom = 1; 775 op.u.aes.size = (nbytes * 8) - 1; 776 } 777 } 778 779 ret = cmd_q->ccp->vdata->perform->aes(&op); 780 if (ret) { 781 cmd->engine_error = cmd_q->cmd_error; 782 goto e_dst; 783 } 784 785 ccp_process_data(&src, &dst, &op); 786 op.init = 0; 787 } 788 } 789 790 /* Step 3: Update the IV portion of the context with the original IV */ 791 ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 792 CCP_PASSTHRU_BYTESWAP_256BIT); 793 if (ret) { 794 cmd->engine_error = cmd_q->cmd_error; 795 goto e_dst; 796 } 797 798 ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); 799 if (ret) 800 goto e_dst; 801 802 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 803 CCP_PASSTHRU_BYTESWAP_256BIT); 804 if (ret) { 805 cmd->engine_error = cmd_q->cmd_error; 806 goto e_dst; 807 } 808 809 /* Step 4: Concatenate the lengths of the AAD and source, and 810 * hash that 16 byte buffer. 811 */ 812 ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE, 813 DMA_BIDIRECTIONAL); 814 if (ret) 815 goto e_dst; 816 final = (unsigned long long *) final_wa.address; 817 final[0] = cpu_to_be64(aes->aad_len * 8); 818 final[1] = cpu_to_be64(ilen * 8); 819 820 op.u.aes.mode = CCP_AES_MODE_GHASH; 821 op.u.aes.action = CCP_AES_GHASHFINAL; 822 op.src.type = CCP_MEMTYPE_SYSTEM; 823 op.src.u.dma.address = final_wa.dma.address; 824 op.src.u.dma.length = AES_BLOCK_SIZE; 825 op.dst.type = CCP_MEMTYPE_SYSTEM; 826 op.dst.u.dma.address = final_wa.dma.address; 827 op.dst.u.dma.length = AES_BLOCK_SIZE; 828 op.eom = 1; 829 op.u.aes.size = 0; 830 ret = cmd_q->ccp->vdata->perform->aes(&op); 831 if (ret) 832 goto e_dst; 833 834 if (aes->action == CCP_AES_ACTION_ENCRYPT) { 835 /* Put the ciphered tag after the ciphertext. */ 836 ccp_get_dm_area(&final_wa, 0, p_tag, 0, AES_BLOCK_SIZE); 837 } else { 838 /* Does this ciphered tag match the input? */ 839 ret = ccp_init_dm_workarea(&tag, cmd_q, AES_BLOCK_SIZE, 840 DMA_BIDIRECTIONAL); 841 if (ret) 842 goto e_tag; 843 ret = ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE); 844 if (ret) 845 goto e_tag; 846 847 ret = memcmp(tag.address, final_wa.address, AES_BLOCK_SIZE); 848 ccp_dm_free(&tag); 849 } 850 851 e_tag: 852 ccp_dm_free(&final_wa); 853 854 e_dst: 855 if (aes->src_len && !in_place) 856 ccp_free_data(&dst, cmd_q); 857 858 e_src: 859 if (aes->src_len) 860 ccp_free_data(&src, cmd_q); 861 862 e_aad: 863 if (aes->aad_len) 864 ccp_free_data(&aad, cmd_q); 865 866 e_ctx: 867 ccp_dm_free(&ctx); 868 869 e_key: 870 ccp_dm_free(&key); 871 872 return ret; 873 } 874 875 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 876 { 877 struct ccp_aes_engine *aes = &cmd->u.aes; 878 struct ccp_dm_workarea key, ctx; 879 struct ccp_data src, dst; 880 struct ccp_op op; 881 unsigned int dm_offset; 882 bool in_place = false; 883 int ret; 884 885 if (aes->mode == CCP_AES_MODE_CMAC) 886 return ccp_run_aes_cmac_cmd(cmd_q, cmd); 887 888 if (aes->mode == CCP_AES_MODE_GCM) 889 return ccp_run_aes_gcm_cmd(cmd_q, cmd); 890 891 if (!((aes->key_len == AES_KEYSIZE_128) || 892 (aes->key_len == AES_KEYSIZE_192) || 893 (aes->key_len == AES_KEYSIZE_256))) 894 return -EINVAL; 895 896 if (((aes->mode == CCP_AES_MODE_ECB) || 897 (aes->mode == CCP_AES_MODE_CBC)) && 898 (aes->src_len & (AES_BLOCK_SIZE - 1))) 899 return -EINVAL; 900 901 if (!aes->key || !aes->src || !aes->dst) 902 return -EINVAL; 903 904 if (aes->mode != CCP_AES_MODE_ECB) { 905 if (aes->iv_len != AES_BLOCK_SIZE) 906 return -EINVAL; 907 908 if (!aes->iv) 909 return -EINVAL; 910 } 911 912 BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1); 913 BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1); 914 915 ret = -EIO; 916 memset(&op, 0, sizeof(op)); 917 op.cmd_q = cmd_q; 918 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 919 op.sb_key = cmd_q->sb_key; 920 op.sb_ctx = cmd_q->sb_ctx; 921 op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1; 922 op.u.aes.type = aes->type; 923 op.u.aes.mode = aes->mode; 924 op.u.aes.action = aes->action; 925 926 /* All supported key sizes fit in a single (32-byte) SB entry 927 * and must be in little endian format. Use the 256-bit byte 928 * swap passthru option to convert from big endian to little 929 * endian. 930 */ 931 ret = ccp_init_dm_workarea(&key, cmd_q, 932 CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES, 933 DMA_TO_DEVICE); 934 if (ret) 935 return ret; 936 937 dm_offset = CCP_SB_BYTES - aes->key_len; 938 ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); 939 if (ret) 940 goto e_key; 941 ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, 942 CCP_PASSTHRU_BYTESWAP_256BIT); 943 if (ret) { 944 cmd->engine_error = cmd_q->cmd_error; 945 goto e_key; 946 } 947 948 /* The AES context fits in a single (32-byte) SB entry and 949 * must be in little endian format. Use the 256-bit byte swap 950 * passthru option to convert from big endian to little endian. 951 */ 952 ret = ccp_init_dm_workarea(&ctx, cmd_q, 953 CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, 954 DMA_BIDIRECTIONAL); 955 if (ret) 956 goto e_key; 957 958 if (aes->mode != CCP_AES_MODE_ECB) { 959 /* Load the AES context - convert to LE */ 960 dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; 961 ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); 962 if (ret) 963 goto e_ctx; 964 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 965 CCP_PASSTHRU_BYTESWAP_256BIT); 966 if (ret) { 967 cmd->engine_error = cmd_q->cmd_error; 968 goto e_ctx; 969 } 970 } 971 switch (aes->mode) { 972 case CCP_AES_MODE_CFB: /* CFB128 only */ 973 case CCP_AES_MODE_CTR: 974 op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1; 975 break; 976 default: 977 op.u.aes.size = 0; 978 } 979 980 /* Prepare the input and output data workareas. For in-place 981 * operations we need to set the dma direction to BIDIRECTIONAL 982 * and copy the src workarea to the dst workarea. 983 */ 984 if (sg_virt(aes->src) == sg_virt(aes->dst)) 985 in_place = true; 986 987 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, 988 AES_BLOCK_SIZE, 989 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); 990 if (ret) 991 goto e_ctx; 992 993 if (in_place) { 994 dst = src; 995 } else { 996 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, 997 AES_BLOCK_SIZE, DMA_FROM_DEVICE); 998 if (ret) 999 goto e_src; 1000 } 1001 1002 /* Send data to the CCP AES engine */ 1003 while (src.sg_wa.bytes_left) { 1004 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); 1005 if (!src.sg_wa.bytes_left) { 1006 op.eom = 1; 1007 1008 /* Since we don't retrieve the AES context in ECB 1009 * mode we have to wait for the operation to complete 1010 * on the last piece of data 1011 */ 1012 if (aes->mode == CCP_AES_MODE_ECB) 1013 op.soc = 1; 1014 } 1015 1016 ret = cmd_q->ccp->vdata->perform->aes(&op); 1017 if (ret) { 1018 cmd->engine_error = cmd_q->cmd_error; 1019 goto e_dst; 1020 } 1021 1022 ccp_process_data(&src, &dst, &op); 1023 } 1024 1025 if (aes->mode != CCP_AES_MODE_ECB) { 1026 /* Retrieve the AES context - convert from LE to BE using 1027 * 32-byte (256-bit) byteswapping 1028 */ 1029 ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 1030 CCP_PASSTHRU_BYTESWAP_256BIT); 1031 if (ret) { 1032 cmd->engine_error = cmd_q->cmd_error; 1033 goto e_dst; 1034 } 1035 1036 /* ...but we only need AES_BLOCK_SIZE bytes */ 1037 dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; 1038 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); 1039 } 1040 1041 e_dst: 1042 if (!in_place) 1043 ccp_free_data(&dst, cmd_q); 1044 1045 e_src: 1046 ccp_free_data(&src, cmd_q); 1047 1048 e_ctx: 1049 ccp_dm_free(&ctx); 1050 1051 e_key: 1052 ccp_dm_free(&key); 1053 1054 return ret; 1055 } 1056 1057 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, 1058 struct ccp_cmd *cmd) 1059 { 1060 struct ccp_xts_aes_engine *xts = &cmd->u.xts; 1061 struct ccp_dm_workarea key, ctx; 1062 struct ccp_data src, dst; 1063 struct ccp_op op; 1064 unsigned int unit_size, dm_offset; 1065 bool in_place = false; 1066 unsigned int sb_count; 1067 enum ccp_aes_type aestype; 1068 int ret; 1069 1070 switch (xts->unit_size) { 1071 case CCP_XTS_AES_UNIT_SIZE_16: 1072 unit_size = 16; 1073 break; 1074 case CCP_XTS_AES_UNIT_SIZE_512: 1075 unit_size = 512; 1076 break; 1077 case CCP_XTS_AES_UNIT_SIZE_1024: 1078 unit_size = 1024; 1079 break; 1080 case CCP_XTS_AES_UNIT_SIZE_2048: 1081 unit_size = 2048; 1082 break; 1083 case CCP_XTS_AES_UNIT_SIZE_4096: 1084 unit_size = 4096; 1085 break; 1086 1087 default: 1088 return -EINVAL; 1089 } 1090 1091 if (xts->key_len == AES_KEYSIZE_128) 1092 aestype = CCP_AES_TYPE_128; 1093 else if (xts->key_len == AES_KEYSIZE_256) 1094 aestype = CCP_AES_TYPE_256; 1095 else 1096 return -EINVAL; 1097 1098 if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1))) 1099 return -EINVAL; 1100 1101 if (xts->iv_len != AES_BLOCK_SIZE) 1102 return -EINVAL; 1103 1104 if (!xts->key || !xts->iv || !xts->src || !xts->dst) 1105 return -EINVAL; 1106 1107 BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1); 1108 BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1); 1109 1110 ret = -EIO; 1111 memset(&op, 0, sizeof(op)); 1112 op.cmd_q = cmd_q; 1113 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 1114 op.sb_key = cmd_q->sb_key; 1115 op.sb_ctx = cmd_q->sb_ctx; 1116 op.init = 1; 1117 op.u.xts.type = aestype; 1118 op.u.xts.action = xts->action; 1119 op.u.xts.unit_size = xts->unit_size; 1120 1121 /* A version 3 device only supports 128-bit keys, which fits into a 1122 * single SB entry. A version 5 device uses a 512-bit vector, so two 1123 * SB entries. 1124 */ 1125 if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) 1126 sb_count = CCP_XTS_AES_KEY_SB_COUNT; 1127 else 1128 sb_count = CCP5_XTS_AES_KEY_SB_COUNT; 1129 ret = ccp_init_dm_workarea(&key, cmd_q, 1130 sb_count * CCP_SB_BYTES, 1131 DMA_TO_DEVICE); 1132 if (ret) 1133 return ret; 1134 1135 if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) { 1136 /* All supported key sizes must be in little endian format. 1137 * Use the 256-bit byte swap passthru option to convert from 1138 * big endian to little endian. 1139 */ 1140 dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128; 1141 ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); 1142 if (ret) 1143 goto e_key; 1144 ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len); 1145 if (ret) 1146 goto e_key; 1147 } else { 1148 /* Version 5 CCPs use a 512-bit space for the key: each portion 1149 * occupies 256 bits, or one entire slot, and is zero-padded. 1150 */ 1151 unsigned int pad; 1152 1153 dm_offset = CCP_SB_BYTES; 1154 pad = dm_offset - xts->key_len; 1155 ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len); 1156 if (ret) 1157 goto e_key; 1158 ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key, 1159 xts->key_len, xts->key_len); 1160 if (ret) 1161 goto e_key; 1162 } 1163 ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, 1164 CCP_PASSTHRU_BYTESWAP_256BIT); 1165 if (ret) { 1166 cmd->engine_error = cmd_q->cmd_error; 1167 goto e_key; 1168 } 1169 1170 /* The AES context fits in a single (32-byte) SB entry and 1171 * for XTS is already in little endian format so no byte swapping 1172 * is needed. 1173 */ 1174 ret = ccp_init_dm_workarea(&ctx, cmd_q, 1175 CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES, 1176 DMA_BIDIRECTIONAL); 1177 if (ret) 1178 goto e_key; 1179 1180 ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len); 1181 if (ret) 1182 goto e_ctx; 1183 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 1184 CCP_PASSTHRU_BYTESWAP_NOOP); 1185 if (ret) { 1186 cmd->engine_error = cmd_q->cmd_error; 1187 goto e_ctx; 1188 } 1189 1190 /* Prepare the input and output data workareas. For in-place 1191 * operations we need to set the dma direction to BIDIRECTIONAL 1192 * and copy the src workarea to the dst workarea. 1193 */ 1194 if (sg_virt(xts->src) == sg_virt(xts->dst)) 1195 in_place = true; 1196 1197 ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len, 1198 unit_size, 1199 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); 1200 if (ret) 1201 goto e_ctx; 1202 1203 if (in_place) { 1204 dst = src; 1205 } else { 1206 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, 1207 unit_size, DMA_FROM_DEVICE); 1208 if (ret) 1209 goto e_src; 1210 } 1211 1212 /* Send data to the CCP AES engine */ 1213 while (src.sg_wa.bytes_left) { 1214 ccp_prepare_data(&src, &dst, &op, unit_size, true); 1215 if (!src.sg_wa.bytes_left) 1216 op.eom = 1; 1217 1218 ret = cmd_q->ccp->vdata->perform->xts_aes(&op); 1219 if (ret) { 1220 cmd->engine_error = cmd_q->cmd_error; 1221 goto e_dst; 1222 } 1223 1224 ccp_process_data(&src, &dst, &op); 1225 } 1226 1227 /* Retrieve the AES context - convert from LE to BE using 1228 * 32-byte (256-bit) byteswapping 1229 */ 1230 ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 1231 CCP_PASSTHRU_BYTESWAP_256BIT); 1232 if (ret) { 1233 cmd->engine_error = cmd_q->cmd_error; 1234 goto e_dst; 1235 } 1236 1237 /* ...but we only need AES_BLOCK_SIZE bytes */ 1238 dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; 1239 ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len); 1240 1241 e_dst: 1242 if (!in_place) 1243 ccp_free_data(&dst, cmd_q); 1244 1245 e_src: 1246 ccp_free_data(&src, cmd_q); 1247 1248 e_ctx: 1249 ccp_dm_free(&ctx); 1250 1251 e_key: 1252 ccp_dm_free(&key); 1253 1254 return ret; 1255 } 1256 1257 static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 1258 { 1259 struct ccp_des3_engine *des3 = &cmd->u.des3; 1260 1261 struct ccp_dm_workarea key, ctx; 1262 struct ccp_data src, dst; 1263 struct ccp_op op; 1264 unsigned int dm_offset; 1265 unsigned int len_singlekey; 1266 bool in_place = false; 1267 int ret; 1268 1269 /* Error checks */ 1270 if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) 1271 return -EINVAL; 1272 1273 if (!cmd_q->ccp->vdata->perform->des3) 1274 return -EINVAL; 1275 1276 if (des3->key_len != DES3_EDE_KEY_SIZE) 1277 return -EINVAL; 1278 1279 if (((des3->mode == CCP_DES3_MODE_ECB) || 1280 (des3->mode == CCP_DES3_MODE_CBC)) && 1281 (des3->src_len & (DES3_EDE_BLOCK_SIZE - 1))) 1282 return -EINVAL; 1283 1284 if (!des3->key || !des3->src || !des3->dst) 1285 return -EINVAL; 1286 1287 if (des3->mode != CCP_DES3_MODE_ECB) { 1288 if (des3->iv_len != DES3_EDE_BLOCK_SIZE) 1289 return -EINVAL; 1290 1291 if (!des3->iv) 1292 return -EINVAL; 1293 } 1294 1295 ret = -EIO; 1296 /* Zero out all the fields of the command desc */ 1297 memset(&op, 0, sizeof(op)); 1298 1299 /* Set up the Function field */ 1300 op.cmd_q = cmd_q; 1301 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 1302 op.sb_key = cmd_q->sb_key; 1303 1304 op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1; 1305 op.u.des3.type = des3->type; 1306 op.u.des3.mode = des3->mode; 1307 op.u.des3.action = des3->action; 1308 1309 /* 1310 * All supported key sizes fit in a single (32-byte) KSB entry and 1311 * (like AES) must be in little endian format. Use the 256-bit byte 1312 * swap passthru option to convert from big endian to little endian. 1313 */ 1314 ret = ccp_init_dm_workarea(&key, cmd_q, 1315 CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES, 1316 DMA_TO_DEVICE); 1317 if (ret) 1318 return ret; 1319 1320 /* 1321 * The contents of the key triplet are in the reverse order of what 1322 * is required by the engine. Copy the 3 pieces individually to put 1323 * them where they belong. 1324 */ 1325 dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */ 1326 1327 len_singlekey = des3->key_len / 3; 1328 ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey, 1329 des3->key, 0, len_singlekey); 1330 if (ret) 1331 goto e_key; 1332 ret = ccp_set_dm_area(&key, dm_offset + len_singlekey, 1333 des3->key, len_singlekey, len_singlekey); 1334 if (ret) 1335 goto e_key; 1336 ret = ccp_set_dm_area(&key, dm_offset, 1337 des3->key, 2 * len_singlekey, len_singlekey); 1338 if (ret) 1339 goto e_key; 1340 1341 /* Copy the key to the SB */ 1342 ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, 1343 CCP_PASSTHRU_BYTESWAP_256BIT); 1344 if (ret) { 1345 cmd->engine_error = cmd_q->cmd_error; 1346 goto e_key; 1347 } 1348 1349 /* 1350 * The DES3 context fits in a single (32-byte) KSB entry and 1351 * must be in little endian format. Use the 256-bit byte swap 1352 * passthru option to convert from big endian to little endian. 1353 */ 1354 if (des3->mode != CCP_DES3_MODE_ECB) { 1355 op.sb_ctx = cmd_q->sb_ctx; 1356 1357 ret = ccp_init_dm_workarea(&ctx, cmd_q, 1358 CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES, 1359 DMA_BIDIRECTIONAL); 1360 if (ret) 1361 goto e_key; 1362 1363 /* Load the context into the LSB */ 1364 dm_offset = CCP_SB_BYTES - des3->iv_len; 1365 ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0, 1366 des3->iv_len); 1367 if (ret) 1368 goto e_ctx; 1369 1370 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 1371 CCP_PASSTHRU_BYTESWAP_256BIT); 1372 if (ret) { 1373 cmd->engine_error = cmd_q->cmd_error; 1374 goto e_ctx; 1375 } 1376 } 1377 1378 /* 1379 * Prepare the input and output data workareas. For in-place 1380 * operations we need to set the dma direction to BIDIRECTIONAL 1381 * and copy the src workarea to the dst workarea. 1382 */ 1383 if (sg_virt(des3->src) == sg_virt(des3->dst)) 1384 in_place = true; 1385 1386 ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len, 1387 DES3_EDE_BLOCK_SIZE, 1388 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); 1389 if (ret) 1390 goto e_ctx; 1391 1392 if (in_place) 1393 dst = src; 1394 else { 1395 ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len, 1396 DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE); 1397 if (ret) 1398 goto e_src; 1399 } 1400 1401 /* Send data to the CCP DES3 engine */ 1402 while (src.sg_wa.bytes_left) { 1403 ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true); 1404 if (!src.sg_wa.bytes_left) { 1405 op.eom = 1; 1406 1407 /* Since we don't retrieve the context in ECB mode 1408 * we have to wait for the operation to complete 1409 * on the last piece of data 1410 */ 1411 op.soc = 0; 1412 } 1413 1414 ret = cmd_q->ccp->vdata->perform->des3(&op); 1415 if (ret) { 1416 cmd->engine_error = cmd_q->cmd_error; 1417 goto e_dst; 1418 } 1419 1420 ccp_process_data(&src, &dst, &op); 1421 } 1422 1423 if (des3->mode != CCP_DES3_MODE_ECB) { 1424 /* Retrieve the context and make BE */ 1425 ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 1426 CCP_PASSTHRU_BYTESWAP_256BIT); 1427 if (ret) { 1428 cmd->engine_error = cmd_q->cmd_error; 1429 goto e_dst; 1430 } 1431 1432 /* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */ 1433 ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0, 1434 DES3_EDE_BLOCK_SIZE); 1435 } 1436 e_dst: 1437 if (!in_place) 1438 ccp_free_data(&dst, cmd_q); 1439 1440 e_src: 1441 ccp_free_data(&src, cmd_q); 1442 1443 e_ctx: 1444 if (des3->mode != CCP_DES3_MODE_ECB) 1445 ccp_dm_free(&ctx); 1446 1447 e_key: 1448 ccp_dm_free(&key); 1449 1450 return ret; 1451 } 1452 1453 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 1454 { 1455 struct ccp_sha_engine *sha = &cmd->u.sha; 1456 struct ccp_dm_workarea ctx; 1457 struct ccp_data src; 1458 struct ccp_op op; 1459 unsigned int ioffset, ooffset; 1460 unsigned int digest_size; 1461 int sb_count; 1462 const void *init; 1463 u64 block_size; 1464 int ctx_size; 1465 int ret; 1466 1467 switch (sha->type) { 1468 case CCP_SHA_TYPE_1: 1469 if (sha->ctx_len < SHA1_DIGEST_SIZE) 1470 return -EINVAL; 1471 block_size = SHA1_BLOCK_SIZE; 1472 break; 1473 case CCP_SHA_TYPE_224: 1474 if (sha->ctx_len < SHA224_DIGEST_SIZE) 1475 return -EINVAL; 1476 block_size = SHA224_BLOCK_SIZE; 1477 break; 1478 case CCP_SHA_TYPE_256: 1479 if (sha->ctx_len < SHA256_DIGEST_SIZE) 1480 return -EINVAL; 1481 block_size = SHA256_BLOCK_SIZE; 1482 break; 1483 case CCP_SHA_TYPE_384: 1484 if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0) 1485 || sha->ctx_len < SHA384_DIGEST_SIZE) 1486 return -EINVAL; 1487 block_size = SHA384_BLOCK_SIZE; 1488 break; 1489 case CCP_SHA_TYPE_512: 1490 if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0) 1491 || sha->ctx_len < SHA512_DIGEST_SIZE) 1492 return -EINVAL; 1493 block_size = SHA512_BLOCK_SIZE; 1494 break; 1495 default: 1496 return -EINVAL; 1497 } 1498 1499 if (!sha->ctx) 1500 return -EINVAL; 1501 1502 if (!sha->final && (sha->src_len & (block_size - 1))) 1503 return -EINVAL; 1504 1505 /* The version 3 device can't handle zero-length input */ 1506 if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) { 1507 1508 if (!sha->src_len) { 1509 unsigned int digest_len; 1510 const u8 *sha_zero; 1511 1512 /* Not final, just return */ 1513 if (!sha->final) 1514 return 0; 1515 1516 /* CCP can't do a zero length sha operation so the 1517 * caller must buffer the data. 1518 */ 1519 if (sha->msg_bits) 1520 return -EINVAL; 1521 1522 /* The CCP cannot perform zero-length sha operations 1523 * so the caller is required to buffer data for the 1524 * final operation. However, a sha operation for a 1525 * message with a total length of zero is valid so 1526 * known values are required to supply the result. 1527 */ 1528 switch (sha->type) { 1529 case CCP_SHA_TYPE_1: 1530 sha_zero = sha1_zero_message_hash; 1531 digest_len = SHA1_DIGEST_SIZE; 1532 break; 1533 case CCP_SHA_TYPE_224: 1534 sha_zero = sha224_zero_message_hash; 1535 digest_len = SHA224_DIGEST_SIZE; 1536 break; 1537 case CCP_SHA_TYPE_256: 1538 sha_zero = sha256_zero_message_hash; 1539 digest_len = SHA256_DIGEST_SIZE; 1540 break; 1541 default: 1542 return -EINVAL; 1543 } 1544 1545 scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0, 1546 digest_len, 1); 1547 1548 return 0; 1549 } 1550 } 1551 1552 /* Set variables used throughout */ 1553 switch (sha->type) { 1554 case CCP_SHA_TYPE_1: 1555 digest_size = SHA1_DIGEST_SIZE; 1556 init = (void *) ccp_sha1_init; 1557 ctx_size = SHA1_DIGEST_SIZE; 1558 sb_count = 1; 1559 if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0)) 1560 ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE; 1561 else 1562 ooffset = ioffset = 0; 1563 break; 1564 case CCP_SHA_TYPE_224: 1565 digest_size = SHA224_DIGEST_SIZE; 1566 init = (void *) ccp_sha224_init; 1567 ctx_size = SHA256_DIGEST_SIZE; 1568 sb_count = 1; 1569 ioffset = 0; 1570 if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0)) 1571 ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE; 1572 else 1573 ooffset = 0; 1574 break; 1575 case CCP_SHA_TYPE_256: 1576 digest_size = SHA256_DIGEST_SIZE; 1577 init = (void *) ccp_sha256_init; 1578 ctx_size = SHA256_DIGEST_SIZE; 1579 sb_count = 1; 1580 ooffset = ioffset = 0; 1581 break; 1582 case CCP_SHA_TYPE_384: 1583 digest_size = SHA384_DIGEST_SIZE; 1584 init = (void *) ccp_sha384_init; 1585 ctx_size = SHA512_DIGEST_SIZE; 1586 sb_count = 2; 1587 ioffset = 0; 1588 ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE; 1589 break; 1590 case CCP_SHA_TYPE_512: 1591 digest_size = SHA512_DIGEST_SIZE; 1592 init = (void *) ccp_sha512_init; 1593 ctx_size = SHA512_DIGEST_SIZE; 1594 sb_count = 2; 1595 ooffset = ioffset = 0; 1596 break; 1597 default: 1598 ret = -EINVAL; 1599 goto e_data; 1600 } 1601 1602 /* For zero-length plaintext the src pointer is ignored; 1603 * otherwise both parts must be valid 1604 */ 1605 if (sha->src_len && !sha->src) 1606 return -EINVAL; 1607 1608 memset(&op, 0, sizeof(op)); 1609 op.cmd_q = cmd_q; 1610 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 1611 op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ 1612 op.u.sha.type = sha->type; 1613 op.u.sha.msg_bits = sha->msg_bits; 1614 1615 /* For SHA1/224/256 the context fits in a single (32-byte) SB entry; 1616 * SHA384/512 require 2 adjacent SB slots, with the right half in the 1617 * first slot, and the left half in the second. Each portion must then 1618 * be in little endian format: use the 256-bit byte swap option. 1619 */ 1620 ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES, 1621 DMA_BIDIRECTIONAL); 1622 if (ret) 1623 return ret; 1624 if (sha->first) { 1625 switch (sha->type) { 1626 case CCP_SHA_TYPE_1: 1627 case CCP_SHA_TYPE_224: 1628 case CCP_SHA_TYPE_256: 1629 memcpy(ctx.address + ioffset, init, ctx_size); 1630 break; 1631 case CCP_SHA_TYPE_384: 1632 case CCP_SHA_TYPE_512: 1633 memcpy(ctx.address + ctx_size / 2, init, 1634 ctx_size / 2); 1635 memcpy(ctx.address, init + ctx_size / 2, 1636 ctx_size / 2); 1637 break; 1638 default: 1639 ret = -EINVAL; 1640 goto e_ctx; 1641 } 1642 } else { 1643 /* Restore the context */ 1644 ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0, 1645 sb_count * CCP_SB_BYTES); 1646 if (ret) 1647 goto e_ctx; 1648 } 1649 1650 ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 1651 CCP_PASSTHRU_BYTESWAP_256BIT); 1652 if (ret) { 1653 cmd->engine_error = cmd_q->cmd_error; 1654 goto e_ctx; 1655 } 1656 1657 if (sha->src) { 1658 /* Send data to the CCP SHA engine; block_size is set above */ 1659 ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len, 1660 block_size, DMA_TO_DEVICE); 1661 if (ret) 1662 goto e_ctx; 1663 1664 while (src.sg_wa.bytes_left) { 1665 ccp_prepare_data(&src, NULL, &op, block_size, false); 1666 if (sha->final && !src.sg_wa.bytes_left) 1667 op.eom = 1; 1668 1669 ret = cmd_q->ccp->vdata->perform->sha(&op); 1670 if (ret) { 1671 cmd->engine_error = cmd_q->cmd_error; 1672 goto e_data; 1673 } 1674 1675 ccp_process_data(&src, NULL, &op); 1676 } 1677 } else { 1678 op.eom = 1; 1679 ret = cmd_q->ccp->vdata->perform->sha(&op); 1680 if (ret) { 1681 cmd->engine_error = cmd_q->cmd_error; 1682 goto e_data; 1683 } 1684 } 1685 1686 /* Retrieve the SHA context - convert from LE to BE using 1687 * 32-byte (256-bit) byteswapping to BE 1688 */ 1689 ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, 1690 CCP_PASSTHRU_BYTESWAP_256BIT); 1691 if (ret) { 1692 cmd->engine_error = cmd_q->cmd_error; 1693 goto e_data; 1694 } 1695 1696 if (sha->final) { 1697 /* Finishing up, so get the digest */ 1698 switch (sha->type) { 1699 case CCP_SHA_TYPE_1: 1700 case CCP_SHA_TYPE_224: 1701 case CCP_SHA_TYPE_256: 1702 ccp_get_dm_area(&ctx, ooffset, 1703 sha->ctx, 0, 1704 digest_size); 1705 break; 1706 case CCP_SHA_TYPE_384: 1707 case CCP_SHA_TYPE_512: 1708 ccp_get_dm_area(&ctx, 0, 1709 sha->ctx, LSB_ITEM_SIZE - ooffset, 1710 LSB_ITEM_SIZE); 1711 ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset, 1712 sha->ctx, 0, 1713 LSB_ITEM_SIZE - ooffset); 1714 break; 1715 default: 1716 ret = -EINVAL; 1717 goto e_ctx; 1718 } 1719 } else { 1720 /* Stash the context */ 1721 ccp_get_dm_area(&ctx, 0, sha->ctx, 0, 1722 sb_count * CCP_SB_BYTES); 1723 } 1724 1725 if (sha->final && sha->opad) { 1726 /* HMAC operation, recursively perform final SHA */ 1727 struct ccp_cmd hmac_cmd; 1728 struct scatterlist sg; 1729 u8 *hmac_buf; 1730 1731 if (sha->opad_len != block_size) { 1732 ret = -EINVAL; 1733 goto e_data; 1734 } 1735 1736 hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL); 1737 if (!hmac_buf) { 1738 ret = -ENOMEM; 1739 goto e_data; 1740 } 1741 sg_init_one(&sg, hmac_buf, block_size + digest_size); 1742 1743 scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0); 1744 switch (sha->type) { 1745 case CCP_SHA_TYPE_1: 1746 case CCP_SHA_TYPE_224: 1747 case CCP_SHA_TYPE_256: 1748 memcpy(hmac_buf + block_size, 1749 ctx.address + ooffset, 1750 digest_size); 1751 break; 1752 case CCP_SHA_TYPE_384: 1753 case CCP_SHA_TYPE_512: 1754 memcpy(hmac_buf + block_size, 1755 ctx.address + LSB_ITEM_SIZE + ooffset, 1756 LSB_ITEM_SIZE); 1757 memcpy(hmac_buf + block_size + 1758 (LSB_ITEM_SIZE - ooffset), 1759 ctx.address, 1760 LSB_ITEM_SIZE); 1761 break; 1762 default: 1763 ret = -EINVAL; 1764 goto e_ctx; 1765 } 1766 1767 memset(&hmac_cmd, 0, sizeof(hmac_cmd)); 1768 hmac_cmd.engine = CCP_ENGINE_SHA; 1769 hmac_cmd.u.sha.type = sha->type; 1770 hmac_cmd.u.sha.ctx = sha->ctx; 1771 hmac_cmd.u.sha.ctx_len = sha->ctx_len; 1772 hmac_cmd.u.sha.src = &sg; 1773 hmac_cmd.u.sha.src_len = block_size + digest_size; 1774 hmac_cmd.u.sha.opad = NULL; 1775 hmac_cmd.u.sha.opad_len = 0; 1776 hmac_cmd.u.sha.first = 1; 1777 hmac_cmd.u.sha.final = 1; 1778 hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3; 1779 1780 ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd); 1781 if (ret) 1782 cmd->engine_error = hmac_cmd.engine_error; 1783 1784 kfree(hmac_buf); 1785 } 1786 1787 e_data: 1788 if (sha->src) 1789 ccp_free_data(&src, cmd_q); 1790 1791 e_ctx: 1792 ccp_dm_free(&ctx); 1793 1794 return ret; 1795 } 1796 1797 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 1798 { 1799 struct ccp_rsa_engine *rsa = &cmd->u.rsa; 1800 struct ccp_dm_workarea exp, src, dst; 1801 struct ccp_op op; 1802 unsigned int sb_count, i_len, o_len; 1803 int ret; 1804 1805 /* Check against the maximum allowable size, in bits */ 1806 if (rsa->key_size > cmd_q->ccp->vdata->rsamax) 1807 return -EINVAL; 1808 1809 if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst) 1810 return -EINVAL; 1811 1812 memset(&op, 0, sizeof(op)); 1813 op.cmd_q = cmd_q; 1814 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 1815 1816 /* The RSA modulus must precede the message being acted upon, so 1817 * it must be copied to a DMA area where the message and the 1818 * modulus can be concatenated. Therefore the input buffer 1819 * length required is twice the output buffer length (which 1820 * must be a multiple of 256-bits). Compute o_len, i_len in bytes. 1821 * Buffer sizes must be a multiple of 32 bytes; rounding up may be 1822 * required. 1823 */ 1824 o_len = 32 * ((rsa->key_size + 255) / 256); 1825 i_len = o_len * 2; 1826 1827 sb_count = 0; 1828 if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) { 1829 /* sb_count is the number of storage block slots required 1830 * for the modulus. 1831 */ 1832 sb_count = o_len / CCP_SB_BYTES; 1833 op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, 1834 sb_count); 1835 if (!op.sb_key) 1836 return -EIO; 1837 } else { 1838 /* A version 5 device allows a modulus size that will not fit 1839 * in the LSB, so the command will transfer it from memory. 1840 * Set the sb key to the default, even though it's not used. 1841 */ 1842 op.sb_key = cmd_q->sb_key; 1843 } 1844 1845 /* The RSA exponent must be in little endian format. Reverse its 1846 * byte order. 1847 */ 1848 ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE); 1849 if (ret) 1850 goto e_sb; 1851 1852 ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len); 1853 if (ret) 1854 goto e_exp; 1855 1856 if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) { 1857 /* Copy the exponent to the local storage block, using 1858 * as many 32-byte blocks as were allocated above. It's 1859 * already little endian, so no further change is required. 1860 */ 1861 ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key, 1862 CCP_PASSTHRU_BYTESWAP_NOOP); 1863 if (ret) { 1864 cmd->engine_error = cmd_q->cmd_error; 1865 goto e_exp; 1866 } 1867 } else { 1868 /* The exponent can be retrieved from memory via DMA. */ 1869 op.exp.u.dma.address = exp.dma.address; 1870 op.exp.u.dma.offset = 0; 1871 } 1872 1873 /* Concatenate the modulus and the message. Both the modulus and 1874 * the operands must be in little endian format. Since the input 1875 * is in big endian format it must be converted. 1876 */ 1877 ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE); 1878 if (ret) 1879 goto e_exp; 1880 1881 ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len); 1882 if (ret) 1883 goto e_src; 1884 ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len); 1885 if (ret) 1886 goto e_src; 1887 1888 /* Prepare the output area for the operation */ 1889 ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE); 1890 if (ret) 1891 goto e_src; 1892 1893 op.soc = 1; 1894 op.src.u.dma.address = src.dma.address; 1895 op.src.u.dma.offset = 0; 1896 op.src.u.dma.length = i_len; 1897 op.dst.u.dma.address = dst.dma.address; 1898 op.dst.u.dma.offset = 0; 1899 op.dst.u.dma.length = o_len; 1900 1901 op.u.rsa.mod_size = rsa->key_size; 1902 op.u.rsa.input_len = i_len; 1903 1904 ret = cmd_q->ccp->vdata->perform->rsa(&op); 1905 if (ret) { 1906 cmd->engine_error = cmd_q->cmd_error; 1907 goto e_dst; 1908 } 1909 1910 ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len); 1911 1912 e_dst: 1913 ccp_dm_free(&dst); 1914 1915 e_src: 1916 ccp_dm_free(&src); 1917 1918 e_exp: 1919 ccp_dm_free(&exp); 1920 1921 e_sb: 1922 if (sb_count) 1923 cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count); 1924 1925 return ret; 1926 } 1927 1928 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, 1929 struct ccp_cmd *cmd) 1930 { 1931 struct ccp_passthru_engine *pt = &cmd->u.passthru; 1932 struct ccp_dm_workarea mask; 1933 struct ccp_data src, dst; 1934 struct ccp_op op; 1935 bool in_place = false; 1936 unsigned int i; 1937 int ret = 0; 1938 1939 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) 1940 return -EINVAL; 1941 1942 if (!pt->src || !pt->dst) 1943 return -EINVAL; 1944 1945 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { 1946 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) 1947 return -EINVAL; 1948 if (!pt->mask) 1949 return -EINVAL; 1950 } 1951 1952 BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1); 1953 1954 memset(&op, 0, sizeof(op)); 1955 op.cmd_q = cmd_q; 1956 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 1957 1958 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { 1959 /* Load the mask */ 1960 op.sb_key = cmd_q->sb_key; 1961 1962 ret = ccp_init_dm_workarea(&mask, cmd_q, 1963 CCP_PASSTHRU_SB_COUNT * 1964 CCP_SB_BYTES, 1965 DMA_TO_DEVICE); 1966 if (ret) 1967 return ret; 1968 1969 ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len); 1970 if (ret) 1971 goto e_mask; 1972 ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key, 1973 CCP_PASSTHRU_BYTESWAP_NOOP); 1974 if (ret) { 1975 cmd->engine_error = cmd_q->cmd_error; 1976 goto e_mask; 1977 } 1978 } 1979 1980 /* Prepare the input and output data workareas. For in-place 1981 * operations we need to set the dma direction to BIDIRECTIONAL 1982 * and copy the src workarea to the dst workarea. 1983 */ 1984 if (sg_virt(pt->src) == sg_virt(pt->dst)) 1985 in_place = true; 1986 1987 ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len, 1988 CCP_PASSTHRU_MASKSIZE, 1989 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); 1990 if (ret) 1991 goto e_mask; 1992 1993 if (in_place) { 1994 dst = src; 1995 } else { 1996 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, 1997 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); 1998 if (ret) 1999 goto e_src; 2000 } 2001 2002 /* Send data to the CCP Passthru engine 2003 * Because the CCP engine works on a single source and destination 2004 * dma address at a time, each entry in the source scatterlist 2005 * (after the dma_map_sg call) must be less than or equal to the 2006 * (remaining) length in the destination scatterlist entry and the 2007 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE 2008 */ 2009 dst.sg_wa.sg_used = 0; 2010 for (i = 1; i <= src.sg_wa.dma_count; i++) { 2011 if (!dst.sg_wa.sg || 2012 (dst.sg_wa.sg->length < src.sg_wa.sg->length)) { 2013 ret = -EINVAL; 2014 goto e_dst; 2015 } 2016 2017 if (i == src.sg_wa.dma_count) { 2018 op.eom = 1; 2019 op.soc = 1; 2020 } 2021 2022 op.src.type = CCP_MEMTYPE_SYSTEM; 2023 op.src.u.dma.address = sg_dma_address(src.sg_wa.sg); 2024 op.src.u.dma.offset = 0; 2025 op.src.u.dma.length = sg_dma_len(src.sg_wa.sg); 2026 2027 op.dst.type = CCP_MEMTYPE_SYSTEM; 2028 op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg); 2029 op.dst.u.dma.offset = dst.sg_wa.sg_used; 2030 op.dst.u.dma.length = op.src.u.dma.length; 2031 2032 ret = cmd_q->ccp->vdata->perform->passthru(&op); 2033 if (ret) { 2034 cmd->engine_error = cmd_q->cmd_error; 2035 goto e_dst; 2036 } 2037 2038 dst.sg_wa.sg_used += src.sg_wa.sg->length; 2039 if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) { 2040 dst.sg_wa.sg = sg_next(dst.sg_wa.sg); 2041 dst.sg_wa.sg_used = 0; 2042 } 2043 src.sg_wa.sg = sg_next(src.sg_wa.sg); 2044 } 2045 2046 e_dst: 2047 if (!in_place) 2048 ccp_free_data(&dst, cmd_q); 2049 2050 e_src: 2051 ccp_free_data(&src, cmd_q); 2052 2053 e_mask: 2054 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) 2055 ccp_dm_free(&mask); 2056 2057 return ret; 2058 } 2059 2060 static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, 2061 struct ccp_cmd *cmd) 2062 { 2063 struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap; 2064 struct ccp_dm_workarea mask; 2065 struct ccp_op op; 2066 int ret; 2067 2068 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) 2069 return -EINVAL; 2070 2071 if (!pt->src_dma || !pt->dst_dma) 2072 return -EINVAL; 2073 2074 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { 2075 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) 2076 return -EINVAL; 2077 if (!pt->mask) 2078 return -EINVAL; 2079 } 2080 2081 BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1); 2082 2083 memset(&op, 0, sizeof(op)); 2084 op.cmd_q = cmd_q; 2085 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 2086 2087 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { 2088 /* Load the mask */ 2089 op.sb_key = cmd_q->sb_key; 2090 2091 mask.length = pt->mask_len; 2092 mask.dma.address = pt->mask; 2093 mask.dma.length = pt->mask_len; 2094 2095 ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key, 2096 CCP_PASSTHRU_BYTESWAP_NOOP); 2097 if (ret) { 2098 cmd->engine_error = cmd_q->cmd_error; 2099 return ret; 2100 } 2101 } 2102 2103 /* Send data to the CCP Passthru engine */ 2104 op.eom = 1; 2105 op.soc = 1; 2106 2107 op.src.type = CCP_MEMTYPE_SYSTEM; 2108 op.src.u.dma.address = pt->src_dma; 2109 op.src.u.dma.offset = 0; 2110 op.src.u.dma.length = pt->src_len; 2111 2112 op.dst.type = CCP_MEMTYPE_SYSTEM; 2113 op.dst.u.dma.address = pt->dst_dma; 2114 op.dst.u.dma.offset = 0; 2115 op.dst.u.dma.length = pt->src_len; 2116 2117 ret = cmd_q->ccp->vdata->perform->passthru(&op); 2118 if (ret) 2119 cmd->engine_error = cmd_q->cmd_error; 2120 2121 return ret; 2122 } 2123 2124 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 2125 { 2126 struct ccp_ecc_engine *ecc = &cmd->u.ecc; 2127 struct ccp_dm_workarea src, dst; 2128 struct ccp_op op; 2129 int ret; 2130 u8 *save; 2131 2132 if (!ecc->u.mm.operand_1 || 2133 (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES)) 2134 return -EINVAL; 2135 2136 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) 2137 if (!ecc->u.mm.operand_2 || 2138 (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES)) 2139 return -EINVAL; 2140 2141 if (!ecc->u.mm.result || 2142 (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES)) 2143 return -EINVAL; 2144 2145 memset(&op, 0, sizeof(op)); 2146 op.cmd_q = cmd_q; 2147 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 2148 2149 /* Concatenate the modulus and the operands. Both the modulus and 2150 * the operands must be in little endian format. Since the input 2151 * is in big endian format it must be converted and placed in a 2152 * fixed length buffer. 2153 */ 2154 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, 2155 DMA_TO_DEVICE); 2156 if (ret) 2157 return ret; 2158 2159 /* Save the workarea address since it is updated in order to perform 2160 * the concatenation 2161 */ 2162 save = src.address; 2163 2164 /* Copy the ECC modulus */ 2165 ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len); 2166 if (ret) 2167 goto e_src; 2168 src.address += CCP_ECC_OPERAND_SIZE; 2169 2170 /* Copy the first operand */ 2171 ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0, 2172 ecc->u.mm.operand_1_len); 2173 if (ret) 2174 goto e_src; 2175 src.address += CCP_ECC_OPERAND_SIZE; 2176 2177 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) { 2178 /* Copy the second operand */ 2179 ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0, 2180 ecc->u.mm.operand_2_len); 2181 if (ret) 2182 goto e_src; 2183 src.address += CCP_ECC_OPERAND_SIZE; 2184 } 2185 2186 /* Restore the workarea address */ 2187 src.address = save; 2188 2189 /* Prepare the output area for the operation */ 2190 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, 2191 DMA_FROM_DEVICE); 2192 if (ret) 2193 goto e_src; 2194 2195 op.soc = 1; 2196 op.src.u.dma.address = src.dma.address; 2197 op.src.u.dma.offset = 0; 2198 op.src.u.dma.length = src.length; 2199 op.dst.u.dma.address = dst.dma.address; 2200 op.dst.u.dma.offset = 0; 2201 op.dst.u.dma.length = dst.length; 2202 2203 op.u.ecc.function = cmd->u.ecc.function; 2204 2205 ret = cmd_q->ccp->vdata->perform->ecc(&op); 2206 if (ret) { 2207 cmd->engine_error = cmd_q->cmd_error; 2208 goto e_dst; 2209 } 2210 2211 ecc->ecc_result = le16_to_cpup( 2212 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); 2213 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { 2214 ret = -EIO; 2215 goto e_dst; 2216 } 2217 2218 /* Save the ECC result */ 2219 ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0, 2220 CCP_ECC_MODULUS_BYTES); 2221 2222 e_dst: 2223 ccp_dm_free(&dst); 2224 2225 e_src: 2226 ccp_dm_free(&src); 2227 2228 return ret; 2229 } 2230 2231 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 2232 { 2233 struct ccp_ecc_engine *ecc = &cmd->u.ecc; 2234 struct ccp_dm_workarea src, dst; 2235 struct ccp_op op; 2236 int ret; 2237 u8 *save; 2238 2239 if (!ecc->u.pm.point_1.x || 2240 (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) || 2241 !ecc->u.pm.point_1.y || 2242 (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES)) 2243 return -EINVAL; 2244 2245 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { 2246 if (!ecc->u.pm.point_2.x || 2247 (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) || 2248 !ecc->u.pm.point_2.y || 2249 (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES)) 2250 return -EINVAL; 2251 } else { 2252 if (!ecc->u.pm.domain_a || 2253 (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES)) 2254 return -EINVAL; 2255 2256 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) 2257 if (!ecc->u.pm.scalar || 2258 (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES)) 2259 return -EINVAL; 2260 } 2261 2262 if (!ecc->u.pm.result.x || 2263 (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) || 2264 !ecc->u.pm.result.y || 2265 (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES)) 2266 return -EINVAL; 2267 2268 memset(&op, 0, sizeof(op)); 2269 op.cmd_q = cmd_q; 2270 op.jobid = CCP_NEW_JOBID(cmd_q->ccp); 2271 2272 /* Concatenate the modulus and the operands. Both the modulus and 2273 * the operands must be in little endian format. Since the input 2274 * is in big endian format it must be converted and placed in a 2275 * fixed length buffer. 2276 */ 2277 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, 2278 DMA_TO_DEVICE); 2279 if (ret) 2280 return ret; 2281 2282 /* Save the workarea address since it is updated in order to perform 2283 * the concatenation 2284 */ 2285 save = src.address; 2286 2287 /* Copy the ECC modulus */ 2288 ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len); 2289 if (ret) 2290 goto e_src; 2291 src.address += CCP_ECC_OPERAND_SIZE; 2292 2293 /* Copy the first point X and Y coordinate */ 2294 ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0, 2295 ecc->u.pm.point_1.x_len); 2296 if (ret) 2297 goto e_src; 2298 src.address += CCP_ECC_OPERAND_SIZE; 2299 ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0, 2300 ecc->u.pm.point_1.y_len); 2301 if (ret) 2302 goto e_src; 2303 src.address += CCP_ECC_OPERAND_SIZE; 2304 2305 /* Set the first point Z coordinate to 1 */ 2306 *src.address = 0x01; 2307 src.address += CCP_ECC_OPERAND_SIZE; 2308 2309 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { 2310 /* Copy the second point X and Y coordinate */ 2311 ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0, 2312 ecc->u.pm.point_2.x_len); 2313 if (ret) 2314 goto e_src; 2315 src.address += CCP_ECC_OPERAND_SIZE; 2316 ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0, 2317 ecc->u.pm.point_2.y_len); 2318 if (ret) 2319 goto e_src; 2320 src.address += CCP_ECC_OPERAND_SIZE; 2321 2322 /* Set the second point Z coordinate to 1 */ 2323 *src.address = 0x01; 2324 src.address += CCP_ECC_OPERAND_SIZE; 2325 } else { 2326 /* Copy the Domain "a" parameter */ 2327 ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0, 2328 ecc->u.pm.domain_a_len); 2329 if (ret) 2330 goto e_src; 2331 src.address += CCP_ECC_OPERAND_SIZE; 2332 2333 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) { 2334 /* Copy the scalar value */ 2335 ret = ccp_reverse_set_dm_area(&src, 0, 2336 ecc->u.pm.scalar, 0, 2337 ecc->u.pm.scalar_len); 2338 if (ret) 2339 goto e_src; 2340 src.address += CCP_ECC_OPERAND_SIZE; 2341 } 2342 } 2343 2344 /* Restore the workarea address */ 2345 src.address = save; 2346 2347 /* Prepare the output area for the operation */ 2348 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, 2349 DMA_FROM_DEVICE); 2350 if (ret) 2351 goto e_src; 2352 2353 op.soc = 1; 2354 op.src.u.dma.address = src.dma.address; 2355 op.src.u.dma.offset = 0; 2356 op.src.u.dma.length = src.length; 2357 op.dst.u.dma.address = dst.dma.address; 2358 op.dst.u.dma.offset = 0; 2359 op.dst.u.dma.length = dst.length; 2360 2361 op.u.ecc.function = cmd->u.ecc.function; 2362 2363 ret = cmd_q->ccp->vdata->perform->ecc(&op); 2364 if (ret) { 2365 cmd->engine_error = cmd_q->cmd_error; 2366 goto e_dst; 2367 } 2368 2369 ecc->ecc_result = le16_to_cpup( 2370 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); 2371 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { 2372 ret = -EIO; 2373 goto e_dst; 2374 } 2375 2376 /* Save the workarea address since it is updated as we walk through 2377 * to copy the point math result 2378 */ 2379 save = dst.address; 2380 2381 /* Save the ECC result X and Y coordinates */ 2382 ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0, 2383 CCP_ECC_MODULUS_BYTES); 2384 dst.address += CCP_ECC_OUTPUT_SIZE; 2385 ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0, 2386 CCP_ECC_MODULUS_BYTES); 2387 dst.address += CCP_ECC_OUTPUT_SIZE; 2388 2389 /* Restore the workarea address */ 2390 dst.address = save; 2391 2392 e_dst: 2393 ccp_dm_free(&dst); 2394 2395 e_src: 2396 ccp_dm_free(&src); 2397 2398 return ret; 2399 } 2400 2401 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 2402 { 2403 struct ccp_ecc_engine *ecc = &cmd->u.ecc; 2404 2405 ecc->ecc_result = 0; 2406 2407 if (!ecc->mod || 2408 (ecc->mod_len > CCP_ECC_MODULUS_BYTES)) 2409 return -EINVAL; 2410 2411 switch (ecc->function) { 2412 case CCP_ECC_FUNCTION_MMUL_384BIT: 2413 case CCP_ECC_FUNCTION_MADD_384BIT: 2414 case CCP_ECC_FUNCTION_MINV_384BIT: 2415 return ccp_run_ecc_mm_cmd(cmd_q, cmd); 2416 2417 case CCP_ECC_FUNCTION_PADD_384BIT: 2418 case CCP_ECC_FUNCTION_PMUL_384BIT: 2419 case CCP_ECC_FUNCTION_PDBL_384BIT: 2420 return ccp_run_ecc_pm_cmd(cmd_q, cmd); 2421 2422 default: 2423 return -EINVAL; 2424 } 2425 } 2426 2427 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) 2428 { 2429 int ret; 2430 2431 cmd->engine_error = 0; 2432 cmd_q->cmd_error = 0; 2433 cmd_q->int_rcvd = 0; 2434 cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q); 2435 2436 switch (cmd->engine) { 2437 case CCP_ENGINE_AES: 2438 ret = ccp_run_aes_cmd(cmd_q, cmd); 2439 break; 2440 case CCP_ENGINE_XTS_AES_128: 2441 ret = ccp_run_xts_aes_cmd(cmd_q, cmd); 2442 break; 2443 case CCP_ENGINE_DES3: 2444 ret = ccp_run_des3_cmd(cmd_q, cmd); 2445 break; 2446 case CCP_ENGINE_SHA: 2447 ret = ccp_run_sha_cmd(cmd_q, cmd); 2448 break; 2449 case CCP_ENGINE_RSA: 2450 ret = ccp_run_rsa_cmd(cmd_q, cmd); 2451 break; 2452 case CCP_ENGINE_PASSTHRU: 2453 if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP) 2454 ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd); 2455 else 2456 ret = ccp_run_passthru_cmd(cmd_q, cmd); 2457 break; 2458 case CCP_ENGINE_ECC: 2459 ret = ccp_run_ecc_cmd(cmd_q, cmd); 2460 break; 2461 default: 2462 ret = -EINVAL; 2463 } 2464 2465 return ret; 2466 } 2467