1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2020 Marvell. */ 3 4 #include "otx2_cptvf.h" 5 #include "otx2_cpt_common.h" 6 7 /* SG list header size in bytes */ 8 #define SG_LIST_HDR_SIZE 8 9 10 /* Default timeout when waiting for free pending entry in us */ 11 #define CPT_PENTRY_TIMEOUT 1000 12 #define CPT_PENTRY_STEP 50 13 14 /* Default threshold for stopping and resuming sender requests */ 15 #define CPT_IQ_STOP_MARGIN 128 16 #define CPT_IQ_RESUME_MARGIN 512 17 18 /* Default command timeout in seconds */ 19 #define CPT_COMMAND_TIMEOUT 4 20 #define CPT_TIME_IN_RESET_COUNT 5 21 22 static void otx2_cpt_dump_sg_list(struct pci_dev *pdev, 23 struct otx2_cpt_req_info *req) 24 { 25 int i; 26 27 pr_debug("Gather list size %d\n", req->in_cnt); 28 for (i = 0; i < req->in_cnt; i++) { 29 pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, 30 req->in[i].size, req->in[i].vptr, 31 (void *) req->in[i].dma_addr); 32 pr_debug("Buffer hexdump (%d bytes)\n", 33 req->in[i].size); 34 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, 35 req->in[i].vptr, req->in[i].size, false); 36 } 37 pr_debug("Scatter list size %d\n", req->out_cnt); 38 for (i = 0; i < req->out_cnt; i++) { 39 pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, 40 req->out[i].size, req->out[i].vptr, 41 (void *) req->out[i].dma_addr); 42 pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); 43 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, 44 req->out[i].vptr, req->out[i].size, false); 45 } 46 } 47 48 static inline struct otx2_cpt_pending_entry *get_free_pending_entry( 49 struct otx2_cpt_pending_queue *q, 50 int qlen) 51 { 52 struct otx2_cpt_pending_entry *ent = NULL; 53 54 ent = &q->head[q->rear]; 55 if (unlikely(ent->busy)) 56 return NULL; 57 58 q->rear++; 59 if (unlikely(q->rear == qlen)) 60 q->rear = 0; 61 62 return ent; 63 } 64 65 static inline u32 modulo_inc(u32 index, u32 length, u32 inc) 66 { 67 if (WARN_ON(inc > length)) 68 inc = length; 69 70 index += inc; 71 if (unlikely(index >= length)) 72 index -= length; 73 74 return index; 75 } 76 77 static inline void free_pentry(struct otx2_cpt_pending_entry *pentry) 78 { 79 pentry->completion_addr = NULL; 80 pentry->info = NULL; 81 pentry->callback = NULL; 82 pentry->areq = NULL; 83 pentry->resume_sender = false; 84 pentry->busy = false; 85 } 86 87 static inline int setup_sgio_components(struct pci_dev *pdev, 88 struct otx2_cpt_buf_ptr *list, 89 int buf_count, u8 *buffer) 90 { 91 struct otx2_cpt_sglist_component *sg_ptr = NULL; 92 int ret = 0, i, j; 93 int components; 94 95 if (unlikely(!list)) { 96 dev_err(&pdev->dev, "Input list pointer is NULL\n"); 97 return -EFAULT; 98 } 99 100 for (i = 0; i < buf_count; i++) { 101 if (unlikely(!list[i].vptr)) 102 continue; 103 list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr, 104 list[i].size, 105 DMA_BIDIRECTIONAL); 106 if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) { 107 dev_err(&pdev->dev, "Dma mapping failed\n"); 108 ret = -EIO; 109 goto sg_cleanup; 110 } 111 } 112 components = buf_count / 4; 113 sg_ptr = (struct otx2_cpt_sglist_component *)buffer; 114 for (i = 0; i < components; i++) { 115 sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size); 116 sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size); 117 sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size); 118 sg_ptr->len3 = cpu_to_be16(list[i * 4 + 3].size); 119 sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); 120 sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); 121 sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); 122 sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); 123 sg_ptr++; 124 } 125 components = buf_count % 4; 126 127 switch (components) { 128 case 3: 129 sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size); 130 sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); 131 fallthrough; 132 case 2: 133 sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size); 134 sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); 135 fallthrough; 136 case 1: 137 sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size); 138 sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); 139 break; 140 default: 141 break; 142 } 143 return ret; 144 145 sg_cleanup: 146 for (j = 0; j < i; j++) { 147 if (list[j].dma_addr) { 148 dma_unmap_single(&pdev->dev, list[j].dma_addr, 149 list[j].size, DMA_BIDIRECTIONAL); 150 } 151 152 list[j].dma_addr = 0; 153 } 154 return ret; 155 } 156 157 static inline struct otx2_cpt_inst_info *info_create(struct pci_dev *pdev, 158 struct otx2_cpt_req_info *req, 159 gfp_t gfp) 160 { 161 int align = OTX2_CPT_DMA_MINALIGN; 162 struct otx2_cpt_inst_info *info; 163 u32 dlen, align_dlen, info_len; 164 u16 g_sz_bytes, s_sz_bytes; 165 u32 total_mem_len; 166 167 if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || 168 req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) { 169 dev_err(&pdev->dev, "Error too many sg components\n"); 170 return NULL; 171 } 172 173 g_sz_bytes = ((req->in_cnt + 3) / 4) * 174 sizeof(struct otx2_cpt_sglist_component); 175 s_sz_bytes = ((req->out_cnt + 3) / 4) * 176 sizeof(struct otx2_cpt_sglist_component); 177 178 dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; 179 align_dlen = ALIGN(dlen, align); 180 info_len = ALIGN(sizeof(*info), align); 181 total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); 182 183 info = kzalloc(total_mem_len, gfp); 184 if (unlikely(!info)) 185 return NULL; 186 187 info->dlen = dlen; 188 info->in_buffer = (u8 *)info + info_len; 189 190 ((u16 *)info->in_buffer)[0] = req->out_cnt; 191 ((u16 *)info->in_buffer)[1] = req->in_cnt; 192 ((u16 *)info->in_buffer)[2] = 0; 193 ((u16 *)info->in_buffer)[3] = 0; 194 cpu_to_be64s((u64 *)info->in_buffer); 195 196 /* Setup gather (input) components */ 197 if (setup_sgio_components(pdev, req->in, req->in_cnt, 198 &info->in_buffer[8])) { 199 dev_err(&pdev->dev, "Failed to setup gather list\n"); 200 goto destroy_info; 201 } 202 203 if (setup_sgio_components(pdev, req->out, req->out_cnt, 204 &info->in_buffer[8 + g_sz_bytes])) { 205 dev_err(&pdev->dev, "Failed to setup scatter list\n"); 206 goto destroy_info; 207 } 208 209 info->dma_len = total_mem_len - info_len; 210 info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer, 211 info->dma_len, DMA_BIDIRECTIONAL); 212 if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { 213 dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); 214 goto destroy_info; 215 } 216 /* 217 * Get buffer for union otx2_cpt_res_s response 218 * structure and its physical address 219 */ 220 info->completion_addr = info->in_buffer + align_dlen; 221 info->comp_baddr = info->dptr_baddr + align_dlen; 222 223 return info; 224 225 destroy_info: 226 otx2_cpt_info_destroy(pdev, info); 227 return NULL; 228 } 229 230 static int process_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, 231 struct otx2_cpt_pending_queue *pqueue, 232 struct otx2_cptlf_info *lf) 233 { 234 struct otx2_cptvf_request *cpt_req = &req->req; 235 struct otx2_cpt_pending_entry *pentry = NULL; 236 union otx2_cpt_ctrl_info *ctrl = &req->ctrl; 237 struct otx2_cpt_inst_info *info = NULL; 238 union otx2_cpt_res_s *result = NULL; 239 struct otx2_cpt_iq_command iq_cmd; 240 union otx2_cpt_inst_s cptinst; 241 int retry, ret = 0; 242 u8 resume_sender; 243 gfp_t gfp; 244 245 gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : 246 GFP_ATOMIC; 247 if (unlikely(!otx2_cptlf_started(lf->lfs))) 248 return -ENODEV; 249 250 info = info_create(pdev, req, gfp); 251 if (unlikely(!info)) { 252 dev_err(&pdev->dev, "Setting up cpt inst info failed"); 253 return -ENOMEM; 254 } 255 cpt_req->dlen = info->dlen; 256 257 result = info->completion_addr; 258 result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; 259 260 spin_lock_bh(&pqueue->lock); 261 pentry = get_free_pending_entry(pqueue, pqueue->qlen); 262 retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; 263 while (unlikely(!pentry) && retry--) { 264 spin_unlock_bh(&pqueue->lock); 265 udelay(CPT_PENTRY_STEP); 266 spin_lock_bh(&pqueue->lock); 267 pentry = get_free_pending_entry(pqueue, pqueue->qlen); 268 } 269 270 if (unlikely(!pentry)) { 271 ret = -ENOSPC; 272 goto destroy_info; 273 } 274 275 /* 276 * Check if we are close to filling in entire pending queue, 277 * if so then tell the sender to stop/sleep by returning -EBUSY 278 * We do it only for context which can sleep (GFP_KERNEL) 279 */ 280 if (gfp == GFP_KERNEL && 281 pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { 282 pentry->resume_sender = true; 283 } else 284 pentry->resume_sender = false; 285 resume_sender = pentry->resume_sender; 286 pqueue->pending_count++; 287 288 pentry->completion_addr = info->completion_addr; 289 pentry->info = info; 290 pentry->callback = req->callback; 291 pentry->areq = req->areq; 292 pentry->busy = true; 293 info->pentry = pentry; 294 info->time_in = jiffies; 295 info->req = req; 296 297 /* Fill in the command */ 298 iq_cmd.cmd.u = 0; 299 iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); 300 iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); 301 iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); 302 iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); 303 304 /* 64-bit swap for microcode data reads, not needed for addresses*/ 305 cpu_to_be64s(&iq_cmd.cmd.u); 306 iq_cmd.dptr = info->dptr_baddr; 307 iq_cmd.rptr = 0; 308 iq_cmd.cptr.u = 0; 309 iq_cmd.cptr.s.grp = ctrl->s.grp; 310 311 /* Fill in the CPT_INST_S type command for HW interpretation */ 312 otx2_cpt_fill_inst(&cptinst, &iq_cmd, info->comp_baddr); 313 314 /* Print debug info if enabled */ 315 otx2_cpt_dump_sg_list(pdev, req); 316 pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX2_CPT_INST_SIZE); 317 print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX2_CPT_INST_SIZE, false); 318 pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); 319 print_hex_dump_debug("", 0, 16, 1, info->in_buffer, 320 cpt_req->dlen, false); 321 322 /* Send CPT command */ 323 otx2_cpt_send_cmd(&cptinst, 1, lf); 324 325 /* 326 * We allocate and prepare pending queue entry in critical section 327 * together with submitting CPT instruction to CPT instruction queue 328 * to make sure that order of CPT requests is the same in both 329 * pending and instruction queues 330 */ 331 spin_unlock_bh(&pqueue->lock); 332 333 ret = resume_sender ? -EBUSY : -EINPROGRESS; 334 return ret; 335 336 destroy_info: 337 spin_unlock_bh(&pqueue->lock); 338 otx2_cpt_info_destroy(pdev, info); 339 return ret; 340 } 341 342 int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, 343 int cpu_num) 344 { 345 struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); 346 struct otx2_cptlfs_info *lfs = &cptvf->lfs; 347 348 return process_request(lfs->pdev, req, &lfs->lf[cpu_num].pqueue, 349 &lfs->lf[cpu_num]); 350 } 351 352 static int cpt_process_ccode(struct pci_dev *pdev, 353 union otx2_cpt_res_s *cpt_status, 354 struct otx2_cpt_inst_info *info, 355 u32 *res_code) 356 { 357 u8 uc_ccode = cpt_status->s.uc_compcode; 358 u8 ccode = cpt_status->s.compcode; 359 360 switch (ccode) { 361 case OTX2_CPT_COMP_E_FAULT: 362 dev_err(&pdev->dev, 363 "Request failed with DMA fault\n"); 364 otx2_cpt_dump_sg_list(pdev, info->req); 365 break; 366 367 case OTX2_CPT_COMP_E_HWERR: 368 dev_err(&pdev->dev, 369 "Request failed with hardware error\n"); 370 otx2_cpt_dump_sg_list(pdev, info->req); 371 break; 372 373 case OTX2_CPT_COMP_E_INSTERR: 374 dev_err(&pdev->dev, 375 "Request failed with instruction error\n"); 376 otx2_cpt_dump_sg_list(pdev, info->req); 377 break; 378 379 case OTX2_CPT_COMP_E_NOTDONE: 380 /* check for timeout */ 381 if (time_after_eq(jiffies, info->time_in + 382 CPT_COMMAND_TIMEOUT * HZ)) 383 dev_warn(&pdev->dev, 384 "Request timed out 0x%p", info->req); 385 else if (info->extra_time < CPT_TIME_IN_RESET_COUNT) { 386 info->time_in = jiffies; 387 info->extra_time++; 388 } 389 return 1; 390 391 case OTX2_CPT_COMP_E_GOOD: 392 /* 393 * Check microcode completion code, it is only valid 394 * when completion code is CPT_COMP_E::GOOD 395 */ 396 if (uc_ccode != OTX2_CPT_UCC_SUCCESS) { 397 /* 398 * If requested hmac is truncated and ucode returns 399 * s/g write length error then we report success 400 * because ucode writes as many bytes of calculated 401 * hmac as available in gather buffer and reports 402 * s/g write length error if number of bytes in gather 403 * buffer is less than full hmac size. 404 */ 405 if (info->req->is_trunc_hmac && 406 uc_ccode == OTX2_CPT_UCC_SG_WRITE_LENGTH) { 407 *res_code = 0; 408 break; 409 } 410 411 dev_err(&pdev->dev, 412 "Request failed with software error code 0x%x\n", 413 cpt_status->s.uc_compcode); 414 otx2_cpt_dump_sg_list(pdev, info->req); 415 break; 416 } 417 /* Request has been processed with success */ 418 *res_code = 0; 419 break; 420 421 default: 422 dev_err(&pdev->dev, 423 "Request returned invalid status %d\n", ccode); 424 break; 425 } 426 return 0; 427 } 428 429 static inline void process_pending_queue(struct pci_dev *pdev, 430 struct otx2_cpt_pending_queue *pqueue) 431 { 432 struct otx2_cpt_pending_entry *resume_pentry = NULL; 433 void (*callback)(int status, void *arg, void *req); 434 struct otx2_cpt_pending_entry *pentry = NULL; 435 union otx2_cpt_res_s *cpt_status = NULL; 436 struct otx2_cpt_inst_info *info = NULL; 437 struct otx2_cpt_req_info *req = NULL; 438 struct crypto_async_request *areq; 439 u32 res_code, resume_index; 440 441 while (1) { 442 spin_lock_bh(&pqueue->lock); 443 pentry = &pqueue->head[pqueue->front]; 444 445 if (WARN_ON(!pentry)) { 446 spin_unlock_bh(&pqueue->lock); 447 break; 448 } 449 450 res_code = -EINVAL; 451 if (unlikely(!pentry->busy)) { 452 spin_unlock_bh(&pqueue->lock); 453 break; 454 } 455 456 if (unlikely(!pentry->callback)) { 457 dev_err(&pdev->dev, "Callback NULL\n"); 458 goto process_pentry; 459 } 460 461 info = pentry->info; 462 if (unlikely(!info)) { 463 dev_err(&pdev->dev, "Pending entry post arg NULL\n"); 464 goto process_pentry; 465 } 466 467 req = info->req; 468 if (unlikely(!req)) { 469 dev_err(&pdev->dev, "Request NULL\n"); 470 goto process_pentry; 471 } 472 473 cpt_status = pentry->completion_addr; 474 if (unlikely(!cpt_status)) { 475 dev_err(&pdev->dev, "Completion address NULL\n"); 476 goto process_pentry; 477 } 478 479 if (cpt_process_ccode(pdev, cpt_status, info, &res_code)) { 480 spin_unlock_bh(&pqueue->lock); 481 return; 482 } 483 info->pdev = pdev; 484 485 process_pentry: 486 /* 487 * Check if we should inform sending side to resume 488 * We do it CPT_IQ_RESUME_MARGIN elements in advance before 489 * pending queue becomes empty 490 */ 491 resume_index = modulo_inc(pqueue->front, pqueue->qlen, 492 CPT_IQ_RESUME_MARGIN); 493 resume_pentry = &pqueue->head[resume_index]; 494 if (resume_pentry && 495 resume_pentry->resume_sender) { 496 resume_pentry->resume_sender = false; 497 callback = resume_pentry->callback; 498 areq = resume_pentry->areq; 499 500 if (callback) { 501 spin_unlock_bh(&pqueue->lock); 502 503 /* 504 * EINPROGRESS is an indication for sending 505 * side that it can resume sending requests 506 */ 507 callback(-EINPROGRESS, areq, info); 508 spin_lock_bh(&pqueue->lock); 509 } 510 } 511 512 callback = pentry->callback; 513 areq = pentry->areq; 514 free_pentry(pentry); 515 516 pqueue->pending_count--; 517 pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); 518 spin_unlock_bh(&pqueue->lock); 519 520 /* 521 * Call callback after current pending entry has been 522 * processed, we don't do it if the callback pointer is 523 * invalid. 524 */ 525 if (callback) 526 callback(res_code, areq, info); 527 } 528 } 529 530 void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe) 531 { 532 process_pending_queue(wqe->lfs->pdev, 533 &wqe->lfs->lf[wqe->lf_num].pqueue); 534 } 535 536 int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev) 537 { 538 struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); 539 540 return cptvf->lfs.kcrypto_eng_grp_num; 541 } 542