1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ 4 /* Kai Shen <kaishen@linux.alibaba.com> */ 5 /* Copyright (c) 2020-2022, Alibaba Group. */ 6 7 #include "erdma.h" 8 9 static void arm_cmdq_cq(struct erdma_cmdq *cmdq) 10 { 11 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 12 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) | 13 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) | 14 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) | 15 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn); 16 17 *cmdq->cq.db_record = db_data; 18 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG); 19 20 atomic64_inc(&cmdq->cq.armed_num); 21 } 22 23 static void kick_cmdq_db(struct erdma_cmdq *cmdq) 24 { 25 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 26 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi); 27 28 *cmdq->sq.db_record = db_data; 29 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG); 30 } 31 32 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq) 33 { 34 int comp_idx; 35 36 spin_lock(&cmdq->lock); 37 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap, 38 cmdq->max_outstandings); 39 if (comp_idx == cmdq->max_outstandings) { 40 spin_unlock(&cmdq->lock); 41 return ERR_PTR(-ENOMEM); 42 } 43 44 __set_bit(comp_idx, cmdq->comp_wait_bitmap); 45 spin_unlock(&cmdq->lock); 46 47 return &cmdq->wait_pool[comp_idx]; 48 } 49 50 static void put_comp_wait(struct erdma_cmdq *cmdq, 51 struct erdma_comp_wait *comp_wait) 52 { 53 int used; 54 55 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT; 56 spin_lock(&cmdq->lock); 57 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); 58 spin_unlock(&cmdq->lock); 59 60 WARN_ON(!used); 61 } 62 63 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev, 64 struct erdma_cmdq *cmdq) 65 { 66 int i; 67 68 cmdq->wait_pool = 69 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings, 70 sizeof(struct erdma_comp_wait), GFP_KERNEL); 71 if (!cmdq->wait_pool) 72 return -ENOMEM; 73 74 spin_lock_init(&cmdq->lock); 75 cmdq->comp_wait_bitmap = devm_bitmap_zalloc( 76 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL); 77 if (!cmdq->comp_wait_bitmap) 78 return -ENOMEM; 79 80 for (i = 0; i < cmdq->max_outstandings; i++) { 81 init_completion(&cmdq->wait_pool[i].wait_event); 82 cmdq->wait_pool[i].ctx_id = i; 83 } 84 85 return 0; 86 } 87 88 static int erdma_cmdq_sq_init(struct erdma_dev *dev) 89 { 90 struct erdma_cmdq *cmdq = &dev->cmdq; 91 struct erdma_cmdq_sq *sq = &cmdq->sq; 92 u32 buf_size; 93 94 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE); 95 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt; 96 97 buf_size = sq->depth << SQEBB_SHIFT; 98 99 sq->qbuf = 100 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), 101 &sq->qbuf_dma_addr, GFP_KERNEL); 102 if (!sq->qbuf) 103 return -ENOMEM; 104 105 sq->db_record = (u64 *)(sq->qbuf + buf_size); 106 107 spin_lock_init(&sq->lock); 108 109 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG, 110 upper_32_bits(sq->qbuf_dma_addr)); 111 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG, 112 lower_32_bits(sq->qbuf_dma_addr)); 113 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth); 114 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, 115 sq->qbuf_dma_addr + buf_size); 116 117 return 0; 118 } 119 120 static int erdma_cmdq_cq_init(struct erdma_dev *dev) 121 { 122 struct erdma_cmdq *cmdq = &dev->cmdq; 123 struct erdma_cmdq_cq *cq = &cmdq->cq; 124 u32 buf_size; 125 126 cq->depth = cmdq->sq.depth; 127 buf_size = cq->depth << CQE_SHIFT; 128 129 cq->qbuf = 130 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), 131 &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); 132 if (!cq->qbuf) 133 return -ENOMEM; 134 135 spin_lock_init(&cq->lock); 136 137 cq->db_record = (u64 *)(cq->qbuf + buf_size); 138 139 atomic64_set(&cq->armed_num, 0); 140 141 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG, 142 upper_32_bits(cq->qbuf_dma_addr)); 143 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG, 144 lower_32_bits(cq->qbuf_dma_addr)); 145 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, 146 cq->qbuf_dma_addr + buf_size); 147 148 return 0; 149 } 150 151 static int erdma_cmdq_eq_init(struct erdma_dev *dev) 152 { 153 struct erdma_cmdq *cmdq = &dev->cmdq; 154 struct erdma_eq *eq = &cmdq->eq; 155 u32 buf_size; 156 157 eq->depth = cmdq->max_outstandings; 158 buf_size = eq->depth << EQE_SHIFT; 159 160 eq->qbuf = 161 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), 162 &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); 163 if (!eq->qbuf) 164 return -ENOMEM; 165 166 spin_lock_init(&eq->lock); 167 atomic64_set(&eq->event_num, 0); 168 169 eq->db_addr = 170 (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG); 171 eq->db_record = (u64 *)(eq->qbuf + buf_size); 172 173 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, 174 upper_32_bits(eq->qbuf_dma_addr)); 175 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG, 176 lower_32_bits(eq->qbuf_dma_addr)); 177 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth); 178 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, 179 eq->qbuf_dma_addr + buf_size); 180 181 return 0; 182 } 183 184 int erdma_cmdq_init(struct erdma_dev *dev) 185 { 186 int err, i; 187 struct erdma_cmdq *cmdq = &dev->cmdq; 188 u32 sts, ctrl; 189 190 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; 191 cmdq->use_event = false; 192 193 sema_init(&cmdq->credits, cmdq->max_outstandings); 194 195 err = erdma_cmdq_wait_res_init(dev, cmdq); 196 if (err) 197 return err; 198 199 err = erdma_cmdq_sq_init(dev); 200 if (err) 201 return err; 202 203 err = erdma_cmdq_cq_init(dev); 204 if (err) 205 goto err_destroy_sq; 206 207 err = erdma_cmdq_eq_init(dev); 208 if (err) 209 goto err_destroy_cq; 210 211 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1); 212 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); 213 214 for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { 215 sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, 216 ERDMA_REG_DEV_ST_INIT_DONE_MASK); 217 if (sts) 218 break; 219 220 msleep(ERDMA_REG_ACCESS_WAIT_MS); 221 } 222 223 if (i == ERDMA_WAIT_DEV_DONE_CNT) { 224 dev_err(&dev->pdev->dev, "wait init done failed.\n"); 225 err = -ETIMEDOUT; 226 goto err_destroy_eq; 227 } 228 229 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 230 231 return 0; 232 233 err_destroy_eq: 234 dma_free_coherent(&dev->pdev->dev, 235 (cmdq->eq.depth << EQE_SHIFT) + 236 ERDMA_EXTRA_BUFFER_SIZE, 237 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); 238 239 err_destroy_cq: 240 dma_free_coherent(&dev->pdev->dev, 241 (cmdq->cq.depth << CQE_SHIFT) + 242 ERDMA_EXTRA_BUFFER_SIZE, 243 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 244 245 err_destroy_sq: 246 dma_free_coherent(&dev->pdev->dev, 247 (cmdq->sq.depth << SQEBB_SHIFT) + 248 ERDMA_EXTRA_BUFFER_SIZE, 249 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 250 251 return err; 252 } 253 254 void erdma_finish_cmdq_init(struct erdma_dev *dev) 255 { 256 /* after device init successfully, change cmdq to event mode. */ 257 dev->cmdq.use_event = true; 258 arm_cmdq_cq(&dev->cmdq); 259 } 260 261 void erdma_cmdq_destroy(struct erdma_dev *dev) 262 { 263 struct erdma_cmdq *cmdq = &dev->cmdq; 264 265 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 266 267 dma_free_coherent(&dev->pdev->dev, 268 (cmdq->eq.depth << EQE_SHIFT) + 269 ERDMA_EXTRA_BUFFER_SIZE, 270 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); 271 dma_free_coherent(&dev->pdev->dev, 272 (cmdq->sq.depth << SQEBB_SHIFT) + 273 ERDMA_EXTRA_BUFFER_SIZE, 274 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 275 dma_free_coherent(&dev->pdev->dev, 276 (cmdq->cq.depth << CQE_SHIFT) + 277 ERDMA_EXTRA_BUFFER_SIZE, 278 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 279 } 280 281 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) 282 { 283 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci, 284 cmdq->cq.depth, CQE_SHIFT); 285 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, 286 __be32_to_cpu(READ_ONCE(*cqe))); 287 288 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL; 289 } 290 291 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len, 292 struct erdma_comp_wait *comp_wait) 293 { 294 __le64 *wqe; 295 u64 hdr = *req; 296 297 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED; 298 reinit_completion(&comp_wait->wait_event); 299 comp_wait->sq_pi = cmdq->sq.pi; 300 301 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth, 302 SQEBB_SHIFT); 303 memcpy(wqe, req, req_len); 304 305 cmdq->sq.pi += cmdq->sq.wqebb_cnt; 306 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) | 307 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, 308 comp_wait->ctx_id) | 309 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1); 310 *wqe = cpu_to_le64(hdr); 311 312 kick_cmdq_db(cmdq); 313 } 314 315 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) 316 { 317 struct erdma_comp_wait *comp_wait; 318 u32 hdr0, sqe_idx; 319 __be32 *cqe; 320 u16 ctx_id; 321 u64 *sqe; 322 int i; 323 324 cqe = get_next_valid_cmdq_cqe(cmdq); 325 if (!cqe) 326 return -EAGAIN; 327 328 cmdq->cq.ci++; 329 330 dma_rmb(); 331 hdr0 = __be32_to_cpu(*cqe); 332 sqe_idx = __be32_to_cpu(*(cqe + 1)); 333 334 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth, 335 SQEBB_SHIFT); 336 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe); 337 comp_wait = &cmdq->wait_pool[ctx_id]; 338 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED) 339 return -EIO; 340 341 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED; 342 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0); 343 cmdq->sq.ci += cmdq->sq.wqebb_cnt; 344 345 for (i = 0; i < 4; i++) 346 comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i)); 347 348 if (cmdq->use_event) 349 complete(&comp_wait->wait_event); 350 351 return 0; 352 } 353 354 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) 355 { 356 unsigned long flags; 357 u16 comp_num; 358 359 spin_lock_irqsave(&cmdq->cq.lock, flags); 360 361 /* We must have less than # of max_outstandings 362 * completions at one time. 363 */ 364 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++) 365 if (erdma_poll_single_cmd_completion(cmdq)) 366 break; 367 368 if (comp_num && cmdq->use_event) 369 arm_cmdq_cq(cmdq); 370 371 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 372 } 373 374 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) 375 { 376 int got_event = 0; 377 378 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) || 379 !cmdq->use_event) 380 return; 381 382 while (get_next_valid_eqe(&cmdq->eq)) { 383 cmdq->eq.ci++; 384 got_event++; 385 } 386 387 if (got_event) { 388 cmdq->cq.cmdsn++; 389 erdma_polling_cmd_completions(cmdq); 390 } 391 392 notify_eq(&cmdq->eq); 393 } 394 395 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx, 396 struct erdma_cmdq *cmdq, u32 timeout) 397 { 398 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout); 399 400 while (1) { 401 erdma_polling_cmd_completions(cmdq); 402 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED) 403 break; 404 405 if (time_is_before_jiffies(comp_timeout)) 406 return -ETIME; 407 408 msleep(20); 409 } 410 411 return 0; 412 } 413 414 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx, 415 struct erdma_cmdq *cmdq, u32 timeout) 416 { 417 unsigned long flags = 0; 418 419 wait_for_completion_timeout(&comp_ctx->wait_event, 420 msecs_to_jiffies(timeout)); 421 422 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) { 423 spin_lock_irqsave(&cmdq->cq.lock, flags); 424 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT; 425 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 426 return -ETIME; 427 } 428 429 return 0; 430 } 431 432 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) 433 { 434 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) | 435 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); 436 } 437 438 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, 439 u64 *resp0, u64 *resp1) 440 { 441 struct erdma_comp_wait *comp_wait; 442 int ret; 443 444 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) 445 return -ENODEV; 446 447 down(&cmdq->credits); 448 449 comp_wait = get_comp_wait(cmdq); 450 if (IS_ERR(comp_wait)) { 451 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 452 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state); 453 up(&cmdq->credits); 454 return PTR_ERR(comp_wait); 455 } 456 457 spin_lock(&cmdq->sq.lock); 458 push_cmdq_sqe(cmdq, req, req_size, comp_wait); 459 spin_unlock(&cmdq->sq.lock); 460 461 if (cmdq->use_event) 462 ret = erdma_wait_cmd_completion(comp_wait, cmdq, 463 ERDMA_CMDQ_TIMEOUT_MS); 464 else 465 ret = erdma_poll_cmd_completion(comp_wait, cmdq, 466 ERDMA_CMDQ_TIMEOUT_MS); 467 468 if (ret) { 469 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state); 470 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 471 goto out; 472 } 473 474 if (comp_wait->comp_status) 475 ret = -EIO; 476 477 if (resp0 && resp1) { 478 *resp0 = *((u64 *)&comp_wait->comp_data[0]); 479 *resp1 = *((u64 *)&comp_wait->comp_data[2]); 480 } 481 put_comp_wait(cmdq, comp_wait); 482 483 out: 484 up(&cmdq->credits); 485 486 return ret; 487 } 488