1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ 4 /* Kai Shen <kaishen@linux.alibaba.com> */ 5 /* Copyright (c) 2020-2022, Alibaba Group. */ 6 7 #include <linux/kernel.h> 8 #include <linux/pci.h> 9 #include <linux/types.h> 10 11 #include "erdma.h" 12 #include "erdma_hw.h" 13 #include "erdma_verbs.h" 14 15 static void arm_cmdq_cq(struct erdma_cmdq *cmdq) 16 { 17 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 18 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) | 19 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) | 20 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) | 21 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn); 22 23 *cmdq->cq.db_record = db_data; 24 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG); 25 26 atomic64_inc(&cmdq->cq.armed_num); 27 } 28 29 static void kick_cmdq_db(struct erdma_cmdq *cmdq) 30 { 31 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); 32 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi); 33 34 *cmdq->sq.db_record = db_data; 35 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG); 36 } 37 38 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq) 39 { 40 int comp_idx; 41 42 spin_lock(&cmdq->lock); 43 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap, 44 cmdq->max_outstandings); 45 if (comp_idx == cmdq->max_outstandings) { 46 spin_unlock(&cmdq->lock); 47 return ERR_PTR(-ENOMEM); 48 } 49 50 __set_bit(comp_idx, cmdq->comp_wait_bitmap); 51 spin_unlock(&cmdq->lock); 52 53 return &cmdq->wait_pool[comp_idx]; 54 } 55 56 static void put_comp_wait(struct erdma_cmdq *cmdq, 57 struct erdma_comp_wait *comp_wait) 58 { 59 int used; 60 61 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT; 62 spin_lock(&cmdq->lock); 63 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); 64 spin_unlock(&cmdq->lock); 65 66 WARN_ON(!used); 67 } 68 69 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev, 70 struct erdma_cmdq *cmdq) 71 { 72 int i; 73 74 cmdq->wait_pool = 75 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings, 76 sizeof(struct erdma_comp_wait), GFP_KERNEL); 77 if (!cmdq->wait_pool) 78 return -ENOMEM; 79 80 spin_lock_init(&cmdq->lock); 81 cmdq->comp_wait_bitmap = devm_bitmap_zalloc( 82 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL); 83 if (!cmdq->comp_wait_bitmap) 84 return -ENOMEM; 85 86 for (i = 0; i < cmdq->max_outstandings; i++) { 87 init_completion(&cmdq->wait_pool[i].wait_event); 88 cmdq->wait_pool[i].ctx_id = i; 89 } 90 91 return 0; 92 } 93 94 static int erdma_cmdq_sq_init(struct erdma_dev *dev) 95 { 96 struct erdma_cmdq *cmdq = &dev->cmdq; 97 struct erdma_cmdq_sq *sq = &cmdq->sq; 98 u32 buf_size; 99 100 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE); 101 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt; 102 103 buf_size = sq->depth << SQEBB_SHIFT; 104 105 sq->qbuf = 106 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), 107 &sq->qbuf_dma_addr, GFP_KERNEL); 108 if (!sq->qbuf) 109 return -ENOMEM; 110 111 sq->db_record = (u64 *)(sq->qbuf + buf_size); 112 113 spin_lock_init(&sq->lock); 114 115 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG, 116 upper_32_bits(sq->qbuf_dma_addr)); 117 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG, 118 lower_32_bits(sq->qbuf_dma_addr)); 119 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth); 120 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, 121 sq->qbuf_dma_addr + buf_size); 122 123 return 0; 124 } 125 126 static int erdma_cmdq_cq_init(struct erdma_dev *dev) 127 { 128 struct erdma_cmdq *cmdq = &dev->cmdq; 129 struct erdma_cmdq_cq *cq = &cmdq->cq; 130 u32 buf_size; 131 132 cq->depth = cmdq->sq.depth; 133 buf_size = cq->depth << CQE_SHIFT; 134 135 cq->qbuf = 136 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), 137 &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); 138 if (!cq->qbuf) 139 return -ENOMEM; 140 141 spin_lock_init(&cq->lock); 142 143 cq->db_record = (u64 *)(cq->qbuf + buf_size); 144 145 atomic64_set(&cq->armed_num, 0); 146 147 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG, 148 upper_32_bits(cq->qbuf_dma_addr)); 149 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG, 150 lower_32_bits(cq->qbuf_dma_addr)); 151 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, 152 cq->qbuf_dma_addr + buf_size); 153 154 return 0; 155 } 156 157 static int erdma_cmdq_eq_init(struct erdma_dev *dev) 158 { 159 struct erdma_cmdq *cmdq = &dev->cmdq; 160 struct erdma_eq *eq = &cmdq->eq; 161 u32 buf_size; 162 163 eq->depth = cmdq->max_outstandings; 164 buf_size = eq->depth << EQE_SHIFT; 165 166 eq->qbuf = 167 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), 168 &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); 169 if (!eq->qbuf) 170 return -ENOMEM; 171 172 spin_lock_init(&eq->lock); 173 atomic64_set(&eq->event_num, 0); 174 175 eq->db_addr = 176 (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG); 177 eq->db_record = (u64 *)(eq->qbuf + buf_size); 178 179 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, 180 upper_32_bits(eq->qbuf_dma_addr)); 181 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG, 182 lower_32_bits(eq->qbuf_dma_addr)); 183 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth); 184 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, 185 eq->qbuf_dma_addr + buf_size); 186 187 return 0; 188 } 189 190 int erdma_cmdq_init(struct erdma_dev *dev) 191 { 192 int err, i; 193 struct erdma_cmdq *cmdq = &dev->cmdq; 194 u32 sts, ctrl; 195 196 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; 197 cmdq->use_event = false; 198 199 sema_init(&cmdq->credits, cmdq->max_outstandings); 200 201 err = erdma_cmdq_wait_res_init(dev, cmdq); 202 if (err) 203 return err; 204 205 err = erdma_cmdq_sq_init(dev); 206 if (err) 207 return err; 208 209 err = erdma_cmdq_cq_init(dev); 210 if (err) 211 goto err_destroy_sq; 212 213 err = erdma_cmdq_eq_init(dev); 214 if (err) 215 goto err_destroy_cq; 216 217 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1); 218 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); 219 220 for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { 221 sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, 222 ERDMA_REG_DEV_ST_INIT_DONE_MASK); 223 if (sts) 224 break; 225 226 msleep(ERDMA_REG_ACCESS_WAIT_MS); 227 } 228 229 if (i == ERDMA_WAIT_DEV_DONE_CNT) { 230 dev_err(&dev->pdev->dev, "wait init done failed.\n"); 231 err = -ETIMEDOUT; 232 goto err_destroy_eq; 233 } 234 235 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 236 237 return 0; 238 239 err_destroy_eq: 240 dma_free_coherent(&dev->pdev->dev, 241 (cmdq->eq.depth << EQE_SHIFT) + 242 ERDMA_EXTRA_BUFFER_SIZE, 243 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); 244 245 err_destroy_cq: 246 dma_free_coherent(&dev->pdev->dev, 247 (cmdq->cq.depth << CQE_SHIFT) + 248 ERDMA_EXTRA_BUFFER_SIZE, 249 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 250 251 err_destroy_sq: 252 dma_free_coherent(&dev->pdev->dev, 253 (cmdq->sq.depth << SQEBB_SHIFT) + 254 ERDMA_EXTRA_BUFFER_SIZE, 255 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 256 257 return err; 258 } 259 260 void erdma_finish_cmdq_init(struct erdma_dev *dev) 261 { 262 /* after device init successfully, change cmdq to event mode. */ 263 dev->cmdq.use_event = true; 264 arm_cmdq_cq(&dev->cmdq); 265 } 266 267 void erdma_cmdq_destroy(struct erdma_dev *dev) 268 { 269 struct erdma_cmdq *cmdq = &dev->cmdq; 270 271 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 272 273 dma_free_coherent(&dev->pdev->dev, 274 (cmdq->eq.depth << EQE_SHIFT) + 275 ERDMA_EXTRA_BUFFER_SIZE, 276 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); 277 dma_free_coherent(&dev->pdev->dev, 278 (cmdq->sq.depth << SQEBB_SHIFT) + 279 ERDMA_EXTRA_BUFFER_SIZE, 280 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); 281 dma_free_coherent(&dev->pdev->dev, 282 (cmdq->cq.depth << CQE_SHIFT) + 283 ERDMA_EXTRA_BUFFER_SIZE, 284 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); 285 } 286 287 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) 288 { 289 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci, 290 cmdq->cq.depth, CQE_SHIFT); 291 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, 292 __be32_to_cpu(READ_ONCE(*cqe))); 293 294 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL; 295 } 296 297 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len, 298 struct erdma_comp_wait *comp_wait) 299 { 300 __le64 *wqe; 301 u64 hdr = *req; 302 303 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED; 304 reinit_completion(&comp_wait->wait_event); 305 comp_wait->sq_pi = cmdq->sq.pi; 306 307 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth, 308 SQEBB_SHIFT); 309 memcpy(wqe, req, req_len); 310 311 cmdq->sq.pi += cmdq->sq.wqebb_cnt; 312 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) | 313 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, 314 comp_wait->ctx_id) | 315 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1); 316 *wqe = cpu_to_le64(hdr); 317 318 kick_cmdq_db(cmdq); 319 } 320 321 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) 322 { 323 struct erdma_comp_wait *comp_wait; 324 u32 hdr0, sqe_idx; 325 __be32 *cqe; 326 u16 ctx_id; 327 u64 *sqe; 328 int i; 329 330 cqe = get_next_valid_cmdq_cqe(cmdq); 331 if (!cqe) 332 return -EAGAIN; 333 334 cmdq->cq.ci++; 335 336 dma_rmb(); 337 hdr0 = __be32_to_cpu(*cqe); 338 sqe_idx = __be32_to_cpu(*(cqe + 1)); 339 340 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth, 341 SQEBB_SHIFT); 342 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe); 343 comp_wait = &cmdq->wait_pool[ctx_id]; 344 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED) 345 return -EIO; 346 347 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED; 348 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0); 349 cmdq->sq.ci += cmdq->sq.wqebb_cnt; 350 351 for (i = 0; i < 4; i++) 352 comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i)); 353 354 if (cmdq->use_event) 355 complete(&comp_wait->wait_event); 356 357 return 0; 358 } 359 360 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) 361 { 362 unsigned long flags; 363 u16 comp_num; 364 365 spin_lock_irqsave(&cmdq->cq.lock, flags); 366 367 /* We must have less than # of max_outstandings 368 * completions at one time. 369 */ 370 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++) 371 if (erdma_poll_single_cmd_completion(cmdq)) 372 break; 373 374 if (comp_num && cmdq->use_event) 375 arm_cmdq_cq(cmdq); 376 377 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 378 } 379 380 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) 381 { 382 int got_event = 0; 383 384 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) || 385 !cmdq->use_event) 386 return; 387 388 while (get_next_valid_eqe(&cmdq->eq)) { 389 cmdq->eq.ci++; 390 got_event++; 391 } 392 393 if (got_event) { 394 cmdq->cq.cmdsn++; 395 erdma_polling_cmd_completions(cmdq); 396 } 397 398 notify_eq(&cmdq->eq); 399 } 400 401 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx, 402 struct erdma_cmdq *cmdq, u32 timeout) 403 { 404 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout); 405 406 while (1) { 407 erdma_polling_cmd_completions(cmdq); 408 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED) 409 break; 410 411 if (time_is_before_jiffies(comp_timeout)) 412 return -ETIME; 413 414 msleep(20); 415 } 416 417 return 0; 418 } 419 420 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx, 421 struct erdma_cmdq *cmdq, u32 timeout) 422 { 423 unsigned long flags = 0; 424 425 wait_for_completion_timeout(&comp_ctx->wait_event, 426 msecs_to_jiffies(timeout)); 427 428 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) { 429 spin_lock_irqsave(&cmdq->cq.lock, flags); 430 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT; 431 spin_unlock_irqrestore(&cmdq->cq.lock, flags); 432 return -ETIME; 433 } 434 435 return 0; 436 } 437 438 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) 439 { 440 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) | 441 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); 442 } 443 444 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size, 445 u64 *resp0, u64 *resp1) 446 { 447 struct erdma_comp_wait *comp_wait; 448 int ret; 449 450 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) 451 return -ENODEV; 452 453 down(&cmdq->credits); 454 455 comp_wait = get_comp_wait(cmdq); 456 if (IS_ERR(comp_wait)) { 457 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 458 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state); 459 up(&cmdq->credits); 460 return PTR_ERR(comp_wait); 461 } 462 463 spin_lock(&cmdq->sq.lock); 464 push_cmdq_sqe(cmdq, req, req_size, comp_wait); 465 spin_unlock(&cmdq->sq.lock); 466 467 if (cmdq->use_event) 468 ret = erdma_wait_cmd_completion(comp_wait, cmdq, 469 ERDMA_CMDQ_TIMEOUT_MS); 470 else 471 ret = erdma_poll_cmd_completion(comp_wait, cmdq, 472 ERDMA_CMDQ_TIMEOUT_MS); 473 474 if (ret) { 475 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state); 476 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); 477 goto out; 478 } 479 480 if (comp_wait->comp_status) 481 ret = -EIO; 482 483 if (resp0 && resp1) { 484 *resp0 = *((u64 *)&comp_wait->comp_data[0]); 485 *resp1 = *((u64 *)&comp_wait->comp_data[2]); 486 } 487 put_comp_wait(cmdq, comp_wait); 488 489 out: 490 up(&cmdq->credits); 491 492 return ret; 493 } 494