xref: /openbmc/linux/drivers/infiniband/hw/erdma/erdma_cmdq.c (revision de19ec778c7a4ee2fe2112126c5d05a10db2d582)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 #include "erdma.h"
8 
9 static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
10 {
11 	struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
12 	u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
13 		      FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
14 		      FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
15 		      FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
16 
17 	*cmdq->cq.db_record = db_data;
18 	writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
19 
20 	atomic64_inc(&cmdq->cq.armed_num);
21 }
22 
23 static void kick_cmdq_db(struct erdma_cmdq *cmdq)
24 {
25 	struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
26 	u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
27 
28 	*cmdq->sq.db_record = db_data;
29 	writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
30 }
31 
32 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
33 {
34 	int comp_idx;
35 
36 	spin_lock(&cmdq->lock);
37 	comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
38 				       cmdq->max_outstandings);
39 	if (comp_idx == cmdq->max_outstandings) {
40 		spin_unlock(&cmdq->lock);
41 		return ERR_PTR(-ENOMEM);
42 	}
43 
44 	__set_bit(comp_idx, cmdq->comp_wait_bitmap);
45 	spin_unlock(&cmdq->lock);
46 
47 	return &cmdq->wait_pool[comp_idx];
48 }
49 
50 static void put_comp_wait(struct erdma_cmdq *cmdq,
51 			  struct erdma_comp_wait *comp_wait)
52 {
53 	int used;
54 
55 	cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
56 	spin_lock(&cmdq->lock);
57 	used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
58 	spin_unlock(&cmdq->lock);
59 
60 	WARN_ON(!used);
61 }
62 
63 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
64 				    struct erdma_cmdq *cmdq)
65 {
66 	int i;
67 
68 	cmdq->wait_pool =
69 		devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
70 			     sizeof(struct erdma_comp_wait), GFP_KERNEL);
71 	if (!cmdq->wait_pool)
72 		return -ENOMEM;
73 
74 	spin_lock_init(&cmdq->lock);
75 	cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
76 		&dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
77 	if (!cmdq->comp_wait_bitmap)
78 		return -ENOMEM;
79 
80 	for (i = 0; i < cmdq->max_outstandings; i++) {
81 		init_completion(&cmdq->wait_pool[i].wait_event);
82 		cmdq->wait_pool[i].ctx_id = i;
83 	}
84 
85 	return 0;
86 }
87 
88 static int erdma_cmdq_sq_init(struct erdma_dev *dev)
89 {
90 	struct erdma_cmdq *cmdq = &dev->cmdq;
91 	struct erdma_cmdq_sq *sq = &cmdq->sq;
92 	u32 buf_size;
93 
94 	sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
95 	sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
96 
97 	buf_size = sq->depth << SQEBB_SHIFT;
98 
99 	sq->qbuf =
100 		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
101 				   &sq->qbuf_dma_addr, GFP_KERNEL);
102 	if (!sq->qbuf)
103 		return -ENOMEM;
104 
105 	sq->db_record = (u64 *)(sq->qbuf + buf_size);
106 
107 	spin_lock_init(&sq->lock);
108 
109 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
110 			  upper_32_bits(sq->qbuf_dma_addr));
111 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
112 			  lower_32_bits(sq->qbuf_dma_addr));
113 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
114 	erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
115 			  sq->qbuf_dma_addr + buf_size);
116 
117 	return 0;
118 }
119 
120 static int erdma_cmdq_cq_init(struct erdma_dev *dev)
121 {
122 	struct erdma_cmdq *cmdq = &dev->cmdq;
123 	struct erdma_cmdq_cq *cq = &cmdq->cq;
124 	u32 buf_size;
125 
126 	cq->depth = cmdq->sq.depth;
127 	buf_size = cq->depth << CQE_SHIFT;
128 
129 	cq->qbuf =
130 		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
131 				   &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
132 	if (!cq->qbuf)
133 		return -ENOMEM;
134 
135 	spin_lock_init(&cq->lock);
136 
137 	cq->db_record = (u64 *)(cq->qbuf + buf_size);
138 
139 	atomic64_set(&cq->armed_num, 0);
140 
141 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
142 			  upper_32_bits(cq->qbuf_dma_addr));
143 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
144 			  lower_32_bits(cq->qbuf_dma_addr));
145 	erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
146 			  cq->qbuf_dma_addr + buf_size);
147 
148 	return 0;
149 }
150 
151 static int erdma_cmdq_eq_init(struct erdma_dev *dev)
152 {
153 	struct erdma_cmdq *cmdq = &dev->cmdq;
154 	struct erdma_eq *eq = &cmdq->eq;
155 	u32 buf_size;
156 
157 	eq->depth = cmdq->max_outstandings;
158 	buf_size = eq->depth << EQE_SHIFT;
159 
160 	eq->qbuf =
161 		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
162 				   &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
163 	if (!eq->qbuf)
164 		return -ENOMEM;
165 
166 	spin_lock_init(&eq->lock);
167 	atomic64_set(&eq->event_num, 0);
168 
169 	eq->db_addr =
170 		(u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
171 	eq->db_record = (u64 *)(eq->qbuf + buf_size);
172 
173 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
174 			  upper_32_bits(eq->qbuf_dma_addr));
175 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
176 			  lower_32_bits(eq->qbuf_dma_addr));
177 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
178 	erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
179 			  eq->qbuf_dma_addr + buf_size);
180 
181 	return 0;
182 }
183 
184 int erdma_cmdq_init(struct erdma_dev *dev)
185 {
186 	int err, i;
187 	struct erdma_cmdq *cmdq = &dev->cmdq;
188 	u32 sts, ctrl;
189 
190 	cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
191 	cmdq->use_event = false;
192 
193 	sema_init(&cmdq->credits, cmdq->max_outstandings);
194 
195 	err = erdma_cmdq_wait_res_init(dev, cmdq);
196 	if (err)
197 		return err;
198 
199 	err = erdma_cmdq_sq_init(dev);
200 	if (err)
201 		return err;
202 
203 	err = erdma_cmdq_cq_init(dev);
204 	if (err)
205 		goto err_destroy_sq;
206 
207 	err = erdma_cmdq_eq_init(dev);
208 	if (err)
209 		goto err_destroy_cq;
210 
211 	ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
212 	erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
213 
214 	for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
215 		sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
216 					     ERDMA_REG_DEV_ST_INIT_DONE_MASK);
217 		if (sts)
218 			break;
219 
220 		msleep(ERDMA_REG_ACCESS_WAIT_MS);
221 	}
222 
223 	if (i == ERDMA_WAIT_DEV_DONE_CNT) {
224 		dev_err(&dev->pdev->dev, "wait init done failed.\n");
225 		err = -ETIMEDOUT;
226 		goto err_destroy_eq;
227 	}
228 
229 	set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
230 
231 	return 0;
232 
233 err_destroy_eq:
234 	dma_free_coherent(&dev->pdev->dev,
235 			  (cmdq->eq.depth << EQE_SHIFT) +
236 				  ERDMA_EXTRA_BUFFER_SIZE,
237 			  cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
238 
239 err_destroy_cq:
240 	dma_free_coherent(&dev->pdev->dev,
241 			  (cmdq->cq.depth << CQE_SHIFT) +
242 				  ERDMA_EXTRA_BUFFER_SIZE,
243 			  cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
244 
245 err_destroy_sq:
246 	dma_free_coherent(&dev->pdev->dev,
247 			  (cmdq->sq.depth << SQEBB_SHIFT) +
248 				  ERDMA_EXTRA_BUFFER_SIZE,
249 			  cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
250 
251 	return err;
252 }
253 
254 void erdma_finish_cmdq_init(struct erdma_dev *dev)
255 {
256 	/* after device init successfully, change cmdq to event mode. */
257 	dev->cmdq.use_event = true;
258 	arm_cmdq_cq(&dev->cmdq);
259 }
260 
261 void erdma_cmdq_destroy(struct erdma_dev *dev)
262 {
263 	struct erdma_cmdq *cmdq = &dev->cmdq;
264 
265 	clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
266 
267 	dma_free_coherent(&dev->pdev->dev,
268 			  (cmdq->eq.depth << EQE_SHIFT) +
269 				  ERDMA_EXTRA_BUFFER_SIZE,
270 			  cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
271 	dma_free_coherent(&dev->pdev->dev,
272 			  (cmdq->sq.depth << SQEBB_SHIFT) +
273 				  ERDMA_EXTRA_BUFFER_SIZE,
274 			  cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
275 	dma_free_coherent(&dev->pdev->dev,
276 			  (cmdq->cq.depth << CQE_SHIFT) +
277 				  ERDMA_EXTRA_BUFFER_SIZE,
278 			  cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
279 }
280 
281 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
282 {
283 	__be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
284 				      cmdq->cq.depth, CQE_SHIFT);
285 	u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
286 			      be32_to_cpu(READ_ONCE(*cqe)));
287 
288 	return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
289 }
290 
291 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
292 			  struct erdma_comp_wait *comp_wait)
293 {
294 	__le64 *wqe;
295 	u64 hdr = *req;
296 
297 	comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
298 	reinit_completion(&comp_wait->wait_event);
299 	comp_wait->sq_pi = cmdq->sq.pi;
300 
301 	wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
302 			      SQEBB_SHIFT);
303 	memcpy(wqe, req, req_len);
304 
305 	cmdq->sq.pi += cmdq->sq.wqebb_cnt;
306 	hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
307 	       FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
308 			  comp_wait->ctx_id) |
309 	       FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
310 	*wqe = cpu_to_le64(hdr);
311 
312 	kick_cmdq_db(cmdq);
313 }
314 
315 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
316 {
317 	struct erdma_comp_wait *comp_wait;
318 	u32 hdr0, sqe_idx;
319 	__be32 *cqe;
320 	u16 ctx_id;
321 	u64 *sqe;
322 
323 	cqe = get_next_valid_cmdq_cqe(cmdq);
324 	if (!cqe)
325 		return -EAGAIN;
326 
327 	cmdq->cq.ci++;
328 
329 	dma_rmb();
330 	hdr0 = be32_to_cpu(*cqe);
331 	sqe_idx = be32_to_cpu(*(cqe + 1));
332 
333 	sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
334 			      SQEBB_SHIFT);
335 	ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
336 	comp_wait = &cmdq->wait_pool[ctx_id];
337 	if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
338 		return -EIO;
339 
340 	comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
341 	comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
342 	cmdq->sq.ci += cmdq->sq.wqebb_cnt;
343 	/* Copy 16B comp data after cqe hdr to outer */
344 	be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
345 
346 	if (cmdq->use_event)
347 		complete(&comp_wait->wait_event);
348 
349 	return 0;
350 }
351 
352 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
353 {
354 	unsigned long flags;
355 	u16 comp_num;
356 
357 	spin_lock_irqsave(&cmdq->cq.lock, flags);
358 
359 	/* We must have less than # of max_outstandings
360 	 * completions at one time.
361 	 */
362 	for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
363 		if (erdma_poll_single_cmd_completion(cmdq))
364 			break;
365 
366 	if (comp_num && cmdq->use_event)
367 		arm_cmdq_cq(cmdq);
368 
369 	spin_unlock_irqrestore(&cmdq->cq.lock, flags);
370 }
371 
372 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
373 {
374 	int got_event = 0;
375 
376 	if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
377 	    !cmdq->use_event)
378 		return;
379 
380 	while (get_next_valid_eqe(&cmdq->eq)) {
381 		cmdq->eq.ci++;
382 		got_event++;
383 	}
384 
385 	if (got_event) {
386 		cmdq->cq.cmdsn++;
387 		erdma_polling_cmd_completions(cmdq);
388 	}
389 
390 	notify_eq(&cmdq->eq);
391 }
392 
393 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
394 				     struct erdma_cmdq *cmdq, u32 timeout)
395 {
396 	unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
397 
398 	while (1) {
399 		erdma_polling_cmd_completions(cmdq);
400 		if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
401 			break;
402 
403 		if (time_is_before_jiffies(comp_timeout))
404 			return -ETIME;
405 
406 		msleep(20);
407 	}
408 
409 	return 0;
410 }
411 
412 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
413 				     struct erdma_cmdq *cmdq, u32 timeout)
414 {
415 	unsigned long flags = 0;
416 
417 	wait_for_completion_timeout(&comp_ctx->wait_event,
418 				    msecs_to_jiffies(timeout));
419 
420 	if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
421 		spin_lock_irqsave(&cmdq->cq.lock, flags);
422 		comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
423 		spin_unlock_irqrestore(&cmdq->cq.lock, flags);
424 		return -ETIME;
425 	}
426 
427 	return 0;
428 }
429 
430 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
431 {
432 	*hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
433 	       FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
434 }
435 
436 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
437 			u64 *resp0, u64 *resp1)
438 {
439 	struct erdma_comp_wait *comp_wait;
440 	int ret;
441 
442 	if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
443 		return -ENODEV;
444 
445 	down(&cmdq->credits);
446 
447 	comp_wait = get_comp_wait(cmdq);
448 	if (IS_ERR(comp_wait)) {
449 		clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
450 		set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
451 		up(&cmdq->credits);
452 		return PTR_ERR(comp_wait);
453 	}
454 
455 	spin_lock(&cmdq->sq.lock);
456 	push_cmdq_sqe(cmdq, req, req_size, comp_wait);
457 	spin_unlock(&cmdq->sq.lock);
458 
459 	if (cmdq->use_event)
460 		ret = erdma_wait_cmd_completion(comp_wait, cmdq,
461 						ERDMA_CMDQ_TIMEOUT_MS);
462 	else
463 		ret = erdma_poll_cmd_completion(comp_wait, cmdq,
464 						ERDMA_CMDQ_TIMEOUT_MS);
465 
466 	if (ret) {
467 		set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
468 		clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
469 		goto out;
470 	}
471 
472 	if (comp_wait->comp_status)
473 		ret = -EIO;
474 
475 	if (resp0 && resp1) {
476 		*resp0 = *((u64 *)&comp_wait->comp_data[0]);
477 		*resp1 = *((u64 *)&comp_wait->comp_data[2]);
478 	}
479 	put_comp_wait(cmdq, comp_wait);
480 
481 out:
482 	up(&cmdq->credits);
483 
484 	return ret;
485 }
486