1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 #include <linux/kernel.h>
8 #include <linux/pci.h>
9 #include <linux/types.h>
10 
11 #include "erdma.h"
12 #include "erdma_hw.h"
13 #include "erdma_verbs.h"
14 
15 static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
16 {
17 	struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
18 	u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
19 		      FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
20 		      FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
21 		      FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
22 
23 	*cmdq->cq.db_record = db_data;
24 	writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
25 
26 	atomic64_inc(&cmdq->cq.armed_num);
27 }
28 
29 static void kick_cmdq_db(struct erdma_cmdq *cmdq)
30 {
31 	struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
32 	u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
33 
34 	*cmdq->sq.db_record = db_data;
35 	writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
36 }
37 
38 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
39 {
40 	int comp_idx;
41 
42 	spin_lock(&cmdq->lock);
43 	comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
44 				       cmdq->max_outstandings);
45 	if (comp_idx == cmdq->max_outstandings) {
46 		spin_unlock(&cmdq->lock);
47 		return ERR_PTR(-ENOMEM);
48 	}
49 
50 	__set_bit(comp_idx, cmdq->comp_wait_bitmap);
51 	spin_unlock(&cmdq->lock);
52 
53 	return &cmdq->wait_pool[comp_idx];
54 }
55 
56 static void put_comp_wait(struct erdma_cmdq *cmdq,
57 			  struct erdma_comp_wait *comp_wait)
58 {
59 	int used;
60 
61 	cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
62 	spin_lock(&cmdq->lock);
63 	used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
64 	spin_unlock(&cmdq->lock);
65 
66 	WARN_ON(!used);
67 }
68 
69 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
70 				    struct erdma_cmdq *cmdq)
71 {
72 	int i;
73 
74 	cmdq->wait_pool =
75 		devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
76 			     sizeof(struct erdma_comp_wait), GFP_KERNEL);
77 	if (!cmdq->wait_pool)
78 		return -ENOMEM;
79 
80 	spin_lock_init(&cmdq->lock);
81 	cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
82 		&dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
83 	if (!cmdq->comp_wait_bitmap)
84 		return -ENOMEM;
85 
86 	for (i = 0; i < cmdq->max_outstandings; i++) {
87 		init_completion(&cmdq->wait_pool[i].wait_event);
88 		cmdq->wait_pool[i].ctx_id = i;
89 	}
90 
91 	return 0;
92 }
93 
94 static int erdma_cmdq_sq_init(struct erdma_dev *dev)
95 {
96 	struct erdma_cmdq *cmdq = &dev->cmdq;
97 	struct erdma_cmdq_sq *sq = &cmdq->sq;
98 	u32 buf_size;
99 
100 	sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
101 	sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
102 
103 	buf_size = sq->depth << SQEBB_SHIFT;
104 
105 	sq->qbuf =
106 		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
107 				   &sq->qbuf_dma_addr, GFP_KERNEL);
108 	if (!sq->qbuf)
109 		return -ENOMEM;
110 
111 	sq->db_record = (u64 *)(sq->qbuf + buf_size);
112 
113 	spin_lock_init(&sq->lock);
114 
115 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
116 			  upper_32_bits(sq->qbuf_dma_addr));
117 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
118 			  lower_32_bits(sq->qbuf_dma_addr));
119 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
120 	erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
121 			  sq->qbuf_dma_addr + buf_size);
122 
123 	return 0;
124 }
125 
126 static int erdma_cmdq_cq_init(struct erdma_dev *dev)
127 {
128 	struct erdma_cmdq *cmdq = &dev->cmdq;
129 	struct erdma_cmdq_cq *cq = &cmdq->cq;
130 	u32 buf_size;
131 
132 	cq->depth = cmdq->sq.depth;
133 	buf_size = cq->depth << CQE_SHIFT;
134 
135 	cq->qbuf =
136 		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
137 				   &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
138 	if (!cq->qbuf)
139 		return -ENOMEM;
140 
141 	spin_lock_init(&cq->lock);
142 
143 	cq->db_record = (u64 *)(cq->qbuf + buf_size);
144 
145 	atomic64_set(&cq->armed_num, 0);
146 
147 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
148 			  upper_32_bits(cq->qbuf_dma_addr));
149 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
150 			  lower_32_bits(cq->qbuf_dma_addr));
151 	erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
152 			  cq->qbuf_dma_addr + buf_size);
153 
154 	return 0;
155 }
156 
157 static int erdma_cmdq_eq_init(struct erdma_dev *dev)
158 {
159 	struct erdma_cmdq *cmdq = &dev->cmdq;
160 	struct erdma_eq *eq = &cmdq->eq;
161 	u32 buf_size;
162 
163 	eq->depth = cmdq->max_outstandings;
164 	buf_size = eq->depth << EQE_SHIFT;
165 
166 	eq->qbuf =
167 		dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
168 				   &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
169 	if (!eq->qbuf)
170 		return -ENOMEM;
171 
172 	spin_lock_init(&eq->lock);
173 	atomic64_set(&eq->event_num, 0);
174 
175 	eq->db_addr =
176 		(u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
177 	eq->db_record = (u64 *)(eq->qbuf + buf_size);
178 
179 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
180 			  upper_32_bits(eq->qbuf_dma_addr));
181 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
182 			  lower_32_bits(eq->qbuf_dma_addr));
183 	erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
184 	erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
185 			  eq->qbuf_dma_addr + buf_size);
186 
187 	return 0;
188 }
189 
190 int erdma_cmdq_init(struct erdma_dev *dev)
191 {
192 	int err, i;
193 	struct erdma_cmdq *cmdq = &dev->cmdq;
194 	u32 sts, ctrl;
195 
196 	cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
197 	cmdq->use_event = false;
198 
199 	sema_init(&cmdq->credits, cmdq->max_outstandings);
200 
201 	err = erdma_cmdq_wait_res_init(dev, cmdq);
202 	if (err)
203 		return err;
204 
205 	err = erdma_cmdq_sq_init(dev);
206 	if (err)
207 		return err;
208 
209 	err = erdma_cmdq_cq_init(dev);
210 	if (err)
211 		goto err_destroy_sq;
212 
213 	err = erdma_cmdq_eq_init(dev);
214 	if (err)
215 		goto err_destroy_cq;
216 
217 	ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
218 	erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
219 
220 	for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
221 		sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
222 					     ERDMA_REG_DEV_ST_INIT_DONE_MASK);
223 		if (sts)
224 			break;
225 
226 		msleep(ERDMA_REG_ACCESS_WAIT_MS);
227 	}
228 
229 	if (i == ERDMA_WAIT_DEV_DONE_CNT) {
230 		dev_err(&dev->pdev->dev, "wait init done failed.\n");
231 		err = -ETIMEDOUT;
232 		goto err_destroy_eq;
233 	}
234 
235 	set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
236 
237 	return 0;
238 
239 err_destroy_eq:
240 	dma_free_coherent(&dev->pdev->dev,
241 			  (cmdq->eq.depth << EQE_SHIFT) +
242 				  ERDMA_EXTRA_BUFFER_SIZE,
243 			  cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
244 
245 err_destroy_cq:
246 	dma_free_coherent(&dev->pdev->dev,
247 			  (cmdq->cq.depth << CQE_SHIFT) +
248 				  ERDMA_EXTRA_BUFFER_SIZE,
249 			  cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
250 
251 err_destroy_sq:
252 	dma_free_coherent(&dev->pdev->dev,
253 			  (cmdq->sq.depth << SQEBB_SHIFT) +
254 				  ERDMA_EXTRA_BUFFER_SIZE,
255 			  cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
256 
257 	return err;
258 }
259 
260 void erdma_finish_cmdq_init(struct erdma_dev *dev)
261 {
262 	/* after device init successfully, change cmdq to event mode. */
263 	dev->cmdq.use_event = true;
264 	arm_cmdq_cq(&dev->cmdq);
265 }
266 
267 void erdma_cmdq_destroy(struct erdma_dev *dev)
268 {
269 	struct erdma_cmdq *cmdq = &dev->cmdq;
270 
271 	clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
272 
273 	dma_free_coherent(&dev->pdev->dev,
274 			  (cmdq->eq.depth << EQE_SHIFT) +
275 				  ERDMA_EXTRA_BUFFER_SIZE,
276 			  cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
277 	dma_free_coherent(&dev->pdev->dev,
278 			  (cmdq->sq.depth << SQEBB_SHIFT) +
279 				  ERDMA_EXTRA_BUFFER_SIZE,
280 			  cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
281 	dma_free_coherent(&dev->pdev->dev,
282 			  (cmdq->cq.depth << CQE_SHIFT) +
283 				  ERDMA_EXTRA_BUFFER_SIZE,
284 			  cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
285 }
286 
287 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
288 {
289 	__be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
290 				      cmdq->cq.depth, CQE_SHIFT);
291 	u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
292 			      __be32_to_cpu(READ_ONCE(*cqe)));
293 
294 	return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
295 }
296 
297 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
298 			  struct erdma_comp_wait *comp_wait)
299 {
300 	__le64 *wqe;
301 	u64 hdr = *req;
302 
303 	comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
304 	reinit_completion(&comp_wait->wait_event);
305 	comp_wait->sq_pi = cmdq->sq.pi;
306 
307 	wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
308 			      SQEBB_SHIFT);
309 	memcpy(wqe, req, req_len);
310 
311 	cmdq->sq.pi += cmdq->sq.wqebb_cnt;
312 	hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
313 	       FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
314 			  comp_wait->ctx_id) |
315 	       FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
316 	*wqe = cpu_to_le64(hdr);
317 
318 	kick_cmdq_db(cmdq);
319 }
320 
321 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
322 {
323 	struct erdma_comp_wait *comp_wait;
324 	u32 hdr0, sqe_idx;
325 	__be32 *cqe;
326 	u16 ctx_id;
327 	u64 *sqe;
328 	int i;
329 
330 	cqe = get_next_valid_cmdq_cqe(cmdq);
331 	if (!cqe)
332 		return -EAGAIN;
333 
334 	cmdq->cq.ci++;
335 
336 	dma_rmb();
337 	hdr0 = __be32_to_cpu(*cqe);
338 	sqe_idx = __be32_to_cpu(*(cqe + 1));
339 
340 	sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
341 			      SQEBB_SHIFT);
342 	ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
343 	comp_wait = &cmdq->wait_pool[ctx_id];
344 	if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
345 		return -EIO;
346 
347 	comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
348 	comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
349 	cmdq->sq.ci += cmdq->sq.wqebb_cnt;
350 
351 	for (i = 0; i < 4; i++)
352 		comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i));
353 
354 	if (cmdq->use_event)
355 		complete(&comp_wait->wait_event);
356 
357 	return 0;
358 }
359 
360 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
361 {
362 	unsigned long flags;
363 	u16 comp_num;
364 
365 	spin_lock_irqsave(&cmdq->cq.lock, flags);
366 
367 	/* We must have less than # of max_outstandings
368 	 * completions at one time.
369 	 */
370 	for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
371 		if (erdma_poll_single_cmd_completion(cmdq))
372 			break;
373 
374 	if (comp_num && cmdq->use_event)
375 		arm_cmdq_cq(cmdq);
376 
377 	spin_unlock_irqrestore(&cmdq->cq.lock, flags);
378 }
379 
380 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
381 {
382 	int got_event = 0;
383 
384 	if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
385 	    !cmdq->use_event)
386 		return;
387 
388 	while (get_next_valid_eqe(&cmdq->eq)) {
389 		cmdq->eq.ci++;
390 		got_event++;
391 	}
392 
393 	if (got_event) {
394 		cmdq->cq.cmdsn++;
395 		erdma_polling_cmd_completions(cmdq);
396 	}
397 
398 	notify_eq(&cmdq->eq);
399 }
400 
401 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
402 				     struct erdma_cmdq *cmdq, u32 timeout)
403 {
404 	unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
405 
406 	while (1) {
407 		erdma_polling_cmd_completions(cmdq);
408 		if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
409 			break;
410 
411 		if (time_is_before_jiffies(comp_timeout))
412 			return -ETIME;
413 
414 		msleep(20);
415 	}
416 
417 	return 0;
418 }
419 
420 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
421 				     struct erdma_cmdq *cmdq, u32 timeout)
422 {
423 	unsigned long flags = 0;
424 
425 	wait_for_completion_timeout(&comp_ctx->wait_event,
426 				    msecs_to_jiffies(timeout));
427 
428 	if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
429 		spin_lock_irqsave(&cmdq->cq.lock, flags);
430 		comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
431 		spin_unlock_irqrestore(&cmdq->cq.lock, flags);
432 		return -ETIME;
433 	}
434 
435 	return 0;
436 }
437 
438 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
439 {
440 	*hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
441 	       FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
442 }
443 
444 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size,
445 			u64 *resp0, u64 *resp1)
446 {
447 	struct erdma_comp_wait *comp_wait;
448 	int ret;
449 
450 	if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
451 		return -ENODEV;
452 
453 	down(&cmdq->credits);
454 
455 	comp_wait = get_comp_wait(cmdq);
456 	if (IS_ERR(comp_wait)) {
457 		clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
458 		set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
459 		up(&cmdq->credits);
460 		return PTR_ERR(comp_wait);
461 	}
462 
463 	spin_lock(&cmdq->sq.lock);
464 	push_cmdq_sqe(cmdq, req, req_size, comp_wait);
465 	spin_unlock(&cmdq->sq.lock);
466 
467 	if (cmdq->use_event)
468 		ret = erdma_wait_cmd_completion(comp_wait, cmdq,
469 						ERDMA_CMDQ_TIMEOUT_MS);
470 	else
471 		ret = erdma_poll_cmd_completion(comp_wait, cmdq,
472 						ERDMA_CMDQ_TIMEOUT_MS);
473 
474 	if (ret) {
475 		set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
476 		clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
477 		goto out;
478 	}
479 
480 	if (comp_wait->comp_status)
481 		ret = -EIO;
482 
483 	if (resp0 && resp1) {
484 		*resp0 = *((u64 *)&comp_wait->comp_data[0]);
485 		*resp1 = *((u64 *)&comp_wait->comp_data[2]);
486 	}
487 	put_comp_wait(cmdq, comp_wait);
488 
489 out:
490 	up(&cmdq->credits);
491 
492 	return ret;
493 }
494