1 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
2 /*
3  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
4  */
5 
6 #include "efa_com.h"
7 #include "efa_regs_defs.h"
8 
9 #define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
10 
11 #define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
12 #define EFA_MMIO_READ_INVALID 0xffffffff
13 
14 #define EFA_POLL_INTERVAL_MS 100 /* msecs */
15 
16 #define EFA_ASYNC_QUEUE_DEPTH 16
17 #define EFA_ADMIN_QUEUE_DEPTH 32
18 
19 #define MIN_EFA_VER\
20 	((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
21 	 (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
22 
23 #define EFA_CTRL_MAJOR          0
24 #define EFA_CTRL_MINOR          0
25 #define EFA_CTRL_SUB_MINOR      1
26 
27 #define MIN_EFA_CTRL_VER \
28 	(((EFA_CTRL_MAJOR) << \
29 	(EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
30 	((EFA_CTRL_MINOR) << \
31 	(EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
32 	(EFA_CTRL_SUB_MINOR))
33 
34 #define EFA_DMA_ADDR_TO_UINT32_LOW(x)   ((u32)((u64)(x)))
35 #define EFA_DMA_ADDR_TO_UINT32_HIGH(x)  ((u32)(((u64)(x)) >> 32))
36 
37 #define EFA_REGS_ADMIN_INTR_MASK 1
38 
39 enum efa_cmd_status {
40 	EFA_CMD_SUBMITTED,
41 	EFA_CMD_COMPLETED,
42 };
43 
44 struct efa_comp_ctx {
45 	struct completion wait_event;
46 	struct efa_admin_acq_entry *user_cqe;
47 	u32 comp_size;
48 	enum efa_cmd_status status;
49 	/* status from the device */
50 	u8 comp_status;
51 	u8 cmd_opcode;
52 	u8 occupied;
53 };
54 
55 static const char *efa_com_cmd_str(u8 cmd)
56 {
57 #define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
58 
59 	switch (cmd) {
60 	EFA_CMD_STR_CASE(CREATE_QP);
61 	EFA_CMD_STR_CASE(MODIFY_QP);
62 	EFA_CMD_STR_CASE(QUERY_QP);
63 	EFA_CMD_STR_CASE(DESTROY_QP);
64 	EFA_CMD_STR_CASE(CREATE_AH);
65 	EFA_CMD_STR_CASE(DESTROY_AH);
66 	EFA_CMD_STR_CASE(REG_MR);
67 	EFA_CMD_STR_CASE(DEREG_MR);
68 	EFA_CMD_STR_CASE(CREATE_CQ);
69 	EFA_CMD_STR_CASE(DESTROY_CQ);
70 	EFA_CMD_STR_CASE(GET_FEATURE);
71 	EFA_CMD_STR_CASE(SET_FEATURE);
72 	EFA_CMD_STR_CASE(GET_STATS);
73 	EFA_CMD_STR_CASE(ALLOC_PD);
74 	EFA_CMD_STR_CASE(DEALLOC_PD);
75 	EFA_CMD_STR_CASE(ALLOC_UAR);
76 	EFA_CMD_STR_CASE(DEALLOC_UAR);
77 	default: return "unknown command opcode";
78 	}
79 #undef EFA_CMD_STR_CASE
80 }
81 
82 static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
83 {
84 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
85 	struct efa_admin_mmio_req_read_less_resp *read_resp;
86 	unsigned long exp_time;
87 	u32 mmio_read_reg;
88 	u32 err;
89 
90 	read_resp = mmio_read->read_resp;
91 
92 	spin_lock(&mmio_read->lock);
93 	mmio_read->seq_num++;
94 
95 	/* trash DMA req_id to identify when hardware is done */
96 	read_resp->req_id = mmio_read->seq_num + 0x9aL;
97 	mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
98 			EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
99 	mmio_read_reg |= mmio_read->seq_num &
100 			 EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
101 
102 	writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
103 
104 	exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
105 	do {
106 		if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
107 			break;
108 		udelay(1);
109 	} while (time_is_after_jiffies(exp_time));
110 
111 	if (read_resp->req_id != mmio_read->seq_num) {
112 		ibdev_err(edev->efa_dev,
113 			  "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
114 			  mmio_read->seq_num, offset, read_resp->req_id,
115 			  read_resp->reg_off);
116 		err = EFA_MMIO_READ_INVALID;
117 		goto out;
118 	}
119 
120 	if (read_resp->reg_off != offset) {
121 		ibdev_err(edev->efa_dev,
122 			  "Reading register failed: wrong offset provided\n");
123 		err = EFA_MMIO_READ_INVALID;
124 		goto out;
125 	}
126 
127 	err = read_resp->reg_val;
128 out:
129 	spin_unlock(&mmio_read->lock);
130 	return err;
131 }
132 
133 static int efa_com_admin_init_sq(struct efa_com_dev *edev)
134 {
135 	struct efa_com_admin_queue *aq = &edev->aq;
136 	struct efa_com_admin_sq *sq = &aq->sq;
137 	u16 size = aq->depth * sizeof(*sq->entries);
138 	u32 addr_high;
139 	u32 addr_low;
140 	u32 aq_caps;
141 
142 	sq->entries =
143 		dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
144 	if (!sq->entries)
145 		return -ENOMEM;
146 
147 	spin_lock_init(&sq->lock);
148 
149 	sq->cc = 0;
150 	sq->pc = 0;
151 	sq->phase = 1;
152 
153 	sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
154 
155 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
156 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
157 
158 	writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
159 	writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
160 
161 	aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
162 	aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
163 			EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
164 			EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
165 
166 	writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
167 
168 	return 0;
169 }
170 
171 static int efa_com_admin_init_cq(struct efa_com_dev *edev)
172 {
173 	struct efa_com_admin_queue *aq = &edev->aq;
174 	struct efa_com_admin_cq *cq = &aq->cq;
175 	u16 size = aq->depth * sizeof(*cq->entries);
176 	u32 addr_high;
177 	u32 addr_low;
178 	u32 acq_caps;
179 
180 	cq->entries =
181 		dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
182 	if (!cq->entries)
183 		return -ENOMEM;
184 
185 	spin_lock_init(&cq->lock);
186 
187 	cq->cc = 0;
188 	cq->phase = 1;
189 
190 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
191 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
192 
193 	writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
194 	writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
195 
196 	acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
197 	acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
198 			EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
199 			EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
200 	acq_caps |= (aq->msix_vector_idx <<
201 			EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
202 			EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
203 
204 	writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
205 
206 	return 0;
207 }
208 
209 static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
210 				   struct efa_aenq_handlers *aenq_handlers)
211 {
212 	struct efa_com_aenq *aenq = &edev->aenq;
213 	u32 addr_low, addr_high, aenq_caps;
214 	u16 size;
215 
216 	if (!aenq_handlers) {
217 		ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
218 		return -EINVAL;
219 	}
220 
221 	size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
222 	aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
223 					   GFP_KERNEL);
224 	if (!aenq->entries)
225 		return -ENOMEM;
226 
227 	aenq->aenq_handlers = aenq_handlers;
228 	aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
229 	aenq->cc = 0;
230 	aenq->phase = 1;
231 
232 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
233 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
234 
235 	writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
236 	writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
237 
238 	aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
239 	aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
240 		EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
241 		EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
242 	aenq_caps |= (aenq->msix_vector_idx
243 		      << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
244 		     EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
245 	writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
246 
247 	/*
248 	 * Init cons_db to mark that all entries in the queue
249 	 * are initially available
250 	 */
251 	writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
252 
253 	return 0;
254 }
255 
256 /* ID to be used with efa_com_get_comp_ctx */
257 static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
258 {
259 	u16 ctx_id;
260 
261 	spin_lock(&aq->comp_ctx_lock);
262 	ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
263 	aq->comp_ctx_pool_next++;
264 	spin_unlock(&aq->comp_ctx_lock);
265 
266 	return ctx_id;
267 }
268 
269 static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
270 				   u16 ctx_id)
271 {
272 	spin_lock(&aq->comp_ctx_lock);
273 	aq->comp_ctx_pool_next--;
274 	aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
275 	spin_unlock(&aq->comp_ctx_lock);
276 }
277 
278 static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
279 					struct efa_comp_ctx *comp_ctx)
280 {
281 	u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command &
282 		     EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
283 	u16 ctx_id = cmd_id & (aq->depth - 1);
284 
285 	ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
286 	comp_ctx->occupied = 0;
287 	efa_com_dealloc_ctx_id(aq, ctx_id);
288 }
289 
290 static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
291 						 u16 cmd_id, bool capture)
292 {
293 	u16 ctx_id = cmd_id & (aq->depth - 1);
294 
295 	if (aq->comp_ctx[ctx_id].occupied && capture) {
296 		ibdev_err(aq->efa_dev,
297 			  "Completion context for command_id %#x is occupied\n",
298 			  cmd_id);
299 		return NULL;
300 	}
301 
302 	if (capture) {
303 		aq->comp_ctx[ctx_id].occupied = 1;
304 		ibdev_dbg(aq->efa_dev,
305 			  "Take completion ctxt for command_id %#x\n", cmd_id);
306 	}
307 
308 	return &aq->comp_ctx[ctx_id];
309 }
310 
311 static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
312 						       struct efa_admin_aq_entry *cmd,
313 						       size_t cmd_size_in_bytes,
314 						       struct efa_admin_acq_entry *comp,
315 						       size_t comp_size_in_bytes)
316 {
317 	struct efa_comp_ctx *comp_ctx;
318 	u16 queue_size_mask;
319 	u16 cmd_id;
320 	u16 ctx_id;
321 	u16 pi;
322 
323 	queue_size_mask = aq->depth - 1;
324 	pi = aq->sq.pc & queue_size_mask;
325 
326 	ctx_id = efa_com_alloc_ctx_id(aq);
327 
328 	/* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
329 	cmd_id = ctx_id & queue_size_mask;
330 	cmd_id |= aq->sq.pc & ~queue_size_mask;
331 	cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
332 
333 	cmd->aq_common_descriptor.command_id = cmd_id;
334 	cmd->aq_common_descriptor.flags |= aq->sq.phase &
335 		EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
336 
337 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
338 	if (!comp_ctx) {
339 		efa_com_dealloc_ctx_id(aq, ctx_id);
340 		return ERR_PTR(-EINVAL);
341 	}
342 
343 	comp_ctx->status = EFA_CMD_SUBMITTED;
344 	comp_ctx->comp_size = comp_size_in_bytes;
345 	comp_ctx->user_cqe = comp;
346 	comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
347 
348 	reinit_completion(&comp_ctx->wait_event);
349 
350 	memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes);
351 
352 	aq->sq.pc++;
353 	atomic64_inc(&aq->stats.submitted_cmd);
354 
355 	if ((aq->sq.pc & queue_size_mask) == 0)
356 		aq->sq.phase = !aq->sq.phase;
357 
358 	/* barrier not needed in case of writel */
359 	writel(aq->sq.pc, aq->sq.db_addr);
360 
361 	return comp_ctx;
362 }
363 
364 static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
365 {
366 	size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
367 	size_t size = aq->depth * sizeof(struct efa_comp_ctx);
368 	struct efa_comp_ctx *comp_ctx;
369 	u16 i;
370 
371 	aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
372 	aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
373 	if (!aq->comp_ctx || !aq->comp_ctx_pool) {
374 		devm_kfree(aq->dmadev, aq->comp_ctx_pool);
375 		devm_kfree(aq->dmadev, aq->comp_ctx);
376 		return -ENOMEM;
377 	}
378 
379 	for (i = 0; i < aq->depth; i++) {
380 		comp_ctx = efa_com_get_comp_ctx(aq, i, false);
381 		if (comp_ctx)
382 			init_completion(&comp_ctx->wait_event);
383 
384 		aq->comp_ctx_pool[i] = i;
385 	}
386 
387 	spin_lock_init(&aq->comp_ctx_lock);
388 
389 	aq->comp_ctx_pool_next = 0;
390 
391 	return 0;
392 }
393 
394 static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
395 						     struct efa_admin_aq_entry *cmd,
396 						     size_t cmd_size_in_bytes,
397 						     struct efa_admin_acq_entry *comp,
398 						     size_t comp_size_in_bytes)
399 {
400 	struct efa_comp_ctx *comp_ctx;
401 
402 	spin_lock(&aq->sq.lock);
403 	if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
404 		ibdev_err(aq->efa_dev, "Admin queue is closed\n");
405 		spin_unlock(&aq->sq.lock);
406 		return ERR_PTR(-ENODEV);
407 	}
408 
409 	comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
410 					      comp_size_in_bytes);
411 	spin_unlock(&aq->sq.lock);
412 	if (IS_ERR(comp_ctx))
413 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
414 
415 	return comp_ctx;
416 }
417 
418 static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
419 						   struct efa_admin_acq_entry *cqe)
420 {
421 	struct efa_comp_ctx *comp_ctx;
422 	u16 cmd_id;
423 
424 	cmd_id = cqe->acq_common_descriptor.command &
425 		 EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
426 
427 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
428 	if (!comp_ctx) {
429 		ibdev_err(aq->efa_dev,
430 			  "comp_ctx is NULL. Changing the admin queue running state\n");
431 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
432 		return;
433 	}
434 
435 	comp_ctx->status = EFA_CMD_COMPLETED;
436 	comp_ctx->comp_status = cqe->acq_common_descriptor.status;
437 	if (comp_ctx->user_cqe)
438 		memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
439 
440 	if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
441 		complete(&comp_ctx->wait_event);
442 }
443 
444 static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
445 {
446 	struct efa_admin_acq_entry *cqe;
447 	u16 queue_size_mask;
448 	u16 comp_num = 0;
449 	u8 phase;
450 	u16 ci;
451 
452 	queue_size_mask = aq->depth - 1;
453 
454 	ci = aq->cq.cc & queue_size_mask;
455 	phase = aq->cq.phase;
456 
457 	cqe = &aq->cq.entries[ci];
458 
459 	/* Go over all the completions */
460 	while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
461 		EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
462 		/*
463 		 * Do not read the rest of the completion entry before the
464 		 * phase bit was validated
465 		 */
466 		dma_rmb();
467 		efa_com_handle_single_admin_completion(aq, cqe);
468 
469 		ci++;
470 		comp_num++;
471 		if (ci == aq->depth) {
472 			ci = 0;
473 			phase = !phase;
474 		}
475 
476 		cqe = &aq->cq.entries[ci];
477 	}
478 
479 	aq->cq.cc += comp_num;
480 	aq->cq.phase = phase;
481 	aq->sq.cc += comp_num;
482 	atomic64_add(comp_num, &aq->stats.completed_cmd);
483 }
484 
485 static int efa_com_comp_status_to_errno(u8 comp_status)
486 {
487 	switch (comp_status) {
488 	case EFA_ADMIN_SUCCESS:
489 		return 0;
490 	case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
491 		return -ENOMEM;
492 	case EFA_ADMIN_UNSUPPORTED_OPCODE:
493 		return -EOPNOTSUPP;
494 	case EFA_ADMIN_BAD_OPCODE:
495 	case EFA_ADMIN_MALFORMED_REQUEST:
496 	case EFA_ADMIN_ILLEGAL_PARAMETER:
497 	case EFA_ADMIN_UNKNOWN_ERROR:
498 		return -EINVAL;
499 	default:
500 		return -EINVAL;
501 	}
502 }
503 
504 static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
505 						     struct efa_com_admin_queue *aq)
506 {
507 	unsigned long timeout;
508 	unsigned long flags;
509 	int err;
510 
511 	timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
512 
513 	while (1) {
514 		spin_lock_irqsave(&aq->cq.lock, flags);
515 		efa_com_handle_admin_completion(aq);
516 		spin_unlock_irqrestore(&aq->cq.lock, flags);
517 
518 		if (comp_ctx->status != EFA_CMD_SUBMITTED)
519 			break;
520 
521 		if (time_is_before_jiffies(timeout)) {
522 			ibdev_err(aq->efa_dev,
523 				  "Wait for completion (polling) timeout\n");
524 			/* EFA didn't have any completion */
525 			atomic64_inc(&aq->stats.no_completion);
526 
527 			clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
528 			err = -ETIME;
529 			goto out;
530 		}
531 
532 		msleep(aq->poll_interval);
533 	}
534 
535 	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
536 out:
537 	efa_com_put_comp_ctx(aq, comp_ctx);
538 	return err;
539 }
540 
541 static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
542 							struct efa_com_admin_queue *aq)
543 {
544 	unsigned long flags;
545 	int err;
546 
547 	wait_for_completion_timeout(&comp_ctx->wait_event,
548 				    usecs_to_jiffies(aq->completion_timeout));
549 
550 	/*
551 	 * In case the command wasn't completed find out the root cause.
552 	 * There might be 2 kinds of errors
553 	 * 1) No completion (timeout reached)
554 	 * 2) There is completion but the device didn't get any msi-x interrupt.
555 	 */
556 	if (comp_ctx->status == EFA_CMD_SUBMITTED) {
557 		spin_lock_irqsave(&aq->cq.lock, flags);
558 		efa_com_handle_admin_completion(aq);
559 		spin_unlock_irqrestore(&aq->cq.lock, flags);
560 
561 		atomic64_inc(&aq->stats.no_completion);
562 
563 		if (comp_ctx->status == EFA_CMD_COMPLETED)
564 			ibdev_err(aq->efa_dev,
565 				  "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
566 				  efa_com_cmd_str(comp_ctx->cmd_opcode),
567 				  comp_ctx->cmd_opcode, comp_ctx->status,
568 				  comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
569 		else
570 			ibdev_err(aq->efa_dev,
571 				  "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
572 				  efa_com_cmd_str(comp_ctx->cmd_opcode),
573 				  comp_ctx->cmd_opcode, comp_ctx->status,
574 				  comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
575 
576 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
577 		err = -ETIME;
578 		goto out;
579 	}
580 
581 	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
582 out:
583 	efa_com_put_comp_ctx(aq, comp_ctx);
584 	return err;
585 }
586 
587 /*
588  * There are two types to wait for completion.
589  * Polling mode - wait until the completion is available.
590  * Async mode - wait on wait queue until the completion is ready
591  * (or the timeout expired).
592  * It is expected that the IRQ called efa_com_handle_admin_completion
593  * to mark the completions.
594  */
595 static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
596 					     struct efa_com_admin_queue *aq)
597 {
598 	if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
599 		return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
600 
601 	return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
602 }
603 
604 /**
605  * efa_com_cmd_exec - Execute admin command
606  * @aq: admin queue.
607  * @cmd: the admin command to execute.
608  * @cmd_size: the command size.
609  * @comp: command completion return entry.
610  * @comp_size: command completion size.
611  * Submit an admin command and then wait until the device will return a
612  * completion.
613  * The completion will be copied into comp.
614  *
615  * @return - 0 on success, negative value on failure.
616  */
617 int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
618 		     struct efa_admin_aq_entry *cmd,
619 		     size_t cmd_size,
620 		     struct efa_admin_acq_entry *comp,
621 		     size_t comp_size)
622 {
623 	struct efa_comp_ctx *comp_ctx;
624 	int err;
625 
626 	might_sleep();
627 
628 	/* In case of queue FULL */
629 	down(&aq->avail_cmds);
630 
631 	ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
632 		  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
633 		  cmd->aq_common_descriptor.opcode);
634 	comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
635 	if (IS_ERR(comp_ctx)) {
636 		ibdev_err(aq->efa_dev,
637 			  "Failed to submit command %s (opcode %u) err %ld\n",
638 			  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
639 			  cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
640 
641 		up(&aq->avail_cmds);
642 		return PTR_ERR(comp_ctx);
643 	}
644 
645 	err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
646 	if (err)
647 		ibdev_err(aq->efa_dev,
648 			  "Failed to process command %s (opcode %u) comp_status %d err %d\n",
649 			  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
650 			  cmd->aq_common_descriptor.opcode,
651 			  comp_ctx->comp_status, err);
652 
653 	up(&aq->avail_cmds);
654 
655 	return err;
656 }
657 
658 /**
659  * efa_com_admin_destroy - Destroy the admin and the async events queues.
660  * @edev: EFA communication layer struct
661  */
662 void efa_com_admin_destroy(struct efa_com_dev *edev)
663 {
664 	struct efa_com_admin_queue *aq = &edev->aq;
665 	struct efa_com_aenq *aenq = &edev->aenq;
666 	struct efa_com_admin_cq *cq = &aq->cq;
667 	struct efa_com_admin_sq *sq = &aq->sq;
668 	u16 size;
669 
670 	clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
671 
672 	devm_kfree(edev->dmadev, aq->comp_ctx_pool);
673 	devm_kfree(edev->dmadev, aq->comp_ctx);
674 
675 	size = aq->depth * sizeof(*sq->entries);
676 	dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
677 
678 	size = aq->depth * sizeof(*cq->entries);
679 	dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
680 
681 	size = aenq->depth * sizeof(*aenq->entries);
682 	dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
683 }
684 
685 /**
686  * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
687  * @edev: EFA communication layer struct
688  * @polling: Enable/Disable polling mode
689  *
690  * Set the admin completion mode.
691  */
692 void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
693 {
694 	u32 mask_value = 0;
695 
696 	if (polling)
697 		mask_value = EFA_REGS_ADMIN_INTR_MASK;
698 
699 	writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
700 	if (polling)
701 		set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
702 	else
703 		clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
704 }
705 
706 static void efa_com_stats_init(struct efa_com_dev *edev)
707 {
708 	atomic64_t *s = (atomic64_t *)&edev->aq.stats;
709 	int i;
710 
711 	for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
712 		atomic64_set(s, 0);
713 }
714 
715 /**
716  * efa_com_admin_init - Init the admin and the async queues
717  * @edev: EFA communication layer struct
718  * @aenq_handlers: Those handlers to be called upon event.
719  *
720  * Initialize the admin submission and completion queues.
721  * Initialize the asynchronous events notification queues.
722  *
723  * @return - 0 on success, negative value on failure.
724  */
725 int efa_com_admin_init(struct efa_com_dev *edev,
726 		       struct efa_aenq_handlers *aenq_handlers)
727 {
728 	struct efa_com_admin_queue *aq = &edev->aq;
729 	u32 timeout;
730 	u32 dev_sts;
731 	u32 cap;
732 	int err;
733 
734 	dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
735 	if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
736 		ibdev_err(edev->efa_dev,
737 			  "Device isn't ready, abort com init %#x\n", dev_sts);
738 		return -ENODEV;
739 	}
740 
741 	aq->depth = EFA_ADMIN_QUEUE_DEPTH;
742 
743 	aq->dmadev = edev->dmadev;
744 	aq->efa_dev = edev->efa_dev;
745 	set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
746 
747 	sema_init(&aq->avail_cmds, aq->depth);
748 
749 	efa_com_stats_init(edev);
750 
751 	err = efa_com_init_comp_ctxt(aq);
752 	if (err)
753 		return err;
754 
755 	err = efa_com_admin_init_sq(edev);
756 	if (err)
757 		goto err_destroy_comp_ctxt;
758 
759 	err = efa_com_admin_init_cq(edev);
760 	if (err)
761 		goto err_destroy_sq;
762 
763 	efa_com_set_admin_polling_mode(edev, false);
764 
765 	err = efa_com_admin_init_aenq(edev, aenq_handlers);
766 	if (err)
767 		goto err_destroy_cq;
768 
769 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
770 	timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
771 		  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
772 	if (timeout)
773 		/* the resolution of timeout reg is 100ms */
774 		aq->completion_timeout = timeout * 100000;
775 	else
776 		aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
777 
778 	aq->poll_interval = EFA_POLL_INTERVAL_MS;
779 
780 	set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
781 
782 	return 0;
783 
784 err_destroy_cq:
785 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
786 			  aq->cq.entries, aq->cq.dma_addr);
787 err_destroy_sq:
788 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
789 			  aq->sq.entries, aq->sq.dma_addr);
790 err_destroy_comp_ctxt:
791 	devm_kfree(edev->dmadev, aq->comp_ctx);
792 
793 	return err;
794 }
795 
796 /**
797  * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
798  * @edev: EFA communication layer struct
799  *
800  * This method goes over the admin completion queue and wakes up
801  * all the pending threads that wait on the commands wait event.
802  *
803  * @note: Should be called after MSI-X interrupt.
804  */
805 void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
806 {
807 	unsigned long flags;
808 
809 	spin_lock_irqsave(&edev->aq.cq.lock, flags);
810 	efa_com_handle_admin_completion(&edev->aq);
811 	spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
812 }
813 
814 /*
815  * efa_handle_specific_aenq_event:
816  * return the handler that is relevant to the specific event group
817  */
818 static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
819 						     u16 group)
820 {
821 	struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
822 
823 	if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
824 		return aenq_handlers->handlers[group];
825 
826 	return aenq_handlers->unimplemented_handler;
827 }
828 
829 /**
830  * efa_com_aenq_intr_handler - AENQ interrupt handler
831  * @edev: EFA communication layer struct
832  * @data: Data of interrupt handler.
833  *
834  * Go over the async event notification queue and call the proper aenq handler.
835  */
836 void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
837 {
838 	struct efa_admin_aenq_common_desc *aenq_common;
839 	struct efa_com_aenq *aenq = &edev->aenq;
840 	struct efa_admin_aenq_entry *aenq_e;
841 	efa_aenq_handler handler_cb;
842 	u32 processed = 0;
843 	u8 phase;
844 	u32 ci;
845 
846 	ci = aenq->cc & (aenq->depth - 1);
847 	phase = aenq->phase;
848 	aenq_e = &aenq->entries[ci]; /* Get first entry */
849 	aenq_common = &aenq_e->aenq_common_desc;
850 
851 	/* Go over all the events */
852 	while ((READ_ONCE(aenq_common->flags) &
853 		EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
854 		/*
855 		 * Do not read the rest of the completion entry before the
856 		 * phase bit was validated
857 		 */
858 		dma_rmb();
859 
860 		/* Handle specific event*/
861 		handler_cb = efa_com_get_specific_aenq_cb(edev,
862 							  aenq_common->group);
863 		handler_cb(data, aenq_e); /* call the actual event handler*/
864 
865 		/* Get next event entry */
866 		ci++;
867 		processed++;
868 
869 		if (ci == aenq->depth) {
870 			ci = 0;
871 			phase = !phase;
872 		}
873 		aenq_e = &aenq->entries[ci];
874 		aenq_common = &aenq_e->aenq_common_desc;
875 	}
876 
877 	aenq->cc += processed;
878 	aenq->phase = phase;
879 
880 	/* Don't update aenq doorbell if there weren't any processed events */
881 	if (!processed)
882 		return;
883 
884 	/* barrier not needed in case of writel */
885 	writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
886 }
887 
888 static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
889 {
890 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
891 	u32 addr_high;
892 	u32 addr_low;
893 
894 	/* dma_addr_bits is unknown at this point */
895 	addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
896 	addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
897 
898 	writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
899 	writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
900 }
901 
902 int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
903 {
904 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
905 
906 	spin_lock_init(&mmio_read->lock);
907 	mmio_read->read_resp =
908 		dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
909 				   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
910 	if (!mmio_read->read_resp)
911 		return -ENOMEM;
912 
913 	efa_com_mmio_reg_read_resp_addr_init(edev);
914 
915 	mmio_read->read_resp->req_id = 0;
916 	mmio_read->seq_num = 0;
917 	mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
918 
919 	return 0;
920 }
921 
922 void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
923 {
924 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
925 
926 	dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
927 			  mmio_read->read_resp, mmio_read->read_resp_dma_addr);
928 }
929 
930 int efa_com_validate_version(struct efa_com_dev *edev)
931 {
932 	u32 ctrl_ver_masked;
933 	u32 ctrl_ver;
934 	u32 ver;
935 
936 	/*
937 	 * Make sure the EFA version and the controller version are at least
938 	 * as the driver expects
939 	 */
940 	ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
941 	ctrl_ver = efa_com_reg_read32(edev,
942 				      EFA_REGS_CONTROLLER_VERSION_OFF);
943 
944 	ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
945 		  (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
946 			  EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
947 		  ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
948 
949 	if (ver < MIN_EFA_VER) {
950 		ibdev_err(edev->efa_dev,
951 			  "EFA version is lower than the minimal version the driver supports\n");
952 		return -EOPNOTSUPP;
953 	}
954 
955 	ibdev_dbg(edev->efa_dev,
956 		  "efa controller version: %d.%d.%d implementation version %d\n",
957 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
958 			  EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
959 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
960 			  EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
961 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
962 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
963 			  EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
964 
965 	ctrl_ver_masked =
966 		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
967 		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
968 		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
969 
970 	/* Validate the ctrl version without the implementation ID */
971 	if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
972 		ibdev_err(edev->efa_dev,
973 			  "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
974 		return -EOPNOTSUPP;
975 	}
976 
977 	return 0;
978 }
979 
980 /**
981  * efa_com_get_dma_width - Retrieve physical dma address width the device
982  * supports.
983  * @edev: EFA communication layer struct
984  *
985  * Retrieve the maximum physical address bits the device can handle.
986  *
987  * @return: > 0 on Success and negative value otherwise.
988  */
989 int efa_com_get_dma_width(struct efa_com_dev *edev)
990 {
991 	u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
992 	int width;
993 
994 	width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
995 		EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
996 
997 	ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
998 
999 	if (width < 32 || width > 64) {
1000 		ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
1001 		return -EINVAL;
1002 	}
1003 
1004 	edev->dma_addr_bits = width;
1005 
1006 	return width;
1007 }
1008 
1009 static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
1010 				u16 exp_state)
1011 {
1012 	u32 val, i;
1013 
1014 	for (i = 0; i < timeout; i++) {
1015 		val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1016 
1017 		if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
1018 		    exp_state)
1019 			return 0;
1020 
1021 		ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
1022 		msleep(EFA_POLL_INTERVAL_MS);
1023 	}
1024 
1025 	return -ETIME;
1026 }
1027 
1028 /**
1029  * efa_com_dev_reset - Perform device FLR to the device.
1030  * @edev: EFA communication layer struct
1031  * @reset_reason: Specify what is the trigger for the reset in case of an error.
1032  *
1033  * @return - 0 on success, negative value on failure.
1034  */
1035 int efa_com_dev_reset(struct efa_com_dev *edev,
1036 		      enum efa_regs_reset_reason_types reset_reason)
1037 {
1038 	u32 stat, timeout, cap, reset_val;
1039 	int err;
1040 
1041 	stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1042 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1043 
1044 	if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
1045 		ibdev_err(edev->efa_dev,
1046 			  "Device isn't ready, can't reset device\n");
1047 		return -EINVAL;
1048 	}
1049 
1050 	timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
1051 		  EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
1052 	if (!timeout) {
1053 		ibdev_err(edev->efa_dev, "Invalid timeout value\n");
1054 		return -EINVAL;
1055 	}
1056 
1057 	/* start reset */
1058 	reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
1059 	reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
1060 		     EFA_REGS_DEV_CTL_RESET_REASON_MASK;
1061 	writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1062 
1063 	/* reset clears the mmio readless address, restore it */
1064 	efa_com_mmio_reg_read_resp_addr_init(edev);
1065 
1066 	err = wait_for_reset_state(edev, timeout,
1067 				   EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
1068 	if (err) {
1069 		ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
1070 		return err;
1071 	}
1072 
1073 	/* reset done */
1074 	writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1075 	err = wait_for_reset_state(edev, timeout, 0);
1076 	if (err) {
1077 		ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
1078 		return err;
1079 	}
1080 
1081 	timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
1082 		  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
1083 	if (timeout)
1084 		/* the resolution of timeout reg is 100ms */
1085 		edev->aq.completion_timeout = timeout * 100000;
1086 	else
1087 		edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
1088 
1089 	return 0;
1090 }
1091