xref: /openbmc/linux/drivers/infiniband/hw/bnxt_re/main.c (revision 4baf4a2919b2a13a7f67d63f34b03f823ea7c0e8)
1  /*
2   * Broadcom NetXtreme-E RoCE driver.
3   *
4   * Copyright (c) 2016 - 2017, Broadcom. All rights reserved.  The term
5   * Broadcom refers to Broadcom Limited and/or its subsidiaries.
6   *
7   * This software is available to you under a choice of one of two
8   * licenses.  You may choose to be licensed under the terms of the GNU
9   * General Public License (GPL) Version 2, available from the file
10   * COPYING in the main directory of this source tree, or the
11   * BSD license below:
12   *
13   * Redistribution and use in source and binary forms, with or without
14   * modification, are permitted provided that the following conditions
15   * are met:
16   *
17   * 1. Redistributions of source code must retain the above copyright
18   *    notice, this list of conditions and the following disclaimer.
19   * 2. Redistributions in binary form must reproduce the above copyright
20   *    notice, this list of conditions and the following disclaimer in
21   *    the documentation and/or other materials provided with the
22   *    distribution.
23   *
24   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
25   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
26   * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27   * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
28   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31   * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33   * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
34   * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35   *
36   * Description: Main component of the bnxt_re driver
37   */
38  
39  #include <linux/module.h>
40  #include <linux/netdevice.h>
41  #include <linux/ethtool.h>
42  #include <linux/mutex.h>
43  #include <linux/list.h>
44  #include <linux/rculist.h>
45  #include <linux/spinlock.h>
46  #include <linux/pci.h>
47  #include <net/dcbnl.h>
48  #include <net/ipv6.h>
49  #include <net/addrconf.h>
50  #include <linux/if_ether.h>
51  #include <linux/auxiliary_bus.h>
52  
53  #include <rdma/ib_verbs.h>
54  #include <rdma/ib_user_verbs.h>
55  #include <rdma/ib_umem.h>
56  #include <rdma/ib_addr.h>
57  
58  #include "bnxt_ulp.h"
59  #include "roce_hsi.h"
60  #include "qplib_res.h"
61  #include "qplib_sp.h"
62  #include "qplib_fp.h"
63  #include "qplib_rcfw.h"
64  #include "bnxt_re.h"
65  #include "ib_verbs.h"
66  #include <rdma/bnxt_re-abi.h>
67  #include "bnxt.h"
68  #include "hw_counters.h"
69  
70  static char version[] =
71  		BNXT_RE_DESC "\n";
72  
73  MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
74  MODULE_DESCRIPTION(BNXT_RE_DESC);
75  MODULE_LICENSE("Dual BSD/GPL");
76  
77  /* globals */
78  static DEFINE_MUTEX(bnxt_re_mutex);
79  
80  static void bnxt_re_stop_irq(void *handle);
81  static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
82  static int bnxt_re_netdev_event(struct notifier_block *notifier,
83  				unsigned long event, void *ptr);
84  static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev);
85  static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev);
86  static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
87  
88  static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
89  			     u32 *offset);
bnxt_re_set_db_offset(struct bnxt_re_dev * rdev)90  static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
91  {
92  	struct bnxt_qplib_chip_ctx *cctx;
93  	struct bnxt_en_dev *en_dev;
94  	struct bnxt_qplib_res *res;
95  	u32 l2db_len = 0;
96  	u32 offset = 0;
97  	u32 barlen;
98  	int rc;
99  
100  	res = &rdev->qplib_res;
101  	en_dev = rdev->en_dev;
102  	cctx = rdev->chip_ctx;
103  
104  	/* Issue qcfg */
105  	rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset);
106  	if (rc)
107  		dev_info(rdev_to_dev(rdev),
108  			 "Couldn't get DB bar size, Low latency framework is disabled\n");
109  	/* set register offsets for both UC and WC */
110  	if (bnxt_qplib_is_chip_gen_p7(cctx)) {
111  		res->dpi_tbl.ucreg.offset = offset;
112  		res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
113  	} else {
114  		res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
115  							 BNXT_QPLIB_DBR_PF_DB_OFFSET;
116  		res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
117  	}
118  
119  	/* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
120  	 * is equal to the DB-Bar actual size. This indicates that L2
121  	 * is mapping entire bar as UC-. RoCE driver can't enable WC mapping
122  	 * in such cases and DB-push will be disabled.
123  	 */
124  	barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
125  	if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
126  		res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
127  		dev_info(rdev_to_dev(rdev),  "Low latency framework is enabled\n");
128  	}
129  }
130  
bnxt_re_set_drv_mode(struct bnxt_re_dev * rdev)131  static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev)
132  {
133  	struct bnxt_qplib_chip_ctx *cctx;
134  
135  	cctx = rdev->chip_ctx;
136  	cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ?
137  			       BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC;
138  	if (bnxt_re_hwrm_qcaps(rdev))
139  		dev_err(rdev_to_dev(rdev),
140  			"Failed to query hwrm qcaps\n");
141  }
142  
bnxt_re_destroy_chip_ctx(struct bnxt_re_dev * rdev)143  static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
144  {
145  	struct bnxt_qplib_chip_ctx *chip_ctx;
146  
147  	if (!rdev->chip_ctx)
148  		return;
149  	chip_ctx = rdev->chip_ctx;
150  	rdev->chip_ctx = NULL;
151  	rdev->rcfw.res = NULL;
152  	rdev->qplib_res.cctx = NULL;
153  	rdev->qplib_res.pdev = NULL;
154  	rdev->qplib_res.netdev = NULL;
155  	kfree(chip_ctx);
156  }
157  
bnxt_re_setup_chip_ctx(struct bnxt_re_dev * rdev)158  static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
159  {
160  	struct bnxt_qplib_chip_ctx *chip_ctx;
161  	struct bnxt_en_dev *en_dev;
162  	int rc;
163  
164  	en_dev = rdev->en_dev;
165  
166  	chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
167  	if (!chip_ctx)
168  		return -ENOMEM;
169  	chip_ctx->chip_num = en_dev->chip_num;
170  	chip_ctx->hw_stats_size = en_dev->hw_ring_stats_size;
171  
172  	rdev->chip_ctx = chip_ctx;
173  	/* rest members to follow eventually */
174  
175  	rdev->qplib_res.cctx = rdev->chip_ctx;
176  	rdev->rcfw.res = &rdev->qplib_res;
177  	rdev->qplib_res.dattr = &rdev->dev_attr;
178  	rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
179  
180  	bnxt_re_set_drv_mode(rdev);
181  
182  	bnxt_re_set_db_offset(rdev);
183  	rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
184  	if (rc) {
185  		kfree(rdev->chip_ctx);
186  		rdev->chip_ctx = NULL;
187  		return rc;
188  	}
189  
190  	if (bnxt_qplib_determine_atomics(en_dev->pdev))
191  		ibdev_info(&rdev->ibdev,
192  			   "platform doesn't support global atomics.");
193  	return 0;
194  }
195  
196  /* SR-IOV helper functions */
197  
bnxt_re_get_sriov_func_type(struct bnxt_re_dev * rdev)198  static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
199  {
200  	if (BNXT_EN_VF(rdev->en_dev))
201  		rdev->is_virtfn = 1;
202  }
203  
204  /* Set the maximum number of each resource that the driver actually wants
205   * to allocate. This may be up to the maximum number the firmware has
206   * reserved for the function. The driver may choose to allocate fewer
207   * resources than the firmware maximum.
208   */
bnxt_re_limit_pf_res(struct bnxt_re_dev * rdev)209  static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
210  {
211  	struct bnxt_qplib_dev_attr *attr;
212  	struct bnxt_qplib_ctx *ctx;
213  	int i;
214  
215  	attr = &rdev->dev_attr;
216  	ctx = &rdev->qplib_ctx;
217  
218  	ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
219  			       attr->max_qp);
220  	ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
221  	/* Use max_mr from fw since max_mrw does not get set */
222  	ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
223  	ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
224  				attr->max_srq);
225  	ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
226  	if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
227  		for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
228  			rdev->qplib_ctx.tqm_ctx.qcount[i] =
229  			rdev->dev_attr.tqm_alloc_reqs[i];
230  }
231  
bnxt_re_limit_vf_res(struct bnxt_qplib_ctx * qplib_ctx,u32 num_vf)232  static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
233  {
234  	struct bnxt_qplib_vf_res *vf_res;
235  	u32 mrws = 0;
236  	u32 vf_pct;
237  	u32 nvfs;
238  
239  	vf_res = &qplib_ctx->vf_res;
240  	/*
241  	 * Reserve a set of resources for the PF. Divide the remaining
242  	 * resources among the VFs
243  	 */
244  	vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
245  	nvfs = num_vf;
246  	num_vf = 100 * num_vf;
247  	vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
248  	vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
249  	vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
250  	/*
251  	 * The driver allows many more MRs than other resources. If the
252  	 * firmware does also, then reserve a fixed amount for the PF and
253  	 * divide the rest among VFs. VFs may use many MRs for NFS
254  	 * mounts, ISER, NVME applications, etc. If the firmware severely
255  	 * restricts the number of MRs, then let PF have half and divide
256  	 * the rest among VFs, as for the other resource types.
257  	 */
258  	if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
259  		mrws = qplib_ctx->mrw_count * vf_pct;
260  		nvfs = num_vf;
261  	} else {
262  		mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
263  	}
264  	vf_res->max_mrw_per_vf = (mrws / nvfs);
265  	vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
266  }
267  
bnxt_re_set_resource_limits(struct bnxt_re_dev * rdev)268  static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
269  {
270  	u32 num_vfs;
271  
272  	memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
273  	bnxt_re_limit_pf_res(rdev);
274  
275  	num_vfs =  bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
276  			BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
277  	if (num_vfs)
278  		bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
279  }
280  
bnxt_re_vf_res_config(struct bnxt_re_dev * rdev)281  static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
282  {
283  
284  	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
285  		return;
286  	rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
287  	if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
288  		bnxt_re_set_resource_limits(rdev);
289  		bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
290  					      &rdev->qplib_ctx);
291  	}
292  }
293  
bnxt_re_shutdown(struct auxiliary_device * adev)294  static void bnxt_re_shutdown(struct auxiliary_device *adev)
295  {
296  	struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
297  
298  	if (!rdev)
299  		return;
300  	ib_unregister_device(&rdev->ibdev);
301  	bnxt_re_dev_uninit(rdev);
302  }
303  
bnxt_re_stop_irq(void * handle)304  static void bnxt_re_stop_irq(void *handle)
305  {
306  	struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
307  	struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
308  	struct bnxt_qplib_nq *nq;
309  	int indx;
310  
311  	for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
312  		nq = &rdev->nq[indx - 1];
313  		bnxt_qplib_nq_stop_irq(nq, false);
314  	}
315  
316  	bnxt_qplib_rcfw_stop_irq(rcfw, false);
317  }
318  
bnxt_re_start_irq(void * handle,struct bnxt_msix_entry * ent)319  static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
320  {
321  	struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
322  	struct bnxt_msix_entry *msix_ent = rdev->en_dev->msix_entries;
323  	struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
324  	struct bnxt_qplib_nq *nq;
325  	int indx, rc;
326  
327  	if (!ent) {
328  		/* Not setting the f/w timeout bit in rcfw.
329  		 * During the driver unload the first command
330  		 * to f/w will timeout and that will set the
331  		 * timeout bit.
332  		 */
333  		ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n");
334  		return;
335  	}
336  
337  	/* Vectors may change after restart, so update with new vectors
338  	 * in device sctructure.
339  	 */
340  	for (indx = 0; indx < rdev->num_msix; indx++)
341  		rdev->en_dev->msix_entries[indx].vector = ent[indx].vector;
342  
343  	rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
344  				       false);
345  	if (rc) {
346  		ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
347  		return;
348  	}
349  	for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
350  		nq = &rdev->nq[indx - 1];
351  		rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
352  					     msix_ent[indx].vector, false);
353  		if (rc) {
354  			ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
355  				   indx - 1);
356  			return;
357  		}
358  	}
359  }
360  
361  static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
362  	.ulp_irq_stop = bnxt_re_stop_irq,
363  	.ulp_irq_restart = bnxt_re_start_irq
364  };
365  
366  /* RoCE -> Net driver */
367  
bnxt_re_register_netdev(struct bnxt_re_dev * rdev)368  static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
369  {
370  	struct bnxt_en_dev *en_dev;
371  	int rc;
372  
373  	en_dev = rdev->en_dev;
374  
375  	rc = bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev);
376  	if (!rc)
377  		rdev->qplib_res.pdev = rdev->en_dev->pdev;
378  	return rc;
379  }
380  
bnxt_re_init_hwrm_hdr(struct input * hdr,u16 opcd)381  static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
382  {
383  	hdr->req_type = cpu_to_le16(opcd);
384  	hdr->cmpl_ring = cpu_to_le16(-1);
385  	hdr->target_id = cpu_to_le16(-1);
386  }
387  
bnxt_re_fill_fw_msg(struct bnxt_fw_msg * fw_msg,void * msg,int msg_len,void * resp,int resp_max_len,int timeout)388  static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
389  				int msg_len, void *resp, int resp_max_len,
390  				int timeout)
391  {
392  	fw_msg->msg = msg;
393  	fw_msg->msg_len = msg_len;
394  	fw_msg->resp = resp;
395  	fw_msg->resp_max_len = resp_max_len;
396  	fw_msg->timeout = timeout;
397  }
398  
399  /* Query device config using common hwrm */
bnxt_re_hwrm_qcfg(struct bnxt_re_dev * rdev,u32 * db_len,u32 * offset)400  static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
401  			     u32 *offset)
402  {
403  	struct bnxt_en_dev *en_dev = rdev->en_dev;
404  	struct hwrm_func_qcfg_output resp = {0};
405  	struct hwrm_func_qcfg_input req = {0};
406  	struct bnxt_fw_msg fw_msg = {};
407  	int rc;
408  
409  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG);
410  	req.fid = cpu_to_le16(0xffff);
411  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
412  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
413  	rc = bnxt_send_msg(en_dev, &fw_msg);
414  	if (!rc) {
415  		*db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
416  		*offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
417  	}
418  	return rc;
419  }
420  
421  /* Query function capabilities using common hwrm */
bnxt_re_hwrm_qcaps(struct bnxt_re_dev * rdev)422  int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
423  {
424  	struct bnxt_en_dev *en_dev = rdev->en_dev;
425  	struct hwrm_func_qcaps_output resp = {};
426  	struct hwrm_func_qcaps_input req = {};
427  	struct bnxt_qplib_chip_ctx *cctx;
428  	struct bnxt_fw_msg fw_msg = {};
429  	int rc;
430  
431  	cctx = rdev->chip_ctx;
432  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS);
433  	req.fid = cpu_to_le16(0xffff);
434  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
435  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
436  
437  	rc = bnxt_send_msg(en_dev, &fw_msg);
438  	if (rc)
439  		return rc;
440  	cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE;
441  
442  	cctx->modes.dbr_pacing =
443  		le32_to_cpu(resp.flags_ext2) &
444  		FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED;
445  	return 0;
446  }
447  
bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev * rdev)448  static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
449  {
450  	struct hwrm_func_dbr_pacing_qcfg_output resp = {};
451  	struct hwrm_func_dbr_pacing_qcfg_input req = {};
452  	struct bnxt_en_dev *en_dev = rdev->en_dev;
453  	struct bnxt_qplib_chip_ctx *cctx;
454  	struct bnxt_fw_msg fw_msg = {};
455  	int rc;
456  
457  	cctx = rdev->chip_ctx;
458  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG);
459  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
460  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
461  	rc = bnxt_send_msg(en_dev, &fw_msg);
462  	if (rc)
463  		return rc;
464  
465  	if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
466  	    FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) ==
467  		FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC)
468  		cctx->dbr_stat_db_fifo =
469  			le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
470  			~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
471  	return 0;
472  }
473  
474  /* Update the pacing tunable parameters to the default values */
bnxt_re_set_default_pacing_data(struct bnxt_re_dev * rdev)475  static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
476  {
477  	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
478  
479  	pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing;
480  	pacing_data->pacing_th = rdev->pacing.pacing_algo_th;
481  	pacing_data->alarm_th =
482  		pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
483  }
484  
__wait_for_fifo_occupancy_below_th(struct bnxt_re_dev * rdev)485  static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
486  {
487  	u32 read_val, fifo_occup;
488  	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
489  	u32 retry_fifo_check = 1000;
490  
491  	/* loop shouldn't run infintely as the occupancy usually goes
492  	 * below pacing algo threshold as soon as pacing kicks in.
493  	 */
494  	while (1) {
495  		read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
496  		fifo_occup = BNXT_RE_MAX_FIFO_DEPTH -
497  			((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >>
498  			 BNXT_RE_DB_FIFO_ROOM_SHIFT);
499  		/* Fifo occupancy cannot be greater the MAX FIFO depth */
500  		if (fifo_occup > BNXT_RE_MAX_FIFO_DEPTH)
501  			break;
502  
503  		if (fifo_occup < rdev->qplib_res.pacing_data->pacing_th)
504  			break;
505  		if (!retry_fifo_check--) {
506  			dev_info_once(rdev_to_dev(rdev),
507  				      "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n",
508  				      __func__, fifo_occup, pacing_data->fifo_max_depth,
509  					pacing_data->pacing_th);
510  			break;
511  		}
512  
513  	}
514  }
515  
bnxt_re_db_fifo_check(struct work_struct * work)516  static void bnxt_re_db_fifo_check(struct work_struct *work)
517  {
518  	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
519  			dbq_fifo_check_work);
520  	struct bnxt_qplib_db_pacing_data *pacing_data;
521  	u32 pacing_save;
522  
523  	if (!mutex_trylock(&rdev->pacing.dbq_lock))
524  		return;
525  	pacing_data = rdev->qplib_res.pacing_data;
526  	pacing_save = rdev->pacing.do_pacing_save;
527  	__wait_for_fifo_occupancy_below_th(rdev);
528  	cancel_delayed_work_sync(&rdev->dbq_pacing_work);
529  	if (pacing_save > rdev->pacing.dbr_def_do_pacing) {
530  		/* Double the do_pacing value during the congestion */
531  		pacing_save = pacing_save << 1;
532  	} else {
533  		/*
534  		 * when a new congestion is detected increase the do_pacing
535  		 * by 8 times. And also increase the pacing_th by 4 times. The
536  		 * reason to increase pacing_th is to give more space for the
537  		 * queue to oscillate down without getting empty, but also more
538  		 * room for the queue to increase without causing another alarm.
539  		 */
540  		pacing_save = pacing_save << 3;
541  		pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4;
542  	}
543  
544  	if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
545  		pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
546  
547  	pacing_data->do_pacing = pacing_save;
548  	rdev->pacing.do_pacing_save = pacing_data->do_pacing;
549  	pacing_data->alarm_th =
550  		pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
551  	schedule_delayed_work(&rdev->dbq_pacing_work,
552  			      msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
553  	rdev->stats.pacing.alerts++;
554  	mutex_unlock(&rdev->pacing.dbq_lock);
555  }
556  
bnxt_re_pacing_timer_exp(struct work_struct * work)557  static void bnxt_re_pacing_timer_exp(struct work_struct *work)
558  {
559  	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
560  			dbq_pacing_work.work);
561  	struct bnxt_qplib_db_pacing_data *pacing_data;
562  	u32 read_val, fifo_occup;
563  
564  	if (!mutex_trylock(&rdev->pacing.dbq_lock))
565  		return;
566  
567  	pacing_data = rdev->qplib_res.pacing_data;
568  	read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
569  	fifo_occup = BNXT_RE_MAX_FIFO_DEPTH -
570  		((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >>
571  		 BNXT_RE_DB_FIFO_ROOM_SHIFT);
572  
573  	if (fifo_occup > pacing_data->pacing_th)
574  		goto restart_timer;
575  
576  	/*
577  	 * Instead of immediately going back to the default do_pacing
578  	 * reduce it by 1/8 times and restart the timer.
579  	 */
580  	pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
581  	pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing);
582  	if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) {
583  		bnxt_re_set_default_pacing_data(rdev);
584  		rdev->stats.pacing.complete++;
585  		goto dbq_unlock;
586  	}
587  
588  restart_timer:
589  	schedule_delayed_work(&rdev->dbq_pacing_work,
590  			      msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
591  	rdev->stats.pacing.resched++;
592  dbq_unlock:
593  	rdev->pacing.do_pacing_save = pacing_data->do_pacing;
594  	mutex_unlock(&rdev->pacing.dbq_lock);
595  }
596  
bnxt_re_pacing_alert(struct bnxt_re_dev * rdev)597  void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
598  {
599  	struct bnxt_qplib_db_pacing_data *pacing_data;
600  
601  	if (!rdev->pacing.dbr_pacing)
602  		return;
603  	mutex_lock(&rdev->pacing.dbq_lock);
604  	pacing_data = rdev->qplib_res.pacing_data;
605  
606  	/*
607  	 * Increase the alarm_th to max so that other user lib instances do not
608  	 * keep alerting the driver.
609  	 */
610  	pacing_data->alarm_th = BNXT_RE_MAX_FIFO_DEPTH;
611  	pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
612  	cancel_work_sync(&rdev->dbq_fifo_check_work);
613  	schedule_work(&rdev->dbq_fifo_check_work);
614  	mutex_unlock(&rdev->pacing.dbq_lock);
615  }
616  
bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev * rdev)617  static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
618  {
619  	if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev))
620  		return -EIO;
621  
622  	/* Allocate a page for app use */
623  	rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL);
624  	if (!rdev->pacing.dbr_page)
625  		return -ENOMEM;
626  
627  	memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE);
628  	rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page;
629  
630  	/* MAP HW window 2 for reading db fifo depth */
631  	writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK,
632  	       rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
633  	rdev->pacing.dbr_db_fifo_reg_off =
634  		(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) +
635  		 BNXT_RE_GRC_FIFO_REG_BASE;
636  	rdev->pacing.dbr_bar_addr =
637  		pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off;
638  
639  	rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD;
640  	rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME;
641  	rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION;
642  	rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing;
643  	rdev->qplib_res.pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH;
644  	rdev->qplib_res.pacing_data->fifo_room_mask = BNXT_RE_DB_FIFO_ROOM_MASK;
645  	rdev->qplib_res.pacing_data->fifo_room_shift = BNXT_RE_DB_FIFO_ROOM_SHIFT;
646  	rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off;
647  	bnxt_re_set_default_pacing_data(rdev);
648  	/* Initialize worker for DBR Pacing */
649  	INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
650  	INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
651  	return 0;
652  }
653  
bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev * rdev)654  static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
655  {
656  	cancel_work_sync(&rdev->dbq_fifo_check_work);
657  	cancel_delayed_work_sync(&rdev->dbq_pacing_work);
658  	if (rdev->pacing.dbr_page)
659  		free_page((u64)rdev->pacing.dbr_page);
660  
661  	rdev->pacing.dbr_page = NULL;
662  	rdev->pacing.dbr_pacing = false;
663  }
664  
bnxt_re_net_ring_free(struct bnxt_re_dev * rdev,u16 fw_ring_id,int type)665  static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
666  				 u16 fw_ring_id, int type)
667  {
668  	struct bnxt_en_dev *en_dev;
669  	struct hwrm_ring_free_input req = {};
670  	struct hwrm_ring_free_output resp;
671  	struct bnxt_fw_msg fw_msg = {};
672  	int rc = -EINVAL;
673  
674  	if (!rdev)
675  		return rc;
676  
677  	en_dev = rdev->en_dev;
678  
679  	if (!en_dev)
680  		return rc;
681  
682  	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
683  		return 0;
684  
685  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
686  	req.ring_type = type;
687  	req.ring_id = cpu_to_le16(fw_ring_id);
688  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
689  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
690  	rc = bnxt_send_msg(en_dev, &fw_msg);
691  	if (rc)
692  		ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
693  			  req.ring_id, rc);
694  	return rc;
695  }
696  
bnxt_re_net_ring_alloc(struct bnxt_re_dev * rdev,struct bnxt_re_ring_attr * ring_attr,u16 * fw_ring_id)697  static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
698  				  struct bnxt_re_ring_attr *ring_attr,
699  				  u16 *fw_ring_id)
700  {
701  	struct bnxt_en_dev *en_dev = rdev->en_dev;
702  	struct hwrm_ring_alloc_input req = {};
703  	struct hwrm_ring_alloc_output resp;
704  	struct bnxt_fw_msg fw_msg = {};
705  	int rc = -EINVAL;
706  
707  	if (!en_dev)
708  		return rc;
709  
710  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
711  	req.enables = 0;
712  	req.page_tbl_addr =  cpu_to_le64(ring_attr->dma_arr[0]);
713  	if (ring_attr->pages > 1) {
714  		/* Page size is in log2 units */
715  		req.page_size = BNXT_PAGE_SHIFT;
716  		req.page_tbl_depth = 1;
717  	}
718  	req.fbo = 0;
719  	/* Association of ring index with doorbell index and MSIX number */
720  	req.logical_id = cpu_to_le16(ring_attr->lrid);
721  	req.length = cpu_to_le32(ring_attr->depth + 1);
722  	req.ring_type = ring_attr->type;
723  	req.int_mode = ring_attr->mode;
724  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
725  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
726  	rc = bnxt_send_msg(en_dev, &fw_msg);
727  	if (!rc)
728  		*fw_ring_id = le16_to_cpu(resp.ring_id);
729  
730  	return rc;
731  }
732  
bnxt_re_net_stats_ctx_free(struct bnxt_re_dev * rdev,u32 fw_stats_ctx_id)733  static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
734  				      u32 fw_stats_ctx_id)
735  {
736  	struct bnxt_en_dev *en_dev = rdev->en_dev;
737  	struct hwrm_stat_ctx_free_input req = {};
738  	struct hwrm_stat_ctx_free_output resp = {};
739  	struct bnxt_fw_msg fw_msg = {};
740  	int rc = -EINVAL;
741  
742  	if (!en_dev)
743  		return rc;
744  
745  	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
746  		return 0;
747  
748  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
749  	req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
750  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
751  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
752  	rc = bnxt_send_msg(en_dev, &fw_msg);
753  	if (rc)
754  		ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
755  			  rc);
756  
757  	return rc;
758  }
759  
bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev * rdev,dma_addr_t dma_map,u32 * fw_stats_ctx_id)760  static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
761  				       dma_addr_t dma_map,
762  				       u32 *fw_stats_ctx_id)
763  {
764  	struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
765  	struct hwrm_stat_ctx_alloc_output resp = {};
766  	struct hwrm_stat_ctx_alloc_input req = {};
767  	struct bnxt_en_dev *en_dev = rdev->en_dev;
768  	struct bnxt_fw_msg fw_msg = {};
769  	int rc = -EINVAL;
770  
771  	*fw_stats_ctx_id = INVALID_STATS_CTX_ID;
772  
773  	if (!en_dev)
774  		return rc;
775  
776  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
777  	req.update_period_ms = cpu_to_le32(1000);
778  	req.stats_dma_addr = cpu_to_le64(dma_map);
779  	req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
780  	req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
781  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
782  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
783  	rc = bnxt_send_msg(en_dev, &fw_msg);
784  	if (!rc)
785  		*fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
786  
787  	return rc;
788  }
789  
bnxt_re_disassociate_ucontext(struct ib_ucontext * ibcontext)790  static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext)
791  {
792  }
793  
794  /* Device */
795  
bnxt_re_from_netdev(struct net_device * netdev)796  static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
797  {
798  	struct ib_device *ibdev =
799  		ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE);
800  	if (!ibdev)
801  		return NULL;
802  
803  	return container_of(ibdev, struct bnxt_re_dev, ibdev);
804  }
805  
hw_rev_show(struct device * device,struct device_attribute * attr,char * buf)806  static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
807  			   char *buf)
808  {
809  	struct bnxt_re_dev *rdev =
810  		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
811  
812  	return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor);
813  }
814  static DEVICE_ATTR_RO(hw_rev);
815  
hca_type_show(struct device * device,struct device_attribute * attr,char * buf)816  static ssize_t hca_type_show(struct device *device,
817  			     struct device_attribute *attr, char *buf)
818  {
819  	struct bnxt_re_dev *rdev =
820  		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
821  
822  	return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc);
823  }
824  static DEVICE_ATTR_RO(hca_type);
825  
826  static struct attribute *bnxt_re_attributes[] = {
827  	&dev_attr_hw_rev.attr,
828  	&dev_attr_hca_type.attr,
829  	NULL
830  };
831  
832  static const struct attribute_group bnxt_re_dev_attr_group = {
833  	.attrs = bnxt_re_attributes,
834  };
835  
836  static const struct ib_device_ops bnxt_re_dev_ops = {
837  	.owner = THIS_MODULE,
838  	.driver_id = RDMA_DRIVER_BNXT_RE,
839  	.uverbs_abi_ver = BNXT_RE_ABI_VERSION,
840  
841  	.add_gid = bnxt_re_add_gid,
842  	.alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats,
843  	.alloc_mr = bnxt_re_alloc_mr,
844  	.alloc_pd = bnxt_re_alloc_pd,
845  	.alloc_ucontext = bnxt_re_alloc_ucontext,
846  	.create_ah = bnxt_re_create_ah,
847  	.create_cq = bnxt_re_create_cq,
848  	.create_qp = bnxt_re_create_qp,
849  	.create_srq = bnxt_re_create_srq,
850  	.create_user_ah = bnxt_re_create_ah,
851  	.dealloc_pd = bnxt_re_dealloc_pd,
852  	.dealloc_ucontext = bnxt_re_dealloc_ucontext,
853  	.del_gid = bnxt_re_del_gid,
854  	.dereg_mr = bnxt_re_dereg_mr,
855  	.destroy_ah = bnxt_re_destroy_ah,
856  	.destroy_cq = bnxt_re_destroy_cq,
857  	.destroy_qp = bnxt_re_destroy_qp,
858  	.destroy_srq = bnxt_re_destroy_srq,
859  	.device_group = &bnxt_re_dev_attr_group,
860  	.disassociate_ucontext = bnxt_re_disassociate_ucontext,
861  	.get_dev_fw_str = bnxt_re_query_fw_str,
862  	.get_dma_mr = bnxt_re_get_dma_mr,
863  	.get_hw_stats = bnxt_re_ib_get_hw_stats,
864  	.get_link_layer = bnxt_re_get_link_layer,
865  	.get_port_immutable = bnxt_re_get_port_immutable,
866  	.map_mr_sg = bnxt_re_map_mr_sg,
867  	.mmap = bnxt_re_mmap,
868  	.mmap_free = bnxt_re_mmap_free,
869  	.modify_qp = bnxt_re_modify_qp,
870  	.modify_srq = bnxt_re_modify_srq,
871  	.poll_cq = bnxt_re_poll_cq,
872  	.post_recv = bnxt_re_post_recv,
873  	.post_send = bnxt_re_post_send,
874  	.post_srq_recv = bnxt_re_post_srq_recv,
875  	.query_ah = bnxt_re_query_ah,
876  	.query_device = bnxt_re_query_device,
877  	.query_pkey = bnxt_re_query_pkey,
878  	.query_port = bnxt_re_query_port,
879  	.query_qp = bnxt_re_query_qp,
880  	.query_srq = bnxt_re_query_srq,
881  	.reg_user_mr = bnxt_re_reg_user_mr,
882  	.reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf,
883  	.req_notify_cq = bnxt_re_req_notify_cq,
884  	.resize_cq = bnxt_re_resize_cq,
885  	INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
886  	INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
887  	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
888  	INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp),
889  	INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
890  	INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
891  };
892  
bnxt_re_register_ib(struct bnxt_re_dev * rdev)893  static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
894  {
895  	struct ib_device *ibdev = &rdev->ibdev;
896  	int ret;
897  
898  	/* ib device init */
899  	ibdev->node_type = RDMA_NODE_IB_CA;
900  	strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
901  		strlen(BNXT_RE_DESC) + 5);
902  	ibdev->phys_port_cnt = 1;
903  
904  	addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
905  
906  	ibdev->num_comp_vectors	= rdev->num_msix - 1;
907  	ibdev->dev.parent = &rdev->en_dev->pdev->dev;
908  	ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
909  
910  	if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
911  		ibdev->driver_def = bnxt_re_uapi_defs;
912  
913  	ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
914  	ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
915  	if (ret)
916  		return ret;
917  
918  	dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
919  	ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ);
920  	return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
921  }
922  
bnxt_re_dev_add(struct bnxt_aux_priv * aux_priv,struct bnxt_en_dev * en_dev)923  static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv,
924  					   struct bnxt_en_dev *en_dev)
925  {
926  	struct bnxt_re_dev *rdev;
927  
928  	/* Allocate bnxt_re_dev instance here */
929  	rdev = ib_alloc_device(bnxt_re_dev, ibdev);
930  	if (!rdev) {
931  		ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!",
932  			  ROCE_DRV_MODULE_NAME);
933  		return NULL;
934  	}
935  	/* Default values */
936  	rdev->nb.notifier_call = NULL;
937  	rdev->netdev = en_dev->net;
938  	rdev->en_dev = en_dev;
939  	rdev->id = rdev->en_dev->pdev->devfn;
940  	INIT_LIST_HEAD(&rdev->qp_list);
941  	mutex_init(&rdev->qp_lock);
942  	mutex_init(&rdev->pacing.dbq_lock);
943  	atomic_set(&rdev->stats.res.qp_count, 0);
944  	atomic_set(&rdev->stats.res.cq_count, 0);
945  	atomic_set(&rdev->stats.res.srq_count, 0);
946  	atomic_set(&rdev->stats.res.mr_count, 0);
947  	atomic_set(&rdev->stats.res.mw_count, 0);
948  	atomic_set(&rdev->stats.res.ah_count, 0);
949  	atomic_set(&rdev->stats.res.pd_count, 0);
950  	rdev->cosq[0] = 0xFFFF;
951  	rdev->cosq[1] = 0xFFFF;
952  
953  	return rdev;
954  }
955  
bnxt_re_handle_unaffi_async_event(struct creq_func_event * unaffi_async)956  static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
957  					     *unaffi_async)
958  {
959  	switch (unaffi_async->event) {
960  	case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
961  		break;
962  	case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
963  		break;
964  	case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
965  		break;
966  	case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
967  		break;
968  	case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
969  		break;
970  	case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
971  		break;
972  	case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
973  		break;
974  	case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
975  		break;
976  	case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
977  		break;
978  	case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
979  		break;
980  	case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
981  		break;
982  	default:
983  		return -EINVAL;
984  	}
985  	return 0;
986  }
987  
bnxt_re_handle_qp_async_event(struct creq_qp_event * qp_event,struct bnxt_re_qp * qp)988  static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
989  					 struct bnxt_re_qp *qp)
990  {
991  	struct ib_event event = {};
992  	unsigned int flags;
993  
994  	if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
995  	    rdma_is_kernel_res(&qp->ib_qp.res)) {
996  		flags = bnxt_re_lock_cqs(qp);
997  		bnxt_qplib_add_flush_qp(&qp->qplib_qp);
998  		bnxt_re_unlock_cqs(qp, flags);
999  	}
1000  
1001  	if (qp->qplib_qp.srq) {
1002  		event.device = &qp->rdev->ibdev;
1003  		event.element.qp = &qp->ib_qp;
1004  		event.event = IB_EVENT_QP_LAST_WQE_REACHED;
1005  	}
1006  
1007  	if (event.device && qp->ib_qp.event_handler)
1008  		qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
1009  
1010  	return 0;
1011  }
1012  
bnxt_re_handle_affi_async_event(struct creq_qp_event * affi_async,void * obj)1013  static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
1014  					   void *obj)
1015  {
1016  	int rc = 0;
1017  	u8 event;
1018  
1019  	if (!obj)
1020  		return rc; /* QP was already dead, still return success */
1021  
1022  	event = affi_async->event;
1023  	if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
1024  		struct bnxt_qplib_qp *lib_qp = obj;
1025  		struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
1026  						     qplib_qp);
1027  		rc = bnxt_re_handle_qp_async_event(affi_async, qp);
1028  	}
1029  	return rc;
1030  }
1031  
bnxt_re_aeq_handler(struct bnxt_qplib_rcfw * rcfw,void * aeqe,void * obj)1032  static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
1033  			       void *aeqe, void *obj)
1034  {
1035  	struct creq_qp_event *affi_async;
1036  	struct creq_func_event *unaffi_async;
1037  	u8 type;
1038  	int rc;
1039  
1040  	type = ((struct creq_base *)aeqe)->type;
1041  	if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
1042  		unaffi_async = aeqe;
1043  		rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
1044  	} else {
1045  		affi_async = aeqe;
1046  		rc = bnxt_re_handle_affi_async_event(affi_async, obj);
1047  	}
1048  
1049  	return rc;
1050  }
1051  
bnxt_re_srqn_handler(struct bnxt_qplib_nq * nq,struct bnxt_qplib_srq * handle,u8 event)1052  static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
1053  				struct bnxt_qplib_srq *handle, u8 event)
1054  {
1055  	struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
1056  					       qplib_srq);
1057  	struct ib_event ib_event;
1058  
1059  	ib_event.device = &srq->rdev->ibdev;
1060  	ib_event.element.srq = &srq->ib_srq;
1061  	if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
1062  		ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
1063  	else
1064  		ib_event.event = IB_EVENT_SRQ_ERR;
1065  
1066  	if (srq->ib_srq.event_handler) {
1067  		/* Lock event_handler? */
1068  		(*srq->ib_srq.event_handler)(&ib_event,
1069  					     srq->ib_srq.srq_context);
1070  	}
1071  	return 0;
1072  }
1073  
bnxt_re_cqn_handler(struct bnxt_qplib_nq * nq,struct bnxt_qplib_cq * handle)1074  static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
1075  			       struct bnxt_qplib_cq *handle)
1076  {
1077  	struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
1078  					     qplib_cq);
1079  
1080  	if (cq->ib_cq.comp_handler) {
1081  		/* Lock comp_handler? */
1082  		(*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
1083  	}
1084  
1085  	return 0;
1086  }
1087  
bnxt_re_cleanup_res(struct bnxt_re_dev * rdev)1088  static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
1089  {
1090  	int i;
1091  
1092  	for (i = 1; i < rdev->num_msix; i++)
1093  		bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
1094  
1095  	if (rdev->qplib_res.rcfw)
1096  		bnxt_qplib_cleanup_res(&rdev->qplib_res);
1097  }
1098  
bnxt_re_init_res(struct bnxt_re_dev * rdev)1099  static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
1100  {
1101  	int num_vec_enabled = 0;
1102  	int rc = 0, i;
1103  	u32 db_offt;
1104  
1105  	bnxt_qplib_init_res(&rdev->qplib_res);
1106  
1107  	for (i = 1; i < rdev->num_msix ; i++) {
1108  		db_offt = rdev->en_dev->msix_entries[i].db_offset;
1109  		rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
1110  					  i - 1, rdev->en_dev->msix_entries[i].vector,
1111  					  db_offt, &bnxt_re_cqn_handler,
1112  					  &bnxt_re_srqn_handler);
1113  		if (rc) {
1114  			ibdev_err(&rdev->ibdev,
1115  				  "Failed to enable NQ with rc = 0x%x", rc);
1116  			goto fail;
1117  		}
1118  		num_vec_enabled++;
1119  	}
1120  	return 0;
1121  fail:
1122  	for (i = num_vec_enabled; i >= 0; i--)
1123  		bnxt_qplib_disable_nq(&rdev->nq[i]);
1124  	return rc;
1125  }
1126  
bnxt_re_free_nq_res(struct bnxt_re_dev * rdev)1127  static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
1128  {
1129  	u8 type;
1130  	int i;
1131  
1132  	for (i = 0; i < rdev->num_msix - 1; i++) {
1133  		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1134  		bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
1135  		bnxt_qplib_free_nq(&rdev->nq[i]);
1136  		rdev->nq[i].res = NULL;
1137  	}
1138  }
1139  
bnxt_re_free_res(struct bnxt_re_dev * rdev)1140  static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
1141  {
1142  	bnxt_re_free_nq_res(rdev);
1143  
1144  	if (rdev->qplib_res.dpi_tbl.max) {
1145  		bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
1146  				       &rdev->dpi_privileged);
1147  	}
1148  	if (rdev->qplib_res.rcfw) {
1149  		bnxt_qplib_free_res(&rdev->qplib_res);
1150  		rdev->qplib_res.rcfw = NULL;
1151  	}
1152  }
1153  
bnxt_re_alloc_res(struct bnxt_re_dev * rdev)1154  static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
1155  {
1156  	struct bnxt_re_ring_attr rattr = {};
1157  	int num_vec_created = 0;
1158  	int rc, i;
1159  	u8 type;
1160  
1161  	/* Configure and allocate resources for qplib */
1162  	rdev->qplib_res.rcfw = &rdev->rcfw;
1163  	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
1164  	if (rc)
1165  		goto fail;
1166  
1167  	rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
1168  				  rdev->netdev, &rdev->dev_attr);
1169  	if (rc)
1170  		goto fail;
1171  
1172  	rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res,
1173  				  &rdev->dpi_privileged,
1174  				  rdev, BNXT_QPLIB_DPI_TYPE_KERNEL);
1175  	if (rc)
1176  		goto dealloc_res;
1177  
1178  	for (i = 0; i < rdev->num_msix - 1; i++) {
1179  		struct bnxt_qplib_nq *nq;
1180  
1181  		nq = &rdev->nq[i];
1182  		nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
1183  		rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]);
1184  		if (rc) {
1185  			ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
1186  				  i, rc);
1187  			goto free_nq;
1188  		}
1189  		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1190  		rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
1191  		rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count;
1192  		rattr.type = type;
1193  		rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
1194  		rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
1195  		rattr.lrid = rdev->en_dev->msix_entries[i + 1].ring_idx;
1196  		rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
1197  		if (rc) {
1198  			ibdev_err(&rdev->ibdev,
1199  				  "Failed to allocate NQ fw id with rc = 0x%x",
1200  				  rc);
1201  			bnxt_qplib_free_nq(&rdev->nq[i]);
1202  			goto free_nq;
1203  		}
1204  		num_vec_created++;
1205  	}
1206  	return 0;
1207  free_nq:
1208  	for (i = num_vec_created - 1; i >= 0; i--) {
1209  		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1210  		bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
1211  		bnxt_qplib_free_nq(&rdev->nq[i]);
1212  	}
1213  	bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
1214  			       &rdev->dpi_privileged);
1215  dealloc_res:
1216  	bnxt_qplib_free_res(&rdev->qplib_res);
1217  
1218  fail:
1219  	rdev->qplib_res.rcfw = NULL;
1220  	return rc;
1221  }
1222  
bnxt_re_dispatch_event(struct ib_device * ibdev,struct ib_qp * qp,u8 port_num,enum ib_event_type event)1223  static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
1224  				   u8 port_num, enum ib_event_type event)
1225  {
1226  	struct ib_event ib_event;
1227  
1228  	ib_event.device = ibdev;
1229  	if (qp) {
1230  		ib_event.element.qp = qp;
1231  		ib_event.event = event;
1232  		if (qp->event_handler)
1233  			qp->event_handler(&ib_event, qp->qp_context);
1234  
1235  	} else {
1236  		ib_event.element.port_num = port_num;
1237  		ib_event.event = event;
1238  		ib_dispatch_event(&ib_event);
1239  	}
1240  }
1241  
bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev * rdev,struct bnxt_re_qp * qp)1242  static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
1243  					struct bnxt_re_qp *qp)
1244  {
1245  	return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
1246  	       (qp == rdev->gsi_ctx.gsi_sqp);
1247  }
1248  
bnxt_re_dev_stop(struct bnxt_re_dev * rdev)1249  static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
1250  {
1251  	int mask = IB_QP_STATE;
1252  	struct ib_qp_attr qp_attr;
1253  	struct bnxt_re_qp *qp;
1254  
1255  	qp_attr.qp_state = IB_QPS_ERR;
1256  	mutex_lock(&rdev->qp_lock);
1257  	list_for_each_entry(qp, &rdev->qp_list, list) {
1258  		/* Modify the state of all QPs except QP1/Shadow QP */
1259  		if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
1260  			if (qp->qplib_qp.state !=
1261  			    CMDQ_MODIFY_QP_NEW_STATE_RESET &&
1262  			    qp->qplib_qp.state !=
1263  			    CMDQ_MODIFY_QP_NEW_STATE_ERR) {
1264  				bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
1265  						       1, IB_EVENT_QP_FATAL);
1266  				bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
1267  						  NULL);
1268  			}
1269  		}
1270  	}
1271  	mutex_unlock(&rdev->qp_lock);
1272  }
1273  
bnxt_re_update_gid(struct bnxt_re_dev * rdev)1274  static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
1275  {
1276  	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
1277  	struct bnxt_qplib_gid gid;
1278  	u16 gid_idx, index;
1279  	int rc = 0;
1280  
1281  	if (!ib_device_try_get(&rdev->ibdev))
1282  		return 0;
1283  
1284  	for (index = 0; index < sgid_tbl->active; index++) {
1285  		gid_idx = sgid_tbl->hw_id[index];
1286  
1287  		if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
1288  			    sizeof(bnxt_qplib_gid_zero)))
1289  			continue;
1290  		/* need to modify the VLAN enable setting of non VLAN GID only
1291  		 * as setting is done for VLAN GID while adding GID
1292  		 */
1293  		if (sgid_tbl->vlan[index])
1294  			continue;
1295  
1296  		memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
1297  
1298  		rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
1299  					    rdev->qplib_res.netdev->dev_addr);
1300  	}
1301  
1302  	ib_device_put(&rdev->ibdev);
1303  	return rc;
1304  }
1305  
bnxt_re_get_priority_mask(struct bnxt_re_dev * rdev)1306  static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
1307  {
1308  	u32 prio_map = 0, tmp_map = 0;
1309  	struct net_device *netdev;
1310  	struct dcb_app app = {};
1311  
1312  	netdev = rdev->netdev;
1313  
1314  	app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
1315  	app.protocol = ETH_P_IBOE;
1316  	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
1317  	prio_map = tmp_map;
1318  
1319  	app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
1320  	app.protocol = ROCE_V2_UDP_DPORT;
1321  	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
1322  	prio_map |= tmp_map;
1323  
1324  	return prio_map;
1325  }
1326  
bnxt_re_setup_qos(struct bnxt_re_dev * rdev)1327  static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
1328  {
1329  	u8 prio_map = 0;
1330  
1331  	/* Get priority for roce */
1332  	prio_map = bnxt_re_get_priority_mask(rdev);
1333  
1334  	if (prio_map == rdev->cur_prio_map)
1335  		return 0;
1336  	rdev->cur_prio_map = prio_map;
1337  	/* Actual priorities are not programmed as they are already
1338  	 * done by L2 driver; just enable or disable priority vlan tagging
1339  	 */
1340  	if ((prio_map == 0 && rdev->qplib_res.prio) ||
1341  	    (prio_map != 0 && !rdev->qplib_res.prio)) {
1342  		rdev->qplib_res.prio = prio_map;
1343  		bnxt_re_update_gid(rdev);
1344  	}
1345  
1346  	return 0;
1347  }
1348  
bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev * rdev)1349  static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
1350  {
1351  	struct bnxt_en_dev *en_dev = rdev->en_dev;
1352  	struct hwrm_ver_get_output resp = {};
1353  	struct hwrm_ver_get_input req = {};
1354  	struct bnxt_qplib_chip_ctx *cctx;
1355  	struct bnxt_fw_msg fw_msg = {};
1356  	int rc;
1357  
1358  	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET);
1359  	req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
1360  	req.hwrm_intf_min = HWRM_VERSION_MINOR;
1361  	req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
1362  	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1363  			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1364  	rc = bnxt_send_msg(en_dev, &fw_msg);
1365  	if (rc) {
1366  		ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
1367  			  rc);
1368  		return;
1369  	}
1370  
1371  	cctx = rdev->chip_ctx;
1372  	cctx->hwrm_intf_ver =
1373  		(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
1374  		(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
1375  		(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
1376  		le16_to_cpu(resp.hwrm_intf_patch);
1377  
1378  	cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
1379  
1380  	if (!cctx->hwrm_cmd_max_timeout)
1381  		cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
1382  }
1383  
bnxt_re_ib_init(struct bnxt_re_dev * rdev)1384  static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
1385  {
1386  	int rc;
1387  	u32 event;
1388  
1389  	/* Register ib dev */
1390  	rc = bnxt_re_register_ib(rdev);
1391  	if (rc) {
1392  		pr_err("Failed to register with IB: %#x\n", rc);
1393  		return rc;
1394  	}
1395  	dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
1396  	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
1397  
1398  	event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
1399  		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
1400  
1401  	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event);
1402  
1403  	return rc;
1404  }
1405  
bnxt_re_dev_uninit(struct bnxt_re_dev * rdev)1406  static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev)
1407  {
1408  	u8 type;
1409  	int rc;
1410  
1411  	if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
1412  		cancel_delayed_work_sync(&rdev->worker);
1413  
1414  	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
1415  			       &rdev->flags))
1416  		bnxt_re_cleanup_res(rdev);
1417  	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
1418  		bnxt_re_free_res(rdev);
1419  
1420  	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
1421  		rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
1422  		if (rc)
1423  			ibdev_warn(&rdev->ibdev,
1424  				   "Failed to deinitialize RCFW: %#x", rc);
1425  		bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
1426  		bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
1427  		bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
1428  		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1429  		bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
1430  		bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
1431  	}
1432  
1433  	rdev->num_msix = 0;
1434  
1435  	if (rdev->pacing.dbr_pacing)
1436  		bnxt_re_deinitialize_dbr_pacing(rdev);
1437  
1438  	bnxt_re_destroy_chip_ctx(rdev);
1439  	if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
1440  		bnxt_unregister_dev(rdev->en_dev);
1441  }
1442  
1443  /* worker thread for polling periodic events. Now used for QoS programming*/
bnxt_re_worker(struct work_struct * work)1444  static void bnxt_re_worker(struct work_struct *work)
1445  {
1446  	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
1447  						worker.work);
1448  
1449  	bnxt_re_setup_qos(rdev);
1450  	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
1451  }
1452  
bnxt_re_dev_init(struct bnxt_re_dev * rdev)1453  static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
1454  {
1455  	struct bnxt_re_ring_attr rattr = {};
1456  	struct bnxt_qplib_creq_ctx *creq;
1457  	u32 db_offt;
1458  	int vid;
1459  	u8 type;
1460  	int rc;
1461  
1462  	/* Registered a new RoCE device instance to netdev */
1463  	rc = bnxt_re_register_netdev(rdev);
1464  	if (rc) {
1465  		ibdev_err(&rdev->ibdev,
1466  			  "Failed to register with netedev: %#x\n", rc);
1467  		return -EINVAL;
1468  	}
1469  	set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1470  
1471  	rc = bnxt_re_setup_chip_ctx(rdev);
1472  	if (rc) {
1473  		bnxt_unregister_dev(rdev->en_dev);
1474  		clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1475  		ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
1476  		return -EINVAL;
1477  	}
1478  
1479  	/* Check whether VF or PF */
1480  	bnxt_re_get_sriov_func_type(rdev);
1481  
1482  	if (!rdev->en_dev->ulp_tbl->msix_requested) {
1483  		ibdev_err(&rdev->ibdev,
1484  			  "Failed to get MSI-X vectors: %#x\n", rc);
1485  		rc = -EINVAL;
1486  		goto fail;
1487  	}
1488  	ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
1489  		  rdev->en_dev->ulp_tbl->msix_requested);
1490  	rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
1491  
1492  	bnxt_re_query_hwrm_intf_version(rdev);
1493  
1494  	/* Establish RCFW Communication Channel to initialize the context
1495  	 * memory for the function and all child VFs
1496  	 */
1497  	rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
1498  					   &rdev->qplib_ctx,
1499  					   BNXT_RE_MAX_QPC_COUNT);
1500  	if (rc) {
1501  		ibdev_err(&rdev->ibdev,
1502  			  "Failed to allocate RCFW Channel: %#x\n", rc);
1503  		goto fail;
1504  	}
1505  
1506  	type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1507  	creq = &rdev->rcfw.creq;
1508  	rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
1509  	rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
1510  	rattr.type = type;
1511  	rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
1512  	rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
1513  	rattr.lrid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
1514  	rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
1515  	if (rc) {
1516  		ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
1517  		goto free_rcfw;
1518  	}
1519  	db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
1520  	vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector;
1521  	rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
1522  					    vid, db_offt,
1523  					    &bnxt_re_aeq_handler);
1524  	if (rc) {
1525  		ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
1526  			  rc);
1527  		goto free_ring;
1528  	}
1529  
1530  	if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) {
1531  		rc = bnxt_re_initialize_dbr_pacing(rdev);
1532  		if (!rc) {
1533  			rdev->pacing.dbr_pacing = true;
1534  		} else {
1535  			ibdev_err(&rdev->ibdev,
1536  				  "DBR pacing disabled with error : %d\n", rc);
1537  			rdev->pacing.dbr_pacing = false;
1538  		}
1539  	}
1540  	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
1541  	if (rc)
1542  		goto disable_rcfw;
1543  
1544  	bnxt_re_set_resource_limits(rdev);
1545  
1546  	rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0,
1547  				  bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx));
1548  	if (rc) {
1549  		ibdev_err(&rdev->ibdev,
1550  			  "Failed to allocate QPLIB context: %#x\n", rc);
1551  		goto disable_rcfw;
1552  	}
1553  	rc = bnxt_re_net_stats_ctx_alloc(rdev,
1554  					 rdev->qplib_ctx.stats.dma_map,
1555  					 &rdev->qplib_ctx.stats.fw_id);
1556  	if (rc) {
1557  		ibdev_err(&rdev->ibdev,
1558  			  "Failed to allocate stats context: %#x\n", rc);
1559  		goto free_ctx;
1560  	}
1561  
1562  	rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
1563  				  rdev->is_virtfn);
1564  	if (rc) {
1565  		ibdev_err(&rdev->ibdev,
1566  			  "Failed to initialize RCFW: %#x\n", rc);
1567  		goto free_sctx;
1568  	}
1569  	set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
1570  
1571  	/* Resources based on the 'new' device caps */
1572  	rc = bnxt_re_alloc_res(rdev);
1573  	if (rc) {
1574  		ibdev_err(&rdev->ibdev,
1575  			  "Failed to allocate resources: %#x\n", rc);
1576  		goto fail;
1577  	}
1578  	set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
1579  	rc = bnxt_re_init_res(rdev);
1580  	if (rc) {
1581  		ibdev_err(&rdev->ibdev,
1582  			  "Failed to initialize resources: %#x\n", rc);
1583  		goto fail;
1584  	}
1585  
1586  	set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
1587  
1588  	if (!rdev->is_virtfn) {
1589  		rc = bnxt_re_setup_qos(rdev);
1590  		if (rc)
1591  			ibdev_info(&rdev->ibdev,
1592  				   "RoCE priority not yet configured\n");
1593  
1594  		INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
1595  		set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
1596  		schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
1597  		/*
1598  		 * Use the total VF count since the actual VF count may not be
1599  		 * available at this point.
1600  		 */
1601  		bnxt_re_vf_res_config(rdev);
1602  	}
1603  
1604  	return 0;
1605  free_sctx:
1606  	bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
1607  free_ctx:
1608  	bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
1609  disable_rcfw:
1610  	bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
1611  free_ring:
1612  	type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1613  	bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
1614  free_rcfw:
1615  	bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
1616  fail:
1617  	bnxt_re_dev_uninit(rdev);
1618  
1619  	return rc;
1620  }
1621  
bnxt_re_add_device(struct auxiliary_device * adev)1622  static int bnxt_re_add_device(struct auxiliary_device *adev)
1623  {
1624  	struct bnxt_aux_priv *aux_priv =
1625  		container_of(adev, struct bnxt_aux_priv, aux_dev);
1626  	struct bnxt_en_dev *en_dev;
1627  	struct bnxt_re_dev *rdev;
1628  	int rc;
1629  
1630  	/* en_dev should never be NULL as long as adev and aux_dev are valid. */
1631  	en_dev = aux_priv->edev;
1632  
1633  	rdev = bnxt_re_dev_add(aux_priv, en_dev);
1634  	if (!rdev || !rdev_to_dev(rdev)) {
1635  		rc = -ENOMEM;
1636  		goto exit;
1637  	}
1638  
1639  	rc = bnxt_re_dev_init(rdev);
1640  	if (rc)
1641  		goto re_dev_dealloc;
1642  
1643  	rc = bnxt_re_ib_init(rdev);
1644  	if (rc) {
1645  		pr_err("Failed to register with IB: %s",
1646  			aux_priv->aux_dev.name);
1647  		goto re_dev_uninit;
1648  	}
1649  	auxiliary_set_drvdata(adev, rdev);
1650  
1651  	return 0;
1652  
1653  re_dev_uninit:
1654  	bnxt_re_dev_uninit(rdev);
1655  re_dev_dealloc:
1656  	ib_dealloc_device(&rdev->ibdev);
1657  exit:
1658  	return rc;
1659  }
1660  
bnxt_re_setup_cc(struct bnxt_re_dev * rdev,bool enable)1661  static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
1662  {
1663  	struct bnxt_qplib_cc_param cc_param = {};
1664  
1665  	/* Do not enable congestion control on VFs */
1666  	if (rdev->is_virtfn)
1667  		return;
1668  
1669  	/* Currently enabling only for GenP5 adapters */
1670  	if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
1671  		return;
1672  
1673  	if (enable) {
1674  		cc_param.enable  = 1;
1675  		cc_param.cc_mode = CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE;
1676  	}
1677  
1678  	cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE |
1679  			 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
1680  			 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
1681  
1682  	if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
1683  		ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
1684  }
1685  
1686  /*
1687   * "Notifier chain callback can be invoked for the same chain from
1688   * different CPUs at the same time".
1689   *
1690   * For cases when the netdev is already present, our call to the
1691   * register_netdevice_notifier() will actually get the rtnl_lock()
1692   * before sending NETDEV_REGISTER and (if up) NETDEV_UP
1693   * events.
1694   *
1695   * But for cases when the netdev is not already present, the notifier
1696   * chain is subjected to be invoked from different CPUs simultaneously.
1697   *
1698   * This is protected by the netdev_mutex.
1699   */
bnxt_re_netdev_event(struct notifier_block * notifier,unsigned long event,void * ptr)1700  static int bnxt_re_netdev_event(struct notifier_block *notifier,
1701  				unsigned long event, void *ptr)
1702  {
1703  	struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
1704  	struct bnxt_re_dev *rdev;
1705  
1706  	real_dev = rdma_vlan_dev_real_dev(netdev);
1707  	if (!real_dev)
1708  		real_dev = netdev;
1709  
1710  	if (real_dev != netdev)
1711  		goto exit;
1712  
1713  	rdev = bnxt_re_from_netdev(real_dev);
1714  	if (!rdev)
1715  		return NOTIFY_DONE;
1716  
1717  
1718  	switch (event) {
1719  	case NETDEV_UP:
1720  	case NETDEV_DOWN:
1721  	case NETDEV_CHANGE:
1722  		bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
1723  					netif_carrier_ok(real_dev) ?
1724  					IB_EVENT_PORT_ACTIVE :
1725  					IB_EVENT_PORT_ERR);
1726  		break;
1727  	default:
1728  		break;
1729  	}
1730  	ib_device_put(&rdev->ibdev);
1731  exit:
1732  	return NOTIFY_DONE;
1733  }
1734  
1735  #define BNXT_ADEV_NAME "bnxt_en"
1736  
bnxt_re_remove(struct auxiliary_device * adev)1737  static void bnxt_re_remove(struct auxiliary_device *adev)
1738  {
1739  	struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
1740  
1741  	if (!rdev)
1742  		return;
1743  
1744  	mutex_lock(&bnxt_re_mutex);
1745  	if (rdev->nb.notifier_call) {
1746  		unregister_netdevice_notifier(&rdev->nb);
1747  		rdev->nb.notifier_call = NULL;
1748  	} else {
1749  		/* If notifier is null, we should have already done a
1750  		 * clean up before coming here.
1751  		 */
1752  		goto skip_remove;
1753  	}
1754  	bnxt_re_setup_cc(rdev, false);
1755  	ib_unregister_device(&rdev->ibdev);
1756  	bnxt_re_dev_uninit(rdev);
1757  	ib_dealloc_device(&rdev->ibdev);
1758  skip_remove:
1759  	mutex_unlock(&bnxt_re_mutex);
1760  }
1761  
bnxt_re_probe(struct auxiliary_device * adev,const struct auxiliary_device_id * id)1762  static int bnxt_re_probe(struct auxiliary_device *adev,
1763  			 const struct auxiliary_device_id *id)
1764  {
1765  	struct bnxt_re_dev *rdev;
1766  	int rc;
1767  
1768  	mutex_lock(&bnxt_re_mutex);
1769  
1770  	rc = bnxt_re_add_device(adev);
1771  	if (rc) {
1772  		mutex_unlock(&bnxt_re_mutex);
1773  		return rc;
1774  	}
1775  
1776  	rdev = auxiliary_get_drvdata(adev);
1777  
1778  	rdev->nb.notifier_call = bnxt_re_netdev_event;
1779  	rc = register_netdevice_notifier(&rdev->nb);
1780  	if (rc) {
1781  		rdev->nb.notifier_call = NULL;
1782  		pr_err("%s: Cannot register to netdevice_notifier",
1783  		       ROCE_DRV_MODULE_NAME);
1784  		goto err;
1785  	}
1786  
1787  	bnxt_re_setup_cc(rdev, true);
1788  	mutex_unlock(&bnxt_re_mutex);
1789  	return 0;
1790  
1791  err:
1792  	mutex_unlock(&bnxt_re_mutex);
1793  	bnxt_re_remove(adev);
1794  
1795  	return rc;
1796  }
1797  
bnxt_re_suspend(struct auxiliary_device * adev,pm_message_t state)1798  static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
1799  {
1800  	struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
1801  
1802  	if (!rdev)
1803  		return 0;
1804  
1805  	mutex_lock(&bnxt_re_mutex);
1806  	/* L2 driver may invoke this callback during device error/crash or device
1807  	 * reset. Current RoCE driver doesn't recover the device in case of
1808  	 * error. Handle the error by dispatching fatal events to all qps
1809  	 * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
1810  	 * L2 driver want to modify the MSIx table.
1811  	 */
1812  
1813  	ibdev_info(&rdev->ibdev, "Handle device suspend call");
1814  	/* Check the current device state from bnxt_en_dev and move the
1815  	 * device to detached state if FW_FATAL_COND is set.
1816  	 * This prevents more commands to HW during clean-up,
1817  	 * in case the device is already in error.
1818  	 */
1819  	if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state))
1820  		set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
1821  
1822  	bnxt_re_dev_stop(rdev);
1823  	bnxt_re_stop_irq(rdev);
1824  	/* Move the device states to detached and  avoid sending any more
1825  	 * commands to HW
1826  	 */
1827  	set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
1828  	set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
1829  	wake_up_all(&rdev->rcfw.cmdq.waitq);
1830  	mutex_unlock(&bnxt_re_mutex);
1831  
1832  	return 0;
1833  }
1834  
bnxt_re_resume(struct auxiliary_device * adev)1835  static int bnxt_re_resume(struct auxiliary_device *adev)
1836  {
1837  	struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
1838  
1839  	if (!rdev)
1840  		return 0;
1841  
1842  	mutex_lock(&bnxt_re_mutex);
1843  	/* L2 driver may invoke this callback during device recovery, resume.
1844  	 * reset. Current RoCE driver doesn't recover the device in case of
1845  	 * error. Handle the error by dispatching fatal events to all qps
1846  	 * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
1847  	 * L2 driver want to modify the MSIx table.
1848  	 */
1849  
1850  	ibdev_info(&rdev->ibdev, "Handle device resume call");
1851  	mutex_unlock(&bnxt_re_mutex);
1852  
1853  	return 0;
1854  }
1855  
1856  static const struct auxiliary_device_id bnxt_re_id_table[] = {
1857  	{ .name = BNXT_ADEV_NAME ".rdma", },
1858  	{},
1859  };
1860  
1861  MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table);
1862  
1863  static struct auxiliary_driver bnxt_re_driver = {
1864  	.name = "rdma",
1865  	.probe = bnxt_re_probe,
1866  	.remove = bnxt_re_remove,
1867  	.shutdown = bnxt_re_shutdown,
1868  	.suspend = bnxt_re_suspend,
1869  	.resume = bnxt_re_resume,
1870  	.id_table = bnxt_re_id_table,
1871  };
1872  
bnxt_re_mod_init(void)1873  static int __init bnxt_re_mod_init(void)
1874  {
1875  	int rc;
1876  
1877  	pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
1878  	rc = auxiliary_driver_register(&bnxt_re_driver);
1879  	if (rc) {
1880  		pr_err("%s: Failed to register auxiliary driver\n",
1881  			ROCE_DRV_MODULE_NAME);
1882  		return rc;
1883  	}
1884  	return 0;
1885  }
1886  
bnxt_re_mod_exit(void)1887  static void __exit bnxt_re_mod_exit(void)
1888  {
1889  	auxiliary_driver_unregister(&bnxt_re_driver);
1890  }
1891  
1892  module_init(bnxt_re_mod_init);
1893  module_exit(bnxt_re_mod_exit);
1894