xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 5ee9cd065836e5934710ca35653bce7905add20b)
1  // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2  /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3  
4  #include <linux/module.h>
5  #include <linux/vdpa.h>
6  #include <linux/vringh.h>
7  #include <uapi/linux/virtio_net.h>
8  #include <uapi/linux/virtio_ids.h>
9  #include <uapi/linux/vdpa.h>
10  #include <linux/virtio_config.h>
11  #include <linux/auxiliary_bus.h>
12  #include <linux/mlx5/cq.h>
13  #include <linux/mlx5/qp.h>
14  #include <linux/mlx5/device.h>
15  #include <linux/mlx5/driver.h>
16  #include <linux/mlx5/vport.h>
17  #include <linux/mlx5/fs.h>
18  #include <linux/mlx5/mlx5_ifc_vdpa.h>
19  #include <linux/mlx5/mpfs.h>
20  #include "mlx5_vdpa.h"
21  #include "mlx5_vnet.h"
22  
23  MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24  MODULE_DESCRIPTION("Mellanox VDPA driver");
25  MODULE_LICENSE("Dual BSD/GPL");
26  
27  #define VALID_FEATURES_MASK                                                                        \
28  	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
29  	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
30  	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
31  	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32  	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
33  	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
34  	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
35  	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
36  	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
37  	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
38  	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
39  	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
40  	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
41  
42  #define VALID_STATUS_MASK                                                                          \
43  	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
44  	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
45  
46  #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
47  
48  #define MLX5V_UNTAGGED 0x1000
49  
50  struct mlx5_vdpa_cq_buf {
51  	struct mlx5_frag_buf_ctrl fbc;
52  	struct mlx5_frag_buf frag_buf;
53  	int cqe_size;
54  	int nent;
55  };
56  
57  struct mlx5_vdpa_cq {
58  	struct mlx5_core_cq mcq;
59  	struct mlx5_vdpa_cq_buf buf;
60  	struct mlx5_db db;
61  	int cqe;
62  };
63  
64  struct mlx5_vdpa_umem {
65  	struct mlx5_frag_buf_ctrl fbc;
66  	struct mlx5_frag_buf frag_buf;
67  	int size;
68  	u32 id;
69  };
70  
71  struct mlx5_vdpa_qp {
72  	struct mlx5_core_qp mqp;
73  	struct mlx5_frag_buf frag_buf;
74  	struct mlx5_db db;
75  	u16 head;
76  	bool fw;
77  };
78  
79  struct mlx5_vq_restore_info {
80  	u32 num_ent;
81  	u64 desc_addr;
82  	u64 device_addr;
83  	u64 driver_addr;
84  	u16 avail_index;
85  	u16 used_index;
86  	struct msi_map map;
87  	bool ready;
88  	bool restore;
89  };
90  
91  struct mlx5_vdpa_virtqueue {
92  	bool ready;
93  	u64 desc_addr;
94  	u64 device_addr;
95  	u64 driver_addr;
96  	u32 num_ent;
97  
98  	/* Resources for implementing the notification channel from the device
99  	 * to the driver. fwqp is the firmware end of an RC connection; the
100  	 * other end is vqqp used by the driver. cq is where completions are
101  	 * reported.
102  	 */
103  	struct mlx5_vdpa_cq cq;
104  	struct mlx5_vdpa_qp fwqp;
105  	struct mlx5_vdpa_qp vqqp;
106  
107  	/* umem resources are required for the virtqueue operation. They're use
108  	 * is internal and they must be provided by the driver.
109  	 */
110  	struct mlx5_vdpa_umem umem1;
111  	struct mlx5_vdpa_umem umem2;
112  	struct mlx5_vdpa_umem umem3;
113  
114  	u32 counter_set_id;
115  	bool initialized;
116  	int index;
117  	u32 virtq_id;
118  	struct mlx5_vdpa_net *ndev;
119  	u16 avail_idx;
120  	u16 used_idx;
121  	int fw_state;
122  	struct msi_map map;
123  
124  	/* keep last in the struct */
125  	struct mlx5_vq_restore_info ri;
126  };
127  
is_index_valid(struct mlx5_vdpa_dev * mvdev,u16 idx)128  static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
129  {
130  	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
131  		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
132  			return idx < 2;
133  		else
134  			return idx < 3;
135  	}
136  
137  	return idx <= mvdev->max_idx;
138  }
139  
140  static void free_resources(struct mlx5_vdpa_net *ndev);
141  static void init_mvqs(struct mlx5_vdpa_net *ndev);
142  static int setup_driver(struct mlx5_vdpa_dev *mvdev);
143  static void teardown_driver(struct mlx5_vdpa_net *ndev);
144  
145  static bool mlx5_vdpa_debug;
146  
147  #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
148  	do {                                                                                       \
149  		if (features & BIT_ULL(_feature))                                                  \
150  			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
151  	} while (0)
152  
153  #define MLX5_LOG_VIO_STAT(_status)                                                                 \
154  	do {                                                                                       \
155  		if (status & (_status))                                                            \
156  			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
157  	} while (0)
158  
159  /* TODO: cross-endian support */
mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev * mvdev)160  static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
161  {
162  	return virtio_legacy_is_little_endian() ||
163  		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
164  }
165  
mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev * mvdev,__virtio16 val)166  static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
167  {
168  	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
169  }
170  
cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev * mvdev,u16 val)171  static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
172  {
173  	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
174  }
175  
ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev)176  static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
177  {
178  	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
179  		return 2;
180  
181  	return mvdev->max_vqs;
182  }
183  
is_ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev,u16 idx)184  static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
185  {
186  	return idx == ctrl_vq_idx(mvdev);
187  }
188  
print_status(struct mlx5_vdpa_dev * mvdev,u8 status,bool set)189  static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
190  {
191  	if (status & ~VALID_STATUS_MASK)
192  		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
193  			       status & ~VALID_STATUS_MASK);
194  
195  	if (!mlx5_vdpa_debug)
196  		return;
197  
198  	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
199  	if (set && !status) {
200  		mlx5_vdpa_info(mvdev, "driver resets the device\n");
201  		return;
202  	}
203  
204  	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
205  	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
206  	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
207  	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
208  	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
209  	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
210  }
211  
print_features(struct mlx5_vdpa_dev * mvdev,u64 features,bool set)212  static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
213  {
214  	if (features & ~VALID_FEATURES_MASK)
215  		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
216  			       features & ~VALID_FEATURES_MASK);
217  
218  	if (!mlx5_vdpa_debug)
219  		return;
220  
221  	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
222  	if (!features)
223  		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
224  
225  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
226  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
227  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
228  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
229  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
230  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
231  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
232  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
233  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
234  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
235  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
236  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
237  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
238  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
239  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
240  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
241  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
242  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
243  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
244  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
245  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
246  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
247  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
248  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
249  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
250  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
251  	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
252  	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
253  	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
254  	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
255  	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
256  	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
257  	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
258  	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
259  }
260  
create_tis(struct mlx5_vdpa_net * ndev)261  static int create_tis(struct mlx5_vdpa_net *ndev)
262  {
263  	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
264  	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
265  	void *tisc;
266  	int err;
267  
268  	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
269  	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
270  	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
271  	if (err)
272  		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
273  
274  	return err;
275  }
276  
destroy_tis(struct mlx5_vdpa_net * ndev)277  static void destroy_tis(struct mlx5_vdpa_net *ndev)
278  {
279  	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
280  }
281  
282  #define MLX5_VDPA_CQE_SIZE 64
283  #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
284  
cq_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf,int nent)285  static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
286  {
287  	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
288  	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
289  	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
290  	int err;
291  
292  	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
293  				       ndev->mvdev.mdev->priv.numa_node);
294  	if (err)
295  		return err;
296  
297  	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
298  
299  	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
300  	buf->nent = nent;
301  
302  	return 0;
303  }
304  
umem_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem,int size)305  static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
306  {
307  	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
308  
309  	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
310  					ndev->mvdev.mdev->priv.numa_node);
311  }
312  
cq_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf)313  static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
314  {
315  	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
316  }
317  
get_cqe(struct mlx5_vdpa_cq * vcq,int n)318  static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
319  {
320  	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
321  }
322  
cq_frag_buf_init(struct mlx5_vdpa_cq * vcq,struct mlx5_vdpa_cq_buf * buf)323  static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
324  {
325  	struct mlx5_cqe64 *cqe64;
326  	void *cqe;
327  	int i;
328  
329  	for (i = 0; i < buf->nent; i++) {
330  		cqe = get_cqe(vcq, i);
331  		cqe64 = cqe;
332  		cqe64->op_own = MLX5_CQE_INVALID << 4;
333  	}
334  }
335  
get_sw_cqe(struct mlx5_vdpa_cq * cq,int n)336  static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
337  {
338  	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
339  
340  	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
341  	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
342  		return cqe64;
343  
344  	return NULL;
345  }
346  
rx_post(struct mlx5_vdpa_qp * vqp,int n)347  static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
348  {
349  	vqp->head += n;
350  	vqp->db.db[0] = cpu_to_be32(vqp->head);
351  }
352  
qp_prepare(struct mlx5_vdpa_net * ndev,bool fw,void * in,struct mlx5_vdpa_virtqueue * mvq,u32 num_ent)353  static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
354  		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
355  {
356  	struct mlx5_vdpa_qp *vqp;
357  	__be64 *pas;
358  	void *qpc;
359  
360  	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
361  	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
362  	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
363  	if (vqp->fw) {
364  		/* Firmware QP is allocated by the driver for the firmware's
365  		 * use so we can skip part of the params as they will be chosen by firmware
366  		 */
367  		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
368  		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
369  		MLX5_SET(qpc, qpc, no_sq, 1);
370  		return;
371  	}
372  
373  	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
374  	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
375  	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
376  	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
377  	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
378  	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
379  	MLX5_SET(qpc, qpc, no_sq, 1);
380  	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
381  	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
382  	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
383  	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
384  	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
385  }
386  
rq_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp,u32 num_ent)387  static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
388  {
389  	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
390  					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
391  					ndev->mvdev.mdev->priv.numa_node);
392  }
393  
rq_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)394  static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
395  {
396  	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
397  }
398  
qp_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_vdpa_qp * vqp)399  static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
400  		     struct mlx5_vdpa_qp *vqp)
401  {
402  	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
403  	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
404  	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
405  	void *qpc;
406  	void *in;
407  	int err;
408  
409  	if (!vqp->fw) {
410  		vqp = &mvq->vqqp;
411  		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
412  		if (err)
413  			return err;
414  
415  		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
416  		if (err)
417  			goto err_db;
418  		inlen += vqp->frag_buf.npages * sizeof(__be64);
419  	}
420  
421  	in = kzalloc(inlen, GFP_KERNEL);
422  	if (!in) {
423  		err = -ENOMEM;
424  		goto err_kzalloc;
425  	}
426  
427  	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
428  	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
429  	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
430  	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
431  	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
432  	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
433  	if (!vqp->fw)
434  		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
435  	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
436  	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
437  	kfree(in);
438  	if (err)
439  		goto err_kzalloc;
440  
441  	vqp->mqp.uid = ndev->mvdev.res.uid;
442  	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
443  
444  	if (!vqp->fw)
445  		rx_post(vqp, mvq->num_ent);
446  
447  	return 0;
448  
449  err_kzalloc:
450  	if (!vqp->fw)
451  		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
452  err_db:
453  	if (!vqp->fw)
454  		rq_buf_free(ndev, vqp);
455  
456  	return err;
457  }
458  
qp_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)459  static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
460  {
461  	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
462  
463  	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
464  	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
465  	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
466  	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
467  		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
468  	if (!vqp->fw) {
469  		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
470  		rq_buf_free(ndev, vqp);
471  	}
472  }
473  
next_cqe_sw(struct mlx5_vdpa_cq * cq)474  static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
475  {
476  	return get_sw_cqe(cq, cq->mcq.cons_index);
477  }
478  
mlx5_vdpa_poll_one(struct mlx5_vdpa_cq * vcq)479  static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
480  {
481  	struct mlx5_cqe64 *cqe64;
482  
483  	cqe64 = next_cqe_sw(vcq);
484  	if (!cqe64)
485  		return -EAGAIN;
486  
487  	vcq->mcq.cons_index++;
488  	return 0;
489  }
490  
mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue * mvq,int num)491  static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
492  {
493  	struct mlx5_vdpa_net *ndev = mvq->ndev;
494  	struct vdpa_callback *event_cb;
495  
496  	event_cb = &ndev->event_cbs[mvq->index];
497  	mlx5_cq_set_ci(&mvq->cq.mcq);
498  
499  	/* make sure CQ cosumer update is visible to the hardware before updating
500  	 * RX doorbell record.
501  	 */
502  	dma_wmb();
503  	rx_post(&mvq->vqqp, num);
504  	if (event_cb->callback)
505  		event_cb->callback(event_cb->private);
506  }
507  
mlx5_vdpa_cq_comp(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)508  static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
509  {
510  	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
511  	struct mlx5_vdpa_net *ndev = mvq->ndev;
512  	void __iomem *uar_page = ndev->mvdev.res.uar->map;
513  	int num = 0;
514  
515  	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
516  		num++;
517  		if (num > mvq->num_ent / 2) {
518  			/* If completions keep coming while we poll, we want to
519  			 * let the hardware know that we consumed them by
520  			 * updating the doorbell record.  We also let vdpa core
521  			 * know about this so it passes it on the virtio driver
522  			 * on the guest.
523  			 */
524  			mlx5_vdpa_handle_completions(mvq, num);
525  			num = 0;
526  		}
527  	}
528  
529  	if (num)
530  		mlx5_vdpa_handle_completions(mvq, num);
531  
532  	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
533  }
534  
cq_create(struct mlx5_vdpa_net * ndev,u16 idx,u32 num_ent)535  static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
536  {
537  	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
538  	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
539  	void __iomem *uar_page = ndev->mvdev.res.uar->map;
540  	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
541  	struct mlx5_vdpa_cq *vcq = &mvq->cq;
542  	__be64 *pas;
543  	int inlen;
544  	void *cqc;
545  	void *in;
546  	int err;
547  	int eqn;
548  
549  	err = mlx5_db_alloc(mdev, &vcq->db);
550  	if (err)
551  		return err;
552  
553  	vcq->mcq.set_ci_db = vcq->db.db;
554  	vcq->mcq.arm_db = vcq->db.db + 1;
555  	vcq->mcq.cqe_sz = 64;
556  
557  	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
558  	if (err)
559  		goto err_db;
560  
561  	cq_frag_buf_init(vcq, &vcq->buf);
562  
563  	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
564  		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
565  	in = kzalloc(inlen, GFP_KERNEL);
566  	if (!in) {
567  		err = -ENOMEM;
568  		goto err_vzalloc;
569  	}
570  
571  	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
572  	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
573  	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
574  
575  	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
576  	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
577  
578  	/* Use vector 0 by default. Consider adding code to choose least used
579  	 * vector.
580  	 */
581  	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
582  	if (err)
583  		goto err_vec;
584  
585  	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
586  	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
587  	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
588  	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
589  	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
590  
591  	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
592  	if (err)
593  		goto err_vec;
594  
595  	vcq->mcq.comp = mlx5_vdpa_cq_comp;
596  	vcq->cqe = num_ent;
597  	vcq->mcq.set_ci_db = vcq->db.db;
598  	vcq->mcq.arm_db = vcq->db.db + 1;
599  	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
600  	kfree(in);
601  	return 0;
602  
603  err_vec:
604  	kfree(in);
605  err_vzalloc:
606  	cq_frag_buf_free(ndev, &vcq->buf);
607  err_db:
608  	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
609  	return err;
610  }
611  
cq_destroy(struct mlx5_vdpa_net * ndev,u16 idx)612  static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
613  {
614  	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
615  	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
616  	struct mlx5_vdpa_cq *vcq = &mvq->cq;
617  
618  	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
619  		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
620  		return;
621  	}
622  	cq_frag_buf_free(ndev, &vcq->buf);
623  	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
624  }
625  
read_umem_params(struct mlx5_vdpa_net * ndev)626  static int read_umem_params(struct mlx5_vdpa_net *ndev)
627  {
628  	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
629  	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
630  	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
631  	int out_size;
632  	void *caps;
633  	void *out;
634  	int err;
635  
636  	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
637  	out = kzalloc(out_size, GFP_KERNEL);
638  	if (!out)
639  		return -ENOMEM;
640  
641  	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
642  	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
643  	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
644  	if (err) {
645  		mlx5_vdpa_warn(&ndev->mvdev,
646  			"Failed reading vdpa umem capabilities with err %d\n", err);
647  		goto out;
648  	}
649  
650  	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
651  
652  	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
653  	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
654  
655  	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
656  	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
657  
658  	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
659  	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
660  
661  out:
662  	kfree(out);
663  	return 0;
664  }
665  
set_umem_size(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num,struct mlx5_vdpa_umem ** umemp)666  static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
667  			  struct mlx5_vdpa_umem **umemp)
668  {
669  	u32 p_a;
670  	u32 p_b;
671  
672  	switch (num) {
673  	case 1:
674  		p_a = ndev->umem_1_buffer_param_a;
675  		p_b = ndev->umem_1_buffer_param_b;
676  		*umemp = &mvq->umem1;
677  		break;
678  	case 2:
679  		p_a = ndev->umem_2_buffer_param_a;
680  		p_b = ndev->umem_2_buffer_param_b;
681  		*umemp = &mvq->umem2;
682  		break;
683  	case 3:
684  		p_a = ndev->umem_3_buffer_param_a;
685  		p_b = ndev->umem_3_buffer_param_b;
686  		*umemp = &mvq->umem3;
687  		break;
688  	}
689  
690  	(*umemp)->size = p_a * mvq->num_ent + p_b;
691  }
692  
umem_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem)693  static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
694  {
695  	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
696  }
697  
create_umem(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)698  static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
699  {
700  	int inlen;
701  	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
702  	void *um;
703  	void *in;
704  	int err;
705  	__be64 *pas;
706  	struct mlx5_vdpa_umem *umem;
707  
708  	set_umem_size(ndev, mvq, num, &umem);
709  	err = umem_frag_buf_alloc(ndev, umem, umem->size);
710  	if (err)
711  		return err;
712  
713  	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
714  
715  	in = kzalloc(inlen, GFP_KERNEL);
716  	if (!in) {
717  		err = -ENOMEM;
718  		goto err_in;
719  	}
720  
721  	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
722  	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
723  	um = MLX5_ADDR_OF(create_umem_in, in, umem);
724  	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
725  	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
726  
727  	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
728  	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
729  
730  	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
731  	if (err) {
732  		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
733  		goto err_cmd;
734  	}
735  
736  	kfree(in);
737  	umem->id = MLX5_GET(create_umem_out, out, umem_id);
738  
739  	return 0;
740  
741  err_cmd:
742  	kfree(in);
743  err_in:
744  	umem_frag_buf_free(ndev, umem);
745  	return err;
746  }
747  
umem_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)748  static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
749  {
750  	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
751  	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
752  	struct mlx5_vdpa_umem *umem;
753  
754  	switch (num) {
755  	case 1:
756  		umem = &mvq->umem1;
757  		break;
758  	case 2:
759  		umem = &mvq->umem2;
760  		break;
761  	case 3:
762  		umem = &mvq->umem3;
763  		break;
764  	}
765  
766  	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
767  	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
768  	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
769  		return;
770  
771  	umem_frag_buf_free(ndev, umem);
772  }
773  
umems_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)774  static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
775  {
776  	int num;
777  	int err;
778  
779  	for (num = 1; num <= 3; num++) {
780  		err = create_umem(ndev, mvq, num);
781  		if (err)
782  			goto err_umem;
783  	}
784  	return 0;
785  
786  err_umem:
787  	for (num--; num > 0; num--)
788  		umem_destroy(ndev, mvq, num);
789  
790  	return err;
791  }
792  
umems_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)793  static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
794  {
795  	int num;
796  
797  	for (num = 3; num > 0; num--)
798  		umem_destroy(ndev, mvq, num);
799  }
800  
get_queue_type(struct mlx5_vdpa_net * ndev)801  static int get_queue_type(struct mlx5_vdpa_net *ndev)
802  {
803  	u32 type_mask;
804  
805  	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
806  
807  	/* prefer split queue */
808  	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
809  		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
810  
811  	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
812  
813  	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
814  }
815  
vq_is_tx(u16 idx)816  static bool vq_is_tx(u16 idx)
817  {
818  	return idx % 2;
819  }
820  
821  enum {
822  	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
823  	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
824  	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
825  	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
826  	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
827  	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
828  	MLX5_VIRTIO_NET_F_CSUM = 10,
829  	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
830  	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
831  };
832  
get_features(u64 features)833  static u16 get_features(u64 features)
834  {
835  	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
836  	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
837  	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
838  	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
839  	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
840  	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
841  	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
842  	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
843  }
844  
counters_supported(const struct mlx5_vdpa_dev * mvdev)845  static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
846  {
847  	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
848  	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
849  }
850  
msix_mode_supported(struct mlx5_vdpa_dev * mvdev)851  static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
852  {
853  	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
854  		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
855  		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
856  }
857  
create_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)858  static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
859  {
860  	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
861  	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
862  	void *obj_context;
863  	u16 mlx_features;
864  	void *cmd_hdr;
865  	void *vq_ctx;
866  	void *in;
867  	int err;
868  
869  	err = umems_create(ndev, mvq);
870  	if (err)
871  		return err;
872  
873  	in = kzalloc(inlen, GFP_KERNEL);
874  	if (!in) {
875  		err = -ENOMEM;
876  		goto err_alloc;
877  	}
878  
879  	mlx_features = get_features(ndev->mvdev.actual_features);
880  	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
881  
882  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
883  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
884  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
885  
886  	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
887  	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
888  	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
889  	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
890  		 mlx_features >> 3);
891  	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
892  		 mlx_features & 7);
893  	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
894  	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
895  
896  	if (vq_is_tx(mvq->index))
897  		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
898  
899  	if (mvq->map.virq) {
900  		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
901  		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
902  	} else {
903  		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
904  		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
905  	}
906  
907  	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
908  	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
909  	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
910  		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
911  	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
912  	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
913  	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
914  	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
915  	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
916  	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
917  	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
918  	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
919  	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
920  	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
921  	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
922  	if (counters_supported(&ndev->mvdev))
923  		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
924  
925  	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
926  	if (err)
927  		goto err_cmd;
928  
929  	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
930  	kfree(in);
931  	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
932  
933  	return 0;
934  
935  err_cmd:
936  	kfree(in);
937  err_alloc:
938  	umems_destroy(ndev, mvq);
939  	return err;
940  }
941  
destroy_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)942  static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
943  {
944  	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
945  	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
946  
947  	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
948  		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
949  	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
950  	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
951  	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
952  		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
953  	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
954  		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
955  		return;
956  	}
957  	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
958  	umems_destroy(ndev, mvq);
959  }
960  
get_rqpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)961  static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
962  {
963  	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
964  }
965  
get_qpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)966  static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
967  {
968  	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
969  }
970  
alloc_inout(struct mlx5_vdpa_net * ndev,int cmd,void ** in,int * inlen,void ** out,int * outlen,u32 qpn,u32 rqpn)971  static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
972  			int *outlen, u32 qpn, u32 rqpn)
973  {
974  	void *qpc;
975  	void *pp;
976  
977  	switch (cmd) {
978  	case MLX5_CMD_OP_2RST_QP:
979  		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
980  		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
981  		*in = kzalloc(*inlen, GFP_KERNEL);
982  		*out = kzalloc(*outlen, GFP_KERNEL);
983  		if (!*in || !*out)
984  			goto outerr;
985  
986  		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
987  		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
988  		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
989  		break;
990  	case MLX5_CMD_OP_RST2INIT_QP:
991  		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
992  		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
993  		*in = kzalloc(*inlen, GFP_KERNEL);
994  		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
995  		if (!*in || !*out)
996  			goto outerr;
997  
998  		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
999  		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1000  		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1001  		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1002  		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1003  		MLX5_SET(qpc, qpc, rwe, 1);
1004  		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1005  		MLX5_SET(ads, pp, vhca_port_num, 1);
1006  		break;
1007  	case MLX5_CMD_OP_INIT2RTR_QP:
1008  		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1009  		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1010  		*in = kzalloc(*inlen, GFP_KERNEL);
1011  		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1012  		if (!*in || !*out)
1013  			goto outerr;
1014  
1015  		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1016  		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1017  		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1018  		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1019  		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1020  		MLX5_SET(qpc, qpc, log_msg_max, 30);
1021  		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1022  		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1023  		MLX5_SET(ads, pp, fl, 1);
1024  		break;
1025  	case MLX5_CMD_OP_RTR2RTS_QP:
1026  		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1027  		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1028  		*in = kzalloc(*inlen, GFP_KERNEL);
1029  		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1030  		if (!*in || !*out)
1031  			goto outerr;
1032  
1033  		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1034  		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1035  		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1036  		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1037  		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1038  		MLX5_SET(ads, pp, ack_timeout, 14);
1039  		MLX5_SET(qpc, qpc, retry_count, 7);
1040  		MLX5_SET(qpc, qpc, rnr_retry, 7);
1041  		break;
1042  	default:
1043  		goto outerr_nullify;
1044  	}
1045  
1046  	return;
1047  
1048  outerr:
1049  	kfree(*in);
1050  	kfree(*out);
1051  outerr_nullify:
1052  	*in = NULL;
1053  	*out = NULL;
1054  }
1055  
free_inout(void * in,void * out)1056  static void free_inout(void *in, void *out)
1057  {
1058  	kfree(in);
1059  	kfree(out);
1060  }
1061  
1062  /* Two QPs are used by each virtqueue. One is used by the driver and one by
1063   * firmware. The fw argument indicates whether the subjected QP is the one used
1064   * by firmware.
1065   */
modify_qp(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool fw,int cmd)1066  static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1067  {
1068  	int outlen;
1069  	int inlen;
1070  	void *out;
1071  	void *in;
1072  	int err;
1073  
1074  	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1075  	if (!in || !out)
1076  		return -ENOMEM;
1077  
1078  	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1079  	free_inout(in, out);
1080  	return err;
1081  }
1082  
connect_qps(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1083  static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1084  {
1085  	int err;
1086  
1087  	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1088  	if (err)
1089  		return err;
1090  
1091  	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1092  	if (err)
1093  		return err;
1094  
1095  	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1096  	if (err)
1097  		return err;
1098  
1099  	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1100  	if (err)
1101  		return err;
1102  
1103  	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1104  	if (err)
1105  		return err;
1106  
1107  	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1108  	if (err)
1109  		return err;
1110  
1111  	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1112  }
1113  
1114  struct mlx5_virtq_attr {
1115  	u8 state;
1116  	u16 available_index;
1117  	u16 used_index;
1118  };
1119  
query_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_virtq_attr * attr)1120  static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1121  			   struct mlx5_virtq_attr *attr)
1122  {
1123  	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1124  	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1125  	void *out;
1126  	void *obj_context;
1127  	void *cmd_hdr;
1128  	int err;
1129  
1130  	out = kzalloc(outlen, GFP_KERNEL);
1131  	if (!out)
1132  		return -ENOMEM;
1133  
1134  	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1135  
1136  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1137  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1138  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1139  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1140  	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1141  	if (err)
1142  		goto err_cmd;
1143  
1144  	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1145  	memset(attr, 0, sizeof(*attr));
1146  	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1147  	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1148  	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1149  	kfree(out);
1150  	return 0;
1151  
1152  err_cmd:
1153  	kfree(out);
1154  	return err;
1155  }
1156  
is_valid_state_change(int oldstate,int newstate)1157  static bool is_valid_state_change(int oldstate, int newstate)
1158  {
1159  	switch (oldstate) {
1160  	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1161  		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1162  	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1163  		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1164  	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1165  	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1166  	default:
1167  		return false;
1168  	}
1169  }
1170  
modify_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int state)1171  static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1172  {
1173  	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1174  	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1175  	void *obj_context;
1176  	void *cmd_hdr;
1177  	void *in;
1178  	int err;
1179  
1180  	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1181  		return 0;
1182  
1183  	if (!is_valid_state_change(mvq->fw_state, state))
1184  		return -EINVAL;
1185  
1186  	in = kzalloc(inlen, GFP_KERNEL);
1187  	if (!in)
1188  		return -ENOMEM;
1189  
1190  	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1191  
1192  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1193  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1194  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1195  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1196  
1197  	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1198  	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1199  		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1200  	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1201  	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1202  	kfree(in);
1203  	if (!err)
1204  		mvq->fw_state = state;
1205  
1206  	return err;
1207  }
1208  
counter_set_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1209  static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1210  {
1211  	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1212  	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1213  	void *cmd_hdr;
1214  	int err;
1215  
1216  	if (!counters_supported(&ndev->mvdev))
1217  		return 0;
1218  
1219  	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1220  
1221  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1222  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1223  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1224  
1225  	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1226  	if (err)
1227  		return err;
1228  
1229  	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1230  
1231  	return 0;
1232  }
1233  
counter_set_dealloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1234  static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1235  {
1236  	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1237  	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1238  
1239  	if (!counters_supported(&ndev->mvdev))
1240  		return;
1241  
1242  	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1243  	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1244  	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1245  	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1246  	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1247  		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1248  }
1249  
mlx5_vdpa_int_handler(int irq,void * priv)1250  static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1251  {
1252  	struct vdpa_callback *cb = priv;
1253  
1254  	if (cb->callback)
1255  		return cb->callback(cb->private);
1256  
1257  	return IRQ_HANDLED;
1258  }
1259  
alloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1260  static void alloc_vector(struct mlx5_vdpa_net *ndev,
1261  			 struct mlx5_vdpa_virtqueue *mvq)
1262  {
1263  	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1264  	struct mlx5_vdpa_irq_pool_entry *ent;
1265  	int err;
1266  	int i;
1267  
1268  	for (i = 0; i < irqp->num_ent; i++) {
1269  		ent = &irqp->entries[i];
1270  		if (!ent->used) {
1271  			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1272  				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1273  			ent->dev_id = &ndev->event_cbs[mvq->index];
1274  			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1275  					  ent->name, ent->dev_id);
1276  			if (err)
1277  				return;
1278  
1279  			ent->used = true;
1280  			mvq->map = ent->map;
1281  			return;
1282  		}
1283  	}
1284  }
1285  
dealloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1286  static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1287  			   struct mlx5_vdpa_virtqueue *mvq)
1288  {
1289  	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1290  	int i;
1291  
1292  	for (i = 0; i < irqp->num_ent; i++)
1293  		if (mvq->map.virq == irqp->entries[i].map.virq) {
1294  			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1295  			irqp->entries[i].used = false;
1296  			return;
1297  		}
1298  }
1299  
setup_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1300  static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1301  {
1302  	u16 idx = mvq->index;
1303  	int err;
1304  
1305  	if (!mvq->num_ent)
1306  		return 0;
1307  
1308  	if (mvq->initialized)
1309  		return 0;
1310  
1311  	err = cq_create(ndev, idx, mvq->num_ent);
1312  	if (err)
1313  		return err;
1314  
1315  	err = qp_create(ndev, mvq, &mvq->fwqp);
1316  	if (err)
1317  		goto err_fwqp;
1318  
1319  	err = qp_create(ndev, mvq, &mvq->vqqp);
1320  	if (err)
1321  		goto err_vqqp;
1322  
1323  	err = connect_qps(ndev, mvq);
1324  	if (err)
1325  		goto err_connect;
1326  
1327  	err = counter_set_alloc(ndev, mvq);
1328  	if (err)
1329  		goto err_connect;
1330  
1331  	alloc_vector(ndev, mvq);
1332  	err = create_virtqueue(ndev, mvq);
1333  	if (err)
1334  		goto err_vq;
1335  
1336  	if (mvq->ready) {
1337  		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1338  		if (err) {
1339  			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1340  				       idx, err);
1341  			goto err_modify;
1342  		}
1343  	}
1344  
1345  	mvq->initialized = true;
1346  	return 0;
1347  
1348  err_modify:
1349  	destroy_virtqueue(ndev, mvq);
1350  err_vq:
1351  	dealloc_vector(ndev, mvq);
1352  	counter_set_dealloc(ndev, mvq);
1353  err_connect:
1354  	qp_destroy(ndev, &mvq->vqqp);
1355  err_vqqp:
1356  	qp_destroy(ndev, &mvq->fwqp);
1357  err_fwqp:
1358  	cq_destroy(ndev, idx);
1359  	return err;
1360  }
1361  
suspend_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1362  static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1363  {
1364  	struct mlx5_virtq_attr attr;
1365  
1366  	if (!mvq->initialized)
1367  		return;
1368  
1369  	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1370  		return;
1371  
1372  	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1373  		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1374  
1375  	if (query_virtqueue(ndev, mvq, &attr)) {
1376  		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1377  		return;
1378  	}
1379  	mvq->avail_idx = attr.available_index;
1380  	mvq->used_idx = attr.used_index;
1381  }
1382  
suspend_vqs(struct mlx5_vdpa_net * ndev)1383  static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1384  {
1385  	int i;
1386  
1387  	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1388  		suspend_vq(ndev, &ndev->vqs[i]);
1389  }
1390  
teardown_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1391  static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1392  {
1393  	if (!mvq->initialized)
1394  		return;
1395  
1396  	suspend_vq(ndev, mvq);
1397  	destroy_virtqueue(ndev, mvq);
1398  	dealloc_vector(ndev, mvq);
1399  	counter_set_dealloc(ndev, mvq);
1400  	qp_destroy(ndev, &mvq->vqqp);
1401  	qp_destroy(ndev, &mvq->fwqp);
1402  	cq_destroy(ndev, mvq->index);
1403  	mvq->initialized = false;
1404  }
1405  
create_rqt(struct mlx5_vdpa_net * ndev)1406  static int create_rqt(struct mlx5_vdpa_net *ndev)
1407  {
1408  	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1409  	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1410  	__be32 *list;
1411  	void *rqtc;
1412  	int inlen;
1413  	void *in;
1414  	int i, j;
1415  	int err;
1416  
1417  	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1418  	in = kzalloc(inlen, GFP_KERNEL);
1419  	if (!in)
1420  		return -ENOMEM;
1421  
1422  	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1423  	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1424  
1425  	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1426  	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1427  	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1428  	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1429  		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1430  
1431  	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1432  	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1433  	kfree(in);
1434  	if (err)
1435  		return err;
1436  
1437  	return 0;
1438  }
1439  
1440  #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1441  
modify_rqt(struct mlx5_vdpa_net * ndev,int num)1442  static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1443  {
1444  	int act_sz = roundup_pow_of_two(num / 2);
1445  	__be32 *list;
1446  	void *rqtc;
1447  	int inlen;
1448  	void *in;
1449  	int i, j;
1450  	int err;
1451  
1452  	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1453  	in = kzalloc(inlen, GFP_KERNEL);
1454  	if (!in)
1455  		return -ENOMEM;
1456  
1457  	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1458  	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1459  	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1460  	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1461  
1462  	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1463  	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1464  		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1465  
1466  	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1467  	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1468  	kfree(in);
1469  	if (err)
1470  		return err;
1471  
1472  	return 0;
1473  }
1474  
destroy_rqt(struct mlx5_vdpa_net * ndev)1475  static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1476  {
1477  	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1478  }
1479  
create_tir(struct mlx5_vdpa_net * ndev)1480  static int create_tir(struct mlx5_vdpa_net *ndev)
1481  {
1482  #define HASH_IP_L4PORTS                                                                            \
1483  	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1484  	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1485  	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1486  						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1487  						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1488  						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1489  						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1490  	void *rss_key;
1491  	void *outer;
1492  	void *tirc;
1493  	void *in;
1494  	int err;
1495  
1496  	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1497  	if (!in)
1498  		return -ENOMEM;
1499  
1500  	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1501  	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1502  	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1503  
1504  	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1505  	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1506  	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1507  	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1508  
1509  	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1510  	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1511  	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1512  	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1513  
1514  	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1515  	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1516  
1517  	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1518  	kfree(in);
1519  	if (err)
1520  		return err;
1521  
1522  	mlx5_vdpa_add_tirn(ndev);
1523  	return err;
1524  }
1525  
destroy_tir(struct mlx5_vdpa_net * ndev)1526  static void destroy_tir(struct mlx5_vdpa_net *ndev)
1527  {
1528  	mlx5_vdpa_remove_tirn(ndev);
1529  	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1530  }
1531  
1532  #define MAX_STEERING_ENT 0x8000
1533  #define MAX_STEERING_GROUPS 2
1534  
1535  #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1536         #define NUM_DESTS 2
1537  #else
1538         #define NUM_DESTS 1
1539  #endif
1540  
add_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node,struct mlx5_flow_act * flow_act,struct mlx5_flow_destination * dests)1541  static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1542  				 struct macvlan_node *node,
1543  				 struct mlx5_flow_act *flow_act,
1544  				 struct mlx5_flow_destination *dests)
1545  {
1546  #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1547  	int err;
1548  
1549  	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1550  	if (IS_ERR(node->ucast_counter.counter))
1551  		return PTR_ERR(node->ucast_counter.counter);
1552  
1553  	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1554  	if (IS_ERR(node->mcast_counter.counter)) {
1555  		err = PTR_ERR(node->mcast_counter.counter);
1556  		goto err_mcast_counter;
1557  	}
1558  
1559  	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1560  	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1561  	return 0;
1562  
1563  err_mcast_counter:
1564  	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1565  	return err;
1566  #else
1567  	return 0;
1568  #endif
1569  }
1570  
remove_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1571  static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1572  				     struct macvlan_node *node)
1573  {
1574  #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1575  	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1576  	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1577  #endif
1578  }
1579  
mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net * ndev,u8 * mac,struct macvlan_node * node)1580  static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1581  					struct macvlan_node *node)
1582  {
1583  	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1584  	struct mlx5_flow_act flow_act = {};
1585  	struct mlx5_flow_spec *spec;
1586  	void *headers_c;
1587  	void *headers_v;
1588  	u8 *dmac_c;
1589  	u8 *dmac_v;
1590  	int err;
1591  	u16 vid;
1592  
1593  	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1594  	if (!spec)
1595  		return -ENOMEM;
1596  
1597  	vid = key2vid(node->macvlan);
1598  	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1599  	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1600  	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1601  	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1602  	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1603  	eth_broadcast_addr(dmac_c);
1604  	ether_addr_copy(dmac_v, mac);
1605  	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1606  		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1607  		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1608  	}
1609  	if (node->tagged) {
1610  		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1611  		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1612  	}
1613  	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1614  	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1615  	dests[0].tir_num = ndev->res.tirn;
1616  	err = add_steering_counters(ndev, node, &flow_act, dests);
1617  	if (err)
1618  		goto out_free;
1619  
1620  #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1621  	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1622  #endif
1623  	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1624  	if (IS_ERR(node->ucast_rule)) {
1625  		err = PTR_ERR(node->ucast_rule);
1626  		goto err_ucast;
1627  	}
1628  
1629  #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1630  	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1631  #endif
1632  
1633  	memset(dmac_c, 0, ETH_ALEN);
1634  	memset(dmac_v, 0, ETH_ALEN);
1635  	dmac_c[0] = 1;
1636  	dmac_v[0] = 1;
1637  	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1638  	if (IS_ERR(node->mcast_rule)) {
1639  		err = PTR_ERR(node->mcast_rule);
1640  		goto err_mcast;
1641  	}
1642  	kvfree(spec);
1643  	mlx5_vdpa_add_rx_counters(ndev, node);
1644  	return 0;
1645  
1646  err_mcast:
1647  	mlx5_del_flow_rules(node->ucast_rule);
1648  err_ucast:
1649  	remove_steering_counters(ndev, node);
1650  out_free:
1651  	kvfree(spec);
1652  	return err;
1653  }
1654  
mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1655  static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1656  					 struct macvlan_node *node)
1657  {
1658  	mlx5_vdpa_remove_rx_counters(ndev, node);
1659  	mlx5_del_flow_rules(node->ucast_rule);
1660  	mlx5_del_flow_rules(node->mcast_rule);
1661  }
1662  
search_val(u8 * mac,u16 vlan,bool tagged)1663  static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1664  {
1665  	u64 val;
1666  
1667  	if (!tagged)
1668  		vlan = MLX5V_UNTAGGED;
1669  
1670  	val = (u64)vlan << 48 |
1671  	      (u64)mac[0] << 40 |
1672  	      (u64)mac[1] << 32 |
1673  	      (u64)mac[2] << 24 |
1674  	      (u64)mac[3] << 16 |
1675  	      (u64)mac[4] << 8 |
1676  	      (u64)mac[5];
1677  
1678  	return val;
1679  }
1680  
mac_vlan_lookup(struct mlx5_vdpa_net * ndev,u64 value)1681  static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1682  {
1683  	struct macvlan_node *pos;
1684  	u32 idx;
1685  
1686  	idx = hash_64(value, 8); // tbd 8
1687  	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1688  		if (pos->macvlan == value)
1689  			return pos;
1690  	}
1691  	return NULL;
1692  }
1693  
mac_vlan_add(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vid,bool tagged)1694  static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1695  {
1696  	struct macvlan_node *ptr;
1697  	u64 val;
1698  	u32 idx;
1699  	int err;
1700  
1701  	val = search_val(mac, vid, tagged);
1702  	if (mac_vlan_lookup(ndev, val))
1703  		return -EEXIST;
1704  
1705  	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1706  	if (!ptr)
1707  		return -ENOMEM;
1708  
1709  	ptr->tagged = tagged;
1710  	ptr->macvlan = val;
1711  	ptr->ndev = ndev;
1712  	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1713  	if (err)
1714  		goto err_add;
1715  
1716  	idx = hash_64(val, 8);
1717  	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1718  	return 0;
1719  
1720  err_add:
1721  	kfree(ptr);
1722  	return err;
1723  }
1724  
mac_vlan_del(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vlan,bool tagged)1725  static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1726  {
1727  	struct macvlan_node *ptr;
1728  
1729  	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1730  	if (!ptr)
1731  		return;
1732  
1733  	hlist_del(&ptr->hlist);
1734  	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1735  	remove_steering_counters(ndev, ptr);
1736  	kfree(ptr);
1737  }
1738  
clear_mac_vlan_table(struct mlx5_vdpa_net * ndev)1739  static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1740  {
1741  	struct macvlan_node *pos;
1742  	struct hlist_node *n;
1743  	int i;
1744  
1745  	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1746  		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1747  			hlist_del(&pos->hlist);
1748  			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1749  			remove_steering_counters(ndev, pos);
1750  			kfree(pos);
1751  		}
1752  	}
1753  }
1754  
setup_steering(struct mlx5_vdpa_net * ndev)1755  static int setup_steering(struct mlx5_vdpa_net *ndev)
1756  {
1757  	struct mlx5_flow_table_attr ft_attr = {};
1758  	struct mlx5_flow_namespace *ns;
1759  	int err;
1760  
1761  	ft_attr.max_fte = MAX_STEERING_ENT;
1762  	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1763  
1764  	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1765  	if (!ns) {
1766  		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1767  		return -EOPNOTSUPP;
1768  	}
1769  
1770  	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1771  	if (IS_ERR(ndev->rxft)) {
1772  		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1773  		return PTR_ERR(ndev->rxft);
1774  	}
1775  	mlx5_vdpa_add_rx_flow_table(ndev);
1776  
1777  	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1778  	if (err)
1779  		goto err_add;
1780  
1781  	return 0;
1782  
1783  err_add:
1784  	mlx5_vdpa_remove_rx_flow_table(ndev);
1785  	mlx5_destroy_flow_table(ndev->rxft);
1786  	return err;
1787  }
1788  
teardown_steering(struct mlx5_vdpa_net * ndev)1789  static void teardown_steering(struct mlx5_vdpa_net *ndev)
1790  {
1791  	clear_mac_vlan_table(ndev);
1792  	mlx5_vdpa_remove_rx_flow_table(ndev);
1793  	mlx5_destroy_flow_table(ndev->rxft);
1794  }
1795  
handle_ctrl_mac(struct mlx5_vdpa_dev * mvdev,u8 cmd)1796  static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1797  {
1798  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1799  	struct mlx5_control_vq *cvq = &mvdev->cvq;
1800  	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1801  	struct mlx5_core_dev *pfmdev;
1802  	size_t read;
1803  	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1804  
1805  	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1806  	switch (cmd) {
1807  	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1808  		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1809  		if (read != ETH_ALEN)
1810  			break;
1811  
1812  		if (!memcmp(ndev->config.mac, mac, 6)) {
1813  			status = VIRTIO_NET_OK;
1814  			break;
1815  		}
1816  
1817  		if (is_zero_ether_addr(mac))
1818  			break;
1819  
1820  		if (!is_zero_ether_addr(ndev->config.mac)) {
1821  			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1822  				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1823  					       ndev->config.mac);
1824  				break;
1825  			}
1826  		}
1827  
1828  		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1829  			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1830  				       mac);
1831  			break;
1832  		}
1833  
1834  		/* backup the original mac address so that if failed to add the forward rules
1835  		 * we could restore it
1836  		 */
1837  		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1838  
1839  		memcpy(ndev->config.mac, mac, ETH_ALEN);
1840  
1841  		/* Need recreate the flow table entry, so that the packet could forward back
1842  		 */
1843  		mac_vlan_del(ndev, mac_back, 0, false);
1844  
1845  		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1846  			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1847  
1848  			/* Although it hardly run here, we still need double check */
1849  			if (is_zero_ether_addr(mac_back)) {
1850  				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1851  				break;
1852  			}
1853  
1854  			/* Try to restore original mac address to MFPS table, and try to restore
1855  			 * the forward rule entry.
1856  			 */
1857  			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1858  				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1859  					       ndev->config.mac);
1860  			}
1861  
1862  			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1863  				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1864  					       mac_back);
1865  			}
1866  
1867  			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1868  
1869  			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1870  				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1871  
1872  			break;
1873  		}
1874  
1875  		status = VIRTIO_NET_OK;
1876  		break;
1877  
1878  	default:
1879  		break;
1880  	}
1881  
1882  	return status;
1883  }
1884  
change_num_qps(struct mlx5_vdpa_dev * mvdev,int newqps)1885  static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1886  {
1887  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1888  	int cur_qps = ndev->cur_num_vqs / 2;
1889  	int err;
1890  	int i;
1891  
1892  	if (cur_qps > newqps) {
1893  		err = modify_rqt(ndev, 2 * newqps);
1894  		if (err)
1895  			return err;
1896  
1897  		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1898  			teardown_vq(ndev, &ndev->vqs[i]);
1899  
1900  		ndev->cur_num_vqs = 2 * newqps;
1901  	} else {
1902  		ndev->cur_num_vqs = 2 * newqps;
1903  		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1904  			err = setup_vq(ndev, &ndev->vqs[i]);
1905  			if (err)
1906  				goto clean_added;
1907  		}
1908  		err = modify_rqt(ndev, 2 * newqps);
1909  		if (err)
1910  			goto clean_added;
1911  	}
1912  	return 0;
1913  
1914  clean_added:
1915  	for (--i; i >= 2 * cur_qps; --i)
1916  		teardown_vq(ndev, &ndev->vqs[i]);
1917  
1918  	ndev->cur_num_vqs = 2 * cur_qps;
1919  
1920  	return err;
1921  }
1922  
handle_ctrl_mq(struct mlx5_vdpa_dev * mvdev,u8 cmd)1923  static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1924  {
1925  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1926  	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1927  	struct mlx5_control_vq *cvq = &mvdev->cvq;
1928  	struct virtio_net_ctrl_mq mq;
1929  	size_t read;
1930  	u16 newqps;
1931  
1932  	switch (cmd) {
1933  	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1934  		/* This mq feature check aligns with pre-existing userspace
1935  		 * implementation.
1936  		 *
1937  		 * Without it, an untrusted driver could fake a multiqueue config
1938  		 * request down to a non-mq device that may cause kernel to
1939  		 * panic due to uninitialized resources for extra vqs. Even with
1940  		 * a well behaving guest driver, it is not expected to allow
1941  		 * changing the number of vqs on a non-mq device.
1942  		 */
1943  		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1944  			break;
1945  
1946  		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1947  		if (read != sizeof(mq))
1948  			break;
1949  
1950  		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1951  		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1952  		    newqps > ndev->rqt_size)
1953  			break;
1954  
1955  		if (ndev->cur_num_vqs == 2 * newqps) {
1956  			status = VIRTIO_NET_OK;
1957  			break;
1958  		}
1959  
1960  		if (!change_num_qps(mvdev, newqps))
1961  			status = VIRTIO_NET_OK;
1962  
1963  		break;
1964  	default:
1965  		break;
1966  	}
1967  
1968  	return status;
1969  }
1970  
handle_ctrl_vlan(struct mlx5_vdpa_dev * mvdev,u8 cmd)1971  static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1972  {
1973  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1974  	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1975  	struct mlx5_control_vq *cvq = &mvdev->cvq;
1976  	__virtio16 vlan;
1977  	size_t read;
1978  	u16 id;
1979  
1980  	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1981  		return status;
1982  
1983  	switch (cmd) {
1984  	case VIRTIO_NET_CTRL_VLAN_ADD:
1985  		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1986  		if (read != sizeof(vlan))
1987  			break;
1988  
1989  		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1990  		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1991  			break;
1992  
1993  		status = VIRTIO_NET_OK;
1994  		break;
1995  	case VIRTIO_NET_CTRL_VLAN_DEL:
1996  		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1997  		if (read != sizeof(vlan))
1998  			break;
1999  
2000  		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2001  		mac_vlan_del(ndev, ndev->config.mac, id, true);
2002  		status = VIRTIO_NET_OK;
2003  		break;
2004  	default:
2005  		break;
2006  	}
2007  
2008  	return status;
2009  }
2010  
mlx5_cvq_kick_handler(struct work_struct * work)2011  static void mlx5_cvq_kick_handler(struct work_struct *work)
2012  {
2013  	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2014  	struct virtio_net_ctrl_hdr ctrl;
2015  	struct mlx5_vdpa_wq_ent *wqent;
2016  	struct mlx5_vdpa_dev *mvdev;
2017  	struct mlx5_control_vq *cvq;
2018  	struct mlx5_vdpa_net *ndev;
2019  	size_t read, write;
2020  	int err;
2021  
2022  	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2023  	mvdev = wqent->mvdev;
2024  	ndev = to_mlx5_vdpa_ndev(mvdev);
2025  	cvq = &mvdev->cvq;
2026  
2027  	down_write(&ndev->reslock);
2028  
2029  	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2030  		goto out;
2031  
2032  	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2033  		goto out;
2034  
2035  	if (!cvq->ready)
2036  		goto out;
2037  
2038  	while (true) {
2039  		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2040  					   GFP_ATOMIC);
2041  		if (err <= 0)
2042  			break;
2043  
2044  		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2045  		if (read != sizeof(ctrl))
2046  			break;
2047  
2048  		cvq->received_desc++;
2049  		switch (ctrl.class) {
2050  		case VIRTIO_NET_CTRL_MAC:
2051  			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2052  			break;
2053  		case VIRTIO_NET_CTRL_MQ:
2054  			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2055  			break;
2056  		case VIRTIO_NET_CTRL_VLAN:
2057  			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2058  			break;
2059  		default:
2060  			break;
2061  		}
2062  
2063  		/* Make sure data is written before advancing index */
2064  		smp_wmb();
2065  
2066  		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2067  		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2068  		vringh_kiov_cleanup(&cvq->riov);
2069  		vringh_kiov_cleanup(&cvq->wiov);
2070  
2071  		if (vringh_need_notify_iotlb(&cvq->vring))
2072  			vringh_notify(&cvq->vring);
2073  
2074  		cvq->completed_desc++;
2075  		queue_work(mvdev->wq, &wqent->work);
2076  		break;
2077  	}
2078  
2079  out:
2080  	up_write(&ndev->reslock);
2081  }
2082  
mlx5_vdpa_kick_vq(struct vdpa_device * vdev,u16 idx)2083  static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2084  {
2085  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2086  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2087  	struct mlx5_vdpa_virtqueue *mvq;
2088  
2089  	if (!is_index_valid(mvdev, idx))
2090  		return;
2091  
2092  	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2093  		if (!mvdev->wq || !mvdev->cvq.ready)
2094  			return;
2095  
2096  		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2097  		return;
2098  	}
2099  
2100  	mvq = &ndev->vqs[idx];
2101  	if (unlikely(!mvq->ready))
2102  		return;
2103  
2104  	iowrite16(idx, ndev->mvdev.res.kick_addr);
2105  }
2106  
mlx5_vdpa_set_vq_address(struct vdpa_device * vdev,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)2107  static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2108  				    u64 driver_area, u64 device_area)
2109  {
2110  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2111  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2112  	struct mlx5_vdpa_virtqueue *mvq;
2113  
2114  	if (!is_index_valid(mvdev, idx))
2115  		return -EINVAL;
2116  
2117  	if (is_ctrl_vq_idx(mvdev, idx)) {
2118  		mvdev->cvq.desc_addr = desc_area;
2119  		mvdev->cvq.device_addr = device_area;
2120  		mvdev->cvq.driver_addr = driver_area;
2121  		return 0;
2122  	}
2123  
2124  	mvq = &ndev->vqs[idx];
2125  	mvq->desc_addr = desc_area;
2126  	mvq->device_addr = device_area;
2127  	mvq->driver_addr = driver_area;
2128  	return 0;
2129  }
2130  
mlx5_vdpa_set_vq_num(struct vdpa_device * vdev,u16 idx,u32 num)2131  static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2132  {
2133  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2134  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2135  	struct mlx5_vdpa_virtqueue *mvq;
2136  
2137  	if (!is_index_valid(mvdev, idx))
2138  		return;
2139  
2140          if (is_ctrl_vq_idx(mvdev, idx)) {
2141                  struct mlx5_control_vq *cvq = &mvdev->cvq;
2142  
2143                  cvq->vring.vring.num = num;
2144                  return;
2145          }
2146  
2147  	mvq = &ndev->vqs[idx];
2148  	mvq->num_ent = num;
2149  }
2150  
mlx5_vdpa_set_vq_cb(struct vdpa_device * vdev,u16 idx,struct vdpa_callback * cb)2151  static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2152  {
2153  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2154  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2155  
2156  	ndev->event_cbs[idx] = *cb;
2157  	if (is_ctrl_vq_idx(mvdev, idx))
2158  		mvdev->cvq.event_cb = *cb;
2159  }
2160  
mlx5_cvq_notify(struct vringh * vring)2161  static void mlx5_cvq_notify(struct vringh *vring)
2162  {
2163  	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2164  
2165  	if (!cvq->event_cb.callback)
2166  		return;
2167  
2168  	cvq->event_cb.callback(cvq->event_cb.private);
2169  }
2170  
set_cvq_ready(struct mlx5_vdpa_dev * mvdev,bool ready)2171  static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2172  {
2173  	struct mlx5_control_vq *cvq = &mvdev->cvq;
2174  
2175  	cvq->ready = ready;
2176  	if (!ready)
2177  		return;
2178  
2179  	cvq->vring.notify = mlx5_cvq_notify;
2180  }
2181  
mlx5_vdpa_set_vq_ready(struct vdpa_device * vdev,u16 idx,bool ready)2182  static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2183  {
2184  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2185  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2186  	struct mlx5_vdpa_virtqueue *mvq;
2187  	int err;
2188  
2189  	if (!mvdev->actual_features)
2190  		return;
2191  
2192  	if (!is_index_valid(mvdev, idx))
2193  		return;
2194  
2195  	if (is_ctrl_vq_idx(mvdev, idx)) {
2196  		set_cvq_ready(mvdev, ready);
2197  		return;
2198  	}
2199  
2200  	mvq = &ndev->vqs[idx];
2201  	if (!ready) {
2202  		suspend_vq(ndev, mvq);
2203  	} else {
2204  		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2205  		if (err) {
2206  			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2207  			ready = false;
2208  		}
2209  	}
2210  
2211  
2212  	mvq->ready = ready;
2213  }
2214  
mlx5_vdpa_get_vq_ready(struct vdpa_device * vdev,u16 idx)2215  static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2216  {
2217  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2218  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2219  
2220  	if (!is_index_valid(mvdev, idx))
2221  		return false;
2222  
2223  	if (is_ctrl_vq_idx(mvdev, idx))
2224  		return mvdev->cvq.ready;
2225  
2226  	return ndev->vqs[idx].ready;
2227  }
2228  
mlx5_vdpa_set_vq_state(struct vdpa_device * vdev,u16 idx,const struct vdpa_vq_state * state)2229  static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2230  				  const struct vdpa_vq_state *state)
2231  {
2232  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2233  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2234  	struct mlx5_vdpa_virtqueue *mvq;
2235  
2236  	if (!is_index_valid(mvdev, idx))
2237  		return -EINVAL;
2238  
2239  	if (is_ctrl_vq_idx(mvdev, idx)) {
2240  		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2241  		return 0;
2242  	}
2243  
2244  	mvq = &ndev->vqs[idx];
2245  	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2246  		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2247  		return -EINVAL;
2248  	}
2249  
2250  	mvq->used_idx = state->split.avail_index;
2251  	mvq->avail_idx = state->split.avail_index;
2252  	return 0;
2253  }
2254  
mlx5_vdpa_get_vq_state(struct vdpa_device * vdev,u16 idx,struct vdpa_vq_state * state)2255  static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2256  {
2257  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2258  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2259  	struct mlx5_vdpa_virtqueue *mvq;
2260  	struct mlx5_virtq_attr attr;
2261  	int err;
2262  
2263  	if (!is_index_valid(mvdev, idx))
2264  		return -EINVAL;
2265  
2266  	if (is_ctrl_vq_idx(mvdev, idx)) {
2267  		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2268  		return 0;
2269  	}
2270  
2271  	mvq = &ndev->vqs[idx];
2272  	/* If the virtq object was destroyed, use the value saved at
2273  	 * the last minute of suspend_vq. This caters for userspace
2274  	 * that cares about emulating the index after vq is stopped.
2275  	 */
2276  	if (!mvq->initialized) {
2277  		/* Firmware returns a wrong value for the available index.
2278  		 * Since both values should be identical, we take the value of
2279  		 * used_idx which is reported correctly.
2280  		 */
2281  		state->split.avail_index = mvq->used_idx;
2282  		return 0;
2283  	}
2284  
2285  	err = query_virtqueue(ndev, mvq, &attr);
2286  	if (err) {
2287  		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2288  		return err;
2289  	}
2290  	state->split.avail_index = attr.used_index;
2291  	return 0;
2292  }
2293  
mlx5_vdpa_get_vq_align(struct vdpa_device * vdev)2294  static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2295  {
2296  	return PAGE_SIZE;
2297  }
2298  
mlx5_vdpa_get_vq_group(struct vdpa_device * vdev,u16 idx)2299  static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2300  {
2301  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2302  
2303  	if (is_ctrl_vq_idx(mvdev, idx))
2304  		return MLX5_VDPA_CVQ_GROUP;
2305  
2306  	return MLX5_VDPA_DATAVQ_GROUP;
2307  }
2308  
mlx_to_vritio_features(u16 dev_features)2309  static u64 mlx_to_vritio_features(u16 dev_features)
2310  {
2311  	u64 result = 0;
2312  
2313  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2314  		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2315  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2316  		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2317  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2318  		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2319  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2320  		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2321  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2322  		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2323  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2324  		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2325  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2326  		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2327  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2328  		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2329  	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2330  		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2331  
2332  	return result;
2333  }
2334  
get_supported_features(struct mlx5_core_dev * mdev)2335  static u64 get_supported_features(struct mlx5_core_dev *mdev)
2336  {
2337  	u64 mlx_vdpa_features = 0;
2338  	u16 dev_features;
2339  
2340  	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2341  	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2342  	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2343  		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2344  	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2345  	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2346  	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2347  	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2348  	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2349  	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2350  	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2351  	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2352  
2353  	return mlx_vdpa_features;
2354  }
2355  
mlx5_vdpa_get_device_features(struct vdpa_device * vdev)2356  static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2357  {
2358  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2359  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2360  
2361  	print_features(mvdev, ndev->mvdev.mlx_features, false);
2362  	return ndev->mvdev.mlx_features;
2363  }
2364  
verify_driver_features(struct mlx5_vdpa_dev * mvdev,u64 features)2365  static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2366  {
2367  	/* Minimum features to expect */
2368  	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2369  		return -EOPNOTSUPP;
2370  
2371  	/* Double check features combination sent down by the driver.
2372  	 * Fail invalid features due to absence of the depended feature.
2373  	 *
2374  	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2375  	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2376  	 * By failing the invalid features sent down by untrusted drivers,
2377  	 * we're assured the assumption made upon is_index_valid() and
2378  	 * is_ctrl_vq_idx() will not be compromised.
2379  	 */
2380  	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2381              BIT_ULL(VIRTIO_NET_F_MQ))
2382  		return -EINVAL;
2383  
2384  	return 0;
2385  }
2386  
setup_virtqueues(struct mlx5_vdpa_dev * mvdev)2387  static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2388  {
2389  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2390  	int err;
2391  	int i;
2392  
2393  	for (i = 0; i < mvdev->max_vqs; i++) {
2394  		err = setup_vq(ndev, &ndev->vqs[i]);
2395  		if (err)
2396  			goto err_vq;
2397  	}
2398  
2399  	return 0;
2400  
2401  err_vq:
2402  	for (--i; i >= 0; i--)
2403  		teardown_vq(ndev, &ndev->vqs[i]);
2404  
2405  	return err;
2406  }
2407  
teardown_virtqueues(struct mlx5_vdpa_net * ndev)2408  static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2409  {
2410  	struct mlx5_vdpa_virtqueue *mvq;
2411  	int i;
2412  
2413  	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2414  		mvq = &ndev->vqs[i];
2415  		if (!mvq->initialized)
2416  			continue;
2417  
2418  		teardown_vq(ndev, mvq);
2419  	}
2420  }
2421  
update_cvq_info(struct mlx5_vdpa_dev * mvdev)2422  static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2423  {
2424  	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2425  		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2426  			/* MQ supported. CVQ index is right above the last data virtqueue's */
2427  			mvdev->max_idx = mvdev->max_vqs;
2428  		} else {
2429  			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2430  			 * CVQ gets index 2
2431  			 */
2432  			mvdev->max_idx = 2;
2433  		}
2434  	} else {
2435  		/* Two data virtqueues only: one for rx and one for tx */
2436  		mvdev->max_idx = 1;
2437  	}
2438  }
2439  
query_vport_state(struct mlx5_core_dev * mdev,u8 opmod,u16 vport)2440  static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2441  {
2442  	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2443  	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2444  	int err;
2445  
2446  	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2447  	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2448  	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2449  	if (vport)
2450  		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2451  
2452  	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2453  	if (err)
2454  		return 0;
2455  
2456  	return MLX5_GET(query_vport_state_out, out, state);
2457  }
2458  
get_link_state(struct mlx5_vdpa_dev * mvdev)2459  static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2460  {
2461  	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2462  	    VPORT_STATE_UP)
2463  		return true;
2464  
2465  	return false;
2466  }
2467  
update_carrier(struct work_struct * work)2468  static void update_carrier(struct work_struct *work)
2469  {
2470  	struct mlx5_vdpa_wq_ent *wqent;
2471  	struct mlx5_vdpa_dev *mvdev;
2472  	struct mlx5_vdpa_net *ndev;
2473  
2474  	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2475  	mvdev = wqent->mvdev;
2476  	ndev = to_mlx5_vdpa_ndev(mvdev);
2477  	if (get_link_state(mvdev))
2478  		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2479  	else
2480  		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2481  
2482  	if (ndev->config_cb.callback)
2483  		ndev->config_cb.callback(ndev->config_cb.private);
2484  
2485  	kfree(wqent);
2486  }
2487  
queue_link_work(struct mlx5_vdpa_net * ndev)2488  static int queue_link_work(struct mlx5_vdpa_net *ndev)
2489  {
2490  	struct mlx5_vdpa_wq_ent *wqent;
2491  
2492  	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2493  	if (!wqent)
2494  		return -ENOMEM;
2495  
2496  	wqent->mvdev = &ndev->mvdev;
2497  	INIT_WORK(&wqent->work, update_carrier);
2498  	queue_work(ndev->mvdev.wq, &wqent->work);
2499  	return 0;
2500  }
2501  
event_handler(struct notifier_block * nb,unsigned long event,void * param)2502  static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2503  {
2504  	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2505  	struct mlx5_eqe *eqe = param;
2506  	int ret = NOTIFY_DONE;
2507  
2508  	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2509  		switch (eqe->sub_type) {
2510  		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2511  		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2512  			if (queue_link_work(ndev))
2513  				return NOTIFY_DONE;
2514  
2515  			ret = NOTIFY_OK;
2516  			break;
2517  		default:
2518  			return NOTIFY_DONE;
2519  		}
2520  		return ret;
2521  	}
2522  	return ret;
2523  }
2524  
register_link_notifier(struct mlx5_vdpa_net * ndev)2525  static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2526  {
2527  	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2528  		return;
2529  
2530  	ndev->nb.notifier_call = event_handler;
2531  	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2532  	ndev->nb_registered = true;
2533  	queue_link_work(ndev);
2534  }
2535  
unregister_link_notifier(struct mlx5_vdpa_net * ndev)2536  static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2537  {
2538  	if (!ndev->nb_registered)
2539  		return;
2540  
2541  	ndev->nb_registered = false;
2542  	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2543  	if (ndev->mvdev.wq)
2544  		flush_workqueue(ndev->mvdev.wq);
2545  }
2546  
mlx5_vdpa_set_driver_features(struct vdpa_device * vdev,u64 features)2547  static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2548  {
2549  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2550  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2551  	int err;
2552  
2553  	print_features(mvdev, features, true);
2554  
2555  	err = verify_driver_features(mvdev, features);
2556  	if (err)
2557  		return err;
2558  
2559  	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2560  	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2561  		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2562  	else
2563  		ndev->rqt_size = 1;
2564  
2565  	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2566  	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2567  	 *
2568  	 * Multiqueue is disabled by default.
2569  	 * The driver enables multiqueue by sending a command using class
2570  	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2571  	 * operation, as follows: ...
2572  	 */
2573  	ndev->cur_num_vqs = 2;
2574  
2575  	update_cvq_info(mvdev);
2576  	return err;
2577  }
2578  
mlx5_vdpa_set_config_cb(struct vdpa_device * vdev,struct vdpa_callback * cb)2579  static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2580  {
2581  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2582  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2583  
2584  	ndev->config_cb = *cb;
2585  }
2586  
2587  #define MLX5_VDPA_MAX_VQ_ENTRIES 256
mlx5_vdpa_get_vq_num_max(struct vdpa_device * vdev)2588  static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2589  {
2590  	return MLX5_VDPA_MAX_VQ_ENTRIES;
2591  }
2592  
mlx5_vdpa_get_device_id(struct vdpa_device * vdev)2593  static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2594  {
2595  	return VIRTIO_ID_NET;
2596  }
2597  
mlx5_vdpa_get_vendor_id(struct vdpa_device * vdev)2598  static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2599  {
2600  	return PCI_VENDOR_ID_MELLANOX;
2601  }
2602  
mlx5_vdpa_get_status(struct vdpa_device * vdev)2603  static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2604  {
2605  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2606  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2607  
2608  	print_status(mvdev, ndev->mvdev.status, false);
2609  	return ndev->mvdev.status;
2610  }
2611  
save_channel_info(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)2612  static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2613  {
2614  	struct mlx5_vq_restore_info *ri = &mvq->ri;
2615  	struct mlx5_virtq_attr attr = {};
2616  	int err;
2617  
2618  	if (mvq->initialized) {
2619  		err = query_virtqueue(ndev, mvq, &attr);
2620  		if (err)
2621  			return err;
2622  	}
2623  
2624  	ri->avail_index = attr.available_index;
2625  	ri->used_index = attr.used_index;
2626  	ri->ready = mvq->ready;
2627  	ri->num_ent = mvq->num_ent;
2628  	ri->desc_addr = mvq->desc_addr;
2629  	ri->device_addr = mvq->device_addr;
2630  	ri->driver_addr = mvq->driver_addr;
2631  	ri->map = mvq->map;
2632  	ri->restore = true;
2633  	return 0;
2634  }
2635  
save_channels_info(struct mlx5_vdpa_net * ndev)2636  static int save_channels_info(struct mlx5_vdpa_net *ndev)
2637  {
2638  	int i;
2639  
2640  	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2641  		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2642  		save_channel_info(ndev, &ndev->vqs[i]);
2643  	}
2644  	return 0;
2645  }
2646  
mlx5_clear_vqs(struct mlx5_vdpa_net * ndev)2647  static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2648  {
2649  	int i;
2650  
2651  	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2652  		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2653  }
2654  
restore_channels_info(struct mlx5_vdpa_net * ndev)2655  static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2656  {
2657  	struct mlx5_vdpa_virtqueue *mvq;
2658  	struct mlx5_vq_restore_info *ri;
2659  	int i;
2660  
2661  	mlx5_clear_vqs(ndev);
2662  	init_mvqs(ndev);
2663  	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2664  		mvq = &ndev->vqs[i];
2665  		ri = &mvq->ri;
2666  		if (!ri->restore)
2667  			continue;
2668  
2669  		mvq->avail_idx = ri->avail_index;
2670  		mvq->used_idx = ri->used_index;
2671  		mvq->ready = ri->ready;
2672  		mvq->num_ent = ri->num_ent;
2673  		mvq->desc_addr = ri->desc_addr;
2674  		mvq->device_addr = ri->device_addr;
2675  		mvq->driver_addr = ri->driver_addr;
2676  		mvq->map = ri->map;
2677  	}
2678  }
2679  
mlx5_vdpa_change_map(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,unsigned int asid)2680  static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2681  				struct vhost_iotlb *iotlb, unsigned int asid)
2682  {
2683  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2684  	int err;
2685  
2686  	suspend_vqs(ndev);
2687  	err = save_channels_info(ndev);
2688  	if (err)
2689  		goto err_mr;
2690  
2691  	teardown_driver(ndev);
2692  	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2693  	err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2694  	if (err)
2695  		goto err_mr;
2696  
2697  	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2698  		goto err_mr;
2699  
2700  	restore_channels_info(ndev);
2701  	err = setup_driver(mvdev);
2702  	if (err)
2703  		goto err_setup;
2704  
2705  	return 0;
2706  
2707  err_setup:
2708  	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2709  err_mr:
2710  	return err;
2711  }
2712  
2713  /* reslock must be held for this function */
setup_driver(struct mlx5_vdpa_dev * mvdev)2714  static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2715  {
2716  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2717  	int err;
2718  
2719  	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2720  
2721  	if (ndev->setup) {
2722  		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2723  		err = 0;
2724  		goto out;
2725  	}
2726  	mlx5_vdpa_add_debugfs(ndev);
2727  
2728  	err = read_umem_params(ndev);
2729  	if (err)
2730  		goto err_setup;
2731  
2732  	err = setup_virtqueues(mvdev);
2733  	if (err) {
2734  		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2735  		goto err_setup;
2736  	}
2737  
2738  	err = create_rqt(ndev);
2739  	if (err) {
2740  		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2741  		goto err_rqt;
2742  	}
2743  
2744  	err = create_tir(ndev);
2745  	if (err) {
2746  		mlx5_vdpa_warn(mvdev, "create_tir\n");
2747  		goto err_tir;
2748  	}
2749  
2750  	err = setup_steering(ndev);
2751  	if (err) {
2752  		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2753  		goto err_fwd;
2754  	}
2755  	ndev->setup = true;
2756  
2757  	return 0;
2758  
2759  err_fwd:
2760  	destroy_tir(ndev);
2761  err_tir:
2762  	destroy_rqt(ndev);
2763  err_rqt:
2764  	teardown_virtqueues(ndev);
2765  err_setup:
2766  	mlx5_vdpa_remove_debugfs(ndev);
2767  out:
2768  	return err;
2769  }
2770  
2771  /* reslock must be held for this function */
teardown_driver(struct mlx5_vdpa_net * ndev)2772  static void teardown_driver(struct mlx5_vdpa_net *ndev)
2773  {
2774  
2775  	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2776  
2777  	if (!ndev->setup)
2778  		return;
2779  
2780  	mlx5_vdpa_remove_debugfs(ndev);
2781  	teardown_steering(ndev);
2782  	destroy_tir(ndev);
2783  	destroy_rqt(ndev);
2784  	teardown_virtqueues(ndev);
2785  	ndev->setup = false;
2786  }
2787  
clear_vqs_ready(struct mlx5_vdpa_net * ndev)2788  static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2789  {
2790  	int i;
2791  
2792  	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2793  		ndev->vqs[i].ready = false;
2794  
2795  	ndev->mvdev.cvq.ready = false;
2796  }
2797  
setup_cvq_vring(struct mlx5_vdpa_dev * mvdev)2798  static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2799  {
2800  	struct mlx5_control_vq *cvq = &mvdev->cvq;
2801  	int err = 0;
2802  
2803  	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2804  		u16 idx = cvq->vring.last_avail_idx;
2805  
2806  		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2807  					cvq->vring.vring.num, false,
2808  					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2809  					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2810  					(struct vring_used *)(uintptr_t)cvq->device_addr);
2811  
2812  		if (!err)
2813  			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2814  	}
2815  	return err;
2816  }
2817  
mlx5_vdpa_set_status(struct vdpa_device * vdev,u8 status)2818  static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2819  {
2820  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2821  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2822  	int err;
2823  
2824  	print_status(mvdev, status, true);
2825  
2826  	down_write(&ndev->reslock);
2827  
2828  	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2829  		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2830  			err = setup_cvq_vring(mvdev);
2831  			if (err) {
2832  				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2833  				goto err_setup;
2834  			}
2835  			register_link_notifier(ndev);
2836  			err = setup_driver(mvdev);
2837  			if (err) {
2838  				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2839  				goto err_driver;
2840  			}
2841  		} else {
2842  			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2843  			goto err_clear;
2844  		}
2845  	}
2846  
2847  	ndev->mvdev.status = status;
2848  	up_write(&ndev->reslock);
2849  	return;
2850  
2851  err_driver:
2852  	unregister_link_notifier(ndev);
2853  err_setup:
2854  	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2855  	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2856  err_clear:
2857  	up_write(&ndev->reslock);
2858  }
2859  
init_group_to_asid_map(struct mlx5_vdpa_dev * mvdev)2860  static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2861  {
2862  	int i;
2863  
2864  	/* default mapping all groups are mapped to asid 0 */
2865  	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2866  		mvdev->group2asid[i] = 0;
2867  }
2868  
mlx5_vdpa_reset(struct vdpa_device * vdev)2869  static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2870  {
2871  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2872  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2873  
2874  	print_status(mvdev, 0, true);
2875  	mlx5_vdpa_info(mvdev, "performing device reset\n");
2876  
2877  	down_write(&ndev->reslock);
2878  	unregister_link_notifier(ndev);
2879  	teardown_driver(ndev);
2880  	clear_vqs_ready(ndev);
2881  	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2882  	ndev->mvdev.status = 0;
2883  	ndev->mvdev.suspended = false;
2884  	ndev->cur_num_vqs = 0;
2885  	ndev->mvdev.cvq.received_desc = 0;
2886  	ndev->mvdev.cvq.completed_desc = 0;
2887  	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2888  	ndev->mvdev.actual_features = 0;
2889  	init_group_to_asid_map(mvdev);
2890  	++mvdev->generation;
2891  
2892  	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2893  		if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2894  			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2895  	}
2896  	up_write(&ndev->reslock);
2897  
2898  	return 0;
2899  }
2900  
mlx5_vdpa_get_config_size(struct vdpa_device * vdev)2901  static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2902  {
2903  	return sizeof(struct virtio_net_config);
2904  }
2905  
mlx5_vdpa_get_config(struct vdpa_device * vdev,unsigned int offset,void * buf,unsigned int len)2906  static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2907  				 unsigned int len)
2908  {
2909  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2910  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2911  
2912  	if (offset + len <= sizeof(struct virtio_net_config))
2913  		memcpy(buf, (u8 *)&ndev->config + offset, len);
2914  }
2915  
mlx5_vdpa_set_config(struct vdpa_device * vdev,unsigned int offset,const void * buf,unsigned int len)2916  static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2917  				 unsigned int len)
2918  {
2919  	/* not supported */
2920  }
2921  
mlx5_vdpa_get_generation(struct vdpa_device * vdev)2922  static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2923  {
2924  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2925  
2926  	return mvdev->generation;
2927  }
2928  
set_map_data(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,unsigned int asid)2929  static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2930  			unsigned int asid)
2931  {
2932  	bool change_map;
2933  	int err;
2934  
2935  	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2936  	if (err) {
2937  		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2938  		return err;
2939  	}
2940  
2941  	if (change_map)
2942  		err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2943  
2944  	return err;
2945  }
2946  
mlx5_vdpa_set_map(struct vdpa_device * vdev,unsigned int asid,struct vhost_iotlb * iotlb)2947  static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2948  			     struct vhost_iotlb *iotlb)
2949  {
2950  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2951  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2952  	int err = -EINVAL;
2953  
2954  	down_write(&ndev->reslock);
2955  	err = set_map_data(mvdev, iotlb, asid);
2956  	up_write(&ndev->reslock);
2957  	return err;
2958  }
2959  
mlx5_get_vq_dma_dev(struct vdpa_device * vdev,u16 idx)2960  static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2961  {
2962  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2963  
2964  	if (is_ctrl_vq_idx(mvdev, idx))
2965  		return &vdev->dev;
2966  
2967  	return mvdev->vdev.dma_dev;
2968  }
2969  
free_irqs(struct mlx5_vdpa_net * ndev)2970  static void free_irqs(struct mlx5_vdpa_net *ndev)
2971  {
2972  	struct mlx5_vdpa_irq_pool_entry *ent;
2973  	int i;
2974  
2975  	if (!msix_mode_supported(&ndev->mvdev))
2976  		return;
2977  
2978  	if (!ndev->irqp.entries)
2979  		return;
2980  
2981  	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
2982  		ent = ndev->irqp.entries + i;
2983  		if (ent->map.virq)
2984  			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
2985  	}
2986  	kfree(ndev->irqp.entries);
2987  }
2988  
mlx5_vdpa_free(struct vdpa_device * vdev)2989  static void mlx5_vdpa_free(struct vdpa_device *vdev)
2990  {
2991  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2992  	struct mlx5_core_dev *pfmdev;
2993  	struct mlx5_vdpa_net *ndev;
2994  
2995  	ndev = to_mlx5_vdpa_ndev(mvdev);
2996  
2997  	free_resources(ndev);
2998  	mlx5_vdpa_destroy_mr(mvdev);
2999  	if (!is_zero_ether_addr(ndev->config.mac)) {
3000  		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3001  		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3002  	}
3003  	mlx5_vdpa_free_resources(&ndev->mvdev);
3004  	free_irqs(ndev);
3005  	kfree(ndev->event_cbs);
3006  	kfree(ndev->vqs);
3007  }
3008  
mlx5_get_vq_notification(struct vdpa_device * vdev,u16 idx)3009  static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3010  {
3011  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3012  	struct vdpa_notification_area ret = {};
3013  	struct mlx5_vdpa_net *ndev;
3014  	phys_addr_t addr;
3015  
3016  	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3017  		return ret;
3018  
3019  	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3020  	 * notification to avoid the risk of mapping pages that contain BAR of more
3021  	 * than one SF
3022  	 */
3023  	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3024  		return ret;
3025  
3026  	ndev = to_mlx5_vdpa_ndev(mvdev);
3027  	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3028  	ret.addr = addr;
3029  	ret.size = PAGE_SIZE;
3030  	return ret;
3031  }
3032  
mlx5_get_vq_irq(struct vdpa_device * vdev,u16 idx)3033  static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3034  {
3035  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3036  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3037  	struct mlx5_vdpa_virtqueue *mvq;
3038  
3039  	if (!is_index_valid(mvdev, idx))
3040  		return -EINVAL;
3041  
3042  	if (is_ctrl_vq_idx(mvdev, idx))
3043  		return -EOPNOTSUPP;
3044  
3045  	mvq = &ndev->vqs[idx];
3046  	if (!mvq->map.virq)
3047  		return -EOPNOTSUPP;
3048  
3049  	return mvq->map.virq;
3050  }
3051  
mlx5_vdpa_get_driver_features(struct vdpa_device * vdev)3052  static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3053  {
3054  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3055  
3056  	return mvdev->actual_features;
3057  }
3058  
counter_set_query(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,u64 * received_desc,u64 * completed_desc)3059  static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3060  			     u64 *received_desc, u64 *completed_desc)
3061  {
3062  	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3063  	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3064  	void *cmd_hdr;
3065  	void *ctx;
3066  	int err;
3067  
3068  	if (!counters_supported(&ndev->mvdev))
3069  		return -EOPNOTSUPP;
3070  
3071  	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3072  		return -EAGAIN;
3073  
3074  	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3075  
3076  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3077  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3078  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3079  	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3080  
3081  	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3082  	if (err)
3083  		return err;
3084  
3085  	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3086  	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3087  	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3088  	return 0;
3089  }
3090  
mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device * vdev,u16 idx,struct sk_buff * msg,struct netlink_ext_ack * extack)3091  static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3092  					 struct sk_buff *msg,
3093  					 struct netlink_ext_ack *extack)
3094  {
3095  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3096  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3097  	struct mlx5_vdpa_virtqueue *mvq;
3098  	struct mlx5_control_vq *cvq;
3099  	u64 received_desc;
3100  	u64 completed_desc;
3101  	int err = 0;
3102  
3103  	down_read(&ndev->reslock);
3104  	if (!is_index_valid(mvdev, idx)) {
3105  		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3106  		err = -EINVAL;
3107  		goto out_err;
3108  	}
3109  
3110  	if (idx == ctrl_vq_idx(mvdev)) {
3111  		cvq = &mvdev->cvq;
3112  		received_desc = cvq->received_desc;
3113  		completed_desc = cvq->completed_desc;
3114  		goto out;
3115  	}
3116  
3117  	mvq = &ndev->vqs[idx];
3118  	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3119  	if (err) {
3120  		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3121  		goto out_err;
3122  	}
3123  
3124  out:
3125  	err = -EMSGSIZE;
3126  	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3127  		goto out_err;
3128  
3129  	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3130  			      VDPA_ATTR_PAD))
3131  		goto out_err;
3132  
3133  	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3134  		goto out_err;
3135  
3136  	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3137  			      VDPA_ATTR_PAD))
3138  		goto out_err;
3139  
3140  	err = 0;
3141  out_err:
3142  	up_read(&ndev->reslock);
3143  	return err;
3144  }
3145  
mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev * mvdev)3146  static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3147  {
3148  	struct mlx5_control_vq *cvq;
3149  
3150  	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3151  		return;
3152  
3153  	cvq = &mvdev->cvq;
3154  	cvq->ready = false;
3155  }
3156  
mlx5_vdpa_suspend(struct vdpa_device * vdev)3157  static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3158  {
3159  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3160  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3161  	struct mlx5_vdpa_virtqueue *mvq;
3162  	int i;
3163  
3164  	mlx5_vdpa_info(mvdev, "suspending device\n");
3165  
3166  	down_write(&ndev->reslock);
3167  	unregister_link_notifier(ndev);
3168  	for (i = 0; i < ndev->cur_num_vqs; i++) {
3169  		mvq = &ndev->vqs[i];
3170  		suspend_vq(ndev, mvq);
3171  	}
3172  	mlx5_vdpa_cvq_suspend(mvdev);
3173  	mvdev->suspended = true;
3174  	up_write(&ndev->reslock);
3175  	return 0;
3176  }
3177  
mlx5_set_group_asid(struct vdpa_device * vdev,u32 group,unsigned int asid)3178  static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3179  			       unsigned int asid)
3180  {
3181  	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3182  
3183  	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3184  		return -EINVAL;
3185  
3186  	mvdev->group2asid[group] = asid;
3187  	return 0;
3188  }
3189  
3190  static const struct vdpa_config_ops mlx5_vdpa_ops = {
3191  	.set_vq_address = mlx5_vdpa_set_vq_address,
3192  	.set_vq_num = mlx5_vdpa_set_vq_num,
3193  	.kick_vq = mlx5_vdpa_kick_vq,
3194  	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3195  	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3196  	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3197  	.set_vq_state = mlx5_vdpa_set_vq_state,
3198  	.get_vq_state = mlx5_vdpa_get_vq_state,
3199  	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3200  	.get_vq_notification = mlx5_get_vq_notification,
3201  	.get_vq_irq = mlx5_get_vq_irq,
3202  	.get_vq_align = mlx5_vdpa_get_vq_align,
3203  	.get_vq_group = mlx5_vdpa_get_vq_group,
3204  	.get_device_features = mlx5_vdpa_get_device_features,
3205  	.set_driver_features = mlx5_vdpa_set_driver_features,
3206  	.get_driver_features = mlx5_vdpa_get_driver_features,
3207  	.set_config_cb = mlx5_vdpa_set_config_cb,
3208  	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3209  	.get_device_id = mlx5_vdpa_get_device_id,
3210  	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3211  	.get_status = mlx5_vdpa_get_status,
3212  	.set_status = mlx5_vdpa_set_status,
3213  	.reset = mlx5_vdpa_reset,
3214  	.get_config_size = mlx5_vdpa_get_config_size,
3215  	.get_config = mlx5_vdpa_get_config,
3216  	.set_config = mlx5_vdpa_set_config,
3217  	.get_generation = mlx5_vdpa_get_generation,
3218  	.set_map = mlx5_vdpa_set_map,
3219  	.set_group_asid = mlx5_set_group_asid,
3220  	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3221  	.free = mlx5_vdpa_free,
3222  	.suspend = mlx5_vdpa_suspend,
3223  };
3224  
query_mtu(struct mlx5_core_dev * mdev,u16 * mtu)3225  static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3226  {
3227  	u16 hw_mtu;
3228  	int err;
3229  
3230  	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3231  	if (err)
3232  		return err;
3233  
3234  	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3235  	return 0;
3236  }
3237  
alloc_resources(struct mlx5_vdpa_net * ndev)3238  static int alloc_resources(struct mlx5_vdpa_net *ndev)
3239  {
3240  	struct mlx5_vdpa_net_resources *res = &ndev->res;
3241  	int err;
3242  
3243  	if (res->valid) {
3244  		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3245  		return -EEXIST;
3246  	}
3247  
3248  	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3249  	if (err)
3250  		return err;
3251  
3252  	err = create_tis(ndev);
3253  	if (err)
3254  		goto err_tis;
3255  
3256  	res->valid = true;
3257  
3258  	return 0;
3259  
3260  err_tis:
3261  	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3262  	return err;
3263  }
3264  
free_resources(struct mlx5_vdpa_net * ndev)3265  static void free_resources(struct mlx5_vdpa_net *ndev)
3266  {
3267  	struct mlx5_vdpa_net_resources *res = &ndev->res;
3268  
3269  	if (!res->valid)
3270  		return;
3271  
3272  	destroy_tis(ndev);
3273  	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3274  	res->valid = false;
3275  }
3276  
init_mvqs(struct mlx5_vdpa_net * ndev)3277  static void init_mvqs(struct mlx5_vdpa_net *ndev)
3278  {
3279  	struct mlx5_vdpa_virtqueue *mvq;
3280  	int i;
3281  
3282  	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3283  		mvq = &ndev->vqs[i];
3284  		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3285  		mvq->index = i;
3286  		mvq->ndev = ndev;
3287  		mvq->fwqp.fw = true;
3288  		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3289  	}
3290  	for (; i < ndev->mvdev.max_vqs; i++) {
3291  		mvq = &ndev->vqs[i];
3292  		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3293  		mvq->index = i;
3294  		mvq->ndev = ndev;
3295  	}
3296  }
3297  
3298  struct mlx5_vdpa_mgmtdev {
3299  	struct vdpa_mgmt_dev mgtdev;
3300  	struct mlx5_adev *madev;
3301  	struct mlx5_vdpa_net *ndev;
3302  };
3303  
config_func_mtu(struct mlx5_core_dev * mdev,u16 mtu)3304  static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3305  {
3306  	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3307  	void *in;
3308  	int err;
3309  
3310  	in = kvzalloc(inlen, GFP_KERNEL);
3311  	if (!in)
3312  		return -ENOMEM;
3313  
3314  	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3315  	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3316  		 mtu + MLX5V_ETH_HARD_MTU);
3317  	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3318  		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3319  
3320  	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3321  
3322  	kvfree(in);
3323  	return err;
3324  }
3325  
allocate_irqs(struct mlx5_vdpa_net * ndev)3326  static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3327  {
3328  	struct mlx5_vdpa_irq_pool_entry *ent;
3329  	int i;
3330  
3331  	if (!msix_mode_supported(&ndev->mvdev))
3332  		return;
3333  
3334  	if (!ndev->mvdev.mdev->pdev)
3335  		return;
3336  
3337  	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3338  	if (!ndev->irqp.entries)
3339  		return;
3340  
3341  
3342  	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3343  		ent = ndev->irqp.entries + i;
3344  		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3345  			 dev_name(&ndev->mvdev.vdev.dev), i);
3346  		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3347  		if (!ent->map.virq)
3348  			return;
3349  
3350  		ndev->irqp.num_ent++;
3351  	}
3352  }
3353  
mlx5_vdpa_dev_add(struct vdpa_mgmt_dev * v_mdev,const char * name,const struct vdpa_dev_set_config * add_config)3354  static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3355  			     const struct vdpa_dev_set_config *add_config)
3356  {
3357  	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3358  	struct virtio_net_config *config;
3359  	struct mlx5_core_dev *pfmdev;
3360  	struct mlx5_vdpa_dev *mvdev;
3361  	struct mlx5_vdpa_net *ndev;
3362  	struct mlx5_core_dev *mdev;
3363  	u64 device_features;
3364  	u32 max_vqs;
3365  	u16 mtu;
3366  	int err;
3367  
3368  	if (mgtdev->ndev)
3369  		return -ENOSPC;
3370  
3371  	mdev = mgtdev->madev->mdev;
3372  	device_features = mgtdev->mgtdev.supported_features;
3373  	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3374  		if (add_config->device_features & ~device_features) {
3375  			dev_warn(mdev->device,
3376  				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3377  				 add_config->device_features, device_features);
3378  			return -EINVAL;
3379  		}
3380  		device_features &= add_config->device_features;
3381  	} else {
3382  		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3383  	}
3384  	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3385  	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3386  		dev_warn(mdev->device,
3387  			 "Must provision minimum features 0x%llx for this device",
3388  			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3389  		return -EOPNOTSUPP;
3390  	}
3391  
3392  	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3393  	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3394  		dev_warn(mdev->device, "missing support for split virtqueues\n");
3395  		return -EOPNOTSUPP;
3396  	}
3397  
3398  	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3399  			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3400  	if (max_vqs < 2) {
3401  		dev_warn(mdev->device,
3402  			 "%d virtqueues are supported. At least 2 are required\n",
3403  			 max_vqs);
3404  		return -EAGAIN;
3405  	}
3406  
3407  	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3408  		if (add_config->net.max_vq_pairs > max_vqs / 2)
3409  			return -EINVAL;
3410  		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3411  	} else {
3412  		max_vqs = 2;
3413  	}
3414  
3415  	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3416  				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3417  	if (IS_ERR(ndev))
3418  		return PTR_ERR(ndev);
3419  
3420  	ndev->mvdev.max_vqs = max_vqs;
3421  	mvdev = &ndev->mvdev;
3422  	mvdev->mdev = mdev;
3423  
3424  	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3425  	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3426  	if (!ndev->vqs || !ndev->event_cbs) {
3427  		err = -ENOMEM;
3428  		goto err_alloc;
3429  	}
3430  
3431  	init_mvqs(ndev);
3432  	allocate_irqs(ndev);
3433  	init_rwsem(&ndev->reslock);
3434  	config = &ndev->config;
3435  
3436  	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3437  		err = config_func_mtu(mdev, add_config->net.mtu);
3438  		if (err)
3439  			goto err_alloc;
3440  	}
3441  
3442  	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3443  		err = query_mtu(mdev, &mtu);
3444  		if (err)
3445  			goto err_alloc;
3446  
3447  		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3448  	}
3449  
3450  	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3451  		if (get_link_state(mvdev))
3452  			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3453  		else
3454  			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3455  	}
3456  
3457  	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3458  		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3459  	/* No bother setting mac address in config if not going to provision _F_MAC */
3460  	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3461  		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3462  		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3463  		if (err)
3464  			goto err_alloc;
3465  	}
3466  
3467  	if (!is_zero_ether_addr(config->mac)) {
3468  		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3469  		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3470  		if (err)
3471  			goto err_alloc;
3472  	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3473  		/*
3474  		 * We used to clear _F_MAC feature bit if seeing
3475  		 * zero mac address when device features are not
3476  		 * specifically provisioned. Keep the behaviour
3477  		 * so old scripts do not break.
3478  		 */
3479  		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3480  	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3481  		/* Don't provision zero mac address for _F_MAC */
3482  		mlx5_vdpa_warn(&ndev->mvdev,
3483  			       "No mac address provisioned?\n");
3484  		err = -EINVAL;
3485  		goto err_alloc;
3486  	}
3487  
3488  	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3489  		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3490  
3491  	ndev->mvdev.mlx_features = device_features;
3492  	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3493  	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3494  	if (err)
3495  		goto err_mpfs;
3496  
3497  	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3498  		err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3499  		if (err)
3500  			goto err_res;
3501  	}
3502  
3503  	err = alloc_resources(ndev);
3504  	if (err)
3505  		goto err_mr;
3506  
3507  	ndev->cvq_ent.mvdev = mvdev;
3508  	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3509  	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3510  	if (!mvdev->wq) {
3511  		err = -ENOMEM;
3512  		goto err_res2;
3513  	}
3514  
3515  	mvdev->vdev.mdev = &mgtdev->mgtdev;
3516  	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3517  	if (err)
3518  		goto err_reg;
3519  
3520  	mgtdev->ndev = ndev;
3521  	return 0;
3522  
3523  err_reg:
3524  	destroy_workqueue(mvdev->wq);
3525  err_res2:
3526  	free_resources(ndev);
3527  err_mr:
3528  	mlx5_vdpa_destroy_mr(mvdev);
3529  err_res:
3530  	mlx5_vdpa_free_resources(&ndev->mvdev);
3531  err_mpfs:
3532  	if (!is_zero_ether_addr(config->mac))
3533  		mlx5_mpfs_del_mac(pfmdev, config->mac);
3534  err_alloc:
3535  	put_device(&mvdev->vdev.dev);
3536  	return err;
3537  }
3538  
mlx5_vdpa_dev_del(struct vdpa_mgmt_dev * v_mdev,struct vdpa_device * dev)3539  static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3540  {
3541  	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3542  	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3543  	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3544  	struct workqueue_struct *wq;
3545  
3546  	unregister_link_notifier(ndev);
3547  	_vdpa_unregister_device(dev);
3548  	wq = mvdev->wq;
3549  	mvdev->wq = NULL;
3550  	destroy_workqueue(wq);
3551  	mgtdev->ndev = NULL;
3552  }
3553  
3554  static const struct vdpa_mgmtdev_ops mdev_ops = {
3555  	.dev_add = mlx5_vdpa_dev_add,
3556  	.dev_del = mlx5_vdpa_dev_del,
3557  };
3558  
3559  static struct virtio_device_id id_table[] = {
3560  	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3561  	{ 0 },
3562  };
3563  
mlx5v_probe(struct auxiliary_device * adev,const struct auxiliary_device_id * id)3564  static int mlx5v_probe(struct auxiliary_device *adev,
3565  		       const struct auxiliary_device_id *id)
3566  
3567  {
3568  	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3569  	struct mlx5_core_dev *mdev = madev->mdev;
3570  	struct mlx5_vdpa_mgmtdev *mgtdev;
3571  	int err;
3572  
3573  	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3574  	if (!mgtdev)
3575  		return -ENOMEM;
3576  
3577  	mgtdev->mgtdev.ops = &mdev_ops;
3578  	mgtdev->mgtdev.device = mdev->device;
3579  	mgtdev->mgtdev.id_table = id_table;
3580  	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3581  					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3582  					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3583  					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3584  	mgtdev->mgtdev.max_supported_vqs =
3585  		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3586  	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3587  	mgtdev->madev = madev;
3588  
3589  	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3590  	if (err)
3591  		goto reg_err;
3592  
3593  	auxiliary_set_drvdata(adev, mgtdev);
3594  
3595  	return 0;
3596  
3597  reg_err:
3598  	kfree(mgtdev);
3599  	return err;
3600  }
3601  
mlx5v_remove(struct auxiliary_device * adev)3602  static void mlx5v_remove(struct auxiliary_device *adev)
3603  {
3604  	struct mlx5_vdpa_mgmtdev *mgtdev;
3605  
3606  	mgtdev = auxiliary_get_drvdata(adev);
3607  	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3608  	kfree(mgtdev);
3609  }
3610  
3611  static const struct auxiliary_device_id mlx5v_id_table[] = {
3612  	{ .name = MLX5_ADEV_NAME ".vnet", },
3613  	{},
3614  };
3615  
3616  MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3617  
3618  static struct auxiliary_driver mlx5v_driver = {
3619  	.name = "vnet",
3620  	.probe = mlx5v_probe,
3621  	.remove = mlx5v_remove,
3622  	.id_table = mlx5v_id_table,
3623  };
3624  
3625  module_auxiliary_driver(mlx5v_driver);
3626