xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision d54151aa)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 #include "mlx5_vnet.h"
22 
23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24 MODULE_DESCRIPTION("Mellanox VDPA driver");
25 MODULE_LICENSE("Dual BSD/GPL");
26 
27 #define VALID_FEATURES_MASK                                                                        \
28 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
29 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
30 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
33 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
34 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
36 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
37 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
38 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
39 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
40 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
41 
42 #define VALID_STATUS_MASK                                                                          \
43 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
44 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
45 
46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
47 
48 #define MLX5V_UNTAGGED 0x1000
49 
50 struct mlx5_vdpa_cq_buf {
51 	struct mlx5_frag_buf_ctrl fbc;
52 	struct mlx5_frag_buf frag_buf;
53 	int cqe_size;
54 	int nent;
55 };
56 
57 struct mlx5_vdpa_cq {
58 	struct mlx5_core_cq mcq;
59 	struct mlx5_vdpa_cq_buf buf;
60 	struct mlx5_db db;
61 	int cqe;
62 };
63 
64 struct mlx5_vdpa_umem {
65 	struct mlx5_frag_buf_ctrl fbc;
66 	struct mlx5_frag_buf frag_buf;
67 	int size;
68 	u32 id;
69 };
70 
71 struct mlx5_vdpa_qp {
72 	struct mlx5_core_qp mqp;
73 	struct mlx5_frag_buf frag_buf;
74 	struct mlx5_db db;
75 	u16 head;
76 	bool fw;
77 };
78 
79 struct mlx5_vq_restore_info {
80 	u32 num_ent;
81 	u64 desc_addr;
82 	u64 device_addr;
83 	u64 driver_addr;
84 	u16 avail_index;
85 	u16 used_index;
86 	bool ready;
87 	bool restore;
88 };
89 
90 struct mlx5_vdpa_virtqueue {
91 	bool ready;
92 	u64 desc_addr;
93 	u64 device_addr;
94 	u64 driver_addr;
95 	u32 num_ent;
96 
97 	/* Resources for implementing the notification channel from the device
98 	 * to the driver. fwqp is the firmware end of an RC connection; the
99 	 * other end is vqqp used by the driver. cq is where completions are
100 	 * reported.
101 	 */
102 	struct mlx5_vdpa_cq cq;
103 	struct mlx5_vdpa_qp fwqp;
104 	struct mlx5_vdpa_qp vqqp;
105 
106 	/* umem resources are required for the virtqueue operation. They're use
107 	 * is internal and they must be provided by the driver.
108 	 */
109 	struct mlx5_vdpa_umem umem1;
110 	struct mlx5_vdpa_umem umem2;
111 	struct mlx5_vdpa_umem umem3;
112 
113 	u32 counter_set_id;
114 	bool initialized;
115 	int index;
116 	u32 virtq_id;
117 	struct mlx5_vdpa_net *ndev;
118 	u16 avail_idx;
119 	u16 used_idx;
120 	int fw_state;
121 
122 	/* keep last in the struct */
123 	struct mlx5_vq_restore_info ri;
124 };
125 
126 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
127 {
128 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
129 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
130 			return idx < 2;
131 		else
132 			return idx < 3;
133 	}
134 
135 	return idx <= mvdev->max_idx;
136 }
137 
138 static void free_resources(struct mlx5_vdpa_net *ndev);
139 static void init_mvqs(struct mlx5_vdpa_net *ndev);
140 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
141 static void teardown_driver(struct mlx5_vdpa_net *ndev);
142 
143 static bool mlx5_vdpa_debug;
144 
145 #define MLX5_CVQ_MAX_ENT 16
146 
147 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
148 	do {                                                                                       \
149 		if (features & BIT_ULL(_feature))                                                  \
150 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
151 	} while (0)
152 
153 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
154 	do {                                                                                       \
155 		if (status & (_status))                                                            \
156 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
157 	} while (0)
158 
159 /* TODO: cross-endian support */
160 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
161 {
162 	return virtio_legacy_is_little_endian() ||
163 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
164 }
165 
166 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
167 {
168 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
169 }
170 
171 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
172 {
173 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
174 }
175 
176 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
177 {
178 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
179 		return 2;
180 
181 	return mvdev->max_vqs;
182 }
183 
184 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
185 {
186 	return idx == ctrl_vq_idx(mvdev);
187 }
188 
189 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
190 {
191 	if (status & ~VALID_STATUS_MASK)
192 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
193 			       status & ~VALID_STATUS_MASK);
194 
195 	if (!mlx5_vdpa_debug)
196 		return;
197 
198 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
199 	if (set && !status) {
200 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
201 		return;
202 	}
203 
204 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
205 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
206 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
207 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
208 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
209 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
210 }
211 
212 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
213 {
214 	if (features & ~VALID_FEATURES_MASK)
215 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
216 			       features & ~VALID_FEATURES_MASK);
217 
218 	if (!mlx5_vdpa_debug)
219 		return;
220 
221 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
222 	if (!features)
223 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
224 
225 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
226 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
227 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
259 }
260 
261 static int create_tis(struct mlx5_vdpa_net *ndev)
262 {
263 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
264 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
265 	void *tisc;
266 	int err;
267 
268 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
269 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
270 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
271 	if (err)
272 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
273 
274 	return err;
275 }
276 
277 static void destroy_tis(struct mlx5_vdpa_net *ndev)
278 {
279 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
280 }
281 
282 #define MLX5_VDPA_CQE_SIZE 64
283 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
284 
285 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
286 {
287 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
288 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
289 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
290 	int err;
291 
292 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
293 				       ndev->mvdev.mdev->priv.numa_node);
294 	if (err)
295 		return err;
296 
297 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
298 
299 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
300 	buf->nent = nent;
301 
302 	return 0;
303 }
304 
305 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
306 {
307 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
308 
309 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
310 					ndev->mvdev.mdev->priv.numa_node);
311 }
312 
313 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
314 {
315 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
316 }
317 
318 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
319 {
320 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
321 }
322 
323 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
324 {
325 	struct mlx5_cqe64 *cqe64;
326 	void *cqe;
327 	int i;
328 
329 	for (i = 0; i < buf->nent; i++) {
330 		cqe = get_cqe(vcq, i);
331 		cqe64 = cqe;
332 		cqe64->op_own = MLX5_CQE_INVALID << 4;
333 	}
334 }
335 
336 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
337 {
338 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
339 
340 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
341 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
342 		return cqe64;
343 
344 	return NULL;
345 }
346 
347 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
348 {
349 	vqp->head += n;
350 	vqp->db.db[0] = cpu_to_be32(vqp->head);
351 }
352 
353 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
354 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
355 {
356 	struct mlx5_vdpa_qp *vqp;
357 	__be64 *pas;
358 	void *qpc;
359 
360 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
361 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
362 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
363 	if (vqp->fw) {
364 		/* Firmware QP is allocated by the driver for the firmware's
365 		 * use so we can skip part of the params as they will be chosen by firmware
366 		 */
367 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
368 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
369 		MLX5_SET(qpc, qpc, no_sq, 1);
370 		return;
371 	}
372 
373 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
374 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
375 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
376 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
377 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
378 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
379 	MLX5_SET(qpc, qpc, no_sq, 1);
380 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
381 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
382 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
383 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
384 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
385 }
386 
387 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
388 {
389 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
390 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
391 					ndev->mvdev.mdev->priv.numa_node);
392 }
393 
394 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
395 {
396 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
397 }
398 
399 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
400 		     struct mlx5_vdpa_qp *vqp)
401 {
402 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
403 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
404 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
405 	void *qpc;
406 	void *in;
407 	int err;
408 
409 	if (!vqp->fw) {
410 		vqp = &mvq->vqqp;
411 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
412 		if (err)
413 			return err;
414 
415 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
416 		if (err)
417 			goto err_db;
418 		inlen += vqp->frag_buf.npages * sizeof(__be64);
419 	}
420 
421 	in = kzalloc(inlen, GFP_KERNEL);
422 	if (!in) {
423 		err = -ENOMEM;
424 		goto err_kzalloc;
425 	}
426 
427 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
428 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
429 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
430 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
431 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
432 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
433 	if (!vqp->fw)
434 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
435 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
436 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
437 	kfree(in);
438 	if (err)
439 		goto err_kzalloc;
440 
441 	vqp->mqp.uid = ndev->mvdev.res.uid;
442 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
443 
444 	if (!vqp->fw)
445 		rx_post(vqp, mvq->num_ent);
446 
447 	return 0;
448 
449 err_kzalloc:
450 	if (!vqp->fw)
451 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
452 err_db:
453 	if (!vqp->fw)
454 		rq_buf_free(ndev, vqp);
455 
456 	return err;
457 }
458 
459 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
460 {
461 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
462 
463 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
464 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
465 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
466 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
467 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
468 	if (!vqp->fw) {
469 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
470 		rq_buf_free(ndev, vqp);
471 	}
472 }
473 
474 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
475 {
476 	return get_sw_cqe(cq, cq->mcq.cons_index);
477 }
478 
479 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
480 {
481 	struct mlx5_cqe64 *cqe64;
482 
483 	cqe64 = next_cqe_sw(vcq);
484 	if (!cqe64)
485 		return -EAGAIN;
486 
487 	vcq->mcq.cons_index++;
488 	return 0;
489 }
490 
491 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
492 {
493 	struct mlx5_vdpa_net *ndev = mvq->ndev;
494 	struct vdpa_callback *event_cb;
495 
496 	event_cb = &ndev->event_cbs[mvq->index];
497 	mlx5_cq_set_ci(&mvq->cq.mcq);
498 
499 	/* make sure CQ cosumer update is visible to the hardware before updating
500 	 * RX doorbell record.
501 	 */
502 	dma_wmb();
503 	rx_post(&mvq->vqqp, num);
504 	if (event_cb->callback)
505 		event_cb->callback(event_cb->private);
506 }
507 
508 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
509 {
510 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
511 	struct mlx5_vdpa_net *ndev = mvq->ndev;
512 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
513 	int num = 0;
514 
515 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
516 		num++;
517 		if (num > mvq->num_ent / 2) {
518 			/* If completions keep coming while we poll, we want to
519 			 * let the hardware know that we consumed them by
520 			 * updating the doorbell record.  We also let vdpa core
521 			 * know about this so it passes it on the virtio driver
522 			 * on the guest.
523 			 */
524 			mlx5_vdpa_handle_completions(mvq, num);
525 			num = 0;
526 		}
527 	}
528 
529 	if (num)
530 		mlx5_vdpa_handle_completions(mvq, num);
531 
532 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
533 }
534 
535 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
536 {
537 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
538 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
539 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
540 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
541 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
542 	__be64 *pas;
543 	int inlen;
544 	void *cqc;
545 	void *in;
546 	int err;
547 	int eqn;
548 
549 	err = mlx5_db_alloc(mdev, &vcq->db);
550 	if (err)
551 		return err;
552 
553 	vcq->mcq.set_ci_db = vcq->db.db;
554 	vcq->mcq.arm_db = vcq->db.db + 1;
555 	vcq->mcq.cqe_sz = 64;
556 
557 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
558 	if (err)
559 		goto err_db;
560 
561 	cq_frag_buf_init(vcq, &vcq->buf);
562 
563 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
564 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
565 	in = kzalloc(inlen, GFP_KERNEL);
566 	if (!in) {
567 		err = -ENOMEM;
568 		goto err_vzalloc;
569 	}
570 
571 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
572 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
573 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
574 
575 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
576 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
577 
578 	/* Use vector 0 by default. Consider adding code to choose least used
579 	 * vector.
580 	 */
581 	err = mlx5_vector2eqn(mdev, 0, &eqn);
582 	if (err)
583 		goto err_vec;
584 
585 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
586 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
587 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
588 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
589 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
590 
591 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
592 	if (err)
593 		goto err_vec;
594 
595 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
596 	vcq->cqe = num_ent;
597 	vcq->mcq.set_ci_db = vcq->db.db;
598 	vcq->mcq.arm_db = vcq->db.db + 1;
599 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
600 	kfree(in);
601 	return 0;
602 
603 err_vec:
604 	kfree(in);
605 err_vzalloc:
606 	cq_frag_buf_free(ndev, &vcq->buf);
607 err_db:
608 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
609 	return err;
610 }
611 
612 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
613 {
614 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
615 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
616 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
617 
618 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
619 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
620 		return;
621 	}
622 	cq_frag_buf_free(ndev, &vcq->buf);
623 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
624 }
625 
626 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
627 			  struct mlx5_vdpa_umem **umemp)
628 {
629 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
630 	int p_a;
631 	int p_b;
632 
633 	switch (num) {
634 	case 1:
635 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
636 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
637 		*umemp = &mvq->umem1;
638 		break;
639 	case 2:
640 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
641 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
642 		*umemp = &mvq->umem2;
643 		break;
644 	case 3:
645 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
646 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
647 		*umemp = &mvq->umem3;
648 		break;
649 	}
650 	(*umemp)->size = p_a * mvq->num_ent + p_b;
651 }
652 
653 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
654 {
655 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
656 }
657 
658 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
659 {
660 	int inlen;
661 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
662 	void *um;
663 	void *in;
664 	int err;
665 	__be64 *pas;
666 	struct mlx5_vdpa_umem *umem;
667 
668 	set_umem_size(ndev, mvq, num, &umem);
669 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
670 	if (err)
671 		return err;
672 
673 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
674 
675 	in = kzalloc(inlen, GFP_KERNEL);
676 	if (!in) {
677 		err = -ENOMEM;
678 		goto err_in;
679 	}
680 
681 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
682 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
683 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
684 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
685 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
686 
687 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
688 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
689 
690 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
691 	if (err) {
692 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
693 		goto err_cmd;
694 	}
695 
696 	kfree(in);
697 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
698 
699 	return 0;
700 
701 err_cmd:
702 	kfree(in);
703 err_in:
704 	umem_frag_buf_free(ndev, umem);
705 	return err;
706 }
707 
708 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
709 {
710 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
711 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
712 	struct mlx5_vdpa_umem *umem;
713 
714 	switch (num) {
715 	case 1:
716 		umem = &mvq->umem1;
717 		break;
718 	case 2:
719 		umem = &mvq->umem2;
720 		break;
721 	case 3:
722 		umem = &mvq->umem3;
723 		break;
724 	}
725 
726 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
727 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
728 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
729 		return;
730 
731 	umem_frag_buf_free(ndev, umem);
732 }
733 
734 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
735 {
736 	int num;
737 	int err;
738 
739 	for (num = 1; num <= 3; num++) {
740 		err = create_umem(ndev, mvq, num);
741 		if (err)
742 			goto err_umem;
743 	}
744 	return 0;
745 
746 err_umem:
747 	for (num--; num > 0; num--)
748 		umem_destroy(ndev, mvq, num);
749 
750 	return err;
751 }
752 
753 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
754 {
755 	int num;
756 
757 	for (num = 3; num > 0; num--)
758 		umem_destroy(ndev, mvq, num);
759 }
760 
761 static int get_queue_type(struct mlx5_vdpa_net *ndev)
762 {
763 	u32 type_mask;
764 
765 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
766 
767 	/* prefer split queue */
768 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
769 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
770 
771 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
772 
773 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
774 }
775 
776 static bool vq_is_tx(u16 idx)
777 {
778 	return idx % 2;
779 }
780 
781 static u16 get_features_12_3(u64 features)
782 {
783 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
784 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
785 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
786 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
787 }
788 
789 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
790 {
791 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
792 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
793 }
794 
795 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
796 {
797 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
798 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
799 	void *obj_context;
800 	void *cmd_hdr;
801 	void *vq_ctx;
802 	void *in;
803 	int err;
804 
805 	err = umems_create(ndev, mvq);
806 	if (err)
807 		return err;
808 
809 	in = kzalloc(inlen, GFP_KERNEL);
810 	if (!in) {
811 		err = -ENOMEM;
812 		goto err_alloc;
813 	}
814 
815 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
816 
817 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
818 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
819 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
820 
821 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
822 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
823 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
824 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
825 		 get_features_12_3(ndev->mvdev.actual_features));
826 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
827 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
828 
829 	if (vq_is_tx(mvq->index))
830 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
831 
832 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
833 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
834 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
835 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
836 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
837 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
838 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
839 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
840 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
841 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
842 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
843 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
844 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
845 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
846 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
847 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
848 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
849 	if (counters_supported(&ndev->mvdev))
850 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
851 
852 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
853 	if (err)
854 		goto err_cmd;
855 
856 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
857 	kfree(in);
858 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
859 
860 	return 0;
861 
862 err_cmd:
863 	kfree(in);
864 err_alloc:
865 	umems_destroy(ndev, mvq);
866 	return err;
867 }
868 
869 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
870 {
871 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
872 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
873 
874 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
875 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
876 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
877 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
878 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
879 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
880 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
881 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
882 		return;
883 	}
884 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
885 	umems_destroy(ndev, mvq);
886 }
887 
888 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
889 {
890 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
891 }
892 
893 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
894 {
895 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
896 }
897 
898 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
899 			int *outlen, u32 qpn, u32 rqpn)
900 {
901 	void *qpc;
902 	void *pp;
903 
904 	switch (cmd) {
905 	case MLX5_CMD_OP_2RST_QP:
906 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
907 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
908 		*in = kzalloc(*inlen, GFP_KERNEL);
909 		*out = kzalloc(*outlen, GFP_KERNEL);
910 		if (!*in || !*out)
911 			goto outerr;
912 
913 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
914 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
915 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
916 		break;
917 	case MLX5_CMD_OP_RST2INIT_QP:
918 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
919 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
920 		*in = kzalloc(*inlen, GFP_KERNEL);
921 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
922 		if (!*in || !*out)
923 			goto outerr;
924 
925 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
926 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
927 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
928 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
929 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
930 		MLX5_SET(qpc, qpc, rwe, 1);
931 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
932 		MLX5_SET(ads, pp, vhca_port_num, 1);
933 		break;
934 	case MLX5_CMD_OP_INIT2RTR_QP:
935 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
936 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
937 		*in = kzalloc(*inlen, GFP_KERNEL);
938 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
939 		if (!*in || !*out)
940 			goto outerr;
941 
942 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
943 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
944 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
945 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
946 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
947 		MLX5_SET(qpc, qpc, log_msg_max, 30);
948 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
949 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
950 		MLX5_SET(ads, pp, fl, 1);
951 		break;
952 	case MLX5_CMD_OP_RTR2RTS_QP:
953 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
954 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
955 		*in = kzalloc(*inlen, GFP_KERNEL);
956 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
957 		if (!*in || !*out)
958 			goto outerr;
959 
960 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
961 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
962 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
963 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
964 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
965 		MLX5_SET(ads, pp, ack_timeout, 14);
966 		MLX5_SET(qpc, qpc, retry_count, 7);
967 		MLX5_SET(qpc, qpc, rnr_retry, 7);
968 		break;
969 	default:
970 		goto outerr_nullify;
971 	}
972 
973 	return;
974 
975 outerr:
976 	kfree(*in);
977 	kfree(*out);
978 outerr_nullify:
979 	*in = NULL;
980 	*out = NULL;
981 }
982 
983 static void free_inout(void *in, void *out)
984 {
985 	kfree(in);
986 	kfree(out);
987 }
988 
989 /* Two QPs are used by each virtqueue. One is used by the driver and one by
990  * firmware. The fw argument indicates whether the subjected QP is the one used
991  * by firmware.
992  */
993 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
994 {
995 	int outlen;
996 	int inlen;
997 	void *out;
998 	void *in;
999 	int err;
1000 
1001 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1002 	if (!in || !out)
1003 		return -ENOMEM;
1004 
1005 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1006 	free_inout(in, out);
1007 	return err;
1008 }
1009 
1010 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1011 {
1012 	int err;
1013 
1014 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1015 	if (err)
1016 		return err;
1017 
1018 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1019 	if (err)
1020 		return err;
1021 
1022 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1023 	if (err)
1024 		return err;
1025 
1026 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1027 	if (err)
1028 		return err;
1029 
1030 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1031 	if (err)
1032 		return err;
1033 
1034 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1035 	if (err)
1036 		return err;
1037 
1038 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1039 }
1040 
1041 struct mlx5_virtq_attr {
1042 	u8 state;
1043 	u16 available_index;
1044 	u16 used_index;
1045 };
1046 
1047 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1048 			   struct mlx5_virtq_attr *attr)
1049 {
1050 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1051 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1052 	void *out;
1053 	void *obj_context;
1054 	void *cmd_hdr;
1055 	int err;
1056 
1057 	out = kzalloc(outlen, GFP_KERNEL);
1058 	if (!out)
1059 		return -ENOMEM;
1060 
1061 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1062 
1063 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1064 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1065 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1066 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1067 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1068 	if (err)
1069 		goto err_cmd;
1070 
1071 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1072 	memset(attr, 0, sizeof(*attr));
1073 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1074 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1075 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1076 	kfree(out);
1077 	return 0;
1078 
1079 err_cmd:
1080 	kfree(out);
1081 	return err;
1082 }
1083 
1084 static bool is_valid_state_change(int oldstate, int newstate)
1085 {
1086 	switch (oldstate) {
1087 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1088 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1089 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1090 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1091 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1092 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1093 	default:
1094 		return false;
1095 	}
1096 }
1097 
1098 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1099 {
1100 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1101 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1102 	void *obj_context;
1103 	void *cmd_hdr;
1104 	void *in;
1105 	int err;
1106 
1107 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1108 		return 0;
1109 
1110 	if (!is_valid_state_change(mvq->fw_state, state))
1111 		return -EINVAL;
1112 
1113 	in = kzalloc(inlen, GFP_KERNEL);
1114 	if (!in)
1115 		return -ENOMEM;
1116 
1117 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1118 
1119 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1120 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1121 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1122 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1123 
1124 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1125 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1126 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1127 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1128 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1129 	kfree(in);
1130 	if (!err)
1131 		mvq->fw_state = state;
1132 
1133 	return err;
1134 }
1135 
1136 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1137 {
1138 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1139 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1140 	void *cmd_hdr;
1141 	int err;
1142 
1143 	if (!counters_supported(&ndev->mvdev))
1144 		return 0;
1145 
1146 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1147 
1148 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1149 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1150 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1151 
1152 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1153 	if (err)
1154 		return err;
1155 
1156 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1157 
1158 	return 0;
1159 }
1160 
1161 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1162 {
1163 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1164 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1165 
1166 	if (!counters_supported(&ndev->mvdev))
1167 		return;
1168 
1169 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1170 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1171 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1172 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1173 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1174 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1175 }
1176 
1177 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1178 {
1179 	u16 idx = mvq->index;
1180 	int err;
1181 
1182 	if (!mvq->num_ent)
1183 		return 0;
1184 
1185 	if (mvq->initialized)
1186 		return 0;
1187 
1188 	err = cq_create(ndev, idx, mvq->num_ent);
1189 	if (err)
1190 		return err;
1191 
1192 	err = qp_create(ndev, mvq, &mvq->fwqp);
1193 	if (err)
1194 		goto err_fwqp;
1195 
1196 	err = qp_create(ndev, mvq, &mvq->vqqp);
1197 	if (err)
1198 		goto err_vqqp;
1199 
1200 	err = connect_qps(ndev, mvq);
1201 	if (err)
1202 		goto err_connect;
1203 
1204 	err = counter_set_alloc(ndev, mvq);
1205 	if (err)
1206 		goto err_counter;
1207 
1208 	err = create_virtqueue(ndev, mvq);
1209 	if (err)
1210 		goto err_connect;
1211 
1212 	if (mvq->ready) {
1213 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1214 		if (err) {
1215 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1216 				       idx, err);
1217 			goto err_connect;
1218 		}
1219 	}
1220 
1221 	mvq->initialized = true;
1222 	return 0;
1223 
1224 err_connect:
1225 	counter_set_dealloc(ndev, mvq);
1226 err_counter:
1227 	qp_destroy(ndev, &mvq->vqqp);
1228 err_vqqp:
1229 	qp_destroy(ndev, &mvq->fwqp);
1230 err_fwqp:
1231 	cq_destroy(ndev, idx);
1232 	return err;
1233 }
1234 
1235 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1236 {
1237 	struct mlx5_virtq_attr attr;
1238 
1239 	if (!mvq->initialized)
1240 		return;
1241 
1242 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1243 		return;
1244 
1245 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1246 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1247 
1248 	if (query_virtqueue(ndev, mvq, &attr)) {
1249 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1250 		return;
1251 	}
1252 	mvq->avail_idx = attr.available_index;
1253 	mvq->used_idx = attr.used_index;
1254 }
1255 
1256 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1257 {
1258 	int i;
1259 
1260 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1261 		suspend_vq(ndev, &ndev->vqs[i]);
1262 }
1263 
1264 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1265 {
1266 	if (!mvq->initialized)
1267 		return;
1268 
1269 	suspend_vq(ndev, mvq);
1270 	destroy_virtqueue(ndev, mvq);
1271 	counter_set_dealloc(ndev, mvq);
1272 	qp_destroy(ndev, &mvq->vqqp);
1273 	qp_destroy(ndev, &mvq->fwqp);
1274 	cq_destroy(ndev, mvq->index);
1275 	mvq->initialized = false;
1276 }
1277 
1278 static int create_rqt(struct mlx5_vdpa_net *ndev)
1279 {
1280 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1281 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1282 	__be32 *list;
1283 	void *rqtc;
1284 	int inlen;
1285 	void *in;
1286 	int i, j;
1287 	int err;
1288 
1289 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1290 	in = kzalloc(inlen, GFP_KERNEL);
1291 	if (!in)
1292 		return -ENOMEM;
1293 
1294 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1295 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1296 
1297 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1298 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1299 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1300 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1301 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1302 
1303 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1304 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1305 	kfree(in);
1306 	if (err)
1307 		return err;
1308 
1309 	return 0;
1310 }
1311 
1312 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1313 
1314 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1315 {
1316 	int act_sz = roundup_pow_of_two(num / 2);
1317 	__be32 *list;
1318 	void *rqtc;
1319 	int inlen;
1320 	void *in;
1321 	int i, j;
1322 	int err;
1323 
1324 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1325 	in = kzalloc(inlen, GFP_KERNEL);
1326 	if (!in)
1327 		return -ENOMEM;
1328 
1329 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1330 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1331 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1332 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1333 
1334 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1335 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1336 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1337 
1338 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1339 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1340 	kfree(in);
1341 	if (err)
1342 		return err;
1343 
1344 	return 0;
1345 }
1346 
1347 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1348 {
1349 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1350 }
1351 
1352 static int create_tir(struct mlx5_vdpa_net *ndev)
1353 {
1354 #define HASH_IP_L4PORTS                                                                            \
1355 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1356 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1357 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1358 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1359 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1360 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1361 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1362 	void *rss_key;
1363 	void *outer;
1364 	void *tirc;
1365 	void *in;
1366 	int err;
1367 
1368 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1369 	if (!in)
1370 		return -ENOMEM;
1371 
1372 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1373 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1374 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1375 
1376 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1377 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1378 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1379 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1380 
1381 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1382 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1383 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1384 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1385 
1386 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1387 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1388 
1389 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1390 	kfree(in);
1391 	if (err)
1392 		return err;
1393 
1394 	mlx5_vdpa_add_tirn(ndev);
1395 	return err;
1396 }
1397 
1398 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1399 {
1400 	mlx5_vdpa_remove_tirn(ndev);
1401 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1402 }
1403 
1404 #define MAX_STEERING_ENT 0x8000
1405 #define MAX_STEERING_GROUPS 2
1406 
1407 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1408        #define NUM_DESTS 2
1409 #else
1410        #define NUM_DESTS 1
1411 #endif
1412 
1413 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1414 				 struct macvlan_node *node,
1415 				 struct mlx5_flow_act *flow_act,
1416 				 struct mlx5_flow_destination *dests)
1417 {
1418 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1419 	int err;
1420 
1421 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1422 	if (IS_ERR(node->ucast_counter.counter))
1423 		return PTR_ERR(node->ucast_counter.counter);
1424 
1425 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1426 	if (IS_ERR(node->mcast_counter.counter)) {
1427 		err = PTR_ERR(node->mcast_counter.counter);
1428 		goto err_mcast_counter;
1429 	}
1430 
1431 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1432 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1433 	return 0;
1434 
1435 err_mcast_counter:
1436 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1437 	return err;
1438 #else
1439 	return 0;
1440 #endif
1441 }
1442 
1443 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1444 				     struct macvlan_node *node)
1445 {
1446 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1447 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1448 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1449 #endif
1450 }
1451 
1452 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1453 					struct macvlan_node *node)
1454 {
1455 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1456 	struct mlx5_flow_act flow_act = {};
1457 	struct mlx5_flow_spec *spec;
1458 	void *headers_c;
1459 	void *headers_v;
1460 	u8 *dmac_c;
1461 	u8 *dmac_v;
1462 	int err;
1463 	u16 vid;
1464 
1465 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1466 	if (!spec)
1467 		return -ENOMEM;
1468 
1469 	vid = key2vid(node->macvlan);
1470 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1471 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1472 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1473 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1474 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1475 	eth_broadcast_addr(dmac_c);
1476 	ether_addr_copy(dmac_v, mac);
1477 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1478 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1479 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1480 	}
1481 	if (node->tagged) {
1482 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1483 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1484 	}
1485 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1486 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1487 	dests[0].tir_num = ndev->res.tirn;
1488 	err = add_steering_counters(ndev, node, &flow_act, dests);
1489 	if (err)
1490 		goto out_free;
1491 
1492 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1493 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1494 #endif
1495 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1496 	if (IS_ERR(node->ucast_rule)) {
1497 		err = PTR_ERR(node->ucast_rule);
1498 		goto err_ucast;
1499 	}
1500 
1501 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1502 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1503 #endif
1504 
1505 	memset(dmac_c, 0, ETH_ALEN);
1506 	memset(dmac_v, 0, ETH_ALEN);
1507 	dmac_c[0] = 1;
1508 	dmac_v[0] = 1;
1509 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1510 	if (IS_ERR(node->mcast_rule)) {
1511 		err = PTR_ERR(node->mcast_rule);
1512 		goto err_mcast;
1513 	}
1514 	kvfree(spec);
1515 	mlx5_vdpa_add_rx_counters(ndev, node);
1516 	return 0;
1517 
1518 err_mcast:
1519 	mlx5_del_flow_rules(node->ucast_rule);
1520 err_ucast:
1521 	remove_steering_counters(ndev, node);
1522 out_free:
1523 	kvfree(spec);
1524 	return err;
1525 }
1526 
1527 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1528 					 struct macvlan_node *node)
1529 {
1530 	mlx5_vdpa_remove_rx_counters(ndev, node);
1531 	mlx5_del_flow_rules(node->ucast_rule);
1532 	mlx5_del_flow_rules(node->mcast_rule);
1533 }
1534 
1535 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1536 {
1537 	u64 val;
1538 
1539 	if (!tagged)
1540 		vlan = MLX5V_UNTAGGED;
1541 
1542 	val = (u64)vlan << 48 |
1543 	      (u64)mac[0] << 40 |
1544 	      (u64)mac[1] << 32 |
1545 	      (u64)mac[2] << 24 |
1546 	      (u64)mac[3] << 16 |
1547 	      (u64)mac[4] << 8 |
1548 	      (u64)mac[5];
1549 
1550 	return val;
1551 }
1552 
1553 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1554 {
1555 	struct macvlan_node *pos;
1556 	u32 idx;
1557 
1558 	idx = hash_64(value, 8); // tbd 8
1559 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1560 		if (pos->macvlan == value)
1561 			return pos;
1562 	}
1563 	return NULL;
1564 }
1565 
1566 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1567 {
1568 	struct macvlan_node *ptr;
1569 	u64 val;
1570 	u32 idx;
1571 	int err;
1572 
1573 	val = search_val(mac, vid, tagged);
1574 	if (mac_vlan_lookup(ndev, val))
1575 		return -EEXIST;
1576 
1577 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1578 	if (!ptr)
1579 		return -ENOMEM;
1580 
1581 	ptr->tagged = tagged;
1582 	ptr->macvlan = val;
1583 	ptr->ndev = ndev;
1584 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1585 	if (err)
1586 		goto err_add;
1587 
1588 	idx = hash_64(val, 8);
1589 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1590 	return 0;
1591 
1592 err_add:
1593 	kfree(ptr);
1594 	return err;
1595 }
1596 
1597 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1598 {
1599 	struct macvlan_node *ptr;
1600 
1601 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1602 	if (!ptr)
1603 		return;
1604 
1605 	hlist_del(&ptr->hlist);
1606 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1607 	remove_steering_counters(ndev, ptr);
1608 	kfree(ptr);
1609 }
1610 
1611 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1612 {
1613 	struct macvlan_node *pos;
1614 	struct hlist_node *n;
1615 	int i;
1616 
1617 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1618 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1619 			hlist_del(&pos->hlist);
1620 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1621 			remove_steering_counters(ndev, pos);
1622 			kfree(pos);
1623 		}
1624 	}
1625 }
1626 
1627 static int setup_steering(struct mlx5_vdpa_net *ndev)
1628 {
1629 	struct mlx5_flow_table_attr ft_attr = {};
1630 	struct mlx5_flow_namespace *ns;
1631 	int err;
1632 
1633 	ft_attr.max_fte = MAX_STEERING_ENT;
1634 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1635 
1636 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1637 	if (!ns) {
1638 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1639 		return -EOPNOTSUPP;
1640 	}
1641 
1642 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1643 	if (IS_ERR(ndev->rxft)) {
1644 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1645 		return PTR_ERR(ndev->rxft);
1646 	}
1647 	mlx5_vdpa_add_rx_flow_table(ndev);
1648 
1649 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1650 	if (err)
1651 		goto err_add;
1652 
1653 	return 0;
1654 
1655 err_add:
1656 	mlx5_vdpa_remove_rx_flow_table(ndev);
1657 	mlx5_destroy_flow_table(ndev->rxft);
1658 	return err;
1659 }
1660 
1661 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1662 {
1663 	clear_mac_vlan_table(ndev);
1664 	mlx5_vdpa_remove_rx_flow_table(ndev);
1665 	mlx5_destroy_flow_table(ndev->rxft);
1666 }
1667 
1668 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1669 {
1670 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1671 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1672 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1673 	struct mlx5_core_dev *pfmdev;
1674 	size_t read;
1675 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1676 
1677 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1678 	switch (cmd) {
1679 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1680 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1681 		if (read != ETH_ALEN)
1682 			break;
1683 
1684 		if (!memcmp(ndev->config.mac, mac, 6)) {
1685 			status = VIRTIO_NET_OK;
1686 			break;
1687 		}
1688 
1689 		if (is_zero_ether_addr(mac))
1690 			break;
1691 
1692 		if (!is_zero_ether_addr(ndev->config.mac)) {
1693 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1694 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1695 					       ndev->config.mac);
1696 				break;
1697 			}
1698 		}
1699 
1700 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1701 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1702 				       mac);
1703 			break;
1704 		}
1705 
1706 		/* backup the original mac address so that if failed to add the forward rules
1707 		 * we could restore it
1708 		 */
1709 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1710 
1711 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1712 
1713 		/* Need recreate the flow table entry, so that the packet could forward back
1714 		 */
1715 		mac_vlan_del(ndev, mac_back, 0, false);
1716 
1717 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1718 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1719 
1720 			/* Although it hardly run here, we still need double check */
1721 			if (is_zero_ether_addr(mac_back)) {
1722 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1723 				break;
1724 			}
1725 
1726 			/* Try to restore original mac address to MFPS table, and try to restore
1727 			 * the forward rule entry.
1728 			 */
1729 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1730 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1731 					       ndev->config.mac);
1732 			}
1733 
1734 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1735 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1736 					       mac_back);
1737 			}
1738 
1739 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1740 
1741 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1742 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1743 
1744 			break;
1745 		}
1746 
1747 		status = VIRTIO_NET_OK;
1748 		break;
1749 
1750 	default:
1751 		break;
1752 	}
1753 
1754 	return status;
1755 }
1756 
1757 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1758 {
1759 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1760 	int cur_qps = ndev->cur_num_vqs / 2;
1761 	int err;
1762 	int i;
1763 
1764 	if (cur_qps > newqps) {
1765 		err = modify_rqt(ndev, 2 * newqps);
1766 		if (err)
1767 			return err;
1768 
1769 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1770 			teardown_vq(ndev, &ndev->vqs[i]);
1771 
1772 		ndev->cur_num_vqs = 2 * newqps;
1773 	} else {
1774 		ndev->cur_num_vqs = 2 * newqps;
1775 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1776 			err = setup_vq(ndev, &ndev->vqs[i]);
1777 			if (err)
1778 				goto clean_added;
1779 		}
1780 		err = modify_rqt(ndev, 2 * newqps);
1781 		if (err)
1782 			goto clean_added;
1783 	}
1784 	return 0;
1785 
1786 clean_added:
1787 	for (--i; i >= 2 * cur_qps; --i)
1788 		teardown_vq(ndev, &ndev->vqs[i]);
1789 
1790 	ndev->cur_num_vqs = 2 * cur_qps;
1791 
1792 	return err;
1793 }
1794 
1795 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1796 {
1797 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1798 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1799 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1800 	struct virtio_net_ctrl_mq mq;
1801 	size_t read;
1802 	u16 newqps;
1803 
1804 	switch (cmd) {
1805 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1806 		/* This mq feature check aligns with pre-existing userspace
1807 		 * implementation.
1808 		 *
1809 		 * Without it, an untrusted driver could fake a multiqueue config
1810 		 * request down to a non-mq device that may cause kernel to
1811 		 * panic due to uninitialized resources for extra vqs. Even with
1812 		 * a well behaving guest driver, it is not expected to allow
1813 		 * changing the number of vqs on a non-mq device.
1814 		 */
1815 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1816 			break;
1817 
1818 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1819 		if (read != sizeof(mq))
1820 			break;
1821 
1822 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1823 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1824 		    newqps > ndev->rqt_size)
1825 			break;
1826 
1827 		if (ndev->cur_num_vqs == 2 * newqps) {
1828 			status = VIRTIO_NET_OK;
1829 			break;
1830 		}
1831 
1832 		if (!change_num_qps(mvdev, newqps))
1833 			status = VIRTIO_NET_OK;
1834 
1835 		break;
1836 	default:
1837 		break;
1838 	}
1839 
1840 	return status;
1841 }
1842 
1843 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1844 {
1845 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1846 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1847 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1848 	__virtio16 vlan;
1849 	size_t read;
1850 	u16 id;
1851 
1852 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1853 		return status;
1854 
1855 	switch (cmd) {
1856 	case VIRTIO_NET_CTRL_VLAN_ADD:
1857 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1858 		if (read != sizeof(vlan))
1859 			break;
1860 
1861 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1862 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1863 			break;
1864 
1865 		status = VIRTIO_NET_OK;
1866 		break;
1867 	case VIRTIO_NET_CTRL_VLAN_DEL:
1868 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1869 		if (read != sizeof(vlan))
1870 			break;
1871 
1872 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1873 		mac_vlan_del(ndev, ndev->config.mac, id, true);
1874 		status = VIRTIO_NET_OK;
1875 		break;
1876 	default:
1877 		break;
1878 	}
1879 
1880 	return status;
1881 }
1882 
1883 static void mlx5_cvq_kick_handler(struct work_struct *work)
1884 {
1885 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1886 	struct virtio_net_ctrl_hdr ctrl;
1887 	struct mlx5_vdpa_wq_ent *wqent;
1888 	struct mlx5_vdpa_dev *mvdev;
1889 	struct mlx5_control_vq *cvq;
1890 	struct mlx5_vdpa_net *ndev;
1891 	size_t read, write;
1892 	int err;
1893 
1894 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1895 	mvdev = wqent->mvdev;
1896 	ndev = to_mlx5_vdpa_ndev(mvdev);
1897 	cvq = &mvdev->cvq;
1898 
1899 	down_write(&ndev->reslock);
1900 
1901 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1902 		goto out;
1903 
1904 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1905 		goto out;
1906 
1907 	if (!cvq->ready)
1908 		goto out;
1909 
1910 	while (true) {
1911 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1912 					   GFP_ATOMIC);
1913 		if (err <= 0)
1914 			break;
1915 
1916 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1917 		if (read != sizeof(ctrl))
1918 			break;
1919 
1920 		cvq->received_desc++;
1921 		switch (ctrl.class) {
1922 		case VIRTIO_NET_CTRL_MAC:
1923 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
1924 			break;
1925 		case VIRTIO_NET_CTRL_MQ:
1926 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
1927 			break;
1928 		case VIRTIO_NET_CTRL_VLAN:
1929 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
1930 			break;
1931 		default:
1932 			break;
1933 		}
1934 
1935 		/* Make sure data is written before advancing index */
1936 		smp_wmb();
1937 
1938 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1939 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1940 		vringh_kiov_cleanup(&cvq->riov);
1941 		vringh_kiov_cleanup(&cvq->wiov);
1942 
1943 		if (vringh_need_notify_iotlb(&cvq->vring))
1944 			vringh_notify(&cvq->vring);
1945 
1946 		cvq->completed_desc++;
1947 		queue_work(mvdev->wq, &wqent->work);
1948 		break;
1949 	}
1950 
1951 out:
1952 	up_write(&ndev->reslock);
1953 }
1954 
1955 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1956 {
1957 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1958 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1959 	struct mlx5_vdpa_virtqueue *mvq;
1960 
1961 	if (!is_index_valid(mvdev, idx))
1962 		return;
1963 
1964 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1965 		if (!mvdev->wq || !mvdev->cvq.ready)
1966 			return;
1967 
1968 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
1969 		return;
1970 	}
1971 
1972 	mvq = &ndev->vqs[idx];
1973 	if (unlikely(!mvq->ready))
1974 		return;
1975 
1976 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1977 }
1978 
1979 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1980 				    u64 driver_area, u64 device_area)
1981 {
1982 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1983 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1984 	struct mlx5_vdpa_virtqueue *mvq;
1985 
1986 	if (!is_index_valid(mvdev, idx))
1987 		return -EINVAL;
1988 
1989 	if (is_ctrl_vq_idx(mvdev, idx)) {
1990 		mvdev->cvq.desc_addr = desc_area;
1991 		mvdev->cvq.device_addr = device_area;
1992 		mvdev->cvq.driver_addr = driver_area;
1993 		return 0;
1994 	}
1995 
1996 	mvq = &ndev->vqs[idx];
1997 	mvq->desc_addr = desc_area;
1998 	mvq->device_addr = device_area;
1999 	mvq->driver_addr = driver_area;
2000 	return 0;
2001 }
2002 
2003 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2004 {
2005 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2006 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2007 	struct mlx5_vdpa_virtqueue *mvq;
2008 
2009 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2010 		return;
2011 
2012 	mvq = &ndev->vqs[idx];
2013 	mvq->num_ent = num;
2014 }
2015 
2016 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2017 {
2018 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2019 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2020 
2021 	ndev->event_cbs[idx] = *cb;
2022 	if (is_ctrl_vq_idx(mvdev, idx))
2023 		mvdev->cvq.event_cb = *cb;
2024 }
2025 
2026 static void mlx5_cvq_notify(struct vringh *vring)
2027 {
2028 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2029 
2030 	if (!cvq->event_cb.callback)
2031 		return;
2032 
2033 	cvq->event_cb.callback(cvq->event_cb.private);
2034 }
2035 
2036 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2037 {
2038 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2039 
2040 	cvq->ready = ready;
2041 	if (!ready)
2042 		return;
2043 
2044 	cvq->vring.notify = mlx5_cvq_notify;
2045 }
2046 
2047 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2048 {
2049 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2050 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2051 	struct mlx5_vdpa_virtqueue *mvq;
2052 	int err;
2053 
2054 	if (!mvdev->actual_features)
2055 		return;
2056 
2057 	if (!is_index_valid(mvdev, idx))
2058 		return;
2059 
2060 	if (is_ctrl_vq_idx(mvdev, idx)) {
2061 		set_cvq_ready(mvdev, ready);
2062 		return;
2063 	}
2064 
2065 	mvq = &ndev->vqs[idx];
2066 	if (!ready) {
2067 		suspend_vq(ndev, mvq);
2068 	} else {
2069 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2070 		if (err) {
2071 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2072 			ready = false;
2073 		}
2074 	}
2075 
2076 
2077 	mvq->ready = ready;
2078 }
2079 
2080 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2081 {
2082 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2083 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2084 
2085 	if (!is_index_valid(mvdev, idx))
2086 		return false;
2087 
2088 	if (is_ctrl_vq_idx(mvdev, idx))
2089 		return mvdev->cvq.ready;
2090 
2091 	return ndev->vqs[idx].ready;
2092 }
2093 
2094 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2095 				  const struct vdpa_vq_state *state)
2096 {
2097 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2098 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2099 	struct mlx5_vdpa_virtqueue *mvq;
2100 
2101 	if (!is_index_valid(mvdev, idx))
2102 		return -EINVAL;
2103 
2104 	if (is_ctrl_vq_idx(mvdev, idx)) {
2105 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2106 		return 0;
2107 	}
2108 
2109 	mvq = &ndev->vqs[idx];
2110 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2111 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2112 		return -EINVAL;
2113 	}
2114 
2115 	mvq->used_idx = state->split.avail_index;
2116 	mvq->avail_idx = state->split.avail_index;
2117 	return 0;
2118 }
2119 
2120 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2121 {
2122 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2123 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2124 	struct mlx5_vdpa_virtqueue *mvq;
2125 	struct mlx5_virtq_attr attr;
2126 	int err;
2127 
2128 	if (!is_index_valid(mvdev, idx))
2129 		return -EINVAL;
2130 
2131 	if (is_ctrl_vq_idx(mvdev, idx)) {
2132 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2133 		return 0;
2134 	}
2135 
2136 	mvq = &ndev->vqs[idx];
2137 	/* If the virtq object was destroyed, use the value saved at
2138 	 * the last minute of suspend_vq. This caters for userspace
2139 	 * that cares about emulating the index after vq is stopped.
2140 	 */
2141 	if (!mvq->initialized) {
2142 		/* Firmware returns a wrong value for the available index.
2143 		 * Since both values should be identical, we take the value of
2144 		 * used_idx which is reported correctly.
2145 		 */
2146 		state->split.avail_index = mvq->used_idx;
2147 		return 0;
2148 	}
2149 
2150 	err = query_virtqueue(ndev, mvq, &attr);
2151 	if (err) {
2152 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2153 		return err;
2154 	}
2155 	state->split.avail_index = attr.used_index;
2156 	return 0;
2157 }
2158 
2159 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2160 {
2161 	return PAGE_SIZE;
2162 }
2163 
2164 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2165 {
2166 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2167 
2168 	if (is_ctrl_vq_idx(mvdev, idx))
2169 		return MLX5_VDPA_CVQ_GROUP;
2170 
2171 	return MLX5_VDPA_DATAVQ_GROUP;
2172 }
2173 
2174 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
2175 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
2176 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
2177 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
2178 };
2179 
2180 static u64 mlx_to_vritio_features(u16 dev_features)
2181 {
2182 	u64 result = 0;
2183 
2184 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
2185 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2186 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
2187 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2188 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
2189 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2190 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
2191 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2192 
2193 	return result;
2194 }
2195 
2196 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2197 {
2198 	u64 mlx_vdpa_features = 0;
2199 	u16 dev_features;
2200 
2201 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2202 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2203 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2204 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2205 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2206 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2207 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2208 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2209 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2210 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2211 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2212 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2213 
2214 	return mlx_vdpa_features;
2215 }
2216 
2217 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2218 {
2219 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2220 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2221 
2222 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2223 	return ndev->mvdev.mlx_features;
2224 }
2225 
2226 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2227 {
2228 	/* Minimum features to expect */
2229 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2230 		return -EOPNOTSUPP;
2231 
2232 	/* Double check features combination sent down by the driver.
2233 	 * Fail invalid features due to absence of the depended feature.
2234 	 *
2235 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2236 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2237 	 * By failing the invalid features sent down by untrusted drivers,
2238 	 * we're assured the assumption made upon is_index_valid() and
2239 	 * is_ctrl_vq_idx() will not be compromised.
2240 	 */
2241 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2242             BIT_ULL(VIRTIO_NET_F_MQ))
2243 		return -EINVAL;
2244 
2245 	return 0;
2246 }
2247 
2248 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2249 {
2250 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2251 	int err;
2252 	int i;
2253 
2254 	for (i = 0; i < mvdev->max_vqs; i++) {
2255 		err = setup_vq(ndev, &ndev->vqs[i]);
2256 		if (err)
2257 			goto err_vq;
2258 	}
2259 
2260 	return 0;
2261 
2262 err_vq:
2263 	for (--i; i >= 0; i--)
2264 		teardown_vq(ndev, &ndev->vqs[i]);
2265 
2266 	return err;
2267 }
2268 
2269 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2270 {
2271 	struct mlx5_vdpa_virtqueue *mvq;
2272 	int i;
2273 
2274 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2275 		mvq = &ndev->vqs[i];
2276 		if (!mvq->initialized)
2277 			continue;
2278 
2279 		teardown_vq(ndev, mvq);
2280 	}
2281 }
2282 
2283 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2284 {
2285 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2286 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2287 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2288 			mvdev->max_idx = mvdev->max_vqs;
2289 		} else {
2290 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2291 			 * CVQ gets index 2
2292 			 */
2293 			mvdev->max_idx = 2;
2294 		}
2295 	} else {
2296 		/* Two data virtqueues only: one for rx and one for tx */
2297 		mvdev->max_idx = 1;
2298 	}
2299 }
2300 
2301 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2302 {
2303 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2304 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2305 	int err;
2306 
2307 	print_features(mvdev, features, true);
2308 
2309 	err = verify_driver_features(mvdev, features);
2310 	if (err)
2311 		return err;
2312 
2313 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2314 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2315 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2316 	else
2317 		ndev->rqt_size = 1;
2318 
2319 	ndev->cur_num_vqs = 2 * ndev->rqt_size;
2320 
2321 	update_cvq_info(mvdev);
2322 	return err;
2323 }
2324 
2325 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2326 {
2327 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2328 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2329 
2330 	ndev->config_cb = *cb;
2331 }
2332 
2333 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2334 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2335 {
2336 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2337 }
2338 
2339 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2340 {
2341 	return VIRTIO_ID_NET;
2342 }
2343 
2344 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2345 {
2346 	return PCI_VENDOR_ID_MELLANOX;
2347 }
2348 
2349 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2350 {
2351 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2352 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2353 
2354 	print_status(mvdev, ndev->mvdev.status, false);
2355 	return ndev->mvdev.status;
2356 }
2357 
2358 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2359 {
2360 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2361 	struct mlx5_virtq_attr attr = {};
2362 	int err;
2363 
2364 	if (mvq->initialized) {
2365 		err = query_virtqueue(ndev, mvq, &attr);
2366 		if (err)
2367 			return err;
2368 	}
2369 
2370 	ri->avail_index = attr.available_index;
2371 	ri->used_index = attr.used_index;
2372 	ri->ready = mvq->ready;
2373 	ri->num_ent = mvq->num_ent;
2374 	ri->desc_addr = mvq->desc_addr;
2375 	ri->device_addr = mvq->device_addr;
2376 	ri->driver_addr = mvq->driver_addr;
2377 	ri->restore = true;
2378 	return 0;
2379 }
2380 
2381 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2382 {
2383 	int i;
2384 
2385 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2386 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2387 		save_channel_info(ndev, &ndev->vqs[i]);
2388 	}
2389 	return 0;
2390 }
2391 
2392 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2393 {
2394 	int i;
2395 
2396 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2397 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2398 }
2399 
2400 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2401 {
2402 	struct mlx5_vdpa_virtqueue *mvq;
2403 	struct mlx5_vq_restore_info *ri;
2404 	int i;
2405 
2406 	mlx5_clear_vqs(ndev);
2407 	init_mvqs(ndev);
2408 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2409 		mvq = &ndev->vqs[i];
2410 		ri = &mvq->ri;
2411 		if (!ri->restore)
2412 			continue;
2413 
2414 		mvq->avail_idx = ri->avail_index;
2415 		mvq->used_idx = ri->used_index;
2416 		mvq->ready = ri->ready;
2417 		mvq->num_ent = ri->num_ent;
2418 		mvq->desc_addr = ri->desc_addr;
2419 		mvq->device_addr = ri->device_addr;
2420 		mvq->driver_addr = ri->driver_addr;
2421 	}
2422 }
2423 
2424 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2425 				struct vhost_iotlb *iotlb, unsigned int asid)
2426 {
2427 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2428 	int err;
2429 
2430 	suspend_vqs(ndev);
2431 	err = save_channels_info(ndev);
2432 	if (err)
2433 		goto err_mr;
2434 
2435 	teardown_driver(ndev);
2436 	mlx5_vdpa_destroy_mr(mvdev);
2437 	err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2438 	if (err)
2439 		goto err_mr;
2440 
2441 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2442 		goto err_mr;
2443 
2444 	restore_channels_info(ndev);
2445 	err = setup_driver(mvdev);
2446 	if (err)
2447 		goto err_setup;
2448 
2449 	return 0;
2450 
2451 err_setup:
2452 	mlx5_vdpa_destroy_mr(mvdev);
2453 err_mr:
2454 	return err;
2455 }
2456 
2457 /* reslock must be held for this function */
2458 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2459 {
2460 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2461 	int err;
2462 
2463 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2464 
2465 	if (ndev->setup) {
2466 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2467 		err = 0;
2468 		goto out;
2469 	}
2470 	mlx5_vdpa_add_debugfs(ndev);
2471 	err = setup_virtqueues(mvdev);
2472 	if (err) {
2473 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2474 		goto err_setup;
2475 	}
2476 
2477 	err = create_rqt(ndev);
2478 	if (err) {
2479 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2480 		goto err_rqt;
2481 	}
2482 
2483 	err = create_tir(ndev);
2484 	if (err) {
2485 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2486 		goto err_tir;
2487 	}
2488 
2489 	err = setup_steering(ndev);
2490 	if (err) {
2491 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2492 		goto err_fwd;
2493 	}
2494 	ndev->setup = true;
2495 
2496 	return 0;
2497 
2498 err_fwd:
2499 	destroy_tir(ndev);
2500 err_tir:
2501 	destroy_rqt(ndev);
2502 err_rqt:
2503 	teardown_virtqueues(ndev);
2504 err_setup:
2505 	mlx5_vdpa_remove_debugfs(ndev->debugfs);
2506 out:
2507 	return err;
2508 }
2509 
2510 /* reslock must be held for this function */
2511 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2512 {
2513 
2514 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2515 
2516 	if (!ndev->setup)
2517 		return;
2518 
2519 	mlx5_vdpa_remove_debugfs(ndev->debugfs);
2520 	ndev->debugfs = NULL;
2521 	teardown_steering(ndev);
2522 	destroy_tir(ndev);
2523 	destroy_rqt(ndev);
2524 	teardown_virtqueues(ndev);
2525 	ndev->setup = false;
2526 }
2527 
2528 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2529 {
2530 	int i;
2531 
2532 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2533 		ndev->vqs[i].ready = false;
2534 
2535 	ndev->mvdev.cvq.ready = false;
2536 }
2537 
2538 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2539 {
2540 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2541 	int err = 0;
2542 
2543 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2544 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2545 					MLX5_CVQ_MAX_ENT, false,
2546 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2547 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2548 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2549 
2550 	return err;
2551 }
2552 
2553 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2554 {
2555 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2556 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2557 	int err;
2558 
2559 	print_status(mvdev, status, true);
2560 
2561 	down_write(&ndev->reslock);
2562 
2563 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2564 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2565 			err = setup_cvq_vring(mvdev);
2566 			if (err) {
2567 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2568 				goto err_setup;
2569 			}
2570 			err = setup_driver(mvdev);
2571 			if (err) {
2572 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2573 				goto err_setup;
2574 			}
2575 		} else {
2576 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2577 			goto err_clear;
2578 		}
2579 	}
2580 
2581 	ndev->mvdev.status = status;
2582 	up_write(&ndev->reslock);
2583 	return;
2584 
2585 err_setup:
2586 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2587 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2588 err_clear:
2589 	up_write(&ndev->reslock);
2590 }
2591 
2592 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2593 {
2594 	int i;
2595 
2596 	/* default mapping all groups are mapped to asid 0 */
2597 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2598 		mvdev->group2asid[i] = 0;
2599 }
2600 
2601 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2602 {
2603 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2604 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2605 
2606 	print_status(mvdev, 0, true);
2607 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2608 
2609 	down_write(&ndev->reslock);
2610 	teardown_driver(ndev);
2611 	clear_vqs_ready(ndev);
2612 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2613 	ndev->mvdev.status = 0;
2614 	ndev->mvdev.suspended = false;
2615 	ndev->cur_num_vqs = 0;
2616 	ndev->mvdev.cvq.received_desc = 0;
2617 	ndev->mvdev.cvq.completed_desc = 0;
2618 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2619 	ndev->mvdev.actual_features = 0;
2620 	init_group_to_asid_map(mvdev);
2621 	++mvdev->generation;
2622 
2623 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2624 		if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2625 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2626 	}
2627 	up_write(&ndev->reslock);
2628 
2629 	return 0;
2630 }
2631 
2632 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2633 {
2634 	return sizeof(struct virtio_net_config);
2635 }
2636 
2637 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2638 				 unsigned int len)
2639 {
2640 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2641 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2642 
2643 	if (offset + len <= sizeof(struct virtio_net_config))
2644 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2645 }
2646 
2647 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2648 				 unsigned int len)
2649 {
2650 	/* not supported */
2651 }
2652 
2653 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2654 {
2655 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2656 
2657 	return mvdev->generation;
2658 }
2659 
2660 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2661 			unsigned int asid)
2662 {
2663 	bool change_map;
2664 	int err;
2665 
2666 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2667 	if (err) {
2668 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2669 		return err;
2670 	}
2671 
2672 	if (change_map)
2673 		err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2674 
2675 	return err;
2676 }
2677 
2678 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2679 			     struct vhost_iotlb *iotlb)
2680 {
2681 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2682 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2683 	int err = -EINVAL;
2684 
2685 	down_write(&ndev->reslock);
2686 	err = set_map_data(mvdev, iotlb, asid);
2687 	up_write(&ndev->reslock);
2688 	return err;
2689 }
2690 
2691 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2692 {
2693 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2694 
2695 	if (is_ctrl_vq_idx(mvdev, idx))
2696 		return &vdev->dev;
2697 
2698 	return mvdev->vdev.dma_dev;
2699 }
2700 
2701 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2702 {
2703 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2704 	struct mlx5_core_dev *pfmdev;
2705 	struct mlx5_vdpa_net *ndev;
2706 
2707 	ndev = to_mlx5_vdpa_ndev(mvdev);
2708 
2709 	free_resources(ndev);
2710 	mlx5_vdpa_destroy_mr(mvdev);
2711 	if (!is_zero_ether_addr(ndev->config.mac)) {
2712 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2713 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2714 	}
2715 	mlx5_vdpa_free_resources(&ndev->mvdev);
2716 	kfree(ndev->event_cbs);
2717 	kfree(ndev->vqs);
2718 }
2719 
2720 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2721 {
2722 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2723 	struct vdpa_notification_area ret = {};
2724 	struct mlx5_vdpa_net *ndev;
2725 	phys_addr_t addr;
2726 
2727 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2728 		return ret;
2729 
2730 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2731 	 * notification to avoid the risk of mapping pages that contain BAR of more
2732 	 * than one SF
2733 	 */
2734 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2735 		return ret;
2736 
2737 	ndev = to_mlx5_vdpa_ndev(mvdev);
2738 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2739 	ret.addr = addr;
2740 	ret.size = PAGE_SIZE;
2741 	return ret;
2742 }
2743 
2744 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2745 {
2746 	return -EOPNOTSUPP;
2747 }
2748 
2749 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2750 {
2751 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2752 
2753 	return mvdev->actual_features;
2754 }
2755 
2756 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
2757 			     u64 *received_desc, u64 *completed_desc)
2758 {
2759 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
2760 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
2761 	void *cmd_hdr;
2762 	void *ctx;
2763 	int err;
2764 
2765 	if (!counters_supported(&ndev->mvdev))
2766 		return -EOPNOTSUPP;
2767 
2768 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
2769 		return -EAGAIN;
2770 
2771 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
2772 
2773 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
2774 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
2775 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
2776 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
2777 
2778 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
2779 	if (err)
2780 		return err;
2781 
2782 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
2783 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
2784 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
2785 	return 0;
2786 }
2787 
2788 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
2789 					 struct sk_buff *msg,
2790 					 struct netlink_ext_ack *extack)
2791 {
2792 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2793 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2794 	struct mlx5_vdpa_virtqueue *mvq;
2795 	struct mlx5_control_vq *cvq;
2796 	u64 received_desc;
2797 	u64 completed_desc;
2798 	int err = 0;
2799 
2800 	down_read(&ndev->reslock);
2801 	if (!is_index_valid(mvdev, idx)) {
2802 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
2803 		err = -EINVAL;
2804 		goto out_err;
2805 	}
2806 
2807 	if (idx == ctrl_vq_idx(mvdev)) {
2808 		cvq = &mvdev->cvq;
2809 		received_desc = cvq->received_desc;
2810 		completed_desc = cvq->completed_desc;
2811 		goto out;
2812 	}
2813 
2814 	mvq = &ndev->vqs[idx];
2815 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
2816 	if (err) {
2817 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
2818 		goto out_err;
2819 	}
2820 
2821 out:
2822 	err = -EMSGSIZE;
2823 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
2824 		goto out_err;
2825 
2826 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
2827 			      VDPA_ATTR_PAD))
2828 		goto out_err;
2829 
2830 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
2831 		goto out_err;
2832 
2833 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
2834 			      VDPA_ATTR_PAD))
2835 		goto out_err;
2836 
2837 	err = 0;
2838 out_err:
2839 	up_read(&ndev->reslock);
2840 	return err;
2841 }
2842 
2843 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
2844 {
2845 	struct mlx5_control_vq *cvq;
2846 
2847 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2848 		return;
2849 
2850 	cvq = &mvdev->cvq;
2851 	cvq->ready = false;
2852 }
2853 
2854 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
2855 {
2856 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2857 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2858 	struct mlx5_vdpa_virtqueue *mvq;
2859 	int i;
2860 
2861 	mlx5_vdpa_info(mvdev, "suspending device\n");
2862 
2863 	down_write(&ndev->reslock);
2864 	ndev->nb_registered = false;
2865 	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
2866 	flush_workqueue(ndev->mvdev.wq);
2867 	for (i = 0; i < ndev->cur_num_vqs; i++) {
2868 		mvq = &ndev->vqs[i];
2869 		suspend_vq(ndev, mvq);
2870 	}
2871 	mlx5_vdpa_cvq_suspend(mvdev);
2872 	mvdev->suspended = true;
2873 	up_write(&ndev->reslock);
2874 	return 0;
2875 }
2876 
2877 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
2878 			       unsigned int asid)
2879 {
2880 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2881 
2882 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
2883 		return -EINVAL;
2884 
2885 	mvdev->group2asid[group] = asid;
2886 	return 0;
2887 }
2888 
2889 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2890 	.set_vq_address = mlx5_vdpa_set_vq_address,
2891 	.set_vq_num = mlx5_vdpa_set_vq_num,
2892 	.kick_vq = mlx5_vdpa_kick_vq,
2893 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
2894 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
2895 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
2896 	.set_vq_state = mlx5_vdpa_set_vq_state,
2897 	.get_vq_state = mlx5_vdpa_get_vq_state,
2898 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
2899 	.get_vq_notification = mlx5_get_vq_notification,
2900 	.get_vq_irq = mlx5_get_vq_irq,
2901 	.get_vq_align = mlx5_vdpa_get_vq_align,
2902 	.get_vq_group = mlx5_vdpa_get_vq_group,
2903 	.get_device_features = mlx5_vdpa_get_device_features,
2904 	.set_driver_features = mlx5_vdpa_set_driver_features,
2905 	.get_driver_features = mlx5_vdpa_get_driver_features,
2906 	.set_config_cb = mlx5_vdpa_set_config_cb,
2907 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2908 	.get_device_id = mlx5_vdpa_get_device_id,
2909 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
2910 	.get_status = mlx5_vdpa_get_status,
2911 	.set_status = mlx5_vdpa_set_status,
2912 	.reset = mlx5_vdpa_reset,
2913 	.get_config_size = mlx5_vdpa_get_config_size,
2914 	.get_config = mlx5_vdpa_get_config,
2915 	.set_config = mlx5_vdpa_set_config,
2916 	.get_generation = mlx5_vdpa_get_generation,
2917 	.set_map = mlx5_vdpa_set_map,
2918 	.set_group_asid = mlx5_set_group_asid,
2919 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
2920 	.free = mlx5_vdpa_free,
2921 	.suspend = mlx5_vdpa_suspend,
2922 };
2923 
2924 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2925 {
2926 	u16 hw_mtu;
2927 	int err;
2928 
2929 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2930 	if (err)
2931 		return err;
2932 
2933 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2934 	return 0;
2935 }
2936 
2937 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2938 {
2939 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2940 	int err;
2941 
2942 	if (res->valid) {
2943 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2944 		return -EEXIST;
2945 	}
2946 
2947 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2948 	if (err)
2949 		return err;
2950 
2951 	err = create_tis(ndev);
2952 	if (err)
2953 		goto err_tis;
2954 
2955 	res->valid = true;
2956 
2957 	return 0;
2958 
2959 err_tis:
2960 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2961 	return err;
2962 }
2963 
2964 static void free_resources(struct mlx5_vdpa_net *ndev)
2965 {
2966 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2967 
2968 	if (!res->valid)
2969 		return;
2970 
2971 	destroy_tis(ndev);
2972 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2973 	res->valid = false;
2974 }
2975 
2976 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2977 {
2978 	struct mlx5_vdpa_virtqueue *mvq;
2979 	int i;
2980 
2981 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
2982 		mvq = &ndev->vqs[i];
2983 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2984 		mvq->index = i;
2985 		mvq->ndev = ndev;
2986 		mvq->fwqp.fw = true;
2987 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
2988 	}
2989 	for (; i < ndev->mvdev.max_vqs; i++) {
2990 		mvq = &ndev->vqs[i];
2991 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2992 		mvq->index = i;
2993 		mvq->ndev = ndev;
2994 	}
2995 }
2996 
2997 struct mlx5_vdpa_mgmtdev {
2998 	struct vdpa_mgmt_dev mgtdev;
2999 	struct mlx5_adev *madev;
3000 	struct mlx5_vdpa_net *ndev;
3001 };
3002 
3003 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
3004 {
3005 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
3006 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
3007 	int err;
3008 
3009 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
3010 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
3011 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
3012 	if (vport)
3013 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
3014 
3015 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
3016 	if (err)
3017 		return 0;
3018 
3019 	return MLX5_GET(query_vport_state_out, out, state);
3020 }
3021 
3022 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
3023 {
3024 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
3025 	    VPORT_STATE_UP)
3026 		return true;
3027 
3028 	return false;
3029 }
3030 
3031 static void update_carrier(struct work_struct *work)
3032 {
3033 	struct mlx5_vdpa_wq_ent *wqent;
3034 	struct mlx5_vdpa_dev *mvdev;
3035 	struct mlx5_vdpa_net *ndev;
3036 
3037 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
3038 	mvdev = wqent->mvdev;
3039 	ndev = to_mlx5_vdpa_ndev(mvdev);
3040 	if (get_link_state(mvdev))
3041 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3042 	else
3043 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3044 
3045 	if (ndev->nb_registered && ndev->config_cb.callback)
3046 		ndev->config_cb.callback(ndev->config_cb.private);
3047 
3048 	kfree(wqent);
3049 }
3050 
3051 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
3052 {
3053 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
3054 	struct mlx5_eqe *eqe = param;
3055 	int ret = NOTIFY_DONE;
3056 	struct mlx5_vdpa_wq_ent *wqent;
3057 
3058 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
3059 		if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
3060 			return NOTIFY_DONE;
3061 		switch (eqe->sub_type) {
3062 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
3063 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
3064 			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
3065 			if (!wqent)
3066 				return NOTIFY_DONE;
3067 
3068 			wqent->mvdev = &ndev->mvdev;
3069 			INIT_WORK(&wqent->work, update_carrier);
3070 			queue_work(ndev->mvdev.wq, &wqent->work);
3071 			ret = NOTIFY_OK;
3072 			break;
3073 		default:
3074 			return NOTIFY_DONE;
3075 		}
3076 		return ret;
3077 	}
3078 	return ret;
3079 }
3080 
3081 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3082 {
3083 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3084 	void *in;
3085 	int err;
3086 
3087 	in = kvzalloc(inlen, GFP_KERNEL);
3088 	if (!in)
3089 		return -ENOMEM;
3090 
3091 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3092 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3093 		 mtu + MLX5V_ETH_HARD_MTU);
3094 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3095 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3096 
3097 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3098 
3099 	kvfree(in);
3100 	return err;
3101 }
3102 
3103 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3104 			     const struct vdpa_dev_set_config *add_config)
3105 {
3106 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3107 	struct virtio_net_config *config;
3108 	struct mlx5_core_dev *pfmdev;
3109 	struct mlx5_vdpa_dev *mvdev;
3110 	struct mlx5_vdpa_net *ndev;
3111 	struct mlx5_core_dev *mdev;
3112 	u64 device_features;
3113 	u32 max_vqs;
3114 	u16 mtu;
3115 	int err;
3116 
3117 	if (mgtdev->ndev)
3118 		return -ENOSPC;
3119 
3120 	mdev = mgtdev->madev->mdev;
3121 	device_features = mgtdev->mgtdev.supported_features;
3122 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3123 		if (add_config->device_features & ~device_features) {
3124 			dev_warn(mdev->device,
3125 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3126 				 add_config->device_features, device_features);
3127 			return -EINVAL;
3128 		}
3129 		device_features &= add_config->device_features;
3130 	}
3131 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3132 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3133 		dev_warn(mdev->device,
3134 			 "Must provision minimum features 0x%llx for this device",
3135 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3136 		return -EOPNOTSUPP;
3137 	}
3138 
3139 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3140 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3141 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3142 		return -EOPNOTSUPP;
3143 	}
3144 
3145 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3146 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3147 	if (max_vqs < 2) {
3148 		dev_warn(mdev->device,
3149 			 "%d virtqueues are supported. At least 2 are required\n",
3150 			 max_vqs);
3151 		return -EAGAIN;
3152 	}
3153 
3154 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3155 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3156 			return -EINVAL;
3157 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3158 	} else {
3159 		max_vqs = 2;
3160 	}
3161 
3162 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3163 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3164 	if (IS_ERR(ndev))
3165 		return PTR_ERR(ndev);
3166 
3167 	ndev->mvdev.max_vqs = max_vqs;
3168 	mvdev = &ndev->mvdev;
3169 	mvdev->mdev = mdev;
3170 
3171 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3172 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3173 	if (!ndev->vqs || !ndev->event_cbs) {
3174 		err = -ENOMEM;
3175 		goto err_alloc;
3176 	}
3177 
3178 	init_mvqs(ndev);
3179 	init_rwsem(&ndev->reslock);
3180 	config = &ndev->config;
3181 
3182 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3183 		err = config_func_mtu(mdev, add_config->net.mtu);
3184 		if (err)
3185 			goto err_alloc;
3186 	}
3187 
3188 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3189 		err = query_mtu(mdev, &mtu);
3190 		if (err)
3191 			goto err_alloc;
3192 
3193 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3194 	}
3195 
3196 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3197 		if (get_link_state(mvdev))
3198 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3199 		else
3200 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3201 	}
3202 
3203 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3204 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3205 	/* No bother setting mac address in config if not going to provision _F_MAC */
3206 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3207 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3208 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3209 		if (err)
3210 			goto err_alloc;
3211 	}
3212 
3213 	if (!is_zero_ether_addr(config->mac)) {
3214 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3215 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3216 		if (err)
3217 			goto err_alloc;
3218 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3219 		/*
3220 		 * We used to clear _F_MAC feature bit if seeing
3221 		 * zero mac address when device features are not
3222 		 * specifically provisioned. Keep the behaviour
3223 		 * so old scripts do not break.
3224 		 */
3225 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3226 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3227 		/* Don't provision zero mac address for _F_MAC */
3228 		mlx5_vdpa_warn(&ndev->mvdev,
3229 			       "No mac address provisioned?\n");
3230 		err = -EINVAL;
3231 		goto err_alloc;
3232 	}
3233 
3234 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3235 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3236 
3237 	ndev->mvdev.mlx_features = device_features;
3238 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3239 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3240 	if (err)
3241 		goto err_mpfs;
3242 
3243 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3244 		err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3245 		if (err)
3246 			goto err_res;
3247 	}
3248 
3249 	err = alloc_resources(ndev);
3250 	if (err)
3251 		goto err_mr;
3252 
3253 	ndev->cvq_ent.mvdev = mvdev;
3254 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3255 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3256 	if (!mvdev->wq) {
3257 		err = -ENOMEM;
3258 		goto err_res2;
3259 	}
3260 
3261 	ndev->nb.notifier_call = event_handler;
3262 	mlx5_notifier_register(mdev, &ndev->nb);
3263 	ndev->nb_registered = true;
3264 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3265 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3266 	if (err)
3267 		goto err_reg;
3268 
3269 	mgtdev->ndev = ndev;
3270 	return 0;
3271 
3272 err_reg:
3273 	destroy_workqueue(mvdev->wq);
3274 err_res2:
3275 	free_resources(ndev);
3276 err_mr:
3277 	mlx5_vdpa_destroy_mr(mvdev);
3278 err_res:
3279 	mlx5_vdpa_free_resources(&ndev->mvdev);
3280 err_mpfs:
3281 	if (!is_zero_ether_addr(config->mac))
3282 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3283 err_alloc:
3284 	put_device(&mvdev->vdev.dev);
3285 	return err;
3286 }
3287 
3288 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3289 {
3290 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3291 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3292 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3293 	struct workqueue_struct *wq;
3294 
3295 	mlx5_vdpa_remove_debugfs(ndev->debugfs);
3296 	ndev->debugfs = NULL;
3297 	if (ndev->nb_registered) {
3298 		ndev->nb_registered = false;
3299 		mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
3300 	}
3301 	wq = mvdev->wq;
3302 	mvdev->wq = NULL;
3303 	destroy_workqueue(wq);
3304 	_vdpa_unregister_device(dev);
3305 	mgtdev->ndev = NULL;
3306 }
3307 
3308 static const struct vdpa_mgmtdev_ops mdev_ops = {
3309 	.dev_add = mlx5_vdpa_dev_add,
3310 	.dev_del = mlx5_vdpa_dev_del,
3311 };
3312 
3313 static struct virtio_device_id id_table[] = {
3314 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3315 	{ 0 },
3316 };
3317 
3318 static int mlx5v_probe(struct auxiliary_device *adev,
3319 		       const struct auxiliary_device_id *id)
3320 
3321 {
3322 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3323 	struct mlx5_core_dev *mdev = madev->mdev;
3324 	struct mlx5_vdpa_mgmtdev *mgtdev;
3325 	int err;
3326 
3327 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3328 	if (!mgtdev)
3329 		return -ENOMEM;
3330 
3331 	mgtdev->mgtdev.ops = &mdev_ops;
3332 	mgtdev->mgtdev.device = mdev->device;
3333 	mgtdev->mgtdev.id_table = id_table;
3334 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3335 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3336 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3337 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3338 	mgtdev->mgtdev.max_supported_vqs =
3339 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3340 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3341 	mgtdev->madev = madev;
3342 
3343 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3344 	if (err)
3345 		goto reg_err;
3346 
3347 	auxiliary_set_drvdata(adev, mgtdev);
3348 
3349 	return 0;
3350 
3351 reg_err:
3352 	kfree(mgtdev);
3353 	return err;
3354 }
3355 
3356 static void mlx5v_remove(struct auxiliary_device *adev)
3357 {
3358 	struct mlx5_vdpa_mgmtdev *mgtdev;
3359 
3360 	mgtdev = auxiliary_get_drvdata(adev);
3361 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3362 	kfree(mgtdev);
3363 }
3364 
3365 static const struct auxiliary_device_id mlx5v_id_table[] = {
3366 	{ .name = MLX5_ADEV_NAME ".vnet", },
3367 	{},
3368 };
3369 
3370 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3371 
3372 static struct auxiliary_driver mlx5v_driver = {
3373 	.name = "vnet",
3374 	.probe = mlx5v_probe,
3375 	.remove = mlx5v_remove,
3376 	.id_table = mlx5v_id_table,
3377 };
3378 
3379 module_auxiliary_driver(mlx5v_driver);
3380