xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 9008a676)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 
22 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
23 MODULE_DESCRIPTION("Mellanox VDPA driver");
24 MODULE_LICENSE("Dual BSD/GPL");
25 
26 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
27 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
28 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 
30 #define VALID_FEATURES_MASK                                                                        \
31 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
32 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
33 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
34 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
35 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
36 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
37 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
38 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
39 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
40 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
41 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
42 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
43 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 
45 #define VALID_STATUS_MASK                                                                          \
46 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
47 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 
49 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
50 
51 #define MLX5V_UNTAGGED 0x1000
52 
53 struct mlx5_vdpa_net_resources {
54 	u32 tisn;
55 	u32 tdn;
56 	u32 tirn;
57 	u32 rqtn;
58 	bool valid;
59 };
60 
61 struct mlx5_vdpa_cq_buf {
62 	struct mlx5_frag_buf_ctrl fbc;
63 	struct mlx5_frag_buf frag_buf;
64 	int cqe_size;
65 	int nent;
66 };
67 
68 struct mlx5_vdpa_cq {
69 	struct mlx5_core_cq mcq;
70 	struct mlx5_vdpa_cq_buf buf;
71 	struct mlx5_db db;
72 	int cqe;
73 };
74 
75 struct mlx5_vdpa_umem {
76 	struct mlx5_frag_buf_ctrl fbc;
77 	struct mlx5_frag_buf frag_buf;
78 	int size;
79 	u32 id;
80 };
81 
82 struct mlx5_vdpa_qp {
83 	struct mlx5_core_qp mqp;
84 	struct mlx5_frag_buf frag_buf;
85 	struct mlx5_db db;
86 	u16 head;
87 	bool fw;
88 };
89 
90 struct mlx5_vq_restore_info {
91 	u32 num_ent;
92 	u64 desc_addr;
93 	u64 device_addr;
94 	u64 driver_addr;
95 	u16 avail_index;
96 	u16 used_index;
97 	bool ready;
98 	bool restore;
99 };
100 
101 struct mlx5_vdpa_virtqueue {
102 	bool ready;
103 	u64 desc_addr;
104 	u64 device_addr;
105 	u64 driver_addr;
106 	u32 num_ent;
107 
108 	/* Resources for implementing the notification channel from the device
109 	 * to the driver. fwqp is the firmware end of an RC connection; the
110 	 * other end is vqqp used by the driver. cq is where completions are
111 	 * reported.
112 	 */
113 	struct mlx5_vdpa_cq cq;
114 	struct mlx5_vdpa_qp fwqp;
115 	struct mlx5_vdpa_qp vqqp;
116 
117 	/* umem resources are required for the virtqueue operation. They're use
118 	 * is internal and they must be provided by the driver.
119 	 */
120 	struct mlx5_vdpa_umem umem1;
121 	struct mlx5_vdpa_umem umem2;
122 	struct mlx5_vdpa_umem umem3;
123 
124 	u32 counter_set_id;
125 	bool initialized;
126 	int index;
127 	u32 virtq_id;
128 	struct mlx5_vdpa_net *ndev;
129 	u16 avail_idx;
130 	u16 used_idx;
131 	int fw_state;
132 
133 	/* keep last in the struct */
134 	struct mlx5_vq_restore_info ri;
135 };
136 
137 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
138 {
139 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
140 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
141 			return idx < 2;
142 		else
143 			return idx < 3;
144 	}
145 
146 	return idx <= mvdev->max_idx;
147 }
148 
149 #define MLX5V_MACVLAN_SIZE 256
150 
151 struct mlx5_vdpa_net {
152 	struct mlx5_vdpa_dev mvdev;
153 	struct mlx5_vdpa_net_resources res;
154 	struct virtio_net_config config;
155 	struct mlx5_vdpa_virtqueue *vqs;
156 	struct vdpa_callback *event_cbs;
157 
158 	/* Serialize vq resources creation and destruction. This is required
159 	 * since memory map might change and we need to destroy and create
160 	 * resources while driver in operational.
161 	 */
162 	struct rw_semaphore reslock;
163 	struct mlx5_flow_table *rxft;
164 	bool setup;
165 	u32 cur_num_vqs;
166 	u32 rqt_size;
167 	struct notifier_block nb;
168 	struct vdpa_callback config_cb;
169 	struct mlx5_vdpa_wq_ent cvq_ent;
170 	struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
171 };
172 
173 struct macvlan_node {
174 	struct hlist_node hlist;
175 	struct mlx5_flow_handle *ucast_rule;
176 	struct mlx5_flow_handle *mcast_rule;
177 	u64 macvlan;
178 };
179 
180 static void free_resources(struct mlx5_vdpa_net *ndev);
181 static void init_mvqs(struct mlx5_vdpa_net *ndev);
182 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
183 static void teardown_driver(struct mlx5_vdpa_net *ndev);
184 
185 static bool mlx5_vdpa_debug;
186 
187 #define MLX5_CVQ_MAX_ENT 16
188 
189 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
190 	do {                                                                                       \
191 		if (features & BIT_ULL(_feature))                                                  \
192 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
193 	} while (0)
194 
195 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
196 	do {                                                                                       \
197 		if (status & (_status))                                                            \
198 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
199 	} while (0)
200 
201 /* TODO: cross-endian support */
202 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
203 {
204 	return virtio_legacy_is_little_endian() ||
205 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
206 }
207 
208 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
209 {
210 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
211 }
212 
213 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
214 {
215 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
216 }
217 
218 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
219 {
220 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
221 		return 2;
222 
223 	return mvdev->max_vqs;
224 }
225 
226 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
227 {
228 	return idx == ctrl_vq_idx(mvdev);
229 }
230 
231 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
232 {
233 	if (status & ~VALID_STATUS_MASK)
234 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
235 			       status & ~VALID_STATUS_MASK);
236 
237 	if (!mlx5_vdpa_debug)
238 		return;
239 
240 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
241 	if (set && !status) {
242 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
243 		return;
244 	}
245 
246 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
247 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
248 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
249 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
250 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
251 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
252 }
253 
254 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
255 {
256 	if (features & ~VALID_FEATURES_MASK)
257 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
258 			       features & ~VALID_FEATURES_MASK);
259 
260 	if (!mlx5_vdpa_debug)
261 		return;
262 
263 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
264 	if (!features)
265 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
266 
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
279 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
280 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
281 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
282 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
283 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
284 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
285 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
286 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
287 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
288 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
289 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
290 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
291 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
292 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
293 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
294 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
295 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
296 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
297 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
298 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
299 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
300 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
301 }
302 
303 static int create_tis(struct mlx5_vdpa_net *ndev)
304 {
305 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
306 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
307 	void *tisc;
308 	int err;
309 
310 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
311 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
312 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
313 	if (err)
314 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
315 
316 	return err;
317 }
318 
319 static void destroy_tis(struct mlx5_vdpa_net *ndev)
320 {
321 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
322 }
323 
324 #define MLX5_VDPA_CQE_SIZE 64
325 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
326 
327 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
328 {
329 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
330 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
331 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
332 	int err;
333 
334 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
335 				       ndev->mvdev.mdev->priv.numa_node);
336 	if (err)
337 		return err;
338 
339 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
340 
341 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
342 	buf->nent = nent;
343 
344 	return 0;
345 }
346 
347 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
348 {
349 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
350 
351 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
352 					ndev->mvdev.mdev->priv.numa_node);
353 }
354 
355 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
356 {
357 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
358 }
359 
360 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
361 {
362 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
363 }
364 
365 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
366 {
367 	struct mlx5_cqe64 *cqe64;
368 	void *cqe;
369 	int i;
370 
371 	for (i = 0; i < buf->nent; i++) {
372 		cqe = get_cqe(vcq, i);
373 		cqe64 = cqe;
374 		cqe64->op_own = MLX5_CQE_INVALID << 4;
375 	}
376 }
377 
378 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
379 {
380 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
381 
382 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
383 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
384 		return cqe64;
385 
386 	return NULL;
387 }
388 
389 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
390 {
391 	vqp->head += n;
392 	vqp->db.db[0] = cpu_to_be32(vqp->head);
393 }
394 
395 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
396 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
397 {
398 	struct mlx5_vdpa_qp *vqp;
399 	__be64 *pas;
400 	void *qpc;
401 
402 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
403 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
404 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
405 	if (vqp->fw) {
406 		/* Firmware QP is allocated by the driver for the firmware's
407 		 * use so we can skip part of the params as they will be chosen by firmware
408 		 */
409 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
410 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
411 		MLX5_SET(qpc, qpc, no_sq, 1);
412 		return;
413 	}
414 
415 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
416 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
417 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
418 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
419 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
420 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
421 	MLX5_SET(qpc, qpc, no_sq, 1);
422 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
423 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
424 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
425 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
426 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
427 }
428 
429 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
430 {
431 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
432 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
433 					ndev->mvdev.mdev->priv.numa_node);
434 }
435 
436 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
437 {
438 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
439 }
440 
441 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
442 		     struct mlx5_vdpa_qp *vqp)
443 {
444 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
445 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
446 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
447 	void *qpc;
448 	void *in;
449 	int err;
450 
451 	if (!vqp->fw) {
452 		vqp = &mvq->vqqp;
453 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
454 		if (err)
455 			return err;
456 
457 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
458 		if (err)
459 			goto err_db;
460 		inlen += vqp->frag_buf.npages * sizeof(__be64);
461 	}
462 
463 	in = kzalloc(inlen, GFP_KERNEL);
464 	if (!in) {
465 		err = -ENOMEM;
466 		goto err_kzalloc;
467 	}
468 
469 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
470 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
471 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
472 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
473 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
474 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
475 	if (!vqp->fw)
476 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
477 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
478 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
479 	kfree(in);
480 	if (err)
481 		goto err_kzalloc;
482 
483 	vqp->mqp.uid = ndev->mvdev.res.uid;
484 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
485 
486 	if (!vqp->fw)
487 		rx_post(vqp, mvq->num_ent);
488 
489 	return 0;
490 
491 err_kzalloc:
492 	if (!vqp->fw)
493 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
494 err_db:
495 	if (!vqp->fw)
496 		rq_buf_free(ndev, vqp);
497 
498 	return err;
499 }
500 
501 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
502 {
503 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
504 
505 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
506 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
507 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
508 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
509 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
510 	if (!vqp->fw) {
511 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
512 		rq_buf_free(ndev, vqp);
513 	}
514 }
515 
516 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
517 {
518 	return get_sw_cqe(cq, cq->mcq.cons_index);
519 }
520 
521 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
522 {
523 	struct mlx5_cqe64 *cqe64;
524 
525 	cqe64 = next_cqe_sw(vcq);
526 	if (!cqe64)
527 		return -EAGAIN;
528 
529 	vcq->mcq.cons_index++;
530 	return 0;
531 }
532 
533 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
534 {
535 	struct mlx5_vdpa_net *ndev = mvq->ndev;
536 	struct vdpa_callback *event_cb;
537 
538 	event_cb = &ndev->event_cbs[mvq->index];
539 	mlx5_cq_set_ci(&mvq->cq.mcq);
540 
541 	/* make sure CQ cosumer update is visible to the hardware before updating
542 	 * RX doorbell record.
543 	 */
544 	dma_wmb();
545 	rx_post(&mvq->vqqp, num);
546 	if (event_cb->callback)
547 		event_cb->callback(event_cb->private);
548 }
549 
550 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
551 {
552 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
553 	struct mlx5_vdpa_net *ndev = mvq->ndev;
554 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
555 	int num = 0;
556 
557 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
558 		num++;
559 		if (num > mvq->num_ent / 2) {
560 			/* If completions keep coming while we poll, we want to
561 			 * let the hardware know that we consumed them by
562 			 * updating the doorbell record.  We also let vdpa core
563 			 * know about this so it passes it on the virtio driver
564 			 * on the guest.
565 			 */
566 			mlx5_vdpa_handle_completions(mvq, num);
567 			num = 0;
568 		}
569 	}
570 
571 	if (num)
572 		mlx5_vdpa_handle_completions(mvq, num);
573 
574 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
575 }
576 
577 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
578 {
579 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
580 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
581 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
582 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
583 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
584 	__be64 *pas;
585 	int inlen;
586 	void *cqc;
587 	void *in;
588 	int err;
589 	int eqn;
590 
591 	err = mlx5_db_alloc(mdev, &vcq->db);
592 	if (err)
593 		return err;
594 
595 	vcq->mcq.set_ci_db = vcq->db.db;
596 	vcq->mcq.arm_db = vcq->db.db + 1;
597 	vcq->mcq.cqe_sz = 64;
598 
599 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
600 	if (err)
601 		goto err_db;
602 
603 	cq_frag_buf_init(vcq, &vcq->buf);
604 
605 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
606 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
607 	in = kzalloc(inlen, GFP_KERNEL);
608 	if (!in) {
609 		err = -ENOMEM;
610 		goto err_vzalloc;
611 	}
612 
613 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
614 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
615 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
616 
617 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
618 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
619 
620 	/* Use vector 0 by default. Consider adding code to choose least used
621 	 * vector.
622 	 */
623 	err = mlx5_vector2eqn(mdev, 0, &eqn);
624 	if (err)
625 		goto err_vec;
626 
627 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
628 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
629 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
630 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
631 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
632 
633 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
634 	if (err)
635 		goto err_vec;
636 
637 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
638 	vcq->cqe = num_ent;
639 	vcq->mcq.set_ci_db = vcq->db.db;
640 	vcq->mcq.arm_db = vcq->db.db + 1;
641 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
642 	kfree(in);
643 	return 0;
644 
645 err_vec:
646 	kfree(in);
647 err_vzalloc:
648 	cq_frag_buf_free(ndev, &vcq->buf);
649 err_db:
650 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
651 	return err;
652 }
653 
654 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
655 {
656 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
657 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
658 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
659 
660 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
661 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
662 		return;
663 	}
664 	cq_frag_buf_free(ndev, &vcq->buf);
665 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
666 }
667 
668 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
669 			  struct mlx5_vdpa_umem **umemp)
670 {
671 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
672 	int p_a;
673 	int p_b;
674 
675 	switch (num) {
676 	case 1:
677 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
678 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
679 		*umemp = &mvq->umem1;
680 		break;
681 	case 2:
682 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
683 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
684 		*umemp = &mvq->umem2;
685 		break;
686 	case 3:
687 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
688 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
689 		*umemp = &mvq->umem3;
690 		break;
691 	}
692 	(*umemp)->size = p_a * mvq->num_ent + p_b;
693 }
694 
695 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
696 {
697 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
698 }
699 
700 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
701 {
702 	int inlen;
703 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
704 	void *um;
705 	void *in;
706 	int err;
707 	__be64 *pas;
708 	struct mlx5_vdpa_umem *umem;
709 
710 	set_umem_size(ndev, mvq, num, &umem);
711 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
712 	if (err)
713 		return err;
714 
715 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
716 
717 	in = kzalloc(inlen, GFP_KERNEL);
718 	if (!in) {
719 		err = -ENOMEM;
720 		goto err_in;
721 	}
722 
723 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
724 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
725 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
726 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
727 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
728 
729 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
730 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
731 
732 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
733 	if (err) {
734 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
735 		goto err_cmd;
736 	}
737 
738 	kfree(in);
739 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
740 
741 	return 0;
742 
743 err_cmd:
744 	kfree(in);
745 err_in:
746 	umem_frag_buf_free(ndev, umem);
747 	return err;
748 }
749 
750 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
751 {
752 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
753 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
754 	struct mlx5_vdpa_umem *umem;
755 
756 	switch (num) {
757 	case 1:
758 		umem = &mvq->umem1;
759 		break;
760 	case 2:
761 		umem = &mvq->umem2;
762 		break;
763 	case 3:
764 		umem = &mvq->umem3;
765 		break;
766 	}
767 
768 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
769 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
770 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
771 		return;
772 
773 	umem_frag_buf_free(ndev, umem);
774 }
775 
776 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
777 {
778 	int num;
779 	int err;
780 
781 	for (num = 1; num <= 3; num++) {
782 		err = create_umem(ndev, mvq, num);
783 		if (err)
784 			goto err_umem;
785 	}
786 	return 0;
787 
788 err_umem:
789 	for (num--; num > 0; num--)
790 		umem_destroy(ndev, mvq, num);
791 
792 	return err;
793 }
794 
795 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
796 {
797 	int num;
798 
799 	for (num = 3; num > 0; num--)
800 		umem_destroy(ndev, mvq, num);
801 }
802 
803 static int get_queue_type(struct mlx5_vdpa_net *ndev)
804 {
805 	u32 type_mask;
806 
807 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
808 
809 	/* prefer split queue */
810 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
811 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
812 
813 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
814 
815 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
816 }
817 
818 static bool vq_is_tx(u16 idx)
819 {
820 	return idx % 2;
821 }
822 
823 static u16 get_features_12_3(u64 features)
824 {
825 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
826 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
827 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
828 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
829 }
830 
831 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
832 {
833 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
834 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
835 }
836 
837 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
838 {
839 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
840 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
841 	void *obj_context;
842 	void *cmd_hdr;
843 	void *vq_ctx;
844 	void *in;
845 	int err;
846 
847 	err = umems_create(ndev, mvq);
848 	if (err)
849 		return err;
850 
851 	in = kzalloc(inlen, GFP_KERNEL);
852 	if (!in) {
853 		err = -ENOMEM;
854 		goto err_alloc;
855 	}
856 
857 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
858 
859 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
860 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
861 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
862 
863 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
864 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
865 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
866 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
867 		 get_features_12_3(ndev->mvdev.actual_features));
868 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
869 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
870 
871 	if (vq_is_tx(mvq->index))
872 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
873 
874 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
875 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
876 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
877 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
878 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
879 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
880 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
881 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
882 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
883 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
884 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
885 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
886 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
887 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
888 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
889 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
890 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
891 	if (counters_supported(&ndev->mvdev))
892 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
893 
894 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
895 	if (err)
896 		goto err_cmd;
897 
898 	kfree(in);
899 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
900 
901 	return 0;
902 
903 err_cmd:
904 	kfree(in);
905 err_alloc:
906 	umems_destroy(ndev, mvq);
907 	return err;
908 }
909 
910 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
911 {
912 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
913 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
914 
915 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
916 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
917 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
918 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
919 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
920 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
921 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
922 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
923 		return;
924 	}
925 	umems_destroy(ndev, mvq);
926 }
927 
928 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
929 {
930 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
931 }
932 
933 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
934 {
935 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
936 }
937 
938 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
939 			int *outlen, u32 qpn, u32 rqpn)
940 {
941 	void *qpc;
942 	void *pp;
943 
944 	switch (cmd) {
945 	case MLX5_CMD_OP_2RST_QP:
946 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
947 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
948 		*in = kzalloc(*inlen, GFP_KERNEL);
949 		*out = kzalloc(*outlen, GFP_KERNEL);
950 		if (!*in || !*out)
951 			goto outerr;
952 
953 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
954 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
955 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
956 		break;
957 	case MLX5_CMD_OP_RST2INIT_QP:
958 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
959 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
960 		*in = kzalloc(*inlen, GFP_KERNEL);
961 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
962 		if (!*in || !*out)
963 			goto outerr;
964 
965 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
966 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
967 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
968 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
969 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
970 		MLX5_SET(qpc, qpc, rwe, 1);
971 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
972 		MLX5_SET(ads, pp, vhca_port_num, 1);
973 		break;
974 	case MLX5_CMD_OP_INIT2RTR_QP:
975 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
976 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
977 		*in = kzalloc(*inlen, GFP_KERNEL);
978 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
979 		if (!*in || !*out)
980 			goto outerr;
981 
982 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
983 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
984 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
985 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
986 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
987 		MLX5_SET(qpc, qpc, log_msg_max, 30);
988 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
989 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
990 		MLX5_SET(ads, pp, fl, 1);
991 		break;
992 	case MLX5_CMD_OP_RTR2RTS_QP:
993 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
994 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
995 		*in = kzalloc(*inlen, GFP_KERNEL);
996 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
997 		if (!*in || !*out)
998 			goto outerr;
999 
1000 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1001 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1002 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1003 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1004 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1005 		MLX5_SET(ads, pp, ack_timeout, 14);
1006 		MLX5_SET(qpc, qpc, retry_count, 7);
1007 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1008 		break;
1009 	default:
1010 		goto outerr_nullify;
1011 	}
1012 
1013 	return;
1014 
1015 outerr:
1016 	kfree(*in);
1017 	kfree(*out);
1018 outerr_nullify:
1019 	*in = NULL;
1020 	*out = NULL;
1021 }
1022 
1023 static void free_inout(void *in, void *out)
1024 {
1025 	kfree(in);
1026 	kfree(out);
1027 }
1028 
1029 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1030  * firmware. The fw argument indicates whether the subjected QP is the one used
1031  * by firmware.
1032  */
1033 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1034 {
1035 	int outlen;
1036 	int inlen;
1037 	void *out;
1038 	void *in;
1039 	int err;
1040 
1041 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1042 	if (!in || !out)
1043 		return -ENOMEM;
1044 
1045 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1046 	free_inout(in, out);
1047 	return err;
1048 }
1049 
1050 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1051 {
1052 	int err;
1053 
1054 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1055 	if (err)
1056 		return err;
1057 
1058 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1059 	if (err)
1060 		return err;
1061 
1062 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1063 	if (err)
1064 		return err;
1065 
1066 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1067 	if (err)
1068 		return err;
1069 
1070 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1071 	if (err)
1072 		return err;
1073 
1074 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1075 	if (err)
1076 		return err;
1077 
1078 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1079 }
1080 
1081 struct mlx5_virtq_attr {
1082 	u8 state;
1083 	u16 available_index;
1084 	u16 used_index;
1085 };
1086 
1087 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1088 			   struct mlx5_virtq_attr *attr)
1089 {
1090 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1091 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1092 	void *out;
1093 	void *obj_context;
1094 	void *cmd_hdr;
1095 	int err;
1096 
1097 	out = kzalloc(outlen, GFP_KERNEL);
1098 	if (!out)
1099 		return -ENOMEM;
1100 
1101 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1102 
1103 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1104 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1105 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1106 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1107 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1108 	if (err)
1109 		goto err_cmd;
1110 
1111 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1112 	memset(attr, 0, sizeof(*attr));
1113 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1114 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1115 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1116 	kfree(out);
1117 	return 0;
1118 
1119 err_cmd:
1120 	kfree(out);
1121 	return err;
1122 }
1123 
1124 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1125 {
1126 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1127 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1128 	void *obj_context;
1129 	void *cmd_hdr;
1130 	void *in;
1131 	int err;
1132 
1133 	in = kzalloc(inlen, GFP_KERNEL);
1134 	if (!in)
1135 		return -ENOMEM;
1136 
1137 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1138 
1139 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1140 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1141 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1142 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1143 
1144 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1145 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1146 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1147 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1148 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1149 	kfree(in);
1150 	if (!err)
1151 		mvq->fw_state = state;
1152 
1153 	return err;
1154 }
1155 
1156 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1157 {
1158 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1159 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1160 	void *cmd_hdr;
1161 	int err;
1162 
1163 	if (!counters_supported(&ndev->mvdev))
1164 		return 0;
1165 
1166 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1167 
1168 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1169 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1170 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1171 
1172 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1173 	if (err)
1174 		return err;
1175 
1176 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1177 
1178 	return 0;
1179 }
1180 
1181 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1182 {
1183 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1184 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1185 
1186 	if (!counters_supported(&ndev->mvdev))
1187 		return;
1188 
1189 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1190 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1191 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1192 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1193 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1194 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1195 }
1196 
1197 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1198 {
1199 	u16 idx = mvq->index;
1200 	int err;
1201 
1202 	if (!mvq->num_ent)
1203 		return 0;
1204 
1205 	if (mvq->initialized)
1206 		return 0;
1207 
1208 	err = cq_create(ndev, idx, mvq->num_ent);
1209 	if (err)
1210 		return err;
1211 
1212 	err = qp_create(ndev, mvq, &mvq->fwqp);
1213 	if (err)
1214 		goto err_fwqp;
1215 
1216 	err = qp_create(ndev, mvq, &mvq->vqqp);
1217 	if (err)
1218 		goto err_vqqp;
1219 
1220 	err = connect_qps(ndev, mvq);
1221 	if (err)
1222 		goto err_connect;
1223 
1224 	err = counter_set_alloc(ndev, mvq);
1225 	if (err)
1226 		goto err_counter;
1227 
1228 	err = create_virtqueue(ndev, mvq);
1229 	if (err)
1230 		goto err_connect;
1231 
1232 	if (mvq->ready) {
1233 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1234 		if (err) {
1235 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1236 				       idx, err);
1237 			goto err_connect;
1238 		}
1239 	}
1240 
1241 	mvq->initialized = true;
1242 	return 0;
1243 
1244 err_connect:
1245 	counter_set_dealloc(ndev, mvq);
1246 err_counter:
1247 	qp_destroy(ndev, &mvq->vqqp);
1248 err_vqqp:
1249 	qp_destroy(ndev, &mvq->fwqp);
1250 err_fwqp:
1251 	cq_destroy(ndev, idx);
1252 	return err;
1253 }
1254 
1255 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1256 {
1257 	struct mlx5_virtq_attr attr;
1258 
1259 	if (!mvq->initialized)
1260 		return;
1261 
1262 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1263 		return;
1264 
1265 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1266 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1267 
1268 	if (query_virtqueue(ndev, mvq, &attr)) {
1269 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1270 		return;
1271 	}
1272 	mvq->avail_idx = attr.available_index;
1273 	mvq->used_idx = attr.used_index;
1274 }
1275 
1276 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1277 {
1278 	int i;
1279 
1280 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1281 		suspend_vq(ndev, &ndev->vqs[i]);
1282 }
1283 
1284 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1285 {
1286 	if (!mvq->initialized)
1287 		return;
1288 
1289 	suspend_vq(ndev, mvq);
1290 	destroy_virtqueue(ndev, mvq);
1291 	counter_set_dealloc(ndev, mvq);
1292 	qp_destroy(ndev, &mvq->vqqp);
1293 	qp_destroy(ndev, &mvq->fwqp);
1294 	cq_destroy(ndev, mvq->index);
1295 	mvq->initialized = false;
1296 }
1297 
1298 static int create_rqt(struct mlx5_vdpa_net *ndev)
1299 {
1300 	__be32 *list;
1301 	void *rqtc;
1302 	int inlen;
1303 	void *in;
1304 	int i, j;
1305 	int err;
1306 
1307 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
1308 	in = kzalloc(inlen, GFP_KERNEL);
1309 	if (!in)
1310 		return -ENOMEM;
1311 
1312 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1313 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1314 
1315 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1316 	MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size);
1317 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1318 	for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
1319 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1320 
1321 	MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
1322 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1323 	kfree(in);
1324 	if (err)
1325 		return err;
1326 
1327 	return 0;
1328 }
1329 
1330 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1331 
1332 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1333 {
1334 	__be32 *list;
1335 	void *rqtc;
1336 	int inlen;
1337 	void *in;
1338 	int i, j;
1339 	int err;
1340 
1341 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
1342 	in = kzalloc(inlen, GFP_KERNEL);
1343 	if (!in)
1344 		return -ENOMEM;
1345 
1346 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1347 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1348 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1349 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1350 
1351 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1352 	for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
1353 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1354 
1355 	MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
1356 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1357 	kfree(in);
1358 	if (err)
1359 		return err;
1360 
1361 	return 0;
1362 }
1363 
1364 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1365 {
1366 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1367 }
1368 
1369 static int create_tir(struct mlx5_vdpa_net *ndev)
1370 {
1371 #define HASH_IP_L4PORTS                                                                            \
1372 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1373 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1374 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1375 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1376 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1377 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1378 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1379 	void *rss_key;
1380 	void *outer;
1381 	void *tirc;
1382 	void *in;
1383 	int err;
1384 
1385 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1386 	if (!in)
1387 		return -ENOMEM;
1388 
1389 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1390 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1391 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1392 
1393 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1394 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1395 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1396 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1397 
1398 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1399 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1400 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1401 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1402 
1403 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1404 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1405 
1406 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1407 	kfree(in);
1408 	return err;
1409 }
1410 
1411 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1412 {
1413 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1414 }
1415 
1416 #define MAX_STEERING_ENT 0x8000
1417 #define MAX_STEERING_GROUPS 2
1418 
1419 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1420 					u16 vid, bool tagged,
1421 					struct mlx5_flow_handle **ucast,
1422 					struct mlx5_flow_handle **mcast)
1423 {
1424 	struct mlx5_flow_destination dest = {};
1425 	struct mlx5_flow_act flow_act = {};
1426 	struct mlx5_flow_handle *rule;
1427 	struct mlx5_flow_spec *spec;
1428 	void *headers_c;
1429 	void *headers_v;
1430 	u8 *dmac_c;
1431 	u8 *dmac_v;
1432 	int err;
1433 
1434 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1435 	if (!spec)
1436 		return -ENOMEM;
1437 
1438 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1439 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1440 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1441 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1442 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1443 	memset(dmac_c, 0xff, ETH_ALEN);
1444 	ether_addr_copy(dmac_v, mac);
1445 	MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1446 	if (tagged) {
1447 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1448 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1449 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid);
1450 	}
1451 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1452 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1453 	dest.tir_num = ndev->res.tirn;
1454 	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1455 	if (IS_ERR(rule))
1456 		return PTR_ERR(rule);
1457 
1458 	*ucast = rule;
1459 
1460 	memset(dmac_c, 0, ETH_ALEN);
1461 	memset(dmac_v, 0, ETH_ALEN);
1462 	dmac_c[0] = 1;
1463 	dmac_v[0] = 1;
1464 	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1465 	kvfree(spec);
1466 	if (IS_ERR(rule)) {
1467 		err = PTR_ERR(rule);
1468 		goto err_mcast;
1469 	}
1470 
1471 	*mcast = rule;
1472 	return 0;
1473 
1474 err_mcast:
1475 	mlx5_del_flow_rules(*ucast);
1476 	return err;
1477 }
1478 
1479 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1480 					 struct mlx5_flow_handle *ucast,
1481 					 struct mlx5_flow_handle *mcast)
1482 {
1483 	mlx5_del_flow_rules(ucast);
1484 	mlx5_del_flow_rules(mcast);
1485 }
1486 
1487 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1488 {
1489 	u64 val;
1490 
1491 	if (!tagged)
1492 		vlan = MLX5V_UNTAGGED;
1493 
1494 	val = (u64)vlan << 48 |
1495 	      (u64)mac[0] << 40 |
1496 	      (u64)mac[1] << 32 |
1497 	      (u64)mac[2] << 24 |
1498 	      (u64)mac[3] << 16 |
1499 	      (u64)mac[4] << 8 |
1500 	      (u64)mac[5];
1501 
1502 	return val;
1503 }
1504 
1505 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1506 {
1507 	struct macvlan_node *pos;
1508 	u32 idx;
1509 
1510 	idx = hash_64(value, 8); // tbd 8
1511 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1512 		if (pos->macvlan == value)
1513 			return pos;
1514 	}
1515 	return NULL;
1516 }
1517 
1518 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid
1519 {
1520 	struct macvlan_node *ptr;
1521 	u64 val;
1522 	u32 idx;
1523 	int err;
1524 
1525 	val = search_val(mac, vlan, tagged);
1526 	if (mac_vlan_lookup(ndev, val))
1527 		return -EEXIST;
1528 
1529 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1530 	if (!ptr)
1531 		return -ENOMEM;
1532 
1533 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged,
1534 					   &ptr->ucast_rule, &ptr->mcast_rule);
1535 	if (err)
1536 		goto err_add;
1537 
1538 	ptr->macvlan = val;
1539 	idx = hash_64(val, 8);
1540 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1541 	return 0;
1542 
1543 err_add:
1544 	kfree(ptr);
1545 	return err;
1546 }
1547 
1548 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1549 {
1550 	struct macvlan_node *ptr;
1551 
1552 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1553 	if (!ptr)
1554 		return;
1555 
1556 	hlist_del(&ptr->hlist);
1557 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule);
1558 	kfree(ptr);
1559 }
1560 
1561 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1562 {
1563 	struct macvlan_node *pos;
1564 	struct hlist_node *n;
1565 	int i;
1566 
1567 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1568 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1569 			hlist_del(&pos->hlist);
1570 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule);
1571 			kfree(pos);
1572 		}
1573 	}
1574 }
1575 
1576 static int setup_steering(struct mlx5_vdpa_net *ndev)
1577 {
1578 	struct mlx5_flow_table_attr ft_attr = {};
1579 	struct mlx5_flow_namespace *ns;
1580 	int err;
1581 
1582 	ft_attr.max_fte = MAX_STEERING_ENT;
1583 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1584 
1585 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1586 	if (!ns) {
1587 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1588 		return -EOPNOTSUPP;
1589 	}
1590 
1591 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1592 	if (IS_ERR(ndev->rxft)) {
1593 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1594 		return PTR_ERR(ndev->rxft);
1595 	}
1596 
1597 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1598 	if (err)
1599 		goto err_add;
1600 
1601 	return 0;
1602 
1603 err_add:
1604 	mlx5_destroy_flow_table(ndev->rxft);
1605 	return err;
1606 }
1607 
1608 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1609 {
1610 	clear_mac_vlan_table(ndev);
1611 	mlx5_destroy_flow_table(ndev->rxft);
1612 }
1613 
1614 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1615 {
1616 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1617 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1618 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1619 	struct mlx5_core_dev *pfmdev;
1620 	size_t read;
1621 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1622 
1623 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1624 	switch (cmd) {
1625 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1626 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1627 		if (read != ETH_ALEN)
1628 			break;
1629 
1630 		if (!memcmp(ndev->config.mac, mac, 6)) {
1631 			status = VIRTIO_NET_OK;
1632 			break;
1633 		}
1634 
1635 		if (is_zero_ether_addr(mac))
1636 			break;
1637 
1638 		if (!is_zero_ether_addr(ndev->config.mac)) {
1639 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1640 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1641 					       ndev->config.mac);
1642 				break;
1643 			}
1644 		}
1645 
1646 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1647 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1648 				       mac);
1649 			break;
1650 		}
1651 
1652 		/* backup the original mac address so that if failed to add the forward rules
1653 		 * we could restore it
1654 		 */
1655 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1656 
1657 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1658 
1659 		/* Need recreate the flow table entry, so that the packet could forward back
1660 		 */
1661 		mac_vlan_del(ndev, ndev->config.mac, 0, false);
1662 
1663 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1664 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1665 
1666 			/* Although it hardly run here, we still need double check */
1667 			if (is_zero_ether_addr(mac_back)) {
1668 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1669 				break;
1670 			}
1671 
1672 			/* Try to restore original mac address to MFPS table, and try to restore
1673 			 * the forward rule entry.
1674 			 */
1675 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1676 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1677 					       ndev->config.mac);
1678 			}
1679 
1680 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1681 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1682 					       mac_back);
1683 			}
1684 
1685 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1686 
1687 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1688 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1689 
1690 			break;
1691 		}
1692 
1693 		status = VIRTIO_NET_OK;
1694 		break;
1695 
1696 	default:
1697 		break;
1698 	}
1699 
1700 	return status;
1701 }
1702 
1703 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1704 {
1705 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1706 	int cur_qps = ndev->cur_num_vqs / 2;
1707 	int err;
1708 	int i;
1709 
1710 	if (cur_qps > newqps) {
1711 		err = modify_rqt(ndev, 2 * newqps);
1712 		if (err)
1713 			return err;
1714 
1715 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1716 			teardown_vq(ndev, &ndev->vqs[i]);
1717 
1718 		ndev->cur_num_vqs = 2 * newqps;
1719 	} else {
1720 		ndev->cur_num_vqs = 2 * newqps;
1721 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1722 			err = setup_vq(ndev, &ndev->vqs[i]);
1723 			if (err)
1724 				goto clean_added;
1725 		}
1726 		err = modify_rqt(ndev, 2 * newqps);
1727 		if (err)
1728 			goto clean_added;
1729 	}
1730 	return 0;
1731 
1732 clean_added:
1733 	for (--i; i >= 2 * cur_qps; --i)
1734 		teardown_vq(ndev, &ndev->vqs[i]);
1735 
1736 	ndev->cur_num_vqs = 2 * cur_qps;
1737 
1738 	return err;
1739 }
1740 
1741 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1742 {
1743 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1744 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1745 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1746 	struct virtio_net_ctrl_mq mq;
1747 	size_t read;
1748 	u16 newqps;
1749 
1750 	switch (cmd) {
1751 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1752 		/* This mq feature check aligns with pre-existing userspace
1753 		 * implementation.
1754 		 *
1755 		 * Without it, an untrusted driver could fake a multiqueue config
1756 		 * request down to a non-mq device that may cause kernel to
1757 		 * panic due to uninitialized resources for extra vqs. Even with
1758 		 * a well behaving guest driver, it is not expected to allow
1759 		 * changing the number of vqs on a non-mq device.
1760 		 */
1761 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1762 			break;
1763 
1764 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1765 		if (read != sizeof(mq))
1766 			break;
1767 
1768 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1769 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1770 		    newqps > ndev->rqt_size)
1771 			break;
1772 
1773 		if (ndev->cur_num_vqs == 2 * newqps) {
1774 			status = VIRTIO_NET_OK;
1775 			break;
1776 		}
1777 
1778 		if (!change_num_qps(mvdev, newqps))
1779 			status = VIRTIO_NET_OK;
1780 
1781 		break;
1782 	default:
1783 		break;
1784 	}
1785 
1786 	return status;
1787 }
1788 
1789 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1790 {
1791 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1792 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1793 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1794 	__virtio16 vlan;
1795 	size_t read;
1796 	u16 id;
1797 
1798 	switch (cmd) {
1799 	case VIRTIO_NET_CTRL_VLAN_ADD:
1800 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1801 		if (read != sizeof(vlan))
1802 			break;
1803 
1804 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1805 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1806 			break;
1807 
1808 		status = VIRTIO_NET_OK;
1809 		break;
1810 	case VIRTIO_NET_CTRL_VLAN_DEL:
1811 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1812 		if (read != sizeof(vlan))
1813 			break;
1814 
1815 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1816 		mac_vlan_del(ndev, ndev->config.mac, id, true);
1817 		status = VIRTIO_NET_OK;
1818 		break;
1819 	default:
1820 		break;
1821 	}
1822 
1823 	return status;
1824 }
1825 
1826 static void mlx5_cvq_kick_handler(struct work_struct *work)
1827 {
1828 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1829 	struct virtio_net_ctrl_hdr ctrl;
1830 	struct mlx5_vdpa_wq_ent *wqent;
1831 	struct mlx5_vdpa_dev *mvdev;
1832 	struct mlx5_control_vq *cvq;
1833 	struct mlx5_vdpa_net *ndev;
1834 	size_t read, write;
1835 	int err;
1836 
1837 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1838 	mvdev = wqent->mvdev;
1839 	ndev = to_mlx5_vdpa_ndev(mvdev);
1840 	cvq = &mvdev->cvq;
1841 
1842 	down_write(&ndev->reslock);
1843 
1844 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1845 		goto out;
1846 
1847 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1848 		goto out;
1849 
1850 	if (!cvq->ready)
1851 		goto out;
1852 
1853 	while (true) {
1854 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1855 					   GFP_ATOMIC);
1856 		if (err <= 0)
1857 			break;
1858 
1859 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1860 		if (read != sizeof(ctrl))
1861 			break;
1862 
1863 		cvq->received_desc++;
1864 		switch (ctrl.class) {
1865 		case VIRTIO_NET_CTRL_MAC:
1866 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
1867 			break;
1868 		case VIRTIO_NET_CTRL_MQ:
1869 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
1870 			break;
1871 		case VIRTIO_NET_CTRL_VLAN:
1872 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
1873 			break;
1874 		default:
1875 			break;
1876 		}
1877 
1878 		/* Make sure data is written before advancing index */
1879 		smp_wmb();
1880 
1881 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1882 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1883 		vringh_kiov_cleanup(&cvq->riov);
1884 		vringh_kiov_cleanup(&cvq->wiov);
1885 
1886 		if (vringh_need_notify_iotlb(&cvq->vring))
1887 			vringh_notify(&cvq->vring);
1888 
1889 		cvq->completed_desc++;
1890 		queue_work(mvdev->wq, &wqent->work);
1891 		break;
1892 	}
1893 
1894 out:
1895 	up_write(&ndev->reslock);
1896 }
1897 
1898 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1899 {
1900 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1901 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1902 	struct mlx5_vdpa_virtqueue *mvq;
1903 
1904 	if (!is_index_valid(mvdev, idx))
1905 		return;
1906 
1907 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1908 		if (!mvdev->wq || !mvdev->cvq.ready)
1909 			return;
1910 
1911 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
1912 		return;
1913 	}
1914 
1915 	mvq = &ndev->vqs[idx];
1916 	if (unlikely(!mvq->ready))
1917 		return;
1918 
1919 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1920 }
1921 
1922 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1923 				    u64 driver_area, u64 device_area)
1924 {
1925 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1926 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1927 	struct mlx5_vdpa_virtqueue *mvq;
1928 
1929 	if (!is_index_valid(mvdev, idx))
1930 		return -EINVAL;
1931 
1932 	if (is_ctrl_vq_idx(mvdev, idx)) {
1933 		mvdev->cvq.desc_addr = desc_area;
1934 		mvdev->cvq.device_addr = device_area;
1935 		mvdev->cvq.driver_addr = driver_area;
1936 		return 0;
1937 	}
1938 
1939 	mvq = &ndev->vqs[idx];
1940 	mvq->desc_addr = desc_area;
1941 	mvq->device_addr = device_area;
1942 	mvq->driver_addr = driver_area;
1943 	return 0;
1944 }
1945 
1946 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1947 {
1948 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1949 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1950 	struct mlx5_vdpa_virtqueue *mvq;
1951 
1952 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1953 		return;
1954 
1955 	mvq = &ndev->vqs[idx];
1956 	mvq->num_ent = num;
1957 }
1958 
1959 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1960 {
1961 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1962 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1963 
1964 	ndev->event_cbs[idx] = *cb;
1965 	if (is_ctrl_vq_idx(mvdev, idx))
1966 		mvdev->cvq.event_cb = *cb;
1967 }
1968 
1969 static void mlx5_cvq_notify(struct vringh *vring)
1970 {
1971 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
1972 
1973 	if (!cvq->event_cb.callback)
1974 		return;
1975 
1976 	cvq->event_cb.callback(cvq->event_cb.private);
1977 }
1978 
1979 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
1980 {
1981 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1982 
1983 	cvq->ready = ready;
1984 	if (!ready)
1985 		return;
1986 
1987 	cvq->vring.notify = mlx5_cvq_notify;
1988 }
1989 
1990 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1991 {
1992 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1993 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1994 	struct mlx5_vdpa_virtqueue *mvq;
1995 
1996 	if (!mvdev->actual_features)
1997 		return;
1998 
1999 	if (!is_index_valid(mvdev, idx))
2000 		return;
2001 
2002 	if (is_ctrl_vq_idx(mvdev, idx)) {
2003 		set_cvq_ready(mvdev, ready);
2004 		return;
2005 	}
2006 
2007 	mvq = &ndev->vqs[idx];
2008 	if (!ready)
2009 		suspend_vq(ndev, mvq);
2010 
2011 	mvq->ready = ready;
2012 }
2013 
2014 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2015 {
2016 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2017 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2018 
2019 	if (!is_index_valid(mvdev, idx))
2020 		return false;
2021 
2022 	if (is_ctrl_vq_idx(mvdev, idx))
2023 		return mvdev->cvq.ready;
2024 
2025 	return ndev->vqs[idx].ready;
2026 }
2027 
2028 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2029 				  const struct vdpa_vq_state *state)
2030 {
2031 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2032 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2033 	struct mlx5_vdpa_virtqueue *mvq;
2034 
2035 	if (!is_index_valid(mvdev, idx))
2036 		return -EINVAL;
2037 
2038 	if (is_ctrl_vq_idx(mvdev, idx)) {
2039 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2040 		return 0;
2041 	}
2042 
2043 	mvq = &ndev->vqs[idx];
2044 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2045 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2046 		return -EINVAL;
2047 	}
2048 
2049 	mvq->used_idx = state->split.avail_index;
2050 	mvq->avail_idx = state->split.avail_index;
2051 	return 0;
2052 }
2053 
2054 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2055 {
2056 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2057 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2058 	struct mlx5_vdpa_virtqueue *mvq;
2059 	struct mlx5_virtq_attr attr;
2060 	int err;
2061 
2062 	if (!is_index_valid(mvdev, idx))
2063 		return -EINVAL;
2064 
2065 	if (is_ctrl_vq_idx(mvdev, idx)) {
2066 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2067 		return 0;
2068 	}
2069 
2070 	mvq = &ndev->vqs[idx];
2071 	/* If the virtq object was destroyed, use the value saved at
2072 	 * the last minute of suspend_vq. This caters for userspace
2073 	 * that cares about emulating the index after vq is stopped.
2074 	 */
2075 	if (!mvq->initialized) {
2076 		/* Firmware returns a wrong value for the available index.
2077 		 * Since both values should be identical, we take the value of
2078 		 * used_idx which is reported correctly.
2079 		 */
2080 		state->split.avail_index = mvq->used_idx;
2081 		return 0;
2082 	}
2083 
2084 	err = query_virtqueue(ndev, mvq, &attr);
2085 	if (err) {
2086 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2087 		return err;
2088 	}
2089 	state->split.avail_index = attr.used_index;
2090 	return 0;
2091 }
2092 
2093 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2094 {
2095 	return PAGE_SIZE;
2096 }
2097 
2098 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
2099 {
2100 	return 0;
2101 }
2102 
2103 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
2104 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
2105 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
2106 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
2107 };
2108 
2109 static u64 mlx_to_vritio_features(u16 dev_features)
2110 {
2111 	u64 result = 0;
2112 
2113 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
2114 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2115 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
2116 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2117 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
2118 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2119 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
2120 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2121 
2122 	return result;
2123 }
2124 
2125 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2126 {
2127 	u64 mlx_vdpa_features = 0;
2128 	u16 dev_features;
2129 
2130 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2131 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2132 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2133 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2134 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2135 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2136 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2137 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2138 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2139 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2140 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2141 
2142 	return mlx_vdpa_features;
2143 }
2144 
2145 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2146 {
2147 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2148 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2149 
2150 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2151 	return ndev->mvdev.mlx_features;
2152 }
2153 
2154 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2155 {
2156 	/* Minimum features to expect */
2157 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2158 		return -EOPNOTSUPP;
2159 
2160 	/* Double check features combination sent down by the driver.
2161 	 * Fail invalid features due to absence of the depended feature.
2162 	 *
2163 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2164 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2165 	 * By failing the invalid features sent down by untrusted drivers,
2166 	 * we're assured the assumption made upon is_index_valid() and
2167 	 * is_ctrl_vq_idx() will not be compromised.
2168 	 */
2169 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2170             BIT_ULL(VIRTIO_NET_F_MQ))
2171 		return -EINVAL;
2172 
2173 	return 0;
2174 }
2175 
2176 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2177 {
2178 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2179 	int err;
2180 	int i;
2181 
2182 	for (i = 0; i < mvdev->max_vqs; i++) {
2183 		err = setup_vq(ndev, &ndev->vqs[i]);
2184 		if (err)
2185 			goto err_vq;
2186 	}
2187 
2188 	return 0;
2189 
2190 err_vq:
2191 	for (--i; i >= 0; i--)
2192 		teardown_vq(ndev, &ndev->vqs[i]);
2193 
2194 	return err;
2195 }
2196 
2197 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2198 {
2199 	struct mlx5_vdpa_virtqueue *mvq;
2200 	int i;
2201 
2202 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2203 		mvq = &ndev->vqs[i];
2204 		if (!mvq->initialized)
2205 			continue;
2206 
2207 		teardown_vq(ndev, mvq);
2208 	}
2209 }
2210 
2211 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2212 {
2213 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2214 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2215 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2216 			mvdev->max_idx = mvdev->max_vqs;
2217 		} else {
2218 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2219 			 * CVQ gets index 2
2220 			 */
2221 			mvdev->max_idx = 2;
2222 		}
2223 	} else {
2224 		/* Two data virtqueues only: one for rx and one for tx */
2225 		mvdev->max_idx = 1;
2226 	}
2227 }
2228 
2229 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2230 {
2231 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2232 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2233 	int err;
2234 
2235 	print_features(mvdev, features, true);
2236 
2237 	err = verify_driver_features(mvdev, features);
2238 	if (err)
2239 		return err;
2240 
2241 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2242 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2243 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2244 	else
2245 		ndev->rqt_size = 1;
2246 
2247 	ndev->cur_num_vqs = 2 * ndev->rqt_size;
2248 
2249 	update_cvq_info(mvdev);
2250 	return err;
2251 }
2252 
2253 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2254 {
2255 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2256 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2257 
2258 	ndev->config_cb = *cb;
2259 }
2260 
2261 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2262 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2263 {
2264 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2265 }
2266 
2267 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2268 {
2269 	return VIRTIO_ID_NET;
2270 }
2271 
2272 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2273 {
2274 	return PCI_VENDOR_ID_MELLANOX;
2275 }
2276 
2277 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2278 {
2279 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2280 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2281 
2282 	print_status(mvdev, ndev->mvdev.status, false);
2283 	return ndev->mvdev.status;
2284 }
2285 
2286 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2287 {
2288 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2289 	struct mlx5_virtq_attr attr = {};
2290 	int err;
2291 
2292 	if (mvq->initialized) {
2293 		err = query_virtqueue(ndev, mvq, &attr);
2294 		if (err)
2295 			return err;
2296 	}
2297 
2298 	ri->avail_index = attr.available_index;
2299 	ri->used_index = attr.used_index;
2300 	ri->ready = mvq->ready;
2301 	ri->num_ent = mvq->num_ent;
2302 	ri->desc_addr = mvq->desc_addr;
2303 	ri->device_addr = mvq->device_addr;
2304 	ri->driver_addr = mvq->driver_addr;
2305 	ri->restore = true;
2306 	return 0;
2307 }
2308 
2309 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2310 {
2311 	int i;
2312 
2313 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2314 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2315 		save_channel_info(ndev, &ndev->vqs[i]);
2316 	}
2317 	return 0;
2318 }
2319 
2320 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2321 {
2322 	int i;
2323 
2324 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2325 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2326 }
2327 
2328 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2329 {
2330 	struct mlx5_vdpa_virtqueue *mvq;
2331 	struct mlx5_vq_restore_info *ri;
2332 	int i;
2333 
2334 	mlx5_clear_vqs(ndev);
2335 	init_mvqs(ndev);
2336 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2337 		mvq = &ndev->vqs[i];
2338 		ri = &mvq->ri;
2339 		if (!ri->restore)
2340 			continue;
2341 
2342 		mvq->avail_idx = ri->avail_index;
2343 		mvq->used_idx = ri->used_index;
2344 		mvq->ready = ri->ready;
2345 		mvq->num_ent = ri->num_ent;
2346 		mvq->desc_addr = ri->desc_addr;
2347 		mvq->device_addr = ri->device_addr;
2348 		mvq->driver_addr = ri->driver_addr;
2349 	}
2350 }
2351 
2352 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2353 {
2354 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2355 	int err;
2356 
2357 	suspend_vqs(ndev);
2358 	err = save_channels_info(ndev);
2359 	if (err)
2360 		goto err_mr;
2361 
2362 	teardown_driver(ndev);
2363 	mlx5_vdpa_destroy_mr(mvdev);
2364 	err = mlx5_vdpa_create_mr(mvdev, iotlb);
2365 	if (err)
2366 		goto err_mr;
2367 
2368 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2369 		goto err_mr;
2370 
2371 	restore_channels_info(ndev);
2372 	err = setup_driver(mvdev);
2373 	if (err)
2374 		goto err_setup;
2375 
2376 	return 0;
2377 
2378 err_setup:
2379 	mlx5_vdpa_destroy_mr(mvdev);
2380 err_mr:
2381 	return err;
2382 }
2383 
2384 /* reslock must be held for this function */
2385 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2386 {
2387 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2388 	int err;
2389 
2390 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2391 
2392 	if (ndev->setup) {
2393 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2394 		err = 0;
2395 		goto out;
2396 	}
2397 	err = setup_virtqueues(mvdev);
2398 	if (err) {
2399 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2400 		goto out;
2401 	}
2402 
2403 	err = create_rqt(ndev);
2404 	if (err) {
2405 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2406 		goto err_rqt;
2407 	}
2408 
2409 	err = create_tir(ndev);
2410 	if (err) {
2411 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2412 		goto err_tir;
2413 	}
2414 
2415 	err = setup_steering(ndev);
2416 	if (err) {
2417 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2418 		goto err_fwd;
2419 	}
2420 	ndev->setup = true;
2421 
2422 	return 0;
2423 
2424 err_fwd:
2425 	destroy_tir(ndev);
2426 err_tir:
2427 	destroy_rqt(ndev);
2428 err_rqt:
2429 	teardown_virtqueues(ndev);
2430 out:
2431 	return err;
2432 }
2433 
2434 /* reslock must be held for this function */
2435 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2436 {
2437 
2438 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2439 
2440 	if (!ndev->setup)
2441 		return;
2442 
2443 	teardown_steering(ndev);
2444 	destroy_tir(ndev);
2445 	destroy_rqt(ndev);
2446 	teardown_virtqueues(ndev);
2447 	ndev->setup = false;
2448 }
2449 
2450 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2451 {
2452 	int i;
2453 
2454 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2455 		ndev->vqs[i].ready = false;
2456 
2457 	ndev->mvdev.cvq.ready = false;
2458 }
2459 
2460 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2461 {
2462 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2463 	int err = 0;
2464 
2465 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2466 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2467 					MLX5_CVQ_MAX_ENT, false,
2468 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2469 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2470 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2471 
2472 	return err;
2473 }
2474 
2475 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2476 {
2477 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2478 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2479 	int err;
2480 
2481 	print_status(mvdev, status, true);
2482 
2483 	down_write(&ndev->reslock);
2484 
2485 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2486 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2487 			err = setup_cvq_vring(mvdev);
2488 			if (err) {
2489 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2490 				goto err_setup;
2491 			}
2492 			err = setup_driver(mvdev);
2493 			if (err) {
2494 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2495 				goto err_setup;
2496 			}
2497 		} else {
2498 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2499 			goto err_clear;
2500 		}
2501 	}
2502 
2503 	ndev->mvdev.status = status;
2504 	up_write(&ndev->reslock);
2505 	return;
2506 
2507 err_setup:
2508 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2509 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2510 err_clear:
2511 	up_write(&ndev->reslock);
2512 }
2513 
2514 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2515 {
2516 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2517 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2518 
2519 	print_status(mvdev, 0, true);
2520 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2521 
2522 	down_write(&ndev->reslock);
2523 	teardown_driver(ndev);
2524 	clear_vqs_ready(ndev);
2525 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2526 	ndev->mvdev.status = 0;
2527 	ndev->cur_num_vqs = 0;
2528 	ndev->mvdev.cvq.received_desc = 0;
2529 	ndev->mvdev.cvq.completed_desc = 0;
2530 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2531 	ndev->mvdev.actual_features = 0;
2532 	++mvdev->generation;
2533 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2534 		if (mlx5_vdpa_create_mr(mvdev, NULL))
2535 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2536 	}
2537 	up_write(&ndev->reslock);
2538 
2539 	return 0;
2540 }
2541 
2542 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2543 {
2544 	return sizeof(struct virtio_net_config);
2545 }
2546 
2547 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2548 				 unsigned int len)
2549 {
2550 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2551 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2552 
2553 	if (offset + len <= sizeof(struct virtio_net_config))
2554 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2555 }
2556 
2557 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2558 				 unsigned int len)
2559 {
2560 	/* not supported */
2561 }
2562 
2563 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2564 {
2565 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2566 
2567 	return mvdev->generation;
2568 }
2569 
2570 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2571 			     struct vhost_iotlb *iotlb)
2572 {
2573 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2574 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2575 	bool change_map;
2576 	int err;
2577 
2578 	down_write(&ndev->reslock);
2579 
2580 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
2581 	if (err) {
2582 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2583 		goto err;
2584 	}
2585 
2586 	if (change_map)
2587 		err = mlx5_vdpa_change_map(mvdev, iotlb);
2588 
2589 err:
2590 	up_write(&ndev->reslock);
2591 	return err;
2592 }
2593 
2594 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2595 {
2596 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2597 	struct mlx5_core_dev *pfmdev;
2598 	struct mlx5_vdpa_net *ndev;
2599 
2600 	ndev = to_mlx5_vdpa_ndev(mvdev);
2601 
2602 	free_resources(ndev);
2603 	mlx5_vdpa_destroy_mr(mvdev);
2604 	if (!is_zero_ether_addr(ndev->config.mac)) {
2605 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2606 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2607 	}
2608 	mlx5_vdpa_free_resources(&ndev->mvdev);
2609 	kfree(ndev->event_cbs);
2610 	kfree(ndev->vqs);
2611 }
2612 
2613 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2614 {
2615 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2616 	struct vdpa_notification_area ret = {};
2617 	struct mlx5_vdpa_net *ndev;
2618 	phys_addr_t addr;
2619 
2620 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2621 		return ret;
2622 
2623 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2624 	 * notification to avoid the risk of mapping pages that contain BAR of more
2625 	 * than one SF
2626 	 */
2627 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2628 		return ret;
2629 
2630 	ndev = to_mlx5_vdpa_ndev(mvdev);
2631 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2632 	ret.addr = addr;
2633 	ret.size = PAGE_SIZE;
2634 	return ret;
2635 }
2636 
2637 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2638 {
2639 	return -EOPNOTSUPP;
2640 }
2641 
2642 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2643 {
2644 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2645 
2646 	return mvdev->actual_features;
2647 }
2648 
2649 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
2650 			     u64 *received_desc, u64 *completed_desc)
2651 {
2652 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
2653 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
2654 	void *cmd_hdr;
2655 	void *ctx;
2656 	int err;
2657 
2658 	if (!counters_supported(&ndev->mvdev))
2659 		return -EOPNOTSUPP;
2660 
2661 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
2662 		return -EAGAIN;
2663 
2664 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
2665 
2666 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
2667 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
2668 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
2669 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
2670 
2671 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
2672 	if (err)
2673 		return err;
2674 
2675 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
2676 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
2677 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
2678 	return 0;
2679 }
2680 
2681 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
2682 					 struct sk_buff *msg,
2683 					 struct netlink_ext_ack *extack)
2684 {
2685 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2686 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2687 	struct mlx5_vdpa_virtqueue *mvq;
2688 	struct mlx5_control_vq *cvq;
2689 	u64 received_desc;
2690 	u64 completed_desc;
2691 	int err = 0;
2692 
2693 	down_read(&ndev->reslock);
2694 	if (!is_index_valid(mvdev, idx)) {
2695 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
2696 		err = -EINVAL;
2697 		goto out_err;
2698 	}
2699 
2700 	if (idx == ctrl_vq_idx(mvdev)) {
2701 		cvq = &mvdev->cvq;
2702 		received_desc = cvq->received_desc;
2703 		completed_desc = cvq->completed_desc;
2704 		goto out;
2705 	}
2706 
2707 	mvq = &ndev->vqs[idx];
2708 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
2709 	if (err) {
2710 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
2711 		goto out_err;
2712 	}
2713 
2714 out:
2715 	err = -EMSGSIZE;
2716 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
2717 		goto out_err;
2718 
2719 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
2720 			      VDPA_ATTR_PAD))
2721 		goto out_err;
2722 
2723 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
2724 		goto out_err;
2725 
2726 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
2727 			      VDPA_ATTR_PAD))
2728 		goto out_err;
2729 
2730 	err = 0;
2731 out_err:
2732 	up_read(&ndev->reslock);
2733 	return err;
2734 }
2735 
2736 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2737 	.set_vq_address = mlx5_vdpa_set_vq_address,
2738 	.set_vq_num = mlx5_vdpa_set_vq_num,
2739 	.kick_vq = mlx5_vdpa_kick_vq,
2740 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
2741 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
2742 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
2743 	.set_vq_state = mlx5_vdpa_set_vq_state,
2744 	.get_vq_state = mlx5_vdpa_get_vq_state,
2745 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
2746 	.get_vq_notification = mlx5_get_vq_notification,
2747 	.get_vq_irq = mlx5_get_vq_irq,
2748 	.get_vq_align = mlx5_vdpa_get_vq_align,
2749 	.get_vq_group = mlx5_vdpa_get_vq_group,
2750 	.get_device_features = mlx5_vdpa_get_device_features,
2751 	.set_driver_features = mlx5_vdpa_set_driver_features,
2752 	.get_driver_features = mlx5_vdpa_get_driver_features,
2753 	.set_config_cb = mlx5_vdpa_set_config_cb,
2754 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2755 	.get_device_id = mlx5_vdpa_get_device_id,
2756 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
2757 	.get_status = mlx5_vdpa_get_status,
2758 	.set_status = mlx5_vdpa_set_status,
2759 	.reset = mlx5_vdpa_reset,
2760 	.get_config_size = mlx5_vdpa_get_config_size,
2761 	.get_config = mlx5_vdpa_get_config,
2762 	.set_config = mlx5_vdpa_set_config,
2763 	.get_generation = mlx5_vdpa_get_generation,
2764 	.set_map = mlx5_vdpa_set_map,
2765 	.free = mlx5_vdpa_free,
2766 };
2767 
2768 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2769 {
2770 	u16 hw_mtu;
2771 	int err;
2772 
2773 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2774 	if (err)
2775 		return err;
2776 
2777 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2778 	return 0;
2779 }
2780 
2781 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2782 {
2783 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2784 	int err;
2785 
2786 	if (res->valid) {
2787 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2788 		return -EEXIST;
2789 	}
2790 
2791 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2792 	if (err)
2793 		return err;
2794 
2795 	err = create_tis(ndev);
2796 	if (err)
2797 		goto err_tis;
2798 
2799 	res->valid = true;
2800 
2801 	return 0;
2802 
2803 err_tis:
2804 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2805 	return err;
2806 }
2807 
2808 static void free_resources(struct mlx5_vdpa_net *ndev)
2809 {
2810 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2811 
2812 	if (!res->valid)
2813 		return;
2814 
2815 	destroy_tis(ndev);
2816 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2817 	res->valid = false;
2818 }
2819 
2820 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2821 {
2822 	struct mlx5_vdpa_virtqueue *mvq;
2823 	int i;
2824 
2825 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
2826 		mvq = &ndev->vqs[i];
2827 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2828 		mvq->index = i;
2829 		mvq->ndev = ndev;
2830 		mvq->fwqp.fw = true;
2831 	}
2832 	for (; i < ndev->mvdev.max_vqs; i++) {
2833 		mvq = &ndev->vqs[i];
2834 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2835 		mvq->index = i;
2836 		mvq->ndev = ndev;
2837 	}
2838 }
2839 
2840 struct mlx5_vdpa_mgmtdev {
2841 	struct vdpa_mgmt_dev mgtdev;
2842 	struct mlx5_adev *madev;
2843 	struct mlx5_vdpa_net *ndev;
2844 };
2845 
2846 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2847 {
2848 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2849 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2850 	int err;
2851 
2852 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2853 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2854 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2855 	if (vport)
2856 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2857 
2858 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2859 	if (err)
2860 		return 0;
2861 
2862 	return MLX5_GET(query_vport_state_out, out, state);
2863 }
2864 
2865 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2866 {
2867 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2868 	    VPORT_STATE_UP)
2869 		return true;
2870 
2871 	return false;
2872 }
2873 
2874 static void update_carrier(struct work_struct *work)
2875 {
2876 	struct mlx5_vdpa_wq_ent *wqent;
2877 	struct mlx5_vdpa_dev *mvdev;
2878 	struct mlx5_vdpa_net *ndev;
2879 
2880 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2881 	mvdev = wqent->mvdev;
2882 	ndev = to_mlx5_vdpa_ndev(mvdev);
2883 	if (get_link_state(mvdev))
2884 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2885 	else
2886 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2887 
2888 	if (ndev->config_cb.callback)
2889 		ndev->config_cb.callback(ndev->config_cb.private);
2890 
2891 	kfree(wqent);
2892 }
2893 
2894 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2895 {
2896 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2897 	struct mlx5_eqe *eqe = param;
2898 	int ret = NOTIFY_DONE;
2899 	struct mlx5_vdpa_wq_ent *wqent;
2900 
2901 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2902 		switch (eqe->sub_type) {
2903 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2904 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2905 			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2906 			if (!wqent)
2907 				return NOTIFY_DONE;
2908 
2909 			wqent->mvdev = &ndev->mvdev;
2910 			INIT_WORK(&wqent->work, update_carrier);
2911 			queue_work(ndev->mvdev.wq, &wqent->work);
2912 			ret = NOTIFY_OK;
2913 			break;
2914 		default:
2915 			return NOTIFY_DONE;
2916 		}
2917 		return ret;
2918 	}
2919 	return ret;
2920 }
2921 
2922 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
2923 {
2924 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
2925 	void *in;
2926 	int err;
2927 
2928 	in = kvzalloc(inlen, GFP_KERNEL);
2929 	if (!in)
2930 		return -ENOMEM;
2931 
2932 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
2933 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
2934 		 mtu + MLX5V_ETH_HARD_MTU);
2935 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
2936 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
2937 
2938 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
2939 
2940 	kvfree(in);
2941 	return err;
2942 }
2943 
2944 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
2945 			     const struct vdpa_dev_set_config *add_config)
2946 {
2947 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2948 	struct virtio_net_config *config;
2949 	struct mlx5_core_dev *pfmdev;
2950 	struct mlx5_vdpa_dev *mvdev;
2951 	struct mlx5_vdpa_net *ndev;
2952 	struct mlx5_core_dev *mdev;
2953 	u32 max_vqs;
2954 	u16 mtu;
2955 	int err;
2956 
2957 	if (mgtdev->ndev)
2958 		return -ENOSPC;
2959 
2960 	mdev = mgtdev->madev->mdev;
2961 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2962 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2963 		dev_warn(mdev->device, "missing support for split virtqueues\n");
2964 		return -EOPNOTSUPP;
2965 	}
2966 
2967 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
2968 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
2969 	if (max_vqs < 2) {
2970 		dev_warn(mdev->device,
2971 			 "%d virtqueues are supported. At least 2 are required\n",
2972 			 max_vqs);
2973 		return -EAGAIN;
2974 	}
2975 
2976 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
2977 		if (add_config->net.max_vq_pairs > max_vqs / 2)
2978 			return -EINVAL;
2979 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
2980 	} else {
2981 		max_vqs = 2;
2982 	}
2983 
2984 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2985 				 1, 1, name, false);
2986 	if (IS_ERR(ndev))
2987 		return PTR_ERR(ndev);
2988 
2989 	ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
2990 	ndev->mvdev.max_vqs = max_vqs;
2991 	mvdev = &ndev->mvdev;
2992 	mvdev->mdev = mdev;
2993 
2994 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
2995 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
2996 	if (!ndev->vqs || !ndev->event_cbs) {
2997 		err = -ENOMEM;
2998 		goto err_alloc;
2999 	}
3000 
3001 	init_mvqs(ndev);
3002 	init_rwsem(&ndev->reslock);
3003 	config = &ndev->config;
3004 
3005 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3006 		err = config_func_mtu(mdev, add_config->net.mtu);
3007 		if (err)
3008 			goto err_alloc;
3009 	}
3010 
3011 	err = query_mtu(mdev, &mtu);
3012 	if (err)
3013 		goto err_alloc;
3014 
3015 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3016 
3017 	if (get_link_state(mvdev))
3018 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3019 	else
3020 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3021 
3022 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3023 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3024 	} else {
3025 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3026 		if (err)
3027 			goto err_alloc;
3028 	}
3029 
3030 	if (!is_zero_ether_addr(config->mac)) {
3031 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3032 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3033 		if (err)
3034 			goto err_alloc;
3035 
3036 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
3037 	}
3038 
3039 	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3040 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3041 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3042 	if (err)
3043 		goto err_mpfs;
3044 
3045 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3046 		err = mlx5_vdpa_create_mr(mvdev, NULL);
3047 		if (err)
3048 			goto err_res;
3049 	}
3050 
3051 	err = alloc_resources(ndev);
3052 	if (err)
3053 		goto err_mr;
3054 
3055 	ndev->cvq_ent.mvdev = mvdev;
3056 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3057 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3058 	if (!mvdev->wq) {
3059 		err = -ENOMEM;
3060 		goto err_res2;
3061 	}
3062 
3063 	ndev->nb.notifier_call = event_handler;
3064 	mlx5_notifier_register(mdev, &ndev->nb);
3065 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3066 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3067 	if (err)
3068 		goto err_reg;
3069 
3070 	mgtdev->ndev = ndev;
3071 	return 0;
3072 
3073 err_reg:
3074 	destroy_workqueue(mvdev->wq);
3075 err_res2:
3076 	free_resources(ndev);
3077 err_mr:
3078 	mlx5_vdpa_destroy_mr(mvdev);
3079 err_res:
3080 	mlx5_vdpa_free_resources(&ndev->mvdev);
3081 err_mpfs:
3082 	if (!is_zero_ether_addr(config->mac))
3083 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3084 err_alloc:
3085 	put_device(&mvdev->vdev.dev);
3086 	return err;
3087 }
3088 
3089 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3090 {
3091 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3092 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3093 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3094 	struct workqueue_struct *wq;
3095 
3096 	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
3097 	wq = mvdev->wq;
3098 	mvdev->wq = NULL;
3099 	destroy_workqueue(wq);
3100 	_vdpa_unregister_device(dev);
3101 	mgtdev->ndev = NULL;
3102 }
3103 
3104 static const struct vdpa_mgmtdev_ops mdev_ops = {
3105 	.dev_add = mlx5_vdpa_dev_add,
3106 	.dev_del = mlx5_vdpa_dev_del,
3107 };
3108 
3109 static struct virtio_device_id id_table[] = {
3110 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3111 	{ 0 },
3112 };
3113 
3114 static int mlx5v_probe(struct auxiliary_device *adev,
3115 		       const struct auxiliary_device_id *id)
3116 
3117 {
3118 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3119 	struct mlx5_core_dev *mdev = madev->mdev;
3120 	struct mlx5_vdpa_mgmtdev *mgtdev;
3121 	int err;
3122 
3123 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3124 	if (!mgtdev)
3125 		return -ENOMEM;
3126 
3127 	mgtdev->mgtdev.ops = &mdev_ops;
3128 	mgtdev->mgtdev.device = mdev->device;
3129 	mgtdev->mgtdev.id_table = id_table;
3130 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3131 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3132 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
3133 	mgtdev->mgtdev.max_supported_vqs =
3134 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3135 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3136 	mgtdev->madev = madev;
3137 
3138 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3139 	if (err)
3140 		goto reg_err;
3141 
3142 	auxiliary_set_drvdata(adev, mgtdev);
3143 
3144 	return 0;
3145 
3146 reg_err:
3147 	kfree(mgtdev);
3148 	return err;
3149 }
3150 
3151 static void mlx5v_remove(struct auxiliary_device *adev)
3152 {
3153 	struct mlx5_vdpa_mgmtdev *mgtdev;
3154 
3155 	mgtdev = auxiliary_get_drvdata(adev);
3156 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3157 	kfree(mgtdev);
3158 }
3159 
3160 static const struct auxiliary_device_id mlx5v_id_table[] = {
3161 	{ .name = MLX5_ADEV_NAME ".vnet", },
3162 	{},
3163 };
3164 
3165 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3166 
3167 static struct auxiliary_driver mlx5v_driver = {
3168 	.name = "vnet",
3169 	.probe = mlx5v_probe,
3170 	.remove = mlx5v_remove,
3171 	.id_table = mlx5v_id_table,
3172 };
3173 
3174 module_auxiliary_driver(mlx5v_driver);
3175