xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 1f1517fafda598839a02e39968c5063ddcfa51fc)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 
22 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
23 MODULE_DESCRIPTION("Mellanox VDPA driver");
24 MODULE_LICENSE("Dual BSD/GPL");
25 
26 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
27 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
28 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 
30 #define VALID_FEATURES_MASK                                                                        \
31 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
32 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
33 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
34 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
35 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
36 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
37 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
38 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
39 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
40 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
41 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
42 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
43 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 
45 #define VALID_STATUS_MASK                                                                          \
46 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
47 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 
49 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
50 
51 struct mlx5_vdpa_net_resources {
52 	u32 tisn;
53 	u32 tdn;
54 	u32 tirn;
55 	u32 rqtn;
56 	bool valid;
57 };
58 
59 struct mlx5_vdpa_cq_buf {
60 	struct mlx5_frag_buf_ctrl fbc;
61 	struct mlx5_frag_buf frag_buf;
62 	int cqe_size;
63 	int nent;
64 };
65 
66 struct mlx5_vdpa_cq {
67 	struct mlx5_core_cq mcq;
68 	struct mlx5_vdpa_cq_buf buf;
69 	struct mlx5_db db;
70 	int cqe;
71 };
72 
73 struct mlx5_vdpa_umem {
74 	struct mlx5_frag_buf_ctrl fbc;
75 	struct mlx5_frag_buf frag_buf;
76 	int size;
77 	u32 id;
78 };
79 
80 struct mlx5_vdpa_qp {
81 	struct mlx5_core_qp mqp;
82 	struct mlx5_frag_buf frag_buf;
83 	struct mlx5_db db;
84 	u16 head;
85 	bool fw;
86 };
87 
88 struct mlx5_vq_restore_info {
89 	u32 num_ent;
90 	u64 desc_addr;
91 	u64 device_addr;
92 	u64 driver_addr;
93 	u16 avail_index;
94 	u16 used_index;
95 	bool ready;
96 	bool restore;
97 };
98 
99 struct mlx5_vdpa_virtqueue {
100 	bool ready;
101 	u64 desc_addr;
102 	u64 device_addr;
103 	u64 driver_addr;
104 	u32 num_ent;
105 
106 	/* Resources for implementing the notification channel from the device
107 	 * to the driver. fwqp is the firmware end of an RC connection; the
108 	 * other end is vqqp used by the driver. cq is is where completions are
109 	 * reported.
110 	 */
111 	struct mlx5_vdpa_cq cq;
112 	struct mlx5_vdpa_qp fwqp;
113 	struct mlx5_vdpa_qp vqqp;
114 
115 	/* umem resources are required for the virtqueue operation. They're use
116 	 * is internal and they must be provided by the driver.
117 	 */
118 	struct mlx5_vdpa_umem umem1;
119 	struct mlx5_vdpa_umem umem2;
120 	struct mlx5_vdpa_umem umem3;
121 
122 	bool initialized;
123 	int index;
124 	u32 virtq_id;
125 	struct mlx5_vdpa_net *ndev;
126 	u16 avail_idx;
127 	u16 used_idx;
128 	int fw_state;
129 
130 	/* keep last in the struct */
131 	struct mlx5_vq_restore_info ri;
132 };
133 
134 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
135  * provides for driver space allocation
136  */
137 #define MLX5_MAX_SUPPORTED_VQS 16
138 
139 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
140 {
141 	if (unlikely(idx > mvdev->max_idx))
142 		return false;
143 
144 	return true;
145 }
146 
147 struct mlx5_vdpa_net {
148 	struct mlx5_vdpa_dev mvdev;
149 	struct mlx5_vdpa_net_resources res;
150 	struct virtio_net_config config;
151 	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
152 	struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
153 
154 	/* Serialize vq resources creation and destruction. This is required
155 	 * since memory map might change and we need to destroy and create
156 	 * resources while driver in operational.
157 	 */
158 	struct mutex reslock;
159 	struct mlx5_flow_table *rxft;
160 	struct mlx5_fc *rx_counter;
161 	struct mlx5_flow_handle *rx_rule_ucast;
162 	struct mlx5_flow_handle *rx_rule_mcast;
163 	bool setup;
164 	u32 cur_num_vqs;
165 	struct notifier_block nb;
166 	struct vdpa_callback config_cb;
167 };
168 
169 static void free_resources(struct mlx5_vdpa_net *ndev);
170 static void init_mvqs(struct mlx5_vdpa_net *ndev);
171 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
172 static void teardown_driver(struct mlx5_vdpa_net *ndev);
173 
174 static bool mlx5_vdpa_debug;
175 
176 #define MLX5_CVQ_MAX_ENT 16
177 
178 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
179 	do {                                                                                       \
180 		if (features & BIT_ULL(_feature))                                                  \
181 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
182 	} while (0)
183 
184 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
185 	do {                                                                                       \
186 		if (status & (_status))                                                            \
187 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
188 	} while (0)
189 
190 /* TODO: cross-endian support */
191 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
192 {
193 	return virtio_legacy_is_little_endian() ||
194 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
195 }
196 
197 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
198 {
199 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
200 }
201 
202 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
203 {
204 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
205 }
206 
207 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
208 {
209 	return max_vqs / 2;
210 }
211 
212 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
213 {
214 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
215 		return 2;
216 
217 	return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
218 }
219 
220 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
221 {
222 	return idx == ctrl_vq_idx(mvdev);
223 }
224 
225 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
226 {
227 	if (status & ~VALID_STATUS_MASK)
228 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
229 			       status & ~VALID_STATUS_MASK);
230 
231 	if (!mlx5_vdpa_debug)
232 		return;
233 
234 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
235 	if (set && !status) {
236 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
237 		return;
238 	}
239 
240 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
241 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
242 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
243 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
244 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
245 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
246 }
247 
248 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
249 {
250 	if (features & ~VALID_FEATURES_MASK)
251 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
252 			       features & ~VALID_FEATURES_MASK);
253 
254 	if (!mlx5_vdpa_debug)
255 		return;
256 
257 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
258 	if (!features)
259 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
260 
261 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
279 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
280 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
281 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
282 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
283 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
284 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
285 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
286 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
287 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
288 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
289 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
290 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
291 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
292 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
293 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
294 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
295 }
296 
297 static int create_tis(struct mlx5_vdpa_net *ndev)
298 {
299 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
300 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
301 	void *tisc;
302 	int err;
303 
304 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
305 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
306 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
307 	if (err)
308 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
309 
310 	return err;
311 }
312 
313 static void destroy_tis(struct mlx5_vdpa_net *ndev)
314 {
315 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
316 }
317 
318 #define MLX5_VDPA_CQE_SIZE 64
319 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
320 
321 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
322 {
323 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
324 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
325 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
326 	int err;
327 
328 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
329 				       ndev->mvdev.mdev->priv.numa_node);
330 	if (err)
331 		return err;
332 
333 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
334 
335 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
336 	buf->nent = nent;
337 
338 	return 0;
339 }
340 
341 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
342 {
343 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
344 
345 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
346 					ndev->mvdev.mdev->priv.numa_node);
347 }
348 
349 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
350 {
351 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
352 }
353 
354 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
355 {
356 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
357 }
358 
359 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
360 {
361 	struct mlx5_cqe64 *cqe64;
362 	void *cqe;
363 	int i;
364 
365 	for (i = 0; i < buf->nent; i++) {
366 		cqe = get_cqe(vcq, i);
367 		cqe64 = cqe;
368 		cqe64->op_own = MLX5_CQE_INVALID << 4;
369 	}
370 }
371 
372 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
373 {
374 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
375 
376 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
377 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
378 		return cqe64;
379 
380 	return NULL;
381 }
382 
383 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
384 {
385 	vqp->head += n;
386 	vqp->db.db[0] = cpu_to_be32(vqp->head);
387 }
388 
389 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
390 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
391 {
392 	struct mlx5_vdpa_qp *vqp;
393 	__be64 *pas;
394 	void *qpc;
395 
396 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
397 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
398 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
399 	if (vqp->fw) {
400 		/* Firmware QP is allocated by the driver for the firmware's
401 		 * use so we can skip part of the params as they will be chosen by firmware
402 		 */
403 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
404 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
405 		MLX5_SET(qpc, qpc, no_sq, 1);
406 		return;
407 	}
408 
409 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
410 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
411 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
412 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
413 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
414 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
415 	MLX5_SET(qpc, qpc, no_sq, 1);
416 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
417 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
418 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
419 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
420 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
421 }
422 
423 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
424 {
425 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
426 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
427 					ndev->mvdev.mdev->priv.numa_node);
428 }
429 
430 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
431 {
432 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
433 }
434 
435 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
436 		     struct mlx5_vdpa_qp *vqp)
437 {
438 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
439 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
440 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
441 	void *qpc;
442 	void *in;
443 	int err;
444 
445 	if (!vqp->fw) {
446 		vqp = &mvq->vqqp;
447 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
448 		if (err)
449 			return err;
450 
451 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
452 		if (err)
453 			goto err_db;
454 		inlen += vqp->frag_buf.npages * sizeof(__be64);
455 	}
456 
457 	in = kzalloc(inlen, GFP_KERNEL);
458 	if (!in) {
459 		err = -ENOMEM;
460 		goto err_kzalloc;
461 	}
462 
463 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
464 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
465 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
466 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
467 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
468 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
469 	if (!vqp->fw)
470 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
471 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
472 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
473 	kfree(in);
474 	if (err)
475 		goto err_kzalloc;
476 
477 	vqp->mqp.uid = ndev->mvdev.res.uid;
478 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
479 
480 	if (!vqp->fw)
481 		rx_post(vqp, mvq->num_ent);
482 
483 	return 0;
484 
485 err_kzalloc:
486 	if (!vqp->fw)
487 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
488 err_db:
489 	if (!vqp->fw)
490 		rq_buf_free(ndev, vqp);
491 
492 	return err;
493 }
494 
495 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
496 {
497 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
498 
499 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
500 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
501 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
502 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
503 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
504 	if (!vqp->fw) {
505 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
506 		rq_buf_free(ndev, vqp);
507 	}
508 }
509 
510 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
511 {
512 	return get_sw_cqe(cq, cq->mcq.cons_index);
513 }
514 
515 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
516 {
517 	struct mlx5_cqe64 *cqe64;
518 
519 	cqe64 = next_cqe_sw(vcq);
520 	if (!cqe64)
521 		return -EAGAIN;
522 
523 	vcq->mcq.cons_index++;
524 	return 0;
525 }
526 
527 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
528 {
529 	struct mlx5_vdpa_net *ndev = mvq->ndev;
530 	struct vdpa_callback *event_cb;
531 
532 	event_cb = &ndev->event_cbs[mvq->index];
533 	mlx5_cq_set_ci(&mvq->cq.mcq);
534 
535 	/* make sure CQ cosumer update is visible to the hardware before updating
536 	 * RX doorbell record.
537 	 */
538 	dma_wmb();
539 	rx_post(&mvq->vqqp, num);
540 	if (event_cb->callback)
541 		event_cb->callback(event_cb->private);
542 }
543 
544 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
545 {
546 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
547 	struct mlx5_vdpa_net *ndev = mvq->ndev;
548 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
549 	int num = 0;
550 
551 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
552 		num++;
553 		if (num > mvq->num_ent / 2) {
554 			/* If completions keep coming while we poll, we want to
555 			 * let the hardware know that we consumed them by
556 			 * updating the doorbell record.  We also let vdpa core
557 			 * know about this so it passes it on the virtio driver
558 			 * on the guest.
559 			 */
560 			mlx5_vdpa_handle_completions(mvq, num);
561 			num = 0;
562 		}
563 	}
564 
565 	if (num)
566 		mlx5_vdpa_handle_completions(mvq, num);
567 
568 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
569 }
570 
571 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
572 {
573 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
574 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
575 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
576 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
577 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
578 	__be64 *pas;
579 	int inlen;
580 	void *cqc;
581 	void *in;
582 	int err;
583 	int eqn;
584 
585 	err = mlx5_db_alloc(mdev, &vcq->db);
586 	if (err)
587 		return err;
588 
589 	vcq->mcq.set_ci_db = vcq->db.db;
590 	vcq->mcq.arm_db = vcq->db.db + 1;
591 	vcq->mcq.cqe_sz = 64;
592 
593 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
594 	if (err)
595 		goto err_db;
596 
597 	cq_frag_buf_init(vcq, &vcq->buf);
598 
599 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
600 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
601 	in = kzalloc(inlen, GFP_KERNEL);
602 	if (!in) {
603 		err = -ENOMEM;
604 		goto err_vzalloc;
605 	}
606 
607 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
608 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
609 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
610 
611 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
612 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
613 
614 	/* Use vector 0 by default. Consider adding code to choose least used
615 	 * vector.
616 	 */
617 	err = mlx5_vector2eqn(mdev, 0, &eqn);
618 	if (err)
619 		goto err_vec;
620 
621 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
622 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
623 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
624 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
625 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
626 
627 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
628 	if (err)
629 		goto err_vec;
630 
631 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
632 	vcq->cqe = num_ent;
633 	vcq->mcq.set_ci_db = vcq->db.db;
634 	vcq->mcq.arm_db = vcq->db.db + 1;
635 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
636 	kfree(in);
637 	return 0;
638 
639 err_vec:
640 	kfree(in);
641 err_vzalloc:
642 	cq_frag_buf_free(ndev, &vcq->buf);
643 err_db:
644 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
645 	return err;
646 }
647 
648 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
649 {
650 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
651 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
652 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
653 
654 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
655 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
656 		return;
657 	}
658 	cq_frag_buf_free(ndev, &vcq->buf);
659 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
660 }
661 
662 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
663 			  struct mlx5_vdpa_umem **umemp)
664 {
665 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
666 	int p_a;
667 	int p_b;
668 
669 	switch (num) {
670 	case 1:
671 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
672 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
673 		*umemp = &mvq->umem1;
674 		break;
675 	case 2:
676 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
677 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
678 		*umemp = &mvq->umem2;
679 		break;
680 	case 3:
681 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
682 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
683 		*umemp = &mvq->umem3;
684 		break;
685 	}
686 	(*umemp)->size = p_a * mvq->num_ent + p_b;
687 }
688 
689 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
690 {
691 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
692 }
693 
694 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
695 {
696 	int inlen;
697 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
698 	void *um;
699 	void *in;
700 	int err;
701 	__be64 *pas;
702 	struct mlx5_vdpa_umem *umem;
703 
704 	set_umem_size(ndev, mvq, num, &umem);
705 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
706 	if (err)
707 		return err;
708 
709 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
710 
711 	in = kzalloc(inlen, GFP_KERNEL);
712 	if (!in) {
713 		err = -ENOMEM;
714 		goto err_in;
715 	}
716 
717 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
718 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
719 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
720 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
721 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
722 
723 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
724 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
725 
726 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
727 	if (err) {
728 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
729 		goto err_cmd;
730 	}
731 
732 	kfree(in);
733 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
734 
735 	return 0;
736 
737 err_cmd:
738 	kfree(in);
739 err_in:
740 	umem_frag_buf_free(ndev, umem);
741 	return err;
742 }
743 
744 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
745 {
746 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
747 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
748 	struct mlx5_vdpa_umem *umem;
749 
750 	switch (num) {
751 	case 1:
752 		umem = &mvq->umem1;
753 		break;
754 	case 2:
755 		umem = &mvq->umem2;
756 		break;
757 	case 3:
758 		umem = &mvq->umem3;
759 		break;
760 	}
761 
762 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
763 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
764 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
765 		return;
766 
767 	umem_frag_buf_free(ndev, umem);
768 }
769 
770 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
771 {
772 	int num;
773 	int err;
774 
775 	for (num = 1; num <= 3; num++) {
776 		err = create_umem(ndev, mvq, num);
777 		if (err)
778 			goto err_umem;
779 	}
780 	return 0;
781 
782 err_umem:
783 	for (num--; num > 0; num--)
784 		umem_destroy(ndev, mvq, num);
785 
786 	return err;
787 }
788 
789 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
790 {
791 	int num;
792 
793 	for (num = 3; num > 0; num--)
794 		umem_destroy(ndev, mvq, num);
795 }
796 
797 static int get_queue_type(struct mlx5_vdpa_net *ndev)
798 {
799 	u32 type_mask;
800 
801 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
802 
803 	/* prefer split queue */
804 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
805 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
806 
807 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
808 
809 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
810 }
811 
812 static bool vq_is_tx(u16 idx)
813 {
814 	return idx % 2;
815 }
816 
817 static u16 get_features_12_3(u64 features)
818 {
819 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
820 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
821 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
822 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
823 }
824 
825 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
826 {
827 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
828 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
829 	void *obj_context;
830 	void *cmd_hdr;
831 	void *vq_ctx;
832 	void *in;
833 	int err;
834 
835 	err = umems_create(ndev, mvq);
836 	if (err)
837 		return err;
838 
839 	in = kzalloc(inlen, GFP_KERNEL);
840 	if (!in) {
841 		err = -ENOMEM;
842 		goto err_alloc;
843 	}
844 
845 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
846 
847 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
848 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
849 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
850 
851 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
852 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
853 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
854 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
855 		 get_features_12_3(ndev->mvdev.actual_features));
856 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
857 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
858 
859 	if (vq_is_tx(mvq->index))
860 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
861 
862 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
863 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
864 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
865 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
866 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
867 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
868 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
869 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
870 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
871 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
872 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
873 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
874 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
875 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
876 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
877 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
878 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
879 	if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
880 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
881 
882 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
883 	if (err)
884 		goto err_cmd;
885 
886 	kfree(in);
887 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
888 
889 	return 0;
890 
891 err_cmd:
892 	kfree(in);
893 err_alloc:
894 	umems_destroy(ndev, mvq);
895 	return err;
896 }
897 
898 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
899 {
900 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
901 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
902 
903 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
904 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
905 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
906 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
907 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
908 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
909 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
910 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
911 		return;
912 	}
913 	umems_destroy(ndev, mvq);
914 }
915 
916 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
917 {
918 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
919 }
920 
921 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
922 {
923 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
924 }
925 
926 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
927 			int *outlen, u32 qpn, u32 rqpn)
928 {
929 	void *qpc;
930 	void *pp;
931 
932 	switch (cmd) {
933 	case MLX5_CMD_OP_2RST_QP:
934 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
935 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
936 		*in = kzalloc(*inlen, GFP_KERNEL);
937 		*out = kzalloc(*outlen, GFP_KERNEL);
938 		if (!*in || !*out)
939 			goto outerr;
940 
941 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
942 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
943 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
944 		break;
945 	case MLX5_CMD_OP_RST2INIT_QP:
946 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
947 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
948 		*in = kzalloc(*inlen, GFP_KERNEL);
949 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
950 		if (!*in || !*out)
951 			goto outerr;
952 
953 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
954 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
955 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
956 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
957 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
958 		MLX5_SET(qpc, qpc, rwe, 1);
959 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
960 		MLX5_SET(ads, pp, vhca_port_num, 1);
961 		break;
962 	case MLX5_CMD_OP_INIT2RTR_QP:
963 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
964 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
965 		*in = kzalloc(*inlen, GFP_KERNEL);
966 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
967 		if (!*in || !*out)
968 			goto outerr;
969 
970 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
971 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
972 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
973 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
974 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
975 		MLX5_SET(qpc, qpc, log_msg_max, 30);
976 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
977 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
978 		MLX5_SET(ads, pp, fl, 1);
979 		break;
980 	case MLX5_CMD_OP_RTR2RTS_QP:
981 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
982 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
983 		*in = kzalloc(*inlen, GFP_KERNEL);
984 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
985 		if (!*in || !*out)
986 			goto outerr;
987 
988 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
989 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
990 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
991 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
992 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
993 		MLX5_SET(ads, pp, ack_timeout, 14);
994 		MLX5_SET(qpc, qpc, retry_count, 7);
995 		MLX5_SET(qpc, qpc, rnr_retry, 7);
996 		break;
997 	default:
998 		goto outerr_nullify;
999 	}
1000 
1001 	return;
1002 
1003 outerr:
1004 	kfree(*in);
1005 	kfree(*out);
1006 outerr_nullify:
1007 	*in = NULL;
1008 	*out = NULL;
1009 }
1010 
1011 static void free_inout(void *in, void *out)
1012 {
1013 	kfree(in);
1014 	kfree(out);
1015 }
1016 
1017 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1018  * firmware. The fw argument indicates whether the subjected QP is the one used
1019  * by firmware.
1020  */
1021 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1022 {
1023 	int outlen;
1024 	int inlen;
1025 	void *out;
1026 	void *in;
1027 	int err;
1028 
1029 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1030 	if (!in || !out)
1031 		return -ENOMEM;
1032 
1033 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1034 	free_inout(in, out);
1035 	return err;
1036 }
1037 
1038 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1039 {
1040 	int err;
1041 
1042 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1043 	if (err)
1044 		return err;
1045 
1046 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1047 	if (err)
1048 		return err;
1049 
1050 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1051 	if (err)
1052 		return err;
1053 
1054 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1055 	if (err)
1056 		return err;
1057 
1058 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1059 	if (err)
1060 		return err;
1061 
1062 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1063 	if (err)
1064 		return err;
1065 
1066 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1067 }
1068 
1069 struct mlx5_virtq_attr {
1070 	u8 state;
1071 	u16 available_index;
1072 	u16 used_index;
1073 };
1074 
1075 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1076 			   struct mlx5_virtq_attr *attr)
1077 {
1078 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1079 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1080 	void *out;
1081 	void *obj_context;
1082 	void *cmd_hdr;
1083 	int err;
1084 
1085 	out = kzalloc(outlen, GFP_KERNEL);
1086 	if (!out)
1087 		return -ENOMEM;
1088 
1089 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1090 
1091 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1092 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1093 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1094 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1095 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1096 	if (err)
1097 		goto err_cmd;
1098 
1099 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1100 	memset(attr, 0, sizeof(*attr));
1101 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1102 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1103 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1104 	kfree(out);
1105 	return 0;
1106 
1107 err_cmd:
1108 	kfree(out);
1109 	return err;
1110 }
1111 
1112 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1113 {
1114 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1115 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1116 	void *obj_context;
1117 	void *cmd_hdr;
1118 	void *in;
1119 	int err;
1120 
1121 	in = kzalloc(inlen, GFP_KERNEL);
1122 	if (!in)
1123 		return -ENOMEM;
1124 
1125 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1126 
1127 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1128 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1129 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1130 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1131 
1132 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1133 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1134 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1135 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1136 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1137 	kfree(in);
1138 	if (!err)
1139 		mvq->fw_state = state;
1140 
1141 	return err;
1142 }
1143 
1144 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1145 {
1146 	u16 idx = mvq->index;
1147 	int err;
1148 
1149 	if (!mvq->num_ent)
1150 		return 0;
1151 
1152 	if (mvq->initialized)
1153 		return 0;
1154 
1155 	err = cq_create(ndev, idx, mvq->num_ent);
1156 	if (err)
1157 		return err;
1158 
1159 	err = qp_create(ndev, mvq, &mvq->fwqp);
1160 	if (err)
1161 		goto err_fwqp;
1162 
1163 	err = qp_create(ndev, mvq, &mvq->vqqp);
1164 	if (err)
1165 		goto err_vqqp;
1166 
1167 	err = connect_qps(ndev, mvq);
1168 	if (err)
1169 		goto err_connect;
1170 
1171 	err = create_virtqueue(ndev, mvq);
1172 	if (err)
1173 		goto err_connect;
1174 
1175 	if (mvq->ready) {
1176 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1177 		if (err) {
1178 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1179 				       idx, err);
1180 			goto err_connect;
1181 		}
1182 	}
1183 
1184 	mvq->initialized = true;
1185 	return 0;
1186 
1187 err_connect:
1188 	qp_destroy(ndev, &mvq->vqqp);
1189 err_vqqp:
1190 	qp_destroy(ndev, &mvq->fwqp);
1191 err_fwqp:
1192 	cq_destroy(ndev, idx);
1193 	return err;
1194 }
1195 
1196 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1197 {
1198 	struct mlx5_virtq_attr attr;
1199 
1200 	if (!mvq->initialized)
1201 		return;
1202 
1203 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1204 		return;
1205 
1206 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1207 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1208 
1209 	if (query_virtqueue(ndev, mvq, &attr)) {
1210 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1211 		return;
1212 	}
1213 	mvq->avail_idx = attr.available_index;
1214 	mvq->used_idx = attr.used_index;
1215 }
1216 
1217 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1218 {
1219 	int i;
1220 
1221 	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1222 		suspend_vq(ndev, &ndev->vqs[i]);
1223 }
1224 
1225 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1226 {
1227 	if (!mvq->initialized)
1228 		return;
1229 
1230 	suspend_vq(ndev, mvq);
1231 	destroy_virtqueue(ndev, mvq);
1232 	qp_destroy(ndev, &mvq->vqqp);
1233 	qp_destroy(ndev, &mvq->fwqp);
1234 	cq_destroy(ndev, mvq->index);
1235 	mvq->initialized = false;
1236 }
1237 
1238 static int create_rqt(struct mlx5_vdpa_net *ndev)
1239 {
1240 	__be32 *list;
1241 	int max_rqt;
1242 	void *rqtc;
1243 	int inlen;
1244 	void *in;
1245 	int i, j;
1246 	int err;
1247 
1248 	max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
1249 			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1250 	if (max_rqt < 1)
1251 		return -EOPNOTSUPP;
1252 
1253 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1254 	in = kzalloc(inlen, GFP_KERNEL);
1255 	if (!in)
1256 		return -ENOMEM;
1257 
1258 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1259 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1260 
1261 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1262 	MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
1263 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1264 	for (i = 0, j = 0; j < max_rqt; j++) {
1265 		if (!ndev->vqs[j].initialized)
1266 			continue;
1267 
1268 		if (!vq_is_tx(ndev->vqs[j].index)) {
1269 			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1270 			i++;
1271 		}
1272 	}
1273 	MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
1274 
1275 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1276 	kfree(in);
1277 	if (err)
1278 		return err;
1279 
1280 	return 0;
1281 }
1282 
1283 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1284 
1285 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1286 {
1287 	__be32 *list;
1288 	int max_rqt;
1289 	void *rqtc;
1290 	int inlen;
1291 	void *in;
1292 	int i, j;
1293 	int err;
1294 
1295 	max_rqt = min_t(int, ndev->cur_num_vqs / 2,
1296 			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1297 	if (max_rqt < 1)
1298 		return -EOPNOTSUPP;
1299 
1300 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1301 	in = kzalloc(inlen, GFP_KERNEL);
1302 	if (!in)
1303 		return -ENOMEM;
1304 
1305 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1306 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1307 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1308 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1309 
1310 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1311 	for (i = 0, j = 0; j < num; j++) {
1312 		if (!ndev->vqs[j].initialized)
1313 			continue;
1314 
1315 		if (!vq_is_tx(ndev->vqs[j].index)) {
1316 			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1317 			i++;
1318 		}
1319 	}
1320 	MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
1321 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1322 	kfree(in);
1323 	if (err)
1324 		return err;
1325 
1326 	return 0;
1327 }
1328 
1329 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1330 {
1331 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1332 }
1333 
1334 static int create_tir(struct mlx5_vdpa_net *ndev)
1335 {
1336 #define HASH_IP_L4PORTS                                                                            \
1337 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1338 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1339 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1340 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1341 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1342 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1343 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1344 	void *rss_key;
1345 	void *outer;
1346 	void *tirc;
1347 	void *in;
1348 	int err;
1349 
1350 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1351 	if (!in)
1352 		return -ENOMEM;
1353 
1354 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1355 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1356 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1357 
1358 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1359 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1360 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1361 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1362 
1363 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1364 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1365 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1366 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1367 
1368 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1369 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1370 
1371 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1372 	kfree(in);
1373 	return err;
1374 }
1375 
1376 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1377 {
1378 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1379 }
1380 
1381 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1382 {
1383 	struct mlx5_flow_destination dest[2] = {};
1384 	struct mlx5_flow_table_attr ft_attr = {};
1385 	struct mlx5_flow_act flow_act = {};
1386 	struct mlx5_flow_namespace *ns;
1387 	struct mlx5_flow_spec *spec;
1388 	void *headers_c;
1389 	void *headers_v;
1390 	u8 *dmac_c;
1391 	u8 *dmac_v;
1392 	int err;
1393 
1394 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1395 	if (!spec)
1396 		return -ENOMEM;
1397 
1398 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1399 	ft_attr.max_fte = 2;
1400 	ft_attr.autogroup.max_num_groups = 2;
1401 
1402 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1403 	if (!ns) {
1404 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1405 		err = -EOPNOTSUPP;
1406 		goto err_ns;
1407 	}
1408 
1409 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1410 	if (IS_ERR(ndev->rxft)) {
1411 		err = PTR_ERR(ndev->rxft);
1412 		goto err_ns;
1413 	}
1414 
1415 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1416 	if (IS_ERR(ndev->rx_counter)) {
1417 		err = PTR_ERR(ndev->rx_counter);
1418 		goto err_fc;
1419 	}
1420 
1421 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1422 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1423 	memset(dmac_c, 0xff, ETH_ALEN);
1424 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1425 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1426 	ether_addr_copy(dmac_v, ndev->config.mac);
1427 
1428 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1429 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1430 	dest[0].tir_num = ndev->res.tirn;
1431 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1432 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1433 	ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2);
1434 
1435 	if (IS_ERR(ndev->rx_rule_ucast)) {
1436 		err = PTR_ERR(ndev->rx_rule_ucast);
1437 		ndev->rx_rule_ucast = NULL;
1438 		goto err_rule_ucast;
1439 	}
1440 
1441 	memset(dmac_c, 0, ETH_ALEN);
1442 	memset(dmac_v, 0, ETH_ALEN);
1443 	dmac_c[0] = 1;
1444 	dmac_v[0] = 1;
1445 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1446 	ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1);
1447 	if (IS_ERR(ndev->rx_rule_mcast)) {
1448 		err = PTR_ERR(ndev->rx_rule_mcast);
1449 		ndev->rx_rule_mcast = NULL;
1450 		goto err_rule_mcast;
1451 	}
1452 
1453 	kvfree(spec);
1454 	return 0;
1455 
1456 err_rule_mcast:
1457 	mlx5_del_flow_rules(ndev->rx_rule_ucast);
1458 	ndev->rx_rule_ucast = NULL;
1459 err_rule_ucast:
1460 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1461 err_fc:
1462 	mlx5_destroy_flow_table(ndev->rxft);
1463 err_ns:
1464 	kvfree(spec);
1465 	return err;
1466 }
1467 
1468 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1469 {
1470 	if (!ndev->rx_rule_ucast)
1471 		return;
1472 
1473 	mlx5_del_flow_rules(ndev->rx_rule_mcast);
1474 	ndev->rx_rule_mcast = NULL;
1475 	mlx5_del_flow_rules(ndev->rx_rule_ucast);
1476 	ndev->rx_rule_ucast = NULL;
1477 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1478 	mlx5_destroy_flow_table(ndev->rxft);
1479 }
1480 
1481 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1482 {
1483 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1484 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1485 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1486 	struct mlx5_core_dev *pfmdev;
1487 	size_t read;
1488 	u8 mac[ETH_ALEN];
1489 
1490 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1491 	switch (cmd) {
1492 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1493 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1494 		if (read != ETH_ALEN)
1495 			break;
1496 
1497 		if (!memcmp(ndev->config.mac, mac, 6)) {
1498 			status = VIRTIO_NET_OK;
1499 			break;
1500 		}
1501 
1502 		if (!is_zero_ether_addr(ndev->config.mac)) {
1503 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1504 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1505 					       ndev->config.mac);
1506 				break;
1507 			}
1508 		}
1509 
1510 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1511 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1512 				       mac);
1513 			break;
1514 		}
1515 
1516 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1517 		status = VIRTIO_NET_OK;
1518 		break;
1519 
1520 	default:
1521 		break;
1522 	}
1523 
1524 	return status;
1525 }
1526 
1527 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1528 {
1529 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1530 	int cur_qps = ndev->cur_num_vqs / 2;
1531 	int err;
1532 	int i;
1533 
1534 	if (cur_qps > newqps) {
1535 		err = modify_rqt(ndev, 2 * newqps);
1536 		if (err)
1537 			return err;
1538 
1539 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1540 			teardown_vq(ndev, &ndev->vqs[i]);
1541 
1542 		ndev->cur_num_vqs = 2 * newqps;
1543 	} else {
1544 		ndev->cur_num_vqs = 2 * newqps;
1545 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1546 			err = setup_vq(ndev, &ndev->vqs[i]);
1547 			if (err)
1548 				goto clean_added;
1549 		}
1550 		err = modify_rqt(ndev, 2 * newqps);
1551 		if (err)
1552 			goto clean_added;
1553 	}
1554 	return 0;
1555 
1556 clean_added:
1557 	for (--i; i >= cur_qps; --i)
1558 		teardown_vq(ndev, &ndev->vqs[i]);
1559 
1560 	return err;
1561 }
1562 
1563 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1564 {
1565 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1566 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1567 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1568 	struct virtio_net_ctrl_mq mq;
1569 	size_t read;
1570 	u16 newqps;
1571 
1572 	switch (cmd) {
1573 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1574 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1575 		if (read != sizeof(mq))
1576 			break;
1577 
1578 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1579 		if (ndev->cur_num_vqs == 2 * newqps) {
1580 			status = VIRTIO_NET_OK;
1581 			break;
1582 		}
1583 
1584 		if (newqps & (newqps - 1))
1585 			break;
1586 
1587 		if (!change_num_qps(mvdev, newqps))
1588 			status = VIRTIO_NET_OK;
1589 
1590 		break;
1591 	default:
1592 		break;
1593 	}
1594 
1595 	return status;
1596 }
1597 
1598 static void mlx5_cvq_kick_handler(struct work_struct *work)
1599 {
1600 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1601 	struct virtio_net_ctrl_hdr ctrl;
1602 	struct mlx5_vdpa_wq_ent *wqent;
1603 	struct mlx5_vdpa_dev *mvdev;
1604 	struct mlx5_control_vq *cvq;
1605 	struct mlx5_vdpa_net *ndev;
1606 	size_t read, write;
1607 	int err;
1608 
1609 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1610 	mvdev = wqent->mvdev;
1611 	ndev = to_mlx5_vdpa_ndev(mvdev);
1612 	cvq = &mvdev->cvq;
1613 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1614 		goto out;
1615 
1616 	if (!cvq->ready)
1617 		goto out;
1618 
1619 	while (true) {
1620 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1621 					   GFP_ATOMIC);
1622 		if (err <= 0)
1623 			break;
1624 
1625 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1626 		if (read != sizeof(ctrl))
1627 			break;
1628 
1629 		switch (ctrl.class) {
1630 		case VIRTIO_NET_CTRL_MAC:
1631 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
1632 			break;
1633 		case VIRTIO_NET_CTRL_MQ:
1634 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
1635 			break;
1636 
1637 		default:
1638 			break;
1639 		}
1640 
1641 		/* Make sure data is written before advancing index */
1642 		smp_wmb();
1643 
1644 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1645 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1646 		vringh_kiov_cleanup(&cvq->riov);
1647 		vringh_kiov_cleanup(&cvq->wiov);
1648 
1649 		if (vringh_need_notify_iotlb(&cvq->vring))
1650 			vringh_notify(&cvq->vring);
1651 	}
1652 out:
1653 	kfree(wqent);
1654 }
1655 
1656 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1657 {
1658 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1659 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1660 	struct mlx5_vdpa_virtqueue *mvq;
1661 	struct mlx5_vdpa_wq_ent *wqent;
1662 
1663 	if (!is_index_valid(mvdev, idx))
1664 		return;
1665 
1666 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1667 		if (!mvdev->cvq.ready)
1668 			return;
1669 
1670 		wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
1671 		if (!wqent)
1672 			return;
1673 
1674 		wqent->mvdev = mvdev;
1675 		INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
1676 		queue_work(mvdev->wq, &wqent->work);
1677 		return;
1678 	}
1679 
1680 	mvq = &ndev->vqs[idx];
1681 	if (unlikely(!mvq->ready))
1682 		return;
1683 
1684 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1685 }
1686 
1687 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1688 				    u64 driver_area, u64 device_area)
1689 {
1690 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1691 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1692 	struct mlx5_vdpa_virtqueue *mvq;
1693 
1694 	if (!is_index_valid(mvdev, idx))
1695 		return -EINVAL;
1696 
1697 	if (is_ctrl_vq_idx(mvdev, idx)) {
1698 		mvdev->cvq.desc_addr = desc_area;
1699 		mvdev->cvq.device_addr = device_area;
1700 		mvdev->cvq.driver_addr = driver_area;
1701 		return 0;
1702 	}
1703 
1704 	mvq = &ndev->vqs[idx];
1705 	mvq->desc_addr = desc_area;
1706 	mvq->device_addr = device_area;
1707 	mvq->driver_addr = driver_area;
1708 	return 0;
1709 }
1710 
1711 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1712 {
1713 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1714 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1715 	struct mlx5_vdpa_virtqueue *mvq;
1716 
1717 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1718 		return;
1719 
1720 	mvq = &ndev->vqs[idx];
1721 	mvq->num_ent = num;
1722 }
1723 
1724 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1725 {
1726 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1727 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1728 
1729 	ndev->event_cbs[idx] = *cb;
1730 }
1731 
1732 static void mlx5_cvq_notify(struct vringh *vring)
1733 {
1734 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
1735 
1736 	if (!cvq->event_cb.callback)
1737 		return;
1738 
1739 	cvq->event_cb.callback(cvq->event_cb.private);
1740 }
1741 
1742 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
1743 {
1744 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1745 
1746 	cvq->ready = ready;
1747 	if (!ready)
1748 		return;
1749 
1750 	cvq->vring.notify = mlx5_cvq_notify;
1751 }
1752 
1753 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1754 {
1755 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1756 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1757 	struct mlx5_vdpa_virtqueue *mvq;
1758 
1759 	if (!mvdev->actual_features)
1760 		return;
1761 
1762 	if (!is_index_valid(mvdev, idx))
1763 		return;
1764 
1765 	if (is_ctrl_vq_idx(mvdev, idx)) {
1766 		set_cvq_ready(mvdev, ready);
1767 		return;
1768 	}
1769 
1770 	mvq = &ndev->vqs[idx];
1771 	if (!ready)
1772 		suspend_vq(ndev, mvq);
1773 
1774 	mvq->ready = ready;
1775 }
1776 
1777 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1778 {
1779 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1780 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1781 
1782 	if (!is_index_valid(mvdev, idx))
1783 		return false;
1784 
1785 	if (is_ctrl_vq_idx(mvdev, idx))
1786 		return mvdev->cvq.ready;
1787 
1788 	return ndev->vqs[idx].ready;
1789 }
1790 
1791 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1792 				  const struct vdpa_vq_state *state)
1793 {
1794 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1795 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1796 	struct mlx5_vdpa_virtqueue *mvq;
1797 
1798 	if (!is_index_valid(mvdev, idx))
1799 		return -EINVAL;
1800 
1801 	if (is_ctrl_vq_idx(mvdev, idx)) {
1802 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
1803 		return 0;
1804 	}
1805 
1806 	mvq = &ndev->vqs[idx];
1807 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1808 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1809 		return -EINVAL;
1810 	}
1811 
1812 	mvq->used_idx = state->split.avail_index;
1813 	mvq->avail_idx = state->split.avail_index;
1814 	return 0;
1815 }
1816 
1817 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1818 {
1819 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1820 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1821 	struct mlx5_vdpa_virtqueue *mvq;
1822 	struct mlx5_virtq_attr attr;
1823 	int err;
1824 
1825 	if (!is_index_valid(mvdev, idx))
1826 		return -EINVAL;
1827 
1828 	if (is_ctrl_vq_idx(mvdev, idx)) {
1829 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
1830 		return 0;
1831 	}
1832 
1833 	mvq = &ndev->vqs[idx];
1834 	/* If the virtq object was destroyed, use the value saved at
1835 	 * the last minute of suspend_vq. This caters for userspace
1836 	 * that cares about emulating the index after vq is stopped.
1837 	 */
1838 	if (!mvq->initialized) {
1839 		/* Firmware returns a wrong value for the available index.
1840 		 * Since both values should be identical, we take the value of
1841 		 * used_idx which is reported correctly.
1842 		 */
1843 		state->split.avail_index = mvq->used_idx;
1844 		return 0;
1845 	}
1846 
1847 	err = query_virtqueue(ndev, mvq, &attr);
1848 	if (err) {
1849 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1850 		return err;
1851 	}
1852 	state->split.avail_index = attr.used_index;
1853 	return 0;
1854 }
1855 
1856 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1857 {
1858 	return PAGE_SIZE;
1859 }
1860 
1861 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1862 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1863 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1864 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1865 };
1866 
1867 static u64 mlx_to_vritio_features(u16 dev_features)
1868 {
1869 	u64 result = 0;
1870 
1871 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1872 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1873 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1874 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1875 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1876 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1877 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1878 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1879 
1880 	return result;
1881 }
1882 
1883 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1884 {
1885 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1886 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1887 	u16 dev_features;
1888 
1889 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1890 	ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
1891 	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1892 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1893 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1894 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
1895 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
1896 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
1897 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
1898 
1899 	print_features(mvdev, ndev->mvdev.mlx_features, false);
1900 	return ndev->mvdev.mlx_features;
1901 }
1902 
1903 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1904 {
1905 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1906 		return -EOPNOTSUPP;
1907 
1908 	return 0;
1909 }
1910 
1911 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
1912 {
1913 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1914 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1915 	int err;
1916 	int i;
1917 
1918 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
1919 		err = setup_vq(ndev, &ndev->vqs[i]);
1920 		if (err)
1921 			goto err_vq;
1922 	}
1923 
1924 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
1925 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
1926 					MLX5_CVQ_MAX_ENT, false,
1927 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
1928 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
1929 					(struct vring_used *)(uintptr_t)cvq->device_addr);
1930 		if (err)
1931 			goto err_vq;
1932 	}
1933 
1934 	return 0;
1935 
1936 err_vq:
1937 	for (--i; i >= 0; i--)
1938 		teardown_vq(ndev, &ndev->vqs[i]);
1939 
1940 	return err;
1941 }
1942 
1943 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1944 {
1945 	struct mlx5_vdpa_virtqueue *mvq;
1946 	int i;
1947 
1948 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1949 		mvq = &ndev->vqs[i];
1950 		if (!mvq->initialized)
1951 			continue;
1952 
1953 		teardown_vq(ndev, mvq);
1954 	}
1955 }
1956 
1957 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
1958 {
1959 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
1960 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
1961 			/* MQ supported. CVQ index is right above the last data virtqueue's */
1962 			mvdev->max_idx = mvdev->max_vqs;
1963 		} else {
1964 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
1965 			 * CVQ gets index 2
1966 			 */
1967 			mvdev->max_idx = 2;
1968 		}
1969 	} else {
1970 		/* Two data virtqueues only: one for rx and one for tx */
1971 		mvdev->max_idx = 1;
1972 	}
1973 }
1974 
1975 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1976 {
1977 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1978 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1979 	int err;
1980 
1981 	print_features(mvdev, features, true);
1982 
1983 	err = verify_min_features(mvdev, features);
1984 	if (err)
1985 		return err;
1986 
1987 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1988 	update_cvq_info(mvdev);
1989 	return err;
1990 }
1991 
1992 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1993 {
1994 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1995 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1996 
1997 	ndev->config_cb = *cb;
1998 }
1999 
2000 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2001 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2002 {
2003 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2004 }
2005 
2006 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2007 {
2008 	return VIRTIO_ID_NET;
2009 }
2010 
2011 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2012 {
2013 	return PCI_VENDOR_ID_MELLANOX;
2014 }
2015 
2016 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2017 {
2018 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2019 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2020 
2021 	print_status(mvdev, ndev->mvdev.status, false);
2022 	return ndev->mvdev.status;
2023 }
2024 
2025 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2026 {
2027 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2028 	struct mlx5_virtq_attr attr = {};
2029 	int err;
2030 
2031 	if (mvq->initialized) {
2032 		err = query_virtqueue(ndev, mvq, &attr);
2033 		if (err)
2034 			return err;
2035 	}
2036 
2037 	ri->avail_index = attr.available_index;
2038 	ri->used_index = attr.used_index;
2039 	ri->ready = mvq->ready;
2040 	ri->num_ent = mvq->num_ent;
2041 	ri->desc_addr = mvq->desc_addr;
2042 	ri->device_addr = mvq->device_addr;
2043 	ri->driver_addr = mvq->driver_addr;
2044 	ri->restore = true;
2045 	return 0;
2046 }
2047 
2048 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2049 {
2050 	int i;
2051 
2052 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2053 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2054 		save_channel_info(ndev, &ndev->vqs[i]);
2055 	}
2056 	return 0;
2057 }
2058 
2059 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2060 {
2061 	int i;
2062 
2063 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2064 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2065 }
2066 
2067 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2068 {
2069 	struct mlx5_vdpa_virtqueue *mvq;
2070 	struct mlx5_vq_restore_info *ri;
2071 	int i;
2072 
2073 	mlx5_clear_vqs(ndev);
2074 	init_mvqs(ndev);
2075 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2076 		mvq = &ndev->vqs[i];
2077 		ri = &mvq->ri;
2078 		if (!ri->restore)
2079 			continue;
2080 
2081 		mvq->avail_idx = ri->avail_index;
2082 		mvq->used_idx = ri->used_index;
2083 		mvq->ready = ri->ready;
2084 		mvq->num_ent = ri->num_ent;
2085 		mvq->desc_addr = ri->desc_addr;
2086 		mvq->device_addr = ri->device_addr;
2087 		mvq->driver_addr = ri->driver_addr;
2088 	}
2089 }
2090 
2091 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2092 {
2093 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2094 	int err;
2095 
2096 	suspend_vqs(ndev);
2097 	err = save_channels_info(ndev);
2098 	if (err)
2099 		goto err_mr;
2100 
2101 	teardown_driver(ndev);
2102 	mlx5_vdpa_destroy_mr(mvdev);
2103 	err = mlx5_vdpa_create_mr(mvdev, iotlb);
2104 	if (err)
2105 		goto err_mr;
2106 
2107 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2108 		return 0;
2109 
2110 	restore_channels_info(ndev);
2111 	err = setup_driver(mvdev);
2112 	if (err)
2113 		goto err_setup;
2114 
2115 	return 0;
2116 
2117 err_setup:
2118 	mlx5_vdpa_destroy_mr(mvdev);
2119 err_mr:
2120 	return err;
2121 }
2122 
2123 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2124 {
2125 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2126 	int err;
2127 
2128 	mutex_lock(&ndev->reslock);
2129 	if (ndev->setup) {
2130 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2131 		err = 0;
2132 		goto out;
2133 	}
2134 	err = setup_virtqueues(mvdev);
2135 	if (err) {
2136 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2137 		goto out;
2138 	}
2139 
2140 	err = create_rqt(ndev);
2141 	if (err) {
2142 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2143 		goto err_rqt;
2144 	}
2145 
2146 	err = create_tir(ndev);
2147 	if (err) {
2148 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2149 		goto err_tir;
2150 	}
2151 
2152 	err = add_fwd_to_tir(ndev);
2153 	if (err) {
2154 		mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
2155 		goto err_fwd;
2156 	}
2157 	ndev->setup = true;
2158 	mutex_unlock(&ndev->reslock);
2159 
2160 	return 0;
2161 
2162 err_fwd:
2163 	destroy_tir(ndev);
2164 err_tir:
2165 	destroy_rqt(ndev);
2166 err_rqt:
2167 	teardown_virtqueues(ndev);
2168 out:
2169 	mutex_unlock(&ndev->reslock);
2170 	return err;
2171 }
2172 
2173 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2174 {
2175 	mutex_lock(&ndev->reslock);
2176 	if (!ndev->setup)
2177 		goto out;
2178 
2179 	remove_fwd_to_tir(ndev);
2180 	destroy_tir(ndev);
2181 	destroy_rqt(ndev);
2182 	teardown_virtqueues(ndev);
2183 	ndev->setup = false;
2184 out:
2185 	mutex_unlock(&ndev->reslock);
2186 }
2187 
2188 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2189 {
2190 	int i;
2191 
2192 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2193 		ndev->vqs[i].ready = false;
2194 
2195 	ndev->mvdev.cvq.ready = false;
2196 }
2197 
2198 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2199 {
2200 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2201 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2202 	int err;
2203 
2204 	print_status(mvdev, status, true);
2205 
2206 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2207 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2208 			err = setup_driver(mvdev);
2209 			if (err) {
2210 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2211 				goto err_setup;
2212 			}
2213 		} else {
2214 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2215 			return;
2216 		}
2217 	}
2218 
2219 	ndev->mvdev.status = status;
2220 	return;
2221 
2222 err_setup:
2223 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2224 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2225 }
2226 
2227 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2228 {
2229 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2230 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2231 
2232 	print_status(mvdev, 0, true);
2233 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2234 	teardown_driver(ndev);
2235 	clear_vqs_ready(ndev);
2236 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2237 	ndev->mvdev.status = 0;
2238 	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
2239 	ndev->mvdev.actual_features = 0;
2240 	++mvdev->generation;
2241 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2242 		if (mlx5_vdpa_create_mr(mvdev, NULL))
2243 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2244 	}
2245 
2246 	return 0;
2247 }
2248 
2249 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2250 {
2251 	return sizeof(struct virtio_net_config);
2252 }
2253 
2254 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2255 				 unsigned int len)
2256 {
2257 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2258 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2259 
2260 	if (offset + len <= sizeof(struct virtio_net_config))
2261 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2262 }
2263 
2264 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2265 				 unsigned int len)
2266 {
2267 	/* not supported */
2268 }
2269 
2270 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2271 {
2272 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2273 
2274 	return mvdev->generation;
2275 }
2276 
2277 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
2278 {
2279 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2280 	bool change_map;
2281 	int err;
2282 
2283 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
2284 	if (err) {
2285 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2286 		return err;
2287 	}
2288 
2289 	if (change_map)
2290 		return mlx5_vdpa_change_map(mvdev, iotlb);
2291 
2292 	return 0;
2293 }
2294 
2295 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2296 {
2297 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2298 	struct mlx5_core_dev *pfmdev;
2299 	struct mlx5_vdpa_net *ndev;
2300 
2301 	ndev = to_mlx5_vdpa_ndev(mvdev);
2302 
2303 	free_resources(ndev);
2304 	mlx5_vdpa_destroy_mr(mvdev);
2305 	if (!is_zero_ether_addr(ndev->config.mac)) {
2306 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2307 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2308 	}
2309 	mlx5_vdpa_free_resources(&ndev->mvdev);
2310 	mutex_destroy(&ndev->reslock);
2311 }
2312 
2313 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2314 {
2315 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2316 	struct vdpa_notification_area ret = {};
2317 	struct mlx5_vdpa_net *ndev;
2318 	phys_addr_t addr;
2319 
2320 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2321 		return ret;
2322 
2323 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2324 	 * notification to avoid the risk of mapping pages that contain BAR of more
2325 	 * than one SF
2326 	 */
2327 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2328 		return ret;
2329 
2330 	ndev = to_mlx5_vdpa_ndev(mvdev);
2331 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2332 	ret.addr = addr;
2333 	ret.size = PAGE_SIZE;
2334 	return ret;
2335 }
2336 
2337 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2338 {
2339 	return -EOPNOTSUPP;
2340 }
2341 
2342 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2343 	.set_vq_address = mlx5_vdpa_set_vq_address,
2344 	.set_vq_num = mlx5_vdpa_set_vq_num,
2345 	.kick_vq = mlx5_vdpa_kick_vq,
2346 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
2347 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
2348 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
2349 	.set_vq_state = mlx5_vdpa_set_vq_state,
2350 	.get_vq_state = mlx5_vdpa_get_vq_state,
2351 	.get_vq_notification = mlx5_get_vq_notification,
2352 	.get_vq_irq = mlx5_get_vq_irq,
2353 	.get_vq_align = mlx5_vdpa_get_vq_align,
2354 	.get_features = mlx5_vdpa_get_features,
2355 	.set_features = mlx5_vdpa_set_features,
2356 	.set_config_cb = mlx5_vdpa_set_config_cb,
2357 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2358 	.get_device_id = mlx5_vdpa_get_device_id,
2359 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
2360 	.get_status = mlx5_vdpa_get_status,
2361 	.set_status = mlx5_vdpa_set_status,
2362 	.reset = mlx5_vdpa_reset,
2363 	.get_config_size = mlx5_vdpa_get_config_size,
2364 	.get_config = mlx5_vdpa_get_config,
2365 	.set_config = mlx5_vdpa_set_config,
2366 	.get_generation = mlx5_vdpa_get_generation,
2367 	.set_map = mlx5_vdpa_set_map,
2368 	.free = mlx5_vdpa_free,
2369 };
2370 
2371 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2372 {
2373 	u16 hw_mtu;
2374 	int err;
2375 
2376 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2377 	if (err)
2378 		return err;
2379 
2380 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2381 	return 0;
2382 }
2383 
2384 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2385 {
2386 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2387 	int err;
2388 
2389 	if (res->valid) {
2390 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2391 		return -EEXIST;
2392 	}
2393 
2394 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2395 	if (err)
2396 		return err;
2397 
2398 	err = create_tis(ndev);
2399 	if (err)
2400 		goto err_tis;
2401 
2402 	res->valid = true;
2403 
2404 	return 0;
2405 
2406 err_tis:
2407 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2408 	return err;
2409 }
2410 
2411 static void free_resources(struct mlx5_vdpa_net *ndev)
2412 {
2413 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2414 
2415 	if (!res->valid)
2416 		return;
2417 
2418 	destroy_tis(ndev);
2419 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2420 	res->valid = false;
2421 }
2422 
2423 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2424 {
2425 	struct mlx5_vdpa_virtqueue *mvq;
2426 	int i;
2427 
2428 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
2429 		mvq = &ndev->vqs[i];
2430 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2431 		mvq->index = i;
2432 		mvq->ndev = ndev;
2433 		mvq->fwqp.fw = true;
2434 	}
2435 	for (; i < ndev->mvdev.max_vqs; i++) {
2436 		mvq = &ndev->vqs[i];
2437 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2438 		mvq->index = i;
2439 		mvq->ndev = ndev;
2440 	}
2441 }
2442 
2443 struct mlx5_vdpa_mgmtdev {
2444 	struct vdpa_mgmt_dev mgtdev;
2445 	struct mlx5_adev *madev;
2446 	struct mlx5_vdpa_net *ndev;
2447 };
2448 
2449 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2450 {
2451 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2452 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2453 	int err;
2454 
2455 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2456 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2457 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2458 	if (vport)
2459 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2460 
2461 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2462 	if (err)
2463 		return 0;
2464 
2465 	return MLX5_GET(query_vport_state_out, out, state);
2466 }
2467 
2468 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2469 {
2470 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2471 	    VPORT_STATE_UP)
2472 		return true;
2473 
2474 	return false;
2475 }
2476 
2477 static void update_carrier(struct work_struct *work)
2478 {
2479 	struct mlx5_vdpa_wq_ent *wqent;
2480 	struct mlx5_vdpa_dev *mvdev;
2481 	struct mlx5_vdpa_net *ndev;
2482 
2483 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2484 	mvdev = wqent->mvdev;
2485 	ndev = to_mlx5_vdpa_ndev(mvdev);
2486 	if (get_link_state(mvdev))
2487 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2488 	else
2489 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2490 
2491 	if (ndev->config_cb.callback)
2492 		ndev->config_cb.callback(ndev->config_cb.private);
2493 
2494 	kfree(wqent);
2495 }
2496 
2497 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2498 {
2499 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2500 	struct mlx5_eqe *eqe = param;
2501 	int ret = NOTIFY_DONE;
2502 	struct mlx5_vdpa_wq_ent *wqent;
2503 
2504 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2505 		switch (eqe->sub_type) {
2506 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2507 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2508 			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2509 			if (!wqent)
2510 				return NOTIFY_DONE;
2511 
2512 			wqent->mvdev = &ndev->mvdev;
2513 			INIT_WORK(&wqent->work, update_carrier);
2514 			queue_work(ndev->mvdev.wq, &wqent->work);
2515 			ret = NOTIFY_OK;
2516 			break;
2517 		default:
2518 			return NOTIFY_DONE;
2519 		}
2520 		return ret;
2521 	}
2522 	return ret;
2523 }
2524 
2525 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
2526 			     const struct vdpa_dev_set_config *add_config)
2527 {
2528 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2529 	struct virtio_net_config *config;
2530 	struct mlx5_core_dev *pfmdev;
2531 	struct mlx5_vdpa_dev *mvdev;
2532 	struct mlx5_vdpa_net *ndev;
2533 	struct mlx5_core_dev *mdev;
2534 	u32 max_vqs;
2535 	u16 mtu;
2536 	int err;
2537 
2538 	if (mgtdev->ndev)
2539 		return -ENOSPC;
2540 
2541 	mdev = mgtdev->madev->mdev;
2542 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2543 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2544 		dev_warn(mdev->device, "missing support for split virtqueues\n");
2545 		return -EOPNOTSUPP;
2546 	}
2547 
2548 	/* we save one virtqueue for control virtqueue should we require it */
2549 	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2550 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2551 
2552 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2553 				 name, false);
2554 	if (IS_ERR(ndev))
2555 		return PTR_ERR(ndev);
2556 
2557 	ndev->mvdev.max_vqs = max_vqs;
2558 	mvdev = &ndev->mvdev;
2559 	mvdev->mdev = mdev;
2560 	init_mvqs(ndev);
2561 	mutex_init(&ndev->reslock);
2562 	config = &ndev->config;
2563 	err = query_mtu(mdev, &mtu);
2564 	if (err)
2565 		goto err_mtu;
2566 
2567 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
2568 
2569 	if (get_link_state(mvdev))
2570 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2571 	else
2572 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2573 
2574 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
2575 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
2576 	} else {
2577 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2578 		if (err)
2579 			goto err_mtu;
2580 	}
2581 
2582 	if (!is_zero_ether_addr(config->mac)) {
2583 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2584 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2585 		if (err)
2586 			goto err_mtu;
2587 
2588 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2589 	}
2590 
2591 	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
2592 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
2593 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2594 	if (err)
2595 		goto err_mpfs;
2596 
2597 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2598 		err = mlx5_vdpa_create_mr(mvdev, NULL);
2599 		if (err)
2600 			goto err_res;
2601 	}
2602 
2603 	err = alloc_resources(ndev);
2604 	if (err)
2605 		goto err_mr;
2606 
2607 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
2608 	if (!mvdev->wq) {
2609 		err = -ENOMEM;
2610 		goto err_res2;
2611 	}
2612 
2613 	ndev->nb.notifier_call = event_handler;
2614 	mlx5_notifier_register(mdev, &ndev->nb);
2615 	ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
2616 	mvdev->vdev.mdev = &mgtdev->mgtdev;
2617 	err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
2618 	if (err)
2619 		goto err_reg;
2620 
2621 	mgtdev->ndev = ndev;
2622 	return 0;
2623 
2624 err_reg:
2625 	destroy_workqueue(mvdev->wq);
2626 err_res2:
2627 	free_resources(ndev);
2628 err_mr:
2629 	mlx5_vdpa_destroy_mr(mvdev);
2630 err_res:
2631 	mlx5_vdpa_free_resources(&ndev->mvdev);
2632 err_mpfs:
2633 	if (!is_zero_ether_addr(config->mac))
2634 		mlx5_mpfs_del_mac(pfmdev, config->mac);
2635 err_mtu:
2636 	mutex_destroy(&ndev->reslock);
2637 	put_device(&mvdev->vdev.dev);
2638 	return err;
2639 }
2640 
2641 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2642 {
2643 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2644 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
2645 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2646 
2647 	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
2648 	destroy_workqueue(mvdev->wq);
2649 	_vdpa_unregister_device(dev);
2650 	mgtdev->ndev = NULL;
2651 }
2652 
2653 static const struct vdpa_mgmtdev_ops mdev_ops = {
2654 	.dev_add = mlx5_vdpa_dev_add,
2655 	.dev_del = mlx5_vdpa_dev_del,
2656 };
2657 
2658 static struct virtio_device_id id_table[] = {
2659 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2660 	{ 0 },
2661 };
2662 
2663 static int mlx5v_probe(struct auxiliary_device *adev,
2664 		       const struct auxiliary_device_id *id)
2665 
2666 {
2667 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2668 	struct mlx5_core_dev *mdev = madev->mdev;
2669 	struct mlx5_vdpa_mgmtdev *mgtdev;
2670 	int err;
2671 
2672 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2673 	if (!mgtdev)
2674 		return -ENOMEM;
2675 
2676 	mgtdev->mgtdev.ops = &mdev_ops;
2677 	mgtdev->mgtdev.device = mdev->device;
2678 	mgtdev->mgtdev.id_table = id_table;
2679 	mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
2680 	mgtdev->madev = madev;
2681 
2682 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2683 	if (err)
2684 		goto reg_err;
2685 
2686 	dev_set_drvdata(&adev->dev, mgtdev);
2687 
2688 	return 0;
2689 
2690 reg_err:
2691 	kfree(mgtdev);
2692 	return err;
2693 }
2694 
2695 static void mlx5v_remove(struct auxiliary_device *adev)
2696 {
2697 	struct mlx5_vdpa_mgmtdev *mgtdev;
2698 
2699 	mgtdev = dev_get_drvdata(&adev->dev);
2700 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2701 	kfree(mgtdev);
2702 }
2703 
2704 static const struct auxiliary_device_id mlx5v_id_table[] = {
2705 	{ .name = MLX5_ADEV_NAME ".vnet", },
2706 	{},
2707 };
2708 
2709 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2710 
2711 static struct auxiliary_driver mlx5v_driver = {
2712 	.name = "vnet",
2713 	.probe = mlx5v_probe,
2714 	.remove = mlx5v_remove,
2715 	.id_table = mlx5v_id_table,
2716 };
2717 
2718 module_auxiliary_driver(mlx5v_driver);
2719