xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision e761cc20)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 
22 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
23 MODULE_DESCRIPTION("Mellanox VDPA driver");
24 MODULE_LICENSE("Dual BSD/GPL");
25 
26 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
27 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
28 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 
30 #define VALID_FEATURES_MASK                                                                        \
31 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
32 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
33 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
34 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
35 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
36 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
37 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
38 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
39 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
40 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
41 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
42 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
43 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 
45 #define VALID_STATUS_MASK                                                                          \
46 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
47 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 
49 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
50 
51 #define MLX5V_UNTAGGED 0x1000
52 
53 struct mlx5_vdpa_net_resources {
54 	u32 tisn;
55 	u32 tdn;
56 	u32 tirn;
57 	u32 rqtn;
58 	bool valid;
59 };
60 
61 struct mlx5_vdpa_cq_buf {
62 	struct mlx5_frag_buf_ctrl fbc;
63 	struct mlx5_frag_buf frag_buf;
64 	int cqe_size;
65 	int nent;
66 };
67 
68 struct mlx5_vdpa_cq {
69 	struct mlx5_core_cq mcq;
70 	struct mlx5_vdpa_cq_buf buf;
71 	struct mlx5_db db;
72 	int cqe;
73 };
74 
75 struct mlx5_vdpa_umem {
76 	struct mlx5_frag_buf_ctrl fbc;
77 	struct mlx5_frag_buf frag_buf;
78 	int size;
79 	u32 id;
80 };
81 
82 struct mlx5_vdpa_qp {
83 	struct mlx5_core_qp mqp;
84 	struct mlx5_frag_buf frag_buf;
85 	struct mlx5_db db;
86 	u16 head;
87 	bool fw;
88 };
89 
90 struct mlx5_vq_restore_info {
91 	u32 num_ent;
92 	u64 desc_addr;
93 	u64 device_addr;
94 	u64 driver_addr;
95 	u16 avail_index;
96 	u16 used_index;
97 	bool ready;
98 	bool restore;
99 };
100 
101 struct mlx5_vdpa_virtqueue {
102 	bool ready;
103 	u64 desc_addr;
104 	u64 device_addr;
105 	u64 driver_addr;
106 	u32 num_ent;
107 
108 	/* Resources for implementing the notification channel from the device
109 	 * to the driver. fwqp is the firmware end of an RC connection; the
110 	 * other end is vqqp used by the driver. cq is where completions are
111 	 * reported.
112 	 */
113 	struct mlx5_vdpa_cq cq;
114 	struct mlx5_vdpa_qp fwqp;
115 	struct mlx5_vdpa_qp vqqp;
116 
117 	/* umem resources are required for the virtqueue operation. They're use
118 	 * is internal and they must be provided by the driver.
119 	 */
120 	struct mlx5_vdpa_umem umem1;
121 	struct mlx5_vdpa_umem umem2;
122 	struct mlx5_vdpa_umem umem3;
123 
124 	u32 counter_set_id;
125 	bool initialized;
126 	int index;
127 	u32 virtq_id;
128 	struct mlx5_vdpa_net *ndev;
129 	u16 avail_idx;
130 	u16 used_idx;
131 	int fw_state;
132 
133 	/* keep last in the struct */
134 	struct mlx5_vq_restore_info ri;
135 };
136 
137 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
138 {
139 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
140 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
141 			return idx < 2;
142 		else
143 			return idx < 3;
144 	}
145 
146 	return idx <= mvdev->max_idx;
147 }
148 
149 #define MLX5V_MACVLAN_SIZE 256
150 
151 struct mlx5_vdpa_net {
152 	struct mlx5_vdpa_dev mvdev;
153 	struct mlx5_vdpa_net_resources res;
154 	struct virtio_net_config config;
155 	struct mlx5_vdpa_virtqueue *vqs;
156 	struct vdpa_callback *event_cbs;
157 
158 	/* Serialize vq resources creation and destruction. This is required
159 	 * since memory map might change and we need to destroy and create
160 	 * resources while driver in operational.
161 	 */
162 	struct rw_semaphore reslock;
163 	struct mlx5_flow_table *rxft;
164 	bool setup;
165 	u32 cur_num_vqs;
166 	u32 rqt_size;
167 	bool nb_registered;
168 	struct notifier_block nb;
169 	struct vdpa_callback config_cb;
170 	struct mlx5_vdpa_wq_ent cvq_ent;
171 	struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
172 };
173 
174 struct macvlan_node {
175 	struct hlist_node hlist;
176 	struct mlx5_flow_handle *ucast_rule;
177 	struct mlx5_flow_handle *mcast_rule;
178 	u64 macvlan;
179 };
180 
181 static void free_resources(struct mlx5_vdpa_net *ndev);
182 static void init_mvqs(struct mlx5_vdpa_net *ndev);
183 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
184 static void teardown_driver(struct mlx5_vdpa_net *ndev);
185 
186 static bool mlx5_vdpa_debug;
187 
188 #define MLX5_CVQ_MAX_ENT 16
189 
190 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
191 	do {                                                                                       \
192 		if (features & BIT_ULL(_feature))                                                  \
193 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
194 	} while (0)
195 
196 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
197 	do {                                                                                       \
198 		if (status & (_status))                                                            \
199 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
200 	} while (0)
201 
202 /* TODO: cross-endian support */
203 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
204 {
205 	return virtio_legacy_is_little_endian() ||
206 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
207 }
208 
209 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
210 {
211 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
212 }
213 
214 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
215 {
216 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
217 }
218 
219 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
220 {
221 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
222 		return 2;
223 
224 	return mvdev->max_vqs;
225 }
226 
227 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
228 {
229 	return idx == ctrl_vq_idx(mvdev);
230 }
231 
232 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
233 {
234 	if (status & ~VALID_STATUS_MASK)
235 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
236 			       status & ~VALID_STATUS_MASK);
237 
238 	if (!mlx5_vdpa_debug)
239 		return;
240 
241 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
242 	if (set && !status) {
243 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
244 		return;
245 	}
246 
247 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
248 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
249 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
250 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
251 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
252 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
253 }
254 
255 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
256 {
257 	if (features & ~VALID_FEATURES_MASK)
258 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
259 			       features & ~VALID_FEATURES_MASK);
260 
261 	if (!mlx5_vdpa_debug)
262 		return;
263 
264 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
265 	if (!features)
266 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
267 
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
279 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
280 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
281 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
282 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
283 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
284 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
285 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
286 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
287 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
288 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
289 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
290 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
291 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
292 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
293 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
294 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
295 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
296 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
297 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
298 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
299 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
300 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
301 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
302 }
303 
304 static int create_tis(struct mlx5_vdpa_net *ndev)
305 {
306 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
307 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
308 	void *tisc;
309 	int err;
310 
311 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
312 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
313 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
314 	if (err)
315 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
316 
317 	return err;
318 }
319 
320 static void destroy_tis(struct mlx5_vdpa_net *ndev)
321 {
322 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
323 }
324 
325 #define MLX5_VDPA_CQE_SIZE 64
326 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
327 
328 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
329 {
330 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
331 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
332 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
333 	int err;
334 
335 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
336 				       ndev->mvdev.mdev->priv.numa_node);
337 	if (err)
338 		return err;
339 
340 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
341 
342 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
343 	buf->nent = nent;
344 
345 	return 0;
346 }
347 
348 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
349 {
350 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
351 
352 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
353 					ndev->mvdev.mdev->priv.numa_node);
354 }
355 
356 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
357 {
358 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
359 }
360 
361 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
362 {
363 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
364 }
365 
366 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
367 {
368 	struct mlx5_cqe64 *cqe64;
369 	void *cqe;
370 	int i;
371 
372 	for (i = 0; i < buf->nent; i++) {
373 		cqe = get_cqe(vcq, i);
374 		cqe64 = cqe;
375 		cqe64->op_own = MLX5_CQE_INVALID << 4;
376 	}
377 }
378 
379 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
380 {
381 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
382 
383 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
384 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
385 		return cqe64;
386 
387 	return NULL;
388 }
389 
390 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
391 {
392 	vqp->head += n;
393 	vqp->db.db[0] = cpu_to_be32(vqp->head);
394 }
395 
396 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
397 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
398 {
399 	struct mlx5_vdpa_qp *vqp;
400 	__be64 *pas;
401 	void *qpc;
402 
403 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
404 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
405 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
406 	if (vqp->fw) {
407 		/* Firmware QP is allocated by the driver for the firmware's
408 		 * use so we can skip part of the params as they will be chosen by firmware
409 		 */
410 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
411 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
412 		MLX5_SET(qpc, qpc, no_sq, 1);
413 		return;
414 	}
415 
416 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
417 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
418 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
419 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
420 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
421 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
422 	MLX5_SET(qpc, qpc, no_sq, 1);
423 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
424 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
425 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
426 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
427 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
428 }
429 
430 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
431 {
432 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
433 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
434 					ndev->mvdev.mdev->priv.numa_node);
435 }
436 
437 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
438 {
439 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
440 }
441 
442 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
443 		     struct mlx5_vdpa_qp *vqp)
444 {
445 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
446 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
447 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
448 	void *qpc;
449 	void *in;
450 	int err;
451 
452 	if (!vqp->fw) {
453 		vqp = &mvq->vqqp;
454 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
455 		if (err)
456 			return err;
457 
458 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
459 		if (err)
460 			goto err_db;
461 		inlen += vqp->frag_buf.npages * sizeof(__be64);
462 	}
463 
464 	in = kzalloc(inlen, GFP_KERNEL);
465 	if (!in) {
466 		err = -ENOMEM;
467 		goto err_kzalloc;
468 	}
469 
470 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
471 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
472 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
473 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
474 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
475 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
476 	if (!vqp->fw)
477 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
478 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
479 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
480 	kfree(in);
481 	if (err)
482 		goto err_kzalloc;
483 
484 	vqp->mqp.uid = ndev->mvdev.res.uid;
485 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
486 
487 	if (!vqp->fw)
488 		rx_post(vqp, mvq->num_ent);
489 
490 	return 0;
491 
492 err_kzalloc:
493 	if (!vqp->fw)
494 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
495 err_db:
496 	if (!vqp->fw)
497 		rq_buf_free(ndev, vqp);
498 
499 	return err;
500 }
501 
502 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
503 {
504 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
505 
506 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
507 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
508 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
509 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
510 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
511 	if (!vqp->fw) {
512 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
513 		rq_buf_free(ndev, vqp);
514 	}
515 }
516 
517 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
518 {
519 	return get_sw_cqe(cq, cq->mcq.cons_index);
520 }
521 
522 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
523 {
524 	struct mlx5_cqe64 *cqe64;
525 
526 	cqe64 = next_cqe_sw(vcq);
527 	if (!cqe64)
528 		return -EAGAIN;
529 
530 	vcq->mcq.cons_index++;
531 	return 0;
532 }
533 
534 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
535 {
536 	struct mlx5_vdpa_net *ndev = mvq->ndev;
537 	struct vdpa_callback *event_cb;
538 
539 	event_cb = &ndev->event_cbs[mvq->index];
540 	mlx5_cq_set_ci(&mvq->cq.mcq);
541 
542 	/* make sure CQ cosumer update is visible to the hardware before updating
543 	 * RX doorbell record.
544 	 */
545 	dma_wmb();
546 	rx_post(&mvq->vqqp, num);
547 	if (event_cb->callback)
548 		event_cb->callback(event_cb->private);
549 }
550 
551 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
552 {
553 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
554 	struct mlx5_vdpa_net *ndev = mvq->ndev;
555 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
556 	int num = 0;
557 
558 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
559 		num++;
560 		if (num > mvq->num_ent / 2) {
561 			/* If completions keep coming while we poll, we want to
562 			 * let the hardware know that we consumed them by
563 			 * updating the doorbell record.  We also let vdpa core
564 			 * know about this so it passes it on the virtio driver
565 			 * on the guest.
566 			 */
567 			mlx5_vdpa_handle_completions(mvq, num);
568 			num = 0;
569 		}
570 	}
571 
572 	if (num)
573 		mlx5_vdpa_handle_completions(mvq, num);
574 
575 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
576 }
577 
578 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
579 {
580 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
581 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
582 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
583 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
584 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
585 	__be64 *pas;
586 	int inlen;
587 	void *cqc;
588 	void *in;
589 	int err;
590 	int eqn;
591 
592 	err = mlx5_db_alloc(mdev, &vcq->db);
593 	if (err)
594 		return err;
595 
596 	vcq->mcq.set_ci_db = vcq->db.db;
597 	vcq->mcq.arm_db = vcq->db.db + 1;
598 	vcq->mcq.cqe_sz = 64;
599 
600 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
601 	if (err)
602 		goto err_db;
603 
604 	cq_frag_buf_init(vcq, &vcq->buf);
605 
606 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
607 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
608 	in = kzalloc(inlen, GFP_KERNEL);
609 	if (!in) {
610 		err = -ENOMEM;
611 		goto err_vzalloc;
612 	}
613 
614 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
615 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
616 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
617 
618 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
619 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
620 
621 	/* Use vector 0 by default. Consider adding code to choose least used
622 	 * vector.
623 	 */
624 	err = mlx5_vector2eqn(mdev, 0, &eqn);
625 	if (err)
626 		goto err_vec;
627 
628 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
629 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
630 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
631 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
632 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
633 
634 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
635 	if (err)
636 		goto err_vec;
637 
638 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
639 	vcq->cqe = num_ent;
640 	vcq->mcq.set_ci_db = vcq->db.db;
641 	vcq->mcq.arm_db = vcq->db.db + 1;
642 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
643 	kfree(in);
644 	return 0;
645 
646 err_vec:
647 	kfree(in);
648 err_vzalloc:
649 	cq_frag_buf_free(ndev, &vcq->buf);
650 err_db:
651 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
652 	return err;
653 }
654 
655 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
656 {
657 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
658 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
659 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
660 
661 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
662 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
663 		return;
664 	}
665 	cq_frag_buf_free(ndev, &vcq->buf);
666 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
667 }
668 
669 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
670 			  struct mlx5_vdpa_umem **umemp)
671 {
672 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
673 	int p_a;
674 	int p_b;
675 
676 	switch (num) {
677 	case 1:
678 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
679 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
680 		*umemp = &mvq->umem1;
681 		break;
682 	case 2:
683 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
684 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
685 		*umemp = &mvq->umem2;
686 		break;
687 	case 3:
688 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
689 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
690 		*umemp = &mvq->umem3;
691 		break;
692 	}
693 	(*umemp)->size = p_a * mvq->num_ent + p_b;
694 }
695 
696 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
697 {
698 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
699 }
700 
701 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
702 {
703 	int inlen;
704 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
705 	void *um;
706 	void *in;
707 	int err;
708 	__be64 *pas;
709 	struct mlx5_vdpa_umem *umem;
710 
711 	set_umem_size(ndev, mvq, num, &umem);
712 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
713 	if (err)
714 		return err;
715 
716 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
717 
718 	in = kzalloc(inlen, GFP_KERNEL);
719 	if (!in) {
720 		err = -ENOMEM;
721 		goto err_in;
722 	}
723 
724 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
725 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
726 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
727 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
728 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
729 
730 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
731 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
732 
733 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
734 	if (err) {
735 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
736 		goto err_cmd;
737 	}
738 
739 	kfree(in);
740 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
741 
742 	return 0;
743 
744 err_cmd:
745 	kfree(in);
746 err_in:
747 	umem_frag_buf_free(ndev, umem);
748 	return err;
749 }
750 
751 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
752 {
753 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
754 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
755 	struct mlx5_vdpa_umem *umem;
756 
757 	switch (num) {
758 	case 1:
759 		umem = &mvq->umem1;
760 		break;
761 	case 2:
762 		umem = &mvq->umem2;
763 		break;
764 	case 3:
765 		umem = &mvq->umem3;
766 		break;
767 	}
768 
769 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
770 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
771 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
772 		return;
773 
774 	umem_frag_buf_free(ndev, umem);
775 }
776 
777 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
778 {
779 	int num;
780 	int err;
781 
782 	for (num = 1; num <= 3; num++) {
783 		err = create_umem(ndev, mvq, num);
784 		if (err)
785 			goto err_umem;
786 	}
787 	return 0;
788 
789 err_umem:
790 	for (num--; num > 0; num--)
791 		umem_destroy(ndev, mvq, num);
792 
793 	return err;
794 }
795 
796 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
797 {
798 	int num;
799 
800 	for (num = 3; num > 0; num--)
801 		umem_destroy(ndev, mvq, num);
802 }
803 
804 static int get_queue_type(struct mlx5_vdpa_net *ndev)
805 {
806 	u32 type_mask;
807 
808 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
809 
810 	/* prefer split queue */
811 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
812 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
813 
814 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
815 
816 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
817 }
818 
819 static bool vq_is_tx(u16 idx)
820 {
821 	return idx % 2;
822 }
823 
824 static u16 get_features_12_3(u64 features)
825 {
826 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
827 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
828 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
829 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
830 }
831 
832 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
833 {
834 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
835 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
836 }
837 
838 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
839 {
840 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
841 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
842 	void *obj_context;
843 	void *cmd_hdr;
844 	void *vq_ctx;
845 	void *in;
846 	int err;
847 
848 	err = umems_create(ndev, mvq);
849 	if (err)
850 		return err;
851 
852 	in = kzalloc(inlen, GFP_KERNEL);
853 	if (!in) {
854 		err = -ENOMEM;
855 		goto err_alloc;
856 	}
857 
858 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
859 
860 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
861 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
862 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
863 
864 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
865 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
866 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
867 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
868 		 get_features_12_3(ndev->mvdev.actual_features));
869 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
870 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
871 
872 	if (vq_is_tx(mvq->index))
873 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
874 
875 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
876 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
877 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
878 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
879 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
880 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
881 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
882 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
883 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
884 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
885 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
886 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
887 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
888 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
889 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
890 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
891 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
892 	if (counters_supported(&ndev->mvdev))
893 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
894 
895 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
896 	if (err)
897 		goto err_cmd;
898 
899 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
900 	kfree(in);
901 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
902 
903 	return 0;
904 
905 err_cmd:
906 	kfree(in);
907 err_alloc:
908 	umems_destroy(ndev, mvq);
909 	return err;
910 }
911 
912 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
913 {
914 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
915 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
916 
917 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
918 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
919 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
920 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
921 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
922 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
923 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
924 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
925 		return;
926 	}
927 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
928 	umems_destroy(ndev, mvq);
929 }
930 
931 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
932 {
933 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
934 }
935 
936 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
937 {
938 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
939 }
940 
941 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
942 			int *outlen, u32 qpn, u32 rqpn)
943 {
944 	void *qpc;
945 	void *pp;
946 
947 	switch (cmd) {
948 	case MLX5_CMD_OP_2RST_QP:
949 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
950 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
951 		*in = kzalloc(*inlen, GFP_KERNEL);
952 		*out = kzalloc(*outlen, GFP_KERNEL);
953 		if (!*in || !*out)
954 			goto outerr;
955 
956 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
957 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
958 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
959 		break;
960 	case MLX5_CMD_OP_RST2INIT_QP:
961 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
962 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
963 		*in = kzalloc(*inlen, GFP_KERNEL);
964 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
965 		if (!*in || !*out)
966 			goto outerr;
967 
968 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
969 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
970 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
971 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
972 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
973 		MLX5_SET(qpc, qpc, rwe, 1);
974 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
975 		MLX5_SET(ads, pp, vhca_port_num, 1);
976 		break;
977 	case MLX5_CMD_OP_INIT2RTR_QP:
978 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
979 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
980 		*in = kzalloc(*inlen, GFP_KERNEL);
981 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
982 		if (!*in || !*out)
983 			goto outerr;
984 
985 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
986 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
987 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
988 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
989 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
990 		MLX5_SET(qpc, qpc, log_msg_max, 30);
991 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
992 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
993 		MLX5_SET(ads, pp, fl, 1);
994 		break;
995 	case MLX5_CMD_OP_RTR2RTS_QP:
996 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
997 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
998 		*in = kzalloc(*inlen, GFP_KERNEL);
999 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1000 		if (!*in || !*out)
1001 			goto outerr;
1002 
1003 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1004 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1005 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1006 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1007 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1008 		MLX5_SET(ads, pp, ack_timeout, 14);
1009 		MLX5_SET(qpc, qpc, retry_count, 7);
1010 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1011 		break;
1012 	default:
1013 		goto outerr_nullify;
1014 	}
1015 
1016 	return;
1017 
1018 outerr:
1019 	kfree(*in);
1020 	kfree(*out);
1021 outerr_nullify:
1022 	*in = NULL;
1023 	*out = NULL;
1024 }
1025 
1026 static void free_inout(void *in, void *out)
1027 {
1028 	kfree(in);
1029 	kfree(out);
1030 }
1031 
1032 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1033  * firmware. The fw argument indicates whether the subjected QP is the one used
1034  * by firmware.
1035  */
1036 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1037 {
1038 	int outlen;
1039 	int inlen;
1040 	void *out;
1041 	void *in;
1042 	int err;
1043 
1044 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1045 	if (!in || !out)
1046 		return -ENOMEM;
1047 
1048 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1049 	free_inout(in, out);
1050 	return err;
1051 }
1052 
1053 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1054 {
1055 	int err;
1056 
1057 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1058 	if (err)
1059 		return err;
1060 
1061 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1062 	if (err)
1063 		return err;
1064 
1065 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1066 	if (err)
1067 		return err;
1068 
1069 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1070 	if (err)
1071 		return err;
1072 
1073 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1074 	if (err)
1075 		return err;
1076 
1077 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1078 	if (err)
1079 		return err;
1080 
1081 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1082 }
1083 
1084 struct mlx5_virtq_attr {
1085 	u8 state;
1086 	u16 available_index;
1087 	u16 used_index;
1088 };
1089 
1090 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1091 			   struct mlx5_virtq_attr *attr)
1092 {
1093 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1094 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1095 	void *out;
1096 	void *obj_context;
1097 	void *cmd_hdr;
1098 	int err;
1099 
1100 	out = kzalloc(outlen, GFP_KERNEL);
1101 	if (!out)
1102 		return -ENOMEM;
1103 
1104 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1105 
1106 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1107 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1108 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1109 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1110 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1111 	if (err)
1112 		goto err_cmd;
1113 
1114 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1115 	memset(attr, 0, sizeof(*attr));
1116 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1117 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1118 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1119 	kfree(out);
1120 	return 0;
1121 
1122 err_cmd:
1123 	kfree(out);
1124 	return err;
1125 }
1126 
1127 static bool is_valid_state_change(int oldstate, int newstate)
1128 {
1129 	switch (oldstate) {
1130 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1131 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1132 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1133 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1134 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1135 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1136 	default:
1137 		return false;
1138 	}
1139 }
1140 
1141 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1142 {
1143 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1144 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1145 	void *obj_context;
1146 	void *cmd_hdr;
1147 	void *in;
1148 	int err;
1149 
1150 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1151 		return 0;
1152 
1153 	if (!is_valid_state_change(mvq->fw_state, state))
1154 		return -EINVAL;
1155 
1156 	in = kzalloc(inlen, GFP_KERNEL);
1157 	if (!in)
1158 		return -ENOMEM;
1159 
1160 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1161 
1162 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1163 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1164 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1165 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1166 
1167 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1168 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1169 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1170 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1171 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1172 	kfree(in);
1173 	if (!err)
1174 		mvq->fw_state = state;
1175 
1176 	return err;
1177 }
1178 
1179 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1180 {
1181 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1182 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1183 	void *cmd_hdr;
1184 	int err;
1185 
1186 	if (!counters_supported(&ndev->mvdev))
1187 		return 0;
1188 
1189 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1190 
1191 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1192 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1193 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1194 
1195 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1196 	if (err)
1197 		return err;
1198 
1199 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1200 
1201 	return 0;
1202 }
1203 
1204 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1205 {
1206 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1207 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1208 
1209 	if (!counters_supported(&ndev->mvdev))
1210 		return;
1211 
1212 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1213 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1214 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1215 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1216 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1217 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1218 }
1219 
1220 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1221 {
1222 	u16 idx = mvq->index;
1223 	int err;
1224 
1225 	if (!mvq->num_ent)
1226 		return 0;
1227 
1228 	if (mvq->initialized)
1229 		return 0;
1230 
1231 	err = cq_create(ndev, idx, mvq->num_ent);
1232 	if (err)
1233 		return err;
1234 
1235 	err = qp_create(ndev, mvq, &mvq->fwqp);
1236 	if (err)
1237 		goto err_fwqp;
1238 
1239 	err = qp_create(ndev, mvq, &mvq->vqqp);
1240 	if (err)
1241 		goto err_vqqp;
1242 
1243 	err = connect_qps(ndev, mvq);
1244 	if (err)
1245 		goto err_connect;
1246 
1247 	err = counter_set_alloc(ndev, mvq);
1248 	if (err)
1249 		goto err_counter;
1250 
1251 	err = create_virtqueue(ndev, mvq);
1252 	if (err)
1253 		goto err_connect;
1254 
1255 	if (mvq->ready) {
1256 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1257 		if (err) {
1258 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1259 				       idx, err);
1260 			goto err_connect;
1261 		}
1262 	}
1263 
1264 	mvq->initialized = true;
1265 	return 0;
1266 
1267 err_connect:
1268 	counter_set_dealloc(ndev, mvq);
1269 err_counter:
1270 	qp_destroy(ndev, &mvq->vqqp);
1271 err_vqqp:
1272 	qp_destroy(ndev, &mvq->fwqp);
1273 err_fwqp:
1274 	cq_destroy(ndev, idx);
1275 	return err;
1276 }
1277 
1278 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1279 {
1280 	struct mlx5_virtq_attr attr;
1281 
1282 	if (!mvq->initialized)
1283 		return;
1284 
1285 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1286 		return;
1287 
1288 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1289 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1290 
1291 	if (query_virtqueue(ndev, mvq, &attr)) {
1292 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1293 		return;
1294 	}
1295 	mvq->avail_idx = attr.available_index;
1296 	mvq->used_idx = attr.used_index;
1297 }
1298 
1299 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1300 {
1301 	int i;
1302 
1303 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1304 		suspend_vq(ndev, &ndev->vqs[i]);
1305 }
1306 
1307 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1308 {
1309 	if (!mvq->initialized)
1310 		return;
1311 
1312 	suspend_vq(ndev, mvq);
1313 	destroy_virtqueue(ndev, mvq);
1314 	counter_set_dealloc(ndev, mvq);
1315 	qp_destroy(ndev, &mvq->vqqp);
1316 	qp_destroy(ndev, &mvq->fwqp);
1317 	cq_destroy(ndev, mvq->index);
1318 	mvq->initialized = false;
1319 }
1320 
1321 static int create_rqt(struct mlx5_vdpa_net *ndev)
1322 {
1323 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1324 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1325 	__be32 *list;
1326 	void *rqtc;
1327 	int inlen;
1328 	void *in;
1329 	int i, j;
1330 	int err;
1331 
1332 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1333 	in = kzalloc(inlen, GFP_KERNEL);
1334 	if (!in)
1335 		return -ENOMEM;
1336 
1337 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1338 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1339 
1340 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1341 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1342 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1343 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1344 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1345 
1346 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1347 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1348 	kfree(in);
1349 	if (err)
1350 		return err;
1351 
1352 	return 0;
1353 }
1354 
1355 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1356 
1357 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1358 {
1359 	int act_sz = roundup_pow_of_two(num / 2);
1360 	__be32 *list;
1361 	void *rqtc;
1362 	int inlen;
1363 	void *in;
1364 	int i, j;
1365 	int err;
1366 
1367 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1368 	in = kzalloc(inlen, GFP_KERNEL);
1369 	if (!in)
1370 		return -ENOMEM;
1371 
1372 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1373 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1374 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1375 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1376 
1377 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1378 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1379 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1380 
1381 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1382 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1383 	kfree(in);
1384 	if (err)
1385 		return err;
1386 
1387 	return 0;
1388 }
1389 
1390 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1391 {
1392 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1393 }
1394 
1395 static int create_tir(struct mlx5_vdpa_net *ndev)
1396 {
1397 #define HASH_IP_L4PORTS                                                                            \
1398 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1399 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1400 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1401 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1402 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1403 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1404 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1405 	void *rss_key;
1406 	void *outer;
1407 	void *tirc;
1408 	void *in;
1409 	int err;
1410 
1411 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1412 	if (!in)
1413 		return -ENOMEM;
1414 
1415 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1416 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1417 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1418 
1419 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1420 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1421 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1422 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1423 
1424 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1425 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1426 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1427 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1428 
1429 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1430 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1431 
1432 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1433 	kfree(in);
1434 	return err;
1435 }
1436 
1437 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1438 {
1439 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1440 }
1441 
1442 #define MAX_STEERING_ENT 0x8000
1443 #define MAX_STEERING_GROUPS 2
1444 
1445 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1446 					u16 vid, bool tagged,
1447 					struct mlx5_flow_handle **ucast,
1448 					struct mlx5_flow_handle **mcast)
1449 {
1450 	struct mlx5_flow_destination dest = {};
1451 	struct mlx5_flow_act flow_act = {};
1452 	struct mlx5_flow_handle *rule;
1453 	struct mlx5_flow_spec *spec;
1454 	void *headers_c;
1455 	void *headers_v;
1456 	u8 *dmac_c;
1457 	u8 *dmac_v;
1458 	int err;
1459 
1460 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1461 	if (!spec)
1462 		return -ENOMEM;
1463 
1464 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1465 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1466 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1467 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1468 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1469 	eth_broadcast_addr(dmac_c);
1470 	ether_addr_copy(dmac_v, mac);
1471 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1472 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1473 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1474 	}
1475 	if (tagged) {
1476 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1477 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1478 	}
1479 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1480 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1481 	dest.tir_num = ndev->res.tirn;
1482 	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1483 	if (IS_ERR(rule))
1484 		return PTR_ERR(rule);
1485 
1486 	*ucast = rule;
1487 
1488 	memset(dmac_c, 0, ETH_ALEN);
1489 	memset(dmac_v, 0, ETH_ALEN);
1490 	dmac_c[0] = 1;
1491 	dmac_v[0] = 1;
1492 	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1493 	kvfree(spec);
1494 	if (IS_ERR(rule)) {
1495 		err = PTR_ERR(rule);
1496 		goto err_mcast;
1497 	}
1498 
1499 	*mcast = rule;
1500 	return 0;
1501 
1502 err_mcast:
1503 	mlx5_del_flow_rules(*ucast);
1504 	return err;
1505 }
1506 
1507 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1508 					 struct mlx5_flow_handle *ucast,
1509 					 struct mlx5_flow_handle *mcast)
1510 {
1511 	mlx5_del_flow_rules(ucast);
1512 	mlx5_del_flow_rules(mcast);
1513 }
1514 
1515 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1516 {
1517 	u64 val;
1518 
1519 	if (!tagged)
1520 		vlan = MLX5V_UNTAGGED;
1521 
1522 	val = (u64)vlan << 48 |
1523 	      (u64)mac[0] << 40 |
1524 	      (u64)mac[1] << 32 |
1525 	      (u64)mac[2] << 24 |
1526 	      (u64)mac[3] << 16 |
1527 	      (u64)mac[4] << 8 |
1528 	      (u64)mac[5];
1529 
1530 	return val;
1531 }
1532 
1533 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1534 {
1535 	struct macvlan_node *pos;
1536 	u32 idx;
1537 
1538 	idx = hash_64(value, 8); // tbd 8
1539 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1540 		if (pos->macvlan == value)
1541 			return pos;
1542 	}
1543 	return NULL;
1544 }
1545 
1546 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid
1547 {
1548 	struct macvlan_node *ptr;
1549 	u64 val;
1550 	u32 idx;
1551 	int err;
1552 
1553 	val = search_val(mac, vlan, tagged);
1554 	if (mac_vlan_lookup(ndev, val))
1555 		return -EEXIST;
1556 
1557 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1558 	if (!ptr)
1559 		return -ENOMEM;
1560 
1561 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged,
1562 					   &ptr->ucast_rule, &ptr->mcast_rule);
1563 	if (err)
1564 		goto err_add;
1565 
1566 	ptr->macvlan = val;
1567 	idx = hash_64(val, 8);
1568 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1569 	return 0;
1570 
1571 err_add:
1572 	kfree(ptr);
1573 	return err;
1574 }
1575 
1576 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1577 {
1578 	struct macvlan_node *ptr;
1579 
1580 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1581 	if (!ptr)
1582 		return;
1583 
1584 	hlist_del(&ptr->hlist);
1585 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule);
1586 	kfree(ptr);
1587 }
1588 
1589 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1590 {
1591 	struct macvlan_node *pos;
1592 	struct hlist_node *n;
1593 	int i;
1594 
1595 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1596 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1597 			hlist_del(&pos->hlist);
1598 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule);
1599 			kfree(pos);
1600 		}
1601 	}
1602 }
1603 
1604 static int setup_steering(struct mlx5_vdpa_net *ndev)
1605 {
1606 	struct mlx5_flow_table_attr ft_attr = {};
1607 	struct mlx5_flow_namespace *ns;
1608 	int err;
1609 
1610 	ft_attr.max_fte = MAX_STEERING_ENT;
1611 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1612 
1613 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1614 	if (!ns) {
1615 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1616 		return -EOPNOTSUPP;
1617 	}
1618 
1619 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1620 	if (IS_ERR(ndev->rxft)) {
1621 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1622 		return PTR_ERR(ndev->rxft);
1623 	}
1624 
1625 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1626 	if (err)
1627 		goto err_add;
1628 
1629 	return 0;
1630 
1631 err_add:
1632 	mlx5_destroy_flow_table(ndev->rxft);
1633 	return err;
1634 }
1635 
1636 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1637 {
1638 	clear_mac_vlan_table(ndev);
1639 	mlx5_destroy_flow_table(ndev->rxft);
1640 }
1641 
1642 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1643 {
1644 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1645 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1646 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1647 	struct mlx5_core_dev *pfmdev;
1648 	size_t read;
1649 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1650 
1651 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1652 	switch (cmd) {
1653 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1654 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1655 		if (read != ETH_ALEN)
1656 			break;
1657 
1658 		if (!memcmp(ndev->config.mac, mac, 6)) {
1659 			status = VIRTIO_NET_OK;
1660 			break;
1661 		}
1662 
1663 		if (is_zero_ether_addr(mac))
1664 			break;
1665 
1666 		if (!is_zero_ether_addr(ndev->config.mac)) {
1667 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1668 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1669 					       ndev->config.mac);
1670 				break;
1671 			}
1672 		}
1673 
1674 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1675 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1676 				       mac);
1677 			break;
1678 		}
1679 
1680 		/* backup the original mac address so that if failed to add the forward rules
1681 		 * we could restore it
1682 		 */
1683 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1684 
1685 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1686 
1687 		/* Need recreate the flow table entry, so that the packet could forward back
1688 		 */
1689 		mac_vlan_del(ndev, mac_back, 0, false);
1690 
1691 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1692 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1693 
1694 			/* Although it hardly run here, we still need double check */
1695 			if (is_zero_ether_addr(mac_back)) {
1696 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1697 				break;
1698 			}
1699 
1700 			/* Try to restore original mac address to MFPS table, and try to restore
1701 			 * the forward rule entry.
1702 			 */
1703 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1704 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1705 					       ndev->config.mac);
1706 			}
1707 
1708 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1709 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1710 					       mac_back);
1711 			}
1712 
1713 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1714 
1715 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1716 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1717 
1718 			break;
1719 		}
1720 
1721 		status = VIRTIO_NET_OK;
1722 		break;
1723 
1724 	default:
1725 		break;
1726 	}
1727 
1728 	return status;
1729 }
1730 
1731 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1732 {
1733 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1734 	int cur_qps = ndev->cur_num_vqs / 2;
1735 	int err;
1736 	int i;
1737 
1738 	if (cur_qps > newqps) {
1739 		err = modify_rqt(ndev, 2 * newqps);
1740 		if (err)
1741 			return err;
1742 
1743 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1744 			teardown_vq(ndev, &ndev->vqs[i]);
1745 
1746 		ndev->cur_num_vqs = 2 * newqps;
1747 	} else {
1748 		ndev->cur_num_vqs = 2 * newqps;
1749 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1750 			err = setup_vq(ndev, &ndev->vqs[i]);
1751 			if (err)
1752 				goto clean_added;
1753 		}
1754 		err = modify_rqt(ndev, 2 * newqps);
1755 		if (err)
1756 			goto clean_added;
1757 	}
1758 	return 0;
1759 
1760 clean_added:
1761 	for (--i; i >= 2 * cur_qps; --i)
1762 		teardown_vq(ndev, &ndev->vqs[i]);
1763 
1764 	ndev->cur_num_vqs = 2 * cur_qps;
1765 
1766 	return err;
1767 }
1768 
1769 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1770 {
1771 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1772 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1773 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1774 	struct virtio_net_ctrl_mq mq;
1775 	size_t read;
1776 	u16 newqps;
1777 
1778 	switch (cmd) {
1779 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1780 		/* This mq feature check aligns with pre-existing userspace
1781 		 * implementation.
1782 		 *
1783 		 * Without it, an untrusted driver could fake a multiqueue config
1784 		 * request down to a non-mq device that may cause kernel to
1785 		 * panic due to uninitialized resources for extra vqs. Even with
1786 		 * a well behaving guest driver, it is not expected to allow
1787 		 * changing the number of vqs on a non-mq device.
1788 		 */
1789 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1790 			break;
1791 
1792 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1793 		if (read != sizeof(mq))
1794 			break;
1795 
1796 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1797 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1798 		    newqps > ndev->rqt_size)
1799 			break;
1800 
1801 		if (ndev->cur_num_vqs == 2 * newqps) {
1802 			status = VIRTIO_NET_OK;
1803 			break;
1804 		}
1805 
1806 		if (!change_num_qps(mvdev, newqps))
1807 			status = VIRTIO_NET_OK;
1808 
1809 		break;
1810 	default:
1811 		break;
1812 	}
1813 
1814 	return status;
1815 }
1816 
1817 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1818 {
1819 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1820 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1821 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1822 	__virtio16 vlan;
1823 	size_t read;
1824 	u16 id;
1825 
1826 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1827 		return status;
1828 
1829 	switch (cmd) {
1830 	case VIRTIO_NET_CTRL_VLAN_ADD:
1831 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1832 		if (read != sizeof(vlan))
1833 			break;
1834 
1835 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1836 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1837 			break;
1838 
1839 		status = VIRTIO_NET_OK;
1840 		break;
1841 	case VIRTIO_NET_CTRL_VLAN_DEL:
1842 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1843 		if (read != sizeof(vlan))
1844 			break;
1845 
1846 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1847 		mac_vlan_del(ndev, ndev->config.mac, id, true);
1848 		status = VIRTIO_NET_OK;
1849 		break;
1850 	default:
1851 		break;
1852 	}
1853 
1854 	return status;
1855 }
1856 
1857 static void mlx5_cvq_kick_handler(struct work_struct *work)
1858 {
1859 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1860 	struct virtio_net_ctrl_hdr ctrl;
1861 	struct mlx5_vdpa_wq_ent *wqent;
1862 	struct mlx5_vdpa_dev *mvdev;
1863 	struct mlx5_control_vq *cvq;
1864 	struct mlx5_vdpa_net *ndev;
1865 	size_t read, write;
1866 	int err;
1867 
1868 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1869 	mvdev = wqent->mvdev;
1870 	ndev = to_mlx5_vdpa_ndev(mvdev);
1871 	cvq = &mvdev->cvq;
1872 
1873 	down_write(&ndev->reslock);
1874 
1875 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1876 		goto out;
1877 
1878 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1879 		goto out;
1880 
1881 	if (!cvq->ready)
1882 		goto out;
1883 
1884 	while (true) {
1885 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1886 					   GFP_ATOMIC);
1887 		if (err <= 0)
1888 			break;
1889 
1890 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1891 		if (read != sizeof(ctrl))
1892 			break;
1893 
1894 		cvq->received_desc++;
1895 		switch (ctrl.class) {
1896 		case VIRTIO_NET_CTRL_MAC:
1897 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
1898 			break;
1899 		case VIRTIO_NET_CTRL_MQ:
1900 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
1901 			break;
1902 		case VIRTIO_NET_CTRL_VLAN:
1903 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
1904 			break;
1905 		default:
1906 			break;
1907 		}
1908 
1909 		/* Make sure data is written before advancing index */
1910 		smp_wmb();
1911 
1912 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1913 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1914 		vringh_kiov_cleanup(&cvq->riov);
1915 		vringh_kiov_cleanup(&cvq->wiov);
1916 
1917 		if (vringh_need_notify_iotlb(&cvq->vring))
1918 			vringh_notify(&cvq->vring);
1919 
1920 		cvq->completed_desc++;
1921 		queue_work(mvdev->wq, &wqent->work);
1922 		break;
1923 	}
1924 
1925 out:
1926 	up_write(&ndev->reslock);
1927 }
1928 
1929 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1930 {
1931 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1932 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1933 	struct mlx5_vdpa_virtqueue *mvq;
1934 
1935 	if (!is_index_valid(mvdev, idx))
1936 		return;
1937 
1938 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1939 		if (!mvdev->wq || !mvdev->cvq.ready)
1940 			return;
1941 
1942 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
1943 		return;
1944 	}
1945 
1946 	mvq = &ndev->vqs[idx];
1947 	if (unlikely(!mvq->ready))
1948 		return;
1949 
1950 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1951 }
1952 
1953 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1954 				    u64 driver_area, u64 device_area)
1955 {
1956 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1957 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1958 	struct mlx5_vdpa_virtqueue *mvq;
1959 
1960 	if (!is_index_valid(mvdev, idx))
1961 		return -EINVAL;
1962 
1963 	if (is_ctrl_vq_idx(mvdev, idx)) {
1964 		mvdev->cvq.desc_addr = desc_area;
1965 		mvdev->cvq.device_addr = device_area;
1966 		mvdev->cvq.driver_addr = driver_area;
1967 		return 0;
1968 	}
1969 
1970 	mvq = &ndev->vqs[idx];
1971 	mvq->desc_addr = desc_area;
1972 	mvq->device_addr = device_area;
1973 	mvq->driver_addr = driver_area;
1974 	return 0;
1975 }
1976 
1977 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1978 {
1979 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1980 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1981 	struct mlx5_vdpa_virtqueue *mvq;
1982 
1983 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1984 		return;
1985 
1986 	mvq = &ndev->vqs[idx];
1987 	mvq->num_ent = num;
1988 }
1989 
1990 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1991 {
1992 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1993 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1994 
1995 	ndev->event_cbs[idx] = *cb;
1996 	if (is_ctrl_vq_idx(mvdev, idx))
1997 		mvdev->cvq.event_cb = *cb;
1998 }
1999 
2000 static void mlx5_cvq_notify(struct vringh *vring)
2001 {
2002 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2003 
2004 	if (!cvq->event_cb.callback)
2005 		return;
2006 
2007 	cvq->event_cb.callback(cvq->event_cb.private);
2008 }
2009 
2010 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2011 {
2012 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2013 
2014 	cvq->ready = ready;
2015 	if (!ready)
2016 		return;
2017 
2018 	cvq->vring.notify = mlx5_cvq_notify;
2019 }
2020 
2021 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2022 {
2023 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2024 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2025 	struct mlx5_vdpa_virtqueue *mvq;
2026 	int err;
2027 
2028 	if (!mvdev->actual_features)
2029 		return;
2030 
2031 	if (!is_index_valid(mvdev, idx))
2032 		return;
2033 
2034 	if (is_ctrl_vq_idx(mvdev, idx)) {
2035 		set_cvq_ready(mvdev, ready);
2036 		return;
2037 	}
2038 
2039 	mvq = &ndev->vqs[idx];
2040 	if (!ready) {
2041 		suspend_vq(ndev, mvq);
2042 	} else {
2043 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2044 		if (err) {
2045 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2046 			ready = false;
2047 		}
2048 	}
2049 
2050 
2051 	mvq->ready = ready;
2052 }
2053 
2054 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2055 {
2056 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2057 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2058 
2059 	if (!is_index_valid(mvdev, idx))
2060 		return false;
2061 
2062 	if (is_ctrl_vq_idx(mvdev, idx))
2063 		return mvdev->cvq.ready;
2064 
2065 	return ndev->vqs[idx].ready;
2066 }
2067 
2068 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2069 				  const struct vdpa_vq_state *state)
2070 {
2071 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2072 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2073 	struct mlx5_vdpa_virtqueue *mvq;
2074 
2075 	if (!is_index_valid(mvdev, idx))
2076 		return -EINVAL;
2077 
2078 	if (is_ctrl_vq_idx(mvdev, idx)) {
2079 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2080 		return 0;
2081 	}
2082 
2083 	mvq = &ndev->vqs[idx];
2084 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2085 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2086 		return -EINVAL;
2087 	}
2088 
2089 	mvq->used_idx = state->split.avail_index;
2090 	mvq->avail_idx = state->split.avail_index;
2091 	return 0;
2092 }
2093 
2094 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2095 {
2096 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2097 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2098 	struct mlx5_vdpa_virtqueue *mvq;
2099 	struct mlx5_virtq_attr attr;
2100 	int err;
2101 
2102 	if (!is_index_valid(mvdev, idx))
2103 		return -EINVAL;
2104 
2105 	if (is_ctrl_vq_idx(mvdev, idx)) {
2106 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2107 		return 0;
2108 	}
2109 
2110 	mvq = &ndev->vqs[idx];
2111 	/* If the virtq object was destroyed, use the value saved at
2112 	 * the last minute of suspend_vq. This caters for userspace
2113 	 * that cares about emulating the index after vq is stopped.
2114 	 */
2115 	if (!mvq->initialized) {
2116 		/* Firmware returns a wrong value for the available index.
2117 		 * Since both values should be identical, we take the value of
2118 		 * used_idx which is reported correctly.
2119 		 */
2120 		state->split.avail_index = mvq->used_idx;
2121 		return 0;
2122 	}
2123 
2124 	err = query_virtqueue(ndev, mvq, &attr);
2125 	if (err) {
2126 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2127 		return err;
2128 	}
2129 	state->split.avail_index = attr.used_index;
2130 	return 0;
2131 }
2132 
2133 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2134 {
2135 	return PAGE_SIZE;
2136 }
2137 
2138 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2139 {
2140 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2141 
2142 	if (is_ctrl_vq_idx(mvdev, idx))
2143 		return MLX5_VDPA_CVQ_GROUP;
2144 
2145 	return MLX5_VDPA_DATAVQ_GROUP;
2146 }
2147 
2148 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
2149 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
2150 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
2151 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
2152 };
2153 
2154 static u64 mlx_to_vritio_features(u16 dev_features)
2155 {
2156 	u64 result = 0;
2157 
2158 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
2159 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2160 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
2161 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2162 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
2163 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2164 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
2165 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2166 
2167 	return result;
2168 }
2169 
2170 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2171 {
2172 	u64 mlx_vdpa_features = 0;
2173 	u16 dev_features;
2174 
2175 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2176 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2177 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2178 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2179 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2180 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2181 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2182 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2183 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2184 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2185 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2186 
2187 	return mlx_vdpa_features;
2188 }
2189 
2190 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2191 {
2192 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2193 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2194 
2195 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2196 	return ndev->mvdev.mlx_features;
2197 }
2198 
2199 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2200 {
2201 	/* Minimum features to expect */
2202 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2203 		return -EOPNOTSUPP;
2204 
2205 	/* Double check features combination sent down by the driver.
2206 	 * Fail invalid features due to absence of the depended feature.
2207 	 *
2208 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2209 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2210 	 * By failing the invalid features sent down by untrusted drivers,
2211 	 * we're assured the assumption made upon is_index_valid() and
2212 	 * is_ctrl_vq_idx() will not be compromised.
2213 	 */
2214 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2215             BIT_ULL(VIRTIO_NET_F_MQ))
2216 		return -EINVAL;
2217 
2218 	return 0;
2219 }
2220 
2221 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2222 {
2223 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2224 	int err;
2225 	int i;
2226 
2227 	for (i = 0; i < mvdev->max_vqs; i++) {
2228 		err = setup_vq(ndev, &ndev->vqs[i]);
2229 		if (err)
2230 			goto err_vq;
2231 	}
2232 
2233 	return 0;
2234 
2235 err_vq:
2236 	for (--i; i >= 0; i--)
2237 		teardown_vq(ndev, &ndev->vqs[i]);
2238 
2239 	return err;
2240 }
2241 
2242 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2243 {
2244 	struct mlx5_vdpa_virtqueue *mvq;
2245 	int i;
2246 
2247 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2248 		mvq = &ndev->vqs[i];
2249 		if (!mvq->initialized)
2250 			continue;
2251 
2252 		teardown_vq(ndev, mvq);
2253 	}
2254 }
2255 
2256 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2257 {
2258 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2259 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2260 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2261 			mvdev->max_idx = mvdev->max_vqs;
2262 		} else {
2263 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2264 			 * CVQ gets index 2
2265 			 */
2266 			mvdev->max_idx = 2;
2267 		}
2268 	} else {
2269 		/* Two data virtqueues only: one for rx and one for tx */
2270 		mvdev->max_idx = 1;
2271 	}
2272 }
2273 
2274 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2275 {
2276 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2277 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2278 	int err;
2279 
2280 	print_features(mvdev, features, true);
2281 
2282 	err = verify_driver_features(mvdev, features);
2283 	if (err)
2284 		return err;
2285 
2286 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2287 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2288 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2289 	else
2290 		ndev->rqt_size = 1;
2291 
2292 	ndev->cur_num_vqs = 2 * ndev->rqt_size;
2293 
2294 	update_cvq_info(mvdev);
2295 	return err;
2296 }
2297 
2298 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2299 {
2300 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2301 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2302 
2303 	ndev->config_cb = *cb;
2304 }
2305 
2306 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2307 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2308 {
2309 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2310 }
2311 
2312 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2313 {
2314 	return VIRTIO_ID_NET;
2315 }
2316 
2317 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2318 {
2319 	return PCI_VENDOR_ID_MELLANOX;
2320 }
2321 
2322 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2323 {
2324 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2325 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2326 
2327 	print_status(mvdev, ndev->mvdev.status, false);
2328 	return ndev->mvdev.status;
2329 }
2330 
2331 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2332 {
2333 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2334 	struct mlx5_virtq_attr attr = {};
2335 	int err;
2336 
2337 	if (mvq->initialized) {
2338 		err = query_virtqueue(ndev, mvq, &attr);
2339 		if (err)
2340 			return err;
2341 	}
2342 
2343 	ri->avail_index = attr.available_index;
2344 	ri->used_index = attr.used_index;
2345 	ri->ready = mvq->ready;
2346 	ri->num_ent = mvq->num_ent;
2347 	ri->desc_addr = mvq->desc_addr;
2348 	ri->device_addr = mvq->device_addr;
2349 	ri->driver_addr = mvq->driver_addr;
2350 	ri->restore = true;
2351 	return 0;
2352 }
2353 
2354 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2355 {
2356 	int i;
2357 
2358 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2359 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2360 		save_channel_info(ndev, &ndev->vqs[i]);
2361 	}
2362 	return 0;
2363 }
2364 
2365 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2366 {
2367 	int i;
2368 
2369 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2370 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2371 }
2372 
2373 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2374 {
2375 	struct mlx5_vdpa_virtqueue *mvq;
2376 	struct mlx5_vq_restore_info *ri;
2377 	int i;
2378 
2379 	mlx5_clear_vqs(ndev);
2380 	init_mvqs(ndev);
2381 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2382 		mvq = &ndev->vqs[i];
2383 		ri = &mvq->ri;
2384 		if (!ri->restore)
2385 			continue;
2386 
2387 		mvq->avail_idx = ri->avail_index;
2388 		mvq->used_idx = ri->used_index;
2389 		mvq->ready = ri->ready;
2390 		mvq->num_ent = ri->num_ent;
2391 		mvq->desc_addr = ri->desc_addr;
2392 		mvq->device_addr = ri->device_addr;
2393 		mvq->driver_addr = ri->driver_addr;
2394 	}
2395 }
2396 
2397 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2398 				struct vhost_iotlb *iotlb, unsigned int asid)
2399 {
2400 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2401 	int err;
2402 
2403 	suspend_vqs(ndev);
2404 	err = save_channels_info(ndev);
2405 	if (err)
2406 		goto err_mr;
2407 
2408 	teardown_driver(ndev);
2409 	mlx5_vdpa_destroy_mr(mvdev);
2410 	err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2411 	if (err)
2412 		goto err_mr;
2413 
2414 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2415 		goto err_mr;
2416 
2417 	restore_channels_info(ndev);
2418 	err = setup_driver(mvdev);
2419 	if (err)
2420 		goto err_setup;
2421 
2422 	return 0;
2423 
2424 err_setup:
2425 	mlx5_vdpa_destroy_mr(mvdev);
2426 err_mr:
2427 	return err;
2428 }
2429 
2430 /* reslock must be held for this function */
2431 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2432 {
2433 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2434 	int err;
2435 
2436 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2437 
2438 	if (ndev->setup) {
2439 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2440 		err = 0;
2441 		goto out;
2442 	}
2443 	err = setup_virtqueues(mvdev);
2444 	if (err) {
2445 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2446 		goto out;
2447 	}
2448 
2449 	err = create_rqt(ndev);
2450 	if (err) {
2451 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2452 		goto err_rqt;
2453 	}
2454 
2455 	err = create_tir(ndev);
2456 	if (err) {
2457 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2458 		goto err_tir;
2459 	}
2460 
2461 	err = setup_steering(ndev);
2462 	if (err) {
2463 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2464 		goto err_fwd;
2465 	}
2466 	ndev->setup = true;
2467 
2468 	return 0;
2469 
2470 err_fwd:
2471 	destroy_tir(ndev);
2472 err_tir:
2473 	destroy_rqt(ndev);
2474 err_rqt:
2475 	teardown_virtqueues(ndev);
2476 out:
2477 	return err;
2478 }
2479 
2480 /* reslock must be held for this function */
2481 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2482 {
2483 
2484 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2485 
2486 	if (!ndev->setup)
2487 		return;
2488 
2489 	teardown_steering(ndev);
2490 	destroy_tir(ndev);
2491 	destroy_rqt(ndev);
2492 	teardown_virtqueues(ndev);
2493 	ndev->setup = false;
2494 }
2495 
2496 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2497 {
2498 	int i;
2499 
2500 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2501 		ndev->vqs[i].ready = false;
2502 
2503 	ndev->mvdev.cvq.ready = false;
2504 }
2505 
2506 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2507 {
2508 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2509 	int err = 0;
2510 
2511 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2512 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2513 					MLX5_CVQ_MAX_ENT, false,
2514 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2515 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2516 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2517 
2518 	return err;
2519 }
2520 
2521 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2522 {
2523 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2524 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2525 	int err;
2526 
2527 	print_status(mvdev, status, true);
2528 
2529 	down_write(&ndev->reslock);
2530 
2531 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2532 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2533 			err = setup_cvq_vring(mvdev);
2534 			if (err) {
2535 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2536 				goto err_setup;
2537 			}
2538 			err = setup_driver(mvdev);
2539 			if (err) {
2540 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2541 				goto err_setup;
2542 			}
2543 		} else {
2544 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2545 			goto err_clear;
2546 		}
2547 	}
2548 
2549 	ndev->mvdev.status = status;
2550 	up_write(&ndev->reslock);
2551 	return;
2552 
2553 err_setup:
2554 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2555 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2556 err_clear:
2557 	up_write(&ndev->reslock);
2558 }
2559 
2560 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2561 {
2562 	int i;
2563 
2564 	/* default mapping all groups are mapped to asid 0 */
2565 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2566 		mvdev->group2asid[i] = 0;
2567 }
2568 
2569 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2570 {
2571 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2572 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2573 
2574 	print_status(mvdev, 0, true);
2575 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2576 
2577 	down_write(&ndev->reslock);
2578 	teardown_driver(ndev);
2579 	clear_vqs_ready(ndev);
2580 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2581 	ndev->mvdev.status = 0;
2582 	ndev->cur_num_vqs = 0;
2583 	ndev->mvdev.cvq.received_desc = 0;
2584 	ndev->mvdev.cvq.completed_desc = 0;
2585 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2586 	ndev->mvdev.actual_features = 0;
2587 	init_group_to_asid_map(mvdev);
2588 	++mvdev->generation;
2589 
2590 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2591 		if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2592 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2593 	}
2594 	up_write(&ndev->reslock);
2595 
2596 	return 0;
2597 }
2598 
2599 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2600 {
2601 	return sizeof(struct virtio_net_config);
2602 }
2603 
2604 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2605 				 unsigned int len)
2606 {
2607 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2608 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2609 
2610 	if (offset + len <= sizeof(struct virtio_net_config))
2611 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2612 }
2613 
2614 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2615 				 unsigned int len)
2616 {
2617 	/* not supported */
2618 }
2619 
2620 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2621 {
2622 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2623 
2624 	return mvdev->generation;
2625 }
2626 
2627 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2628 			unsigned int asid)
2629 {
2630 	bool change_map;
2631 	int err;
2632 
2633 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2634 	if (err) {
2635 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2636 		return err;
2637 	}
2638 
2639 	if (change_map)
2640 		err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2641 
2642 	return err;
2643 }
2644 
2645 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2646 			     struct vhost_iotlb *iotlb)
2647 {
2648 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2649 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2650 	int err = -EINVAL;
2651 
2652 	down_write(&ndev->reslock);
2653 	err = set_map_data(mvdev, iotlb, asid);
2654 	up_write(&ndev->reslock);
2655 	return err;
2656 }
2657 
2658 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2659 {
2660 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2661 	struct mlx5_core_dev *pfmdev;
2662 	struct mlx5_vdpa_net *ndev;
2663 
2664 	ndev = to_mlx5_vdpa_ndev(mvdev);
2665 
2666 	free_resources(ndev);
2667 	mlx5_vdpa_destroy_mr(mvdev);
2668 	if (!is_zero_ether_addr(ndev->config.mac)) {
2669 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2670 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2671 	}
2672 	mlx5_vdpa_free_resources(&ndev->mvdev);
2673 	kfree(ndev->event_cbs);
2674 	kfree(ndev->vqs);
2675 }
2676 
2677 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2678 {
2679 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2680 	struct vdpa_notification_area ret = {};
2681 	struct mlx5_vdpa_net *ndev;
2682 	phys_addr_t addr;
2683 
2684 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2685 		return ret;
2686 
2687 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2688 	 * notification to avoid the risk of mapping pages that contain BAR of more
2689 	 * than one SF
2690 	 */
2691 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2692 		return ret;
2693 
2694 	ndev = to_mlx5_vdpa_ndev(mvdev);
2695 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2696 	ret.addr = addr;
2697 	ret.size = PAGE_SIZE;
2698 	return ret;
2699 }
2700 
2701 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2702 {
2703 	return -EOPNOTSUPP;
2704 }
2705 
2706 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2707 {
2708 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2709 
2710 	return mvdev->actual_features;
2711 }
2712 
2713 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
2714 			     u64 *received_desc, u64 *completed_desc)
2715 {
2716 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
2717 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
2718 	void *cmd_hdr;
2719 	void *ctx;
2720 	int err;
2721 
2722 	if (!counters_supported(&ndev->mvdev))
2723 		return -EOPNOTSUPP;
2724 
2725 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
2726 		return -EAGAIN;
2727 
2728 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
2729 
2730 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
2731 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
2732 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
2733 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
2734 
2735 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
2736 	if (err)
2737 		return err;
2738 
2739 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
2740 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
2741 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
2742 	return 0;
2743 }
2744 
2745 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
2746 					 struct sk_buff *msg,
2747 					 struct netlink_ext_ack *extack)
2748 {
2749 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2750 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2751 	struct mlx5_vdpa_virtqueue *mvq;
2752 	struct mlx5_control_vq *cvq;
2753 	u64 received_desc;
2754 	u64 completed_desc;
2755 	int err = 0;
2756 
2757 	down_read(&ndev->reslock);
2758 	if (!is_index_valid(mvdev, idx)) {
2759 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
2760 		err = -EINVAL;
2761 		goto out_err;
2762 	}
2763 
2764 	if (idx == ctrl_vq_idx(mvdev)) {
2765 		cvq = &mvdev->cvq;
2766 		received_desc = cvq->received_desc;
2767 		completed_desc = cvq->completed_desc;
2768 		goto out;
2769 	}
2770 
2771 	mvq = &ndev->vqs[idx];
2772 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
2773 	if (err) {
2774 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
2775 		goto out_err;
2776 	}
2777 
2778 out:
2779 	err = -EMSGSIZE;
2780 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
2781 		goto out_err;
2782 
2783 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
2784 			      VDPA_ATTR_PAD))
2785 		goto out_err;
2786 
2787 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
2788 		goto out_err;
2789 
2790 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
2791 			      VDPA_ATTR_PAD))
2792 		goto out_err;
2793 
2794 	err = 0;
2795 out_err:
2796 	up_read(&ndev->reslock);
2797 	return err;
2798 }
2799 
2800 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
2801 {
2802 	struct mlx5_control_vq *cvq;
2803 
2804 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2805 		return;
2806 
2807 	cvq = &mvdev->cvq;
2808 	cvq->ready = false;
2809 }
2810 
2811 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
2812 {
2813 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2814 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2815 	struct mlx5_vdpa_virtqueue *mvq;
2816 	int i;
2817 
2818 	down_write(&ndev->reslock);
2819 	ndev->nb_registered = false;
2820 	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
2821 	flush_workqueue(ndev->mvdev.wq);
2822 	for (i = 0; i < ndev->cur_num_vqs; i++) {
2823 		mvq = &ndev->vqs[i];
2824 		suspend_vq(ndev, mvq);
2825 	}
2826 	mlx5_vdpa_cvq_suspend(mvdev);
2827 	up_write(&ndev->reslock);
2828 	return 0;
2829 }
2830 
2831 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
2832 			       unsigned int asid)
2833 {
2834 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2835 
2836 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
2837 		return -EINVAL;
2838 
2839 	mvdev->group2asid[group] = asid;
2840 	return 0;
2841 }
2842 
2843 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2844 	.set_vq_address = mlx5_vdpa_set_vq_address,
2845 	.set_vq_num = mlx5_vdpa_set_vq_num,
2846 	.kick_vq = mlx5_vdpa_kick_vq,
2847 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
2848 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
2849 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
2850 	.set_vq_state = mlx5_vdpa_set_vq_state,
2851 	.get_vq_state = mlx5_vdpa_get_vq_state,
2852 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
2853 	.get_vq_notification = mlx5_get_vq_notification,
2854 	.get_vq_irq = mlx5_get_vq_irq,
2855 	.get_vq_align = mlx5_vdpa_get_vq_align,
2856 	.get_vq_group = mlx5_vdpa_get_vq_group,
2857 	.get_device_features = mlx5_vdpa_get_device_features,
2858 	.set_driver_features = mlx5_vdpa_set_driver_features,
2859 	.get_driver_features = mlx5_vdpa_get_driver_features,
2860 	.set_config_cb = mlx5_vdpa_set_config_cb,
2861 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2862 	.get_device_id = mlx5_vdpa_get_device_id,
2863 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
2864 	.get_status = mlx5_vdpa_get_status,
2865 	.set_status = mlx5_vdpa_set_status,
2866 	.reset = mlx5_vdpa_reset,
2867 	.get_config_size = mlx5_vdpa_get_config_size,
2868 	.get_config = mlx5_vdpa_get_config,
2869 	.set_config = mlx5_vdpa_set_config,
2870 	.get_generation = mlx5_vdpa_get_generation,
2871 	.set_map = mlx5_vdpa_set_map,
2872 	.set_group_asid = mlx5_set_group_asid,
2873 	.free = mlx5_vdpa_free,
2874 	.suspend = mlx5_vdpa_suspend,
2875 };
2876 
2877 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2878 {
2879 	u16 hw_mtu;
2880 	int err;
2881 
2882 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2883 	if (err)
2884 		return err;
2885 
2886 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2887 	return 0;
2888 }
2889 
2890 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2891 {
2892 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2893 	int err;
2894 
2895 	if (res->valid) {
2896 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2897 		return -EEXIST;
2898 	}
2899 
2900 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2901 	if (err)
2902 		return err;
2903 
2904 	err = create_tis(ndev);
2905 	if (err)
2906 		goto err_tis;
2907 
2908 	res->valid = true;
2909 
2910 	return 0;
2911 
2912 err_tis:
2913 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2914 	return err;
2915 }
2916 
2917 static void free_resources(struct mlx5_vdpa_net *ndev)
2918 {
2919 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2920 
2921 	if (!res->valid)
2922 		return;
2923 
2924 	destroy_tis(ndev);
2925 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2926 	res->valid = false;
2927 }
2928 
2929 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2930 {
2931 	struct mlx5_vdpa_virtqueue *mvq;
2932 	int i;
2933 
2934 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
2935 		mvq = &ndev->vqs[i];
2936 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2937 		mvq->index = i;
2938 		mvq->ndev = ndev;
2939 		mvq->fwqp.fw = true;
2940 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
2941 	}
2942 	for (; i < ndev->mvdev.max_vqs; i++) {
2943 		mvq = &ndev->vqs[i];
2944 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2945 		mvq->index = i;
2946 		mvq->ndev = ndev;
2947 	}
2948 }
2949 
2950 struct mlx5_vdpa_mgmtdev {
2951 	struct vdpa_mgmt_dev mgtdev;
2952 	struct mlx5_adev *madev;
2953 	struct mlx5_vdpa_net *ndev;
2954 };
2955 
2956 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2957 {
2958 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2959 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2960 	int err;
2961 
2962 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2963 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2964 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2965 	if (vport)
2966 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2967 
2968 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2969 	if (err)
2970 		return 0;
2971 
2972 	return MLX5_GET(query_vport_state_out, out, state);
2973 }
2974 
2975 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2976 {
2977 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2978 	    VPORT_STATE_UP)
2979 		return true;
2980 
2981 	return false;
2982 }
2983 
2984 static void update_carrier(struct work_struct *work)
2985 {
2986 	struct mlx5_vdpa_wq_ent *wqent;
2987 	struct mlx5_vdpa_dev *mvdev;
2988 	struct mlx5_vdpa_net *ndev;
2989 
2990 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2991 	mvdev = wqent->mvdev;
2992 	ndev = to_mlx5_vdpa_ndev(mvdev);
2993 	if (get_link_state(mvdev))
2994 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2995 	else
2996 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2997 
2998 	if (ndev->nb_registered && ndev->config_cb.callback)
2999 		ndev->config_cb.callback(ndev->config_cb.private);
3000 
3001 	kfree(wqent);
3002 }
3003 
3004 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
3005 {
3006 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
3007 	struct mlx5_eqe *eqe = param;
3008 	int ret = NOTIFY_DONE;
3009 	struct mlx5_vdpa_wq_ent *wqent;
3010 
3011 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
3012 		switch (eqe->sub_type) {
3013 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
3014 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
3015 			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
3016 			if (!wqent)
3017 				return NOTIFY_DONE;
3018 
3019 			wqent->mvdev = &ndev->mvdev;
3020 			INIT_WORK(&wqent->work, update_carrier);
3021 			queue_work(ndev->mvdev.wq, &wqent->work);
3022 			ret = NOTIFY_OK;
3023 			break;
3024 		default:
3025 			return NOTIFY_DONE;
3026 		}
3027 		return ret;
3028 	}
3029 	return ret;
3030 }
3031 
3032 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3033 {
3034 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3035 	void *in;
3036 	int err;
3037 
3038 	in = kvzalloc(inlen, GFP_KERNEL);
3039 	if (!in)
3040 		return -ENOMEM;
3041 
3042 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3043 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3044 		 mtu + MLX5V_ETH_HARD_MTU);
3045 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3046 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3047 
3048 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3049 
3050 	kvfree(in);
3051 	return err;
3052 }
3053 
3054 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3055 			     const struct vdpa_dev_set_config *add_config)
3056 {
3057 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3058 	struct virtio_net_config *config;
3059 	struct mlx5_core_dev *pfmdev;
3060 	struct mlx5_vdpa_dev *mvdev;
3061 	struct mlx5_vdpa_net *ndev;
3062 	struct mlx5_core_dev *mdev;
3063 	u32 max_vqs;
3064 	u16 mtu;
3065 	int err;
3066 
3067 	if (mgtdev->ndev)
3068 		return -ENOSPC;
3069 
3070 	mdev = mgtdev->madev->mdev;
3071 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3072 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3073 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3074 		return -EOPNOTSUPP;
3075 	}
3076 
3077 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3078 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3079 	if (max_vqs < 2) {
3080 		dev_warn(mdev->device,
3081 			 "%d virtqueues are supported. At least 2 are required\n",
3082 			 max_vqs);
3083 		return -EAGAIN;
3084 	}
3085 
3086 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3087 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3088 			return -EINVAL;
3089 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3090 	} else {
3091 		max_vqs = 2;
3092 	}
3093 
3094 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3095 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3096 	if (IS_ERR(ndev))
3097 		return PTR_ERR(ndev);
3098 
3099 	ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
3100 	ndev->mvdev.max_vqs = max_vqs;
3101 	mvdev = &ndev->mvdev;
3102 	mvdev->mdev = mdev;
3103 
3104 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3105 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3106 	if (!ndev->vqs || !ndev->event_cbs) {
3107 		err = -ENOMEM;
3108 		goto err_alloc;
3109 	}
3110 
3111 	init_mvqs(ndev);
3112 	init_rwsem(&ndev->reslock);
3113 	config = &ndev->config;
3114 
3115 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3116 		err = config_func_mtu(mdev, add_config->net.mtu);
3117 		if (err)
3118 			goto err_alloc;
3119 	}
3120 
3121 	err = query_mtu(mdev, &mtu);
3122 	if (err)
3123 		goto err_alloc;
3124 
3125 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3126 
3127 	if (get_link_state(mvdev))
3128 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3129 	else
3130 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3131 
3132 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3133 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3134 	} else {
3135 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3136 		if (err)
3137 			goto err_alloc;
3138 	}
3139 
3140 	if (!is_zero_ether_addr(config->mac)) {
3141 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3142 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3143 		if (err)
3144 			goto err_alloc;
3145 
3146 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
3147 	}
3148 
3149 	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3150 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3151 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3152 	if (err)
3153 		goto err_mpfs;
3154 
3155 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3156 		err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3157 		if (err)
3158 			goto err_res;
3159 	}
3160 
3161 	err = alloc_resources(ndev);
3162 	if (err)
3163 		goto err_mr;
3164 
3165 	ndev->cvq_ent.mvdev = mvdev;
3166 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3167 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3168 	if (!mvdev->wq) {
3169 		err = -ENOMEM;
3170 		goto err_res2;
3171 	}
3172 
3173 	ndev->nb.notifier_call = event_handler;
3174 	mlx5_notifier_register(mdev, &ndev->nb);
3175 	ndev->nb_registered = true;
3176 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3177 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3178 	if (err)
3179 		goto err_reg;
3180 
3181 	mgtdev->ndev = ndev;
3182 	return 0;
3183 
3184 err_reg:
3185 	destroy_workqueue(mvdev->wq);
3186 err_res2:
3187 	free_resources(ndev);
3188 err_mr:
3189 	mlx5_vdpa_destroy_mr(mvdev);
3190 err_res:
3191 	mlx5_vdpa_free_resources(&ndev->mvdev);
3192 err_mpfs:
3193 	if (!is_zero_ether_addr(config->mac))
3194 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3195 err_alloc:
3196 	put_device(&mvdev->vdev.dev);
3197 	return err;
3198 }
3199 
3200 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3201 {
3202 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3203 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3204 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3205 	struct workqueue_struct *wq;
3206 
3207 	if (ndev->nb_registered) {
3208 		ndev->nb_registered = false;
3209 		mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
3210 	}
3211 	wq = mvdev->wq;
3212 	mvdev->wq = NULL;
3213 	destroy_workqueue(wq);
3214 	_vdpa_unregister_device(dev);
3215 	mgtdev->ndev = NULL;
3216 }
3217 
3218 static const struct vdpa_mgmtdev_ops mdev_ops = {
3219 	.dev_add = mlx5_vdpa_dev_add,
3220 	.dev_del = mlx5_vdpa_dev_del,
3221 };
3222 
3223 static struct virtio_device_id id_table[] = {
3224 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3225 	{ 0 },
3226 };
3227 
3228 static int mlx5v_probe(struct auxiliary_device *adev,
3229 		       const struct auxiliary_device_id *id)
3230 
3231 {
3232 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3233 	struct mlx5_core_dev *mdev = madev->mdev;
3234 	struct mlx5_vdpa_mgmtdev *mgtdev;
3235 	int err;
3236 
3237 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3238 	if (!mgtdev)
3239 		return -ENOMEM;
3240 
3241 	mgtdev->mgtdev.ops = &mdev_ops;
3242 	mgtdev->mgtdev.device = mdev->device;
3243 	mgtdev->mgtdev.id_table = id_table;
3244 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3245 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3246 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
3247 	mgtdev->mgtdev.max_supported_vqs =
3248 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3249 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3250 	mgtdev->madev = madev;
3251 
3252 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3253 	if (err)
3254 		goto reg_err;
3255 
3256 	auxiliary_set_drvdata(adev, mgtdev);
3257 
3258 	return 0;
3259 
3260 reg_err:
3261 	kfree(mgtdev);
3262 	return err;
3263 }
3264 
3265 static void mlx5v_remove(struct auxiliary_device *adev)
3266 {
3267 	struct mlx5_vdpa_mgmtdev *mgtdev;
3268 
3269 	mgtdev = auxiliary_get_drvdata(adev);
3270 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3271 	kfree(mgtdev);
3272 }
3273 
3274 static const struct auxiliary_device_id mlx5v_id_table[] = {
3275 	{ .name = MLX5_ADEV_NAME ".vnet", },
3276 	{},
3277 };
3278 
3279 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3280 
3281 static struct auxiliary_driver mlx5v_driver = {
3282 	.name = "vnet",
3283 	.probe = mlx5v_probe,
3284 	.remove = mlx5v_remove,
3285 	.id_table = mlx5v_id_table,
3286 };
3287 
3288 module_auxiliary_driver(mlx5v_driver);
3289