xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 2cf1c348)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 
22 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
23 MODULE_DESCRIPTION("Mellanox VDPA driver");
24 MODULE_LICENSE("Dual BSD/GPL");
25 
26 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
27 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
28 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 
30 #define VALID_FEATURES_MASK                                                                        \
31 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
32 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
33 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
34 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
35 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
36 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
37 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
38 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
39 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
40 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
41 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
42 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
43 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 
45 #define VALID_STATUS_MASK                                                                          \
46 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
47 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 
49 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
50 
51 struct mlx5_vdpa_net_resources {
52 	u32 tisn;
53 	u32 tdn;
54 	u32 tirn;
55 	u32 rqtn;
56 	bool valid;
57 };
58 
59 struct mlx5_vdpa_cq_buf {
60 	struct mlx5_frag_buf_ctrl fbc;
61 	struct mlx5_frag_buf frag_buf;
62 	int cqe_size;
63 	int nent;
64 };
65 
66 struct mlx5_vdpa_cq {
67 	struct mlx5_core_cq mcq;
68 	struct mlx5_vdpa_cq_buf buf;
69 	struct mlx5_db db;
70 	int cqe;
71 };
72 
73 struct mlx5_vdpa_umem {
74 	struct mlx5_frag_buf_ctrl fbc;
75 	struct mlx5_frag_buf frag_buf;
76 	int size;
77 	u32 id;
78 };
79 
80 struct mlx5_vdpa_qp {
81 	struct mlx5_core_qp mqp;
82 	struct mlx5_frag_buf frag_buf;
83 	struct mlx5_db db;
84 	u16 head;
85 	bool fw;
86 };
87 
88 struct mlx5_vq_restore_info {
89 	u32 num_ent;
90 	u64 desc_addr;
91 	u64 device_addr;
92 	u64 driver_addr;
93 	u16 avail_index;
94 	u16 used_index;
95 	bool ready;
96 	bool restore;
97 };
98 
99 struct mlx5_vdpa_virtqueue {
100 	bool ready;
101 	u64 desc_addr;
102 	u64 device_addr;
103 	u64 driver_addr;
104 	u32 num_ent;
105 
106 	/* Resources for implementing the notification channel from the device
107 	 * to the driver. fwqp is the firmware end of an RC connection; the
108 	 * other end is vqqp used by the driver. cq is is where completions are
109 	 * reported.
110 	 */
111 	struct mlx5_vdpa_cq cq;
112 	struct mlx5_vdpa_qp fwqp;
113 	struct mlx5_vdpa_qp vqqp;
114 
115 	/* umem resources are required for the virtqueue operation. They're use
116 	 * is internal and they must be provided by the driver.
117 	 */
118 	struct mlx5_vdpa_umem umem1;
119 	struct mlx5_vdpa_umem umem2;
120 	struct mlx5_vdpa_umem umem3;
121 
122 	bool initialized;
123 	int index;
124 	u32 virtq_id;
125 	struct mlx5_vdpa_net *ndev;
126 	u16 avail_idx;
127 	u16 used_idx;
128 	int fw_state;
129 
130 	/* keep last in the struct */
131 	struct mlx5_vq_restore_info ri;
132 };
133 
134 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
135 {
136 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
137 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
138 			return idx < 2;
139 		else
140 			return idx < 3;
141 	}
142 
143 	return idx <= mvdev->max_idx;
144 }
145 
146 struct mlx5_vdpa_net {
147 	struct mlx5_vdpa_dev mvdev;
148 	struct mlx5_vdpa_net_resources res;
149 	struct virtio_net_config config;
150 	struct mlx5_vdpa_virtqueue *vqs;
151 	struct vdpa_callback *event_cbs;
152 
153 	/* Serialize vq resources creation and destruction. This is required
154 	 * since memory map might change and we need to destroy and create
155 	 * resources while driver in operational.
156 	 */
157 	struct mutex reslock;
158 	struct mlx5_flow_table *rxft;
159 	struct mlx5_fc *rx_counter;
160 	struct mlx5_flow_handle *rx_rule_ucast;
161 	struct mlx5_flow_handle *rx_rule_mcast;
162 	bool setup;
163 	u32 cur_num_vqs;
164 	struct notifier_block nb;
165 	struct vdpa_callback config_cb;
166 };
167 
168 static void free_resources(struct mlx5_vdpa_net *ndev);
169 static void init_mvqs(struct mlx5_vdpa_net *ndev);
170 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
171 static void teardown_driver(struct mlx5_vdpa_net *ndev);
172 
173 static bool mlx5_vdpa_debug;
174 
175 #define MLX5_CVQ_MAX_ENT 16
176 
177 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
178 	do {                                                                                       \
179 		if (features & BIT_ULL(_feature))                                                  \
180 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
181 	} while (0)
182 
183 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
184 	do {                                                                                       \
185 		if (status & (_status))                                                            \
186 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
187 	} while (0)
188 
189 /* TODO: cross-endian support */
190 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
191 {
192 	return virtio_legacy_is_little_endian() ||
193 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
194 }
195 
196 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
197 {
198 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
199 }
200 
201 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
202 {
203 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
204 }
205 
206 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
207 {
208 	return max_vqs / 2;
209 }
210 
211 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
212 {
213 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
214 		return 2;
215 
216 	return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
217 }
218 
219 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
220 {
221 	return idx == ctrl_vq_idx(mvdev);
222 }
223 
224 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
225 {
226 	if (status & ~VALID_STATUS_MASK)
227 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
228 			       status & ~VALID_STATUS_MASK);
229 
230 	if (!mlx5_vdpa_debug)
231 		return;
232 
233 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
234 	if (set && !status) {
235 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
236 		return;
237 	}
238 
239 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
240 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
241 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
242 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
243 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
244 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
245 }
246 
247 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
248 {
249 	if (features & ~VALID_FEATURES_MASK)
250 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
251 			       features & ~VALID_FEATURES_MASK);
252 
253 	if (!mlx5_vdpa_debug)
254 		return;
255 
256 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
257 	if (!features)
258 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
259 
260 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
279 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
280 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
281 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
282 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
283 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
284 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
285 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
286 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
287 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
288 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
289 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
290 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
291 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
292 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
293 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
294 }
295 
296 static int create_tis(struct mlx5_vdpa_net *ndev)
297 {
298 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
299 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
300 	void *tisc;
301 	int err;
302 
303 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
304 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
305 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
306 	if (err)
307 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
308 
309 	return err;
310 }
311 
312 static void destroy_tis(struct mlx5_vdpa_net *ndev)
313 {
314 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
315 }
316 
317 #define MLX5_VDPA_CQE_SIZE 64
318 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
319 
320 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
321 {
322 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
323 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
324 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
325 	int err;
326 
327 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
328 				       ndev->mvdev.mdev->priv.numa_node);
329 	if (err)
330 		return err;
331 
332 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
333 
334 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
335 	buf->nent = nent;
336 
337 	return 0;
338 }
339 
340 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
341 {
342 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
343 
344 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
345 					ndev->mvdev.mdev->priv.numa_node);
346 }
347 
348 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
349 {
350 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
351 }
352 
353 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
354 {
355 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
356 }
357 
358 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
359 {
360 	struct mlx5_cqe64 *cqe64;
361 	void *cqe;
362 	int i;
363 
364 	for (i = 0; i < buf->nent; i++) {
365 		cqe = get_cqe(vcq, i);
366 		cqe64 = cqe;
367 		cqe64->op_own = MLX5_CQE_INVALID << 4;
368 	}
369 }
370 
371 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
372 {
373 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
374 
375 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
376 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
377 		return cqe64;
378 
379 	return NULL;
380 }
381 
382 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
383 {
384 	vqp->head += n;
385 	vqp->db.db[0] = cpu_to_be32(vqp->head);
386 }
387 
388 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
389 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
390 {
391 	struct mlx5_vdpa_qp *vqp;
392 	__be64 *pas;
393 	void *qpc;
394 
395 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
396 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
397 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
398 	if (vqp->fw) {
399 		/* Firmware QP is allocated by the driver for the firmware's
400 		 * use so we can skip part of the params as they will be chosen by firmware
401 		 */
402 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
403 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
404 		MLX5_SET(qpc, qpc, no_sq, 1);
405 		return;
406 	}
407 
408 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
409 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
410 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
411 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
412 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
413 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
414 	MLX5_SET(qpc, qpc, no_sq, 1);
415 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
416 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
417 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
418 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
419 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
420 }
421 
422 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
423 {
424 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
425 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
426 					ndev->mvdev.mdev->priv.numa_node);
427 }
428 
429 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
430 {
431 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
432 }
433 
434 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
435 		     struct mlx5_vdpa_qp *vqp)
436 {
437 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
438 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
439 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
440 	void *qpc;
441 	void *in;
442 	int err;
443 
444 	if (!vqp->fw) {
445 		vqp = &mvq->vqqp;
446 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
447 		if (err)
448 			return err;
449 
450 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
451 		if (err)
452 			goto err_db;
453 		inlen += vqp->frag_buf.npages * sizeof(__be64);
454 	}
455 
456 	in = kzalloc(inlen, GFP_KERNEL);
457 	if (!in) {
458 		err = -ENOMEM;
459 		goto err_kzalloc;
460 	}
461 
462 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
463 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
464 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
465 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
466 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
467 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
468 	if (!vqp->fw)
469 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
470 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
471 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
472 	kfree(in);
473 	if (err)
474 		goto err_kzalloc;
475 
476 	vqp->mqp.uid = ndev->mvdev.res.uid;
477 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
478 
479 	if (!vqp->fw)
480 		rx_post(vqp, mvq->num_ent);
481 
482 	return 0;
483 
484 err_kzalloc:
485 	if (!vqp->fw)
486 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
487 err_db:
488 	if (!vqp->fw)
489 		rq_buf_free(ndev, vqp);
490 
491 	return err;
492 }
493 
494 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
495 {
496 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
497 
498 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
499 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
500 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
501 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
502 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
503 	if (!vqp->fw) {
504 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
505 		rq_buf_free(ndev, vqp);
506 	}
507 }
508 
509 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
510 {
511 	return get_sw_cqe(cq, cq->mcq.cons_index);
512 }
513 
514 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
515 {
516 	struct mlx5_cqe64 *cqe64;
517 
518 	cqe64 = next_cqe_sw(vcq);
519 	if (!cqe64)
520 		return -EAGAIN;
521 
522 	vcq->mcq.cons_index++;
523 	return 0;
524 }
525 
526 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
527 {
528 	struct mlx5_vdpa_net *ndev = mvq->ndev;
529 	struct vdpa_callback *event_cb;
530 
531 	event_cb = &ndev->event_cbs[mvq->index];
532 	mlx5_cq_set_ci(&mvq->cq.mcq);
533 
534 	/* make sure CQ cosumer update is visible to the hardware before updating
535 	 * RX doorbell record.
536 	 */
537 	dma_wmb();
538 	rx_post(&mvq->vqqp, num);
539 	if (event_cb->callback)
540 		event_cb->callback(event_cb->private);
541 }
542 
543 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
544 {
545 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
546 	struct mlx5_vdpa_net *ndev = mvq->ndev;
547 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
548 	int num = 0;
549 
550 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
551 		num++;
552 		if (num > mvq->num_ent / 2) {
553 			/* If completions keep coming while we poll, we want to
554 			 * let the hardware know that we consumed them by
555 			 * updating the doorbell record.  We also let vdpa core
556 			 * know about this so it passes it on the virtio driver
557 			 * on the guest.
558 			 */
559 			mlx5_vdpa_handle_completions(mvq, num);
560 			num = 0;
561 		}
562 	}
563 
564 	if (num)
565 		mlx5_vdpa_handle_completions(mvq, num);
566 
567 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
568 }
569 
570 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
571 {
572 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
573 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
574 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
575 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
576 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
577 	__be64 *pas;
578 	int inlen;
579 	void *cqc;
580 	void *in;
581 	int err;
582 	int eqn;
583 
584 	err = mlx5_db_alloc(mdev, &vcq->db);
585 	if (err)
586 		return err;
587 
588 	vcq->mcq.set_ci_db = vcq->db.db;
589 	vcq->mcq.arm_db = vcq->db.db + 1;
590 	vcq->mcq.cqe_sz = 64;
591 
592 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
593 	if (err)
594 		goto err_db;
595 
596 	cq_frag_buf_init(vcq, &vcq->buf);
597 
598 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
599 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
600 	in = kzalloc(inlen, GFP_KERNEL);
601 	if (!in) {
602 		err = -ENOMEM;
603 		goto err_vzalloc;
604 	}
605 
606 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
607 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
608 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
609 
610 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
611 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
612 
613 	/* Use vector 0 by default. Consider adding code to choose least used
614 	 * vector.
615 	 */
616 	err = mlx5_vector2eqn(mdev, 0, &eqn);
617 	if (err)
618 		goto err_vec;
619 
620 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
621 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
622 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
623 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
624 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
625 
626 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
627 	if (err)
628 		goto err_vec;
629 
630 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
631 	vcq->cqe = num_ent;
632 	vcq->mcq.set_ci_db = vcq->db.db;
633 	vcq->mcq.arm_db = vcq->db.db + 1;
634 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
635 	kfree(in);
636 	return 0;
637 
638 err_vec:
639 	kfree(in);
640 err_vzalloc:
641 	cq_frag_buf_free(ndev, &vcq->buf);
642 err_db:
643 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
644 	return err;
645 }
646 
647 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
648 {
649 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
650 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
651 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
652 
653 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
654 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
655 		return;
656 	}
657 	cq_frag_buf_free(ndev, &vcq->buf);
658 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
659 }
660 
661 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
662 			  struct mlx5_vdpa_umem **umemp)
663 {
664 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
665 	int p_a;
666 	int p_b;
667 
668 	switch (num) {
669 	case 1:
670 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
671 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
672 		*umemp = &mvq->umem1;
673 		break;
674 	case 2:
675 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
676 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
677 		*umemp = &mvq->umem2;
678 		break;
679 	case 3:
680 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
681 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
682 		*umemp = &mvq->umem3;
683 		break;
684 	}
685 	(*umemp)->size = p_a * mvq->num_ent + p_b;
686 }
687 
688 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
689 {
690 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
691 }
692 
693 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
694 {
695 	int inlen;
696 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
697 	void *um;
698 	void *in;
699 	int err;
700 	__be64 *pas;
701 	struct mlx5_vdpa_umem *umem;
702 
703 	set_umem_size(ndev, mvq, num, &umem);
704 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
705 	if (err)
706 		return err;
707 
708 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
709 
710 	in = kzalloc(inlen, GFP_KERNEL);
711 	if (!in) {
712 		err = -ENOMEM;
713 		goto err_in;
714 	}
715 
716 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
717 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
718 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
719 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
720 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
721 
722 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
723 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
724 
725 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
726 	if (err) {
727 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
728 		goto err_cmd;
729 	}
730 
731 	kfree(in);
732 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
733 
734 	return 0;
735 
736 err_cmd:
737 	kfree(in);
738 err_in:
739 	umem_frag_buf_free(ndev, umem);
740 	return err;
741 }
742 
743 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
744 {
745 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
746 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
747 	struct mlx5_vdpa_umem *umem;
748 
749 	switch (num) {
750 	case 1:
751 		umem = &mvq->umem1;
752 		break;
753 	case 2:
754 		umem = &mvq->umem2;
755 		break;
756 	case 3:
757 		umem = &mvq->umem3;
758 		break;
759 	}
760 
761 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
762 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
763 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
764 		return;
765 
766 	umem_frag_buf_free(ndev, umem);
767 }
768 
769 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
770 {
771 	int num;
772 	int err;
773 
774 	for (num = 1; num <= 3; num++) {
775 		err = create_umem(ndev, mvq, num);
776 		if (err)
777 			goto err_umem;
778 	}
779 	return 0;
780 
781 err_umem:
782 	for (num--; num > 0; num--)
783 		umem_destroy(ndev, mvq, num);
784 
785 	return err;
786 }
787 
788 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
789 {
790 	int num;
791 
792 	for (num = 3; num > 0; num--)
793 		umem_destroy(ndev, mvq, num);
794 }
795 
796 static int get_queue_type(struct mlx5_vdpa_net *ndev)
797 {
798 	u32 type_mask;
799 
800 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
801 
802 	/* prefer split queue */
803 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
804 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
805 
806 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
807 
808 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
809 }
810 
811 static bool vq_is_tx(u16 idx)
812 {
813 	return idx % 2;
814 }
815 
816 static u16 get_features_12_3(u64 features)
817 {
818 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
819 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
820 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
821 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
822 }
823 
824 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
825 {
826 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
827 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
828 	void *obj_context;
829 	void *cmd_hdr;
830 	void *vq_ctx;
831 	void *in;
832 	int err;
833 
834 	err = umems_create(ndev, mvq);
835 	if (err)
836 		return err;
837 
838 	in = kzalloc(inlen, GFP_KERNEL);
839 	if (!in) {
840 		err = -ENOMEM;
841 		goto err_alloc;
842 	}
843 
844 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
845 
846 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
847 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
848 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
849 
850 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
851 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
852 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
853 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
854 		 get_features_12_3(ndev->mvdev.actual_features));
855 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
856 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
857 
858 	if (vq_is_tx(mvq->index))
859 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
860 
861 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
862 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
863 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
864 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
865 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
866 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
867 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
868 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
869 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
870 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
871 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
872 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
873 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
874 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
875 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
876 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
877 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
878 
879 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
880 	if (err)
881 		goto err_cmd;
882 
883 	kfree(in);
884 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
885 
886 	return 0;
887 
888 err_cmd:
889 	kfree(in);
890 err_alloc:
891 	umems_destroy(ndev, mvq);
892 	return err;
893 }
894 
895 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
896 {
897 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
898 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
899 
900 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
901 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
902 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
903 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
904 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
905 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
906 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
907 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
908 		return;
909 	}
910 	umems_destroy(ndev, mvq);
911 }
912 
913 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
914 {
915 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
916 }
917 
918 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
919 {
920 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
921 }
922 
923 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
924 			int *outlen, u32 qpn, u32 rqpn)
925 {
926 	void *qpc;
927 	void *pp;
928 
929 	switch (cmd) {
930 	case MLX5_CMD_OP_2RST_QP:
931 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
932 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
933 		*in = kzalloc(*inlen, GFP_KERNEL);
934 		*out = kzalloc(*outlen, GFP_KERNEL);
935 		if (!*in || !*out)
936 			goto outerr;
937 
938 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
939 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
940 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
941 		break;
942 	case MLX5_CMD_OP_RST2INIT_QP:
943 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
944 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
945 		*in = kzalloc(*inlen, GFP_KERNEL);
946 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
947 		if (!*in || !*out)
948 			goto outerr;
949 
950 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
951 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
952 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
953 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
954 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
955 		MLX5_SET(qpc, qpc, rwe, 1);
956 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
957 		MLX5_SET(ads, pp, vhca_port_num, 1);
958 		break;
959 	case MLX5_CMD_OP_INIT2RTR_QP:
960 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
961 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
962 		*in = kzalloc(*inlen, GFP_KERNEL);
963 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
964 		if (!*in || !*out)
965 			goto outerr;
966 
967 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
968 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
969 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
970 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
971 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
972 		MLX5_SET(qpc, qpc, log_msg_max, 30);
973 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
974 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
975 		MLX5_SET(ads, pp, fl, 1);
976 		break;
977 	case MLX5_CMD_OP_RTR2RTS_QP:
978 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
979 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
980 		*in = kzalloc(*inlen, GFP_KERNEL);
981 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
982 		if (!*in || !*out)
983 			goto outerr;
984 
985 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
986 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
987 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
988 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
989 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
990 		MLX5_SET(ads, pp, ack_timeout, 14);
991 		MLX5_SET(qpc, qpc, retry_count, 7);
992 		MLX5_SET(qpc, qpc, rnr_retry, 7);
993 		break;
994 	default:
995 		goto outerr_nullify;
996 	}
997 
998 	return;
999 
1000 outerr:
1001 	kfree(*in);
1002 	kfree(*out);
1003 outerr_nullify:
1004 	*in = NULL;
1005 	*out = NULL;
1006 }
1007 
1008 static void free_inout(void *in, void *out)
1009 {
1010 	kfree(in);
1011 	kfree(out);
1012 }
1013 
1014 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1015  * firmware. The fw argument indicates whether the subjected QP is the one used
1016  * by firmware.
1017  */
1018 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1019 {
1020 	int outlen;
1021 	int inlen;
1022 	void *out;
1023 	void *in;
1024 	int err;
1025 
1026 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1027 	if (!in || !out)
1028 		return -ENOMEM;
1029 
1030 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1031 	free_inout(in, out);
1032 	return err;
1033 }
1034 
1035 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1036 {
1037 	int err;
1038 
1039 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1040 	if (err)
1041 		return err;
1042 
1043 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1044 	if (err)
1045 		return err;
1046 
1047 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1048 	if (err)
1049 		return err;
1050 
1051 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1052 	if (err)
1053 		return err;
1054 
1055 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1056 	if (err)
1057 		return err;
1058 
1059 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1060 	if (err)
1061 		return err;
1062 
1063 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1064 }
1065 
1066 struct mlx5_virtq_attr {
1067 	u8 state;
1068 	u16 available_index;
1069 	u16 used_index;
1070 };
1071 
1072 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1073 			   struct mlx5_virtq_attr *attr)
1074 {
1075 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1076 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1077 	void *out;
1078 	void *obj_context;
1079 	void *cmd_hdr;
1080 	int err;
1081 
1082 	out = kzalloc(outlen, GFP_KERNEL);
1083 	if (!out)
1084 		return -ENOMEM;
1085 
1086 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1087 
1088 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1089 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1090 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1091 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1092 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1093 	if (err)
1094 		goto err_cmd;
1095 
1096 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1097 	memset(attr, 0, sizeof(*attr));
1098 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1099 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1100 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1101 	kfree(out);
1102 	return 0;
1103 
1104 err_cmd:
1105 	kfree(out);
1106 	return err;
1107 }
1108 
1109 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1110 {
1111 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1112 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1113 	void *obj_context;
1114 	void *cmd_hdr;
1115 	void *in;
1116 	int err;
1117 
1118 	in = kzalloc(inlen, GFP_KERNEL);
1119 	if (!in)
1120 		return -ENOMEM;
1121 
1122 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1123 
1124 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1125 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1126 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1127 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1128 
1129 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1130 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1131 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1132 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1133 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1134 	kfree(in);
1135 	if (!err)
1136 		mvq->fw_state = state;
1137 
1138 	return err;
1139 }
1140 
1141 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1142 {
1143 	u16 idx = mvq->index;
1144 	int err;
1145 
1146 	if (!mvq->num_ent)
1147 		return 0;
1148 
1149 	if (mvq->initialized)
1150 		return 0;
1151 
1152 	err = cq_create(ndev, idx, mvq->num_ent);
1153 	if (err)
1154 		return err;
1155 
1156 	err = qp_create(ndev, mvq, &mvq->fwqp);
1157 	if (err)
1158 		goto err_fwqp;
1159 
1160 	err = qp_create(ndev, mvq, &mvq->vqqp);
1161 	if (err)
1162 		goto err_vqqp;
1163 
1164 	err = connect_qps(ndev, mvq);
1165 	if (err)
1166 		goto err_connect;
1167 
1168 	err = create_virtqueue(ndev, mvq);
1169 	if (err)
1170 		goto err_connect;
1171 
1172 	if (mvq->ready) {
1173 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1174 		if (err) {
1175 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1176 				       idx, err);
1177 			goto err_connect;
1178 		}
1179 	}
1180 
1181 	mvq->initialized = true;
1182 	return 0;
1183 
1184 err_connect:
1185 	qp_destroy(ndev, &mvq->vqqp);
1186 err_vqqp:
1187 	qp_destroy(ndev, &mvq->fwqp);
1188 err_fwqp:
1189 	cq_destroy(ndev, idx);
1190 	return err;
1191 }
1192 
1193 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1194 {
1195 	struct mlx5_virtq_attr attr;
1196 
1197 	if (!mvq->initialized)
1198 		return;
1199 
1200 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1201 		return;
1202 
1203 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1204 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1205 
1206 	if (query_virtqueue(ndev, mvq, &attr)) {
1207 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1208 		return;
1209 	}
1210 	mvq->avail_idx = attr.available_index;
1211 	mvq->used_idx = attr.used_index;
1212 }
1213 
1214 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1215 {
1216 	int i;
1217 
1218 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1219 		suspend_vq(ndev, &ndev->vqs[i]);
1220 }
1221 
1222 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1223 {
1224 	if (!mvq->initialized)
1225 		return;
1226 
1227 	suspend_vq(ndev, mvq);
1228 	destroy_virtqueue(ndev, mvq);
1229 	qp_destroy(ndev, &mvq->vqqp);
1230 	qp_destroy(ndev, &mvq->fwqp);
1231 	cq_destroy(ndev, mvq->index);
1232 	mvq->initialized = false;
1233 }
1234 
1235 static int create_rqt(struct mlx5_vdpa_net *ndev)
1236 {
1237 	__be32 *list;
1238 	int max_rqt;
1239 	void *rqtc;
1240 	int inlen;
1241 	void *in;
1242 	int i, j;
1243 	int err;
1244 	int num;
1245 
1246 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
1247 		num = 1;
1248 	else
1249 		num = ndev->cur_num_vqs / 2;
1250 
1251 	max_rqt = min_t(int, roundup_pow_of_two(num),
1252 			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1253 	if (max_rqt < 1)
1254 		return -EOPNOTSUPP;
1255 
1256 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1257 	in = kzalloc(inlen, GFP_KERNEL);
1258 	if (!in)
1259 		return -ENOMEM;
1260 
1261 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1262 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1263 
1264 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1265 	MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
1266 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1267 	for (i = 0, j = 0; i < max_rqt; i++, j += 2)
1268 		list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
1269 
1270 	MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
1271 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1272 	kfree(in);
1273 	if (err)
1274 		return err;
1275 
1276 	return 0;
1277 }
1278 
1279 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1280 
1281 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1282 {
1283 	__be32 *list;
1284 	int max_rqt;
1285 	void *rqtc;
1286 	int inlen;
1287 	void *in;
1288 	int i, j;
1289 	int err;
1290 
1291 	max_rqt = min_t(int, roundup_pow_of_two(ndev->cur_num_vqs / 2),
1292 			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1293 	if (max_rqt < 1)
1294 		return -EOPNOTSUPP;
1295 
1296 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1297 	in = kzalloc(inlen, GFP_KERNEL);
1298 	if (!in)
1299 		return -ENOMEM;
1300 
1301 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1302 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1303 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1304 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1305 
1306 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1307 	for (i = 0, j = 0; i < max_rqt; i++, j += 2)
1308 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1309 
1310 	MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
1311 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1312 	kfree(in);
1313 	if (err)
1314 		return err;
1315 
1316 	return 0;
1317 }
1318 
1319 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1320 {
1321 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1322 }
1323 
1324 static int create_tir(struct mlx5_vdpa_net *ndev)
1325 {
1326 #define HASH_IP_L4PORTS                                                                            \
1327 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1328 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1329 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1330 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1331 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1332 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1333 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1334 	void *rss_key;
1335 	void *outer;
1336 	void *tirc;
1337 	void *in;
1338 	int err;
1339 
1340 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1341 	if (!in)
1342 		return -ENOMEM;
1343 
1344 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1345 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1346 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1347 
1348 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1349 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1350 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1351 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1352 
1353 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1354 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1355 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1356 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1357 
1358 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1359 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1360 
1361 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1362 	kfree(in);
1363 	return err;
1364 }
1365 
1366 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1367 {
1368 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1369 }
1370 
1371 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1372 {
1373 	struct mlx5_flow_destination dest[2] = {};
1374 	struct mlx5_flow_table_attr ft_attr = {};
1375 	struct mlx5_flow_act flow_act = {};
1376 	struct mlx5_flow_namespace *ns;
1377 	struct mlx5_flow_spec *spec;
1378 	void *headers_c;
1379 	void *headers_v;
1380 	u8 *dmac_c;
1381 	u8 *dmac_v;
1382 	int err;
1383 
1384 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1385 	if (!spec)
1386 		return -ENOMEM;
1387 
1388 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1389 	ft_attr.max_fte = 2;
1390 	ft_attr.autogroup.max_num_groups = 2;
1391 
1392 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1393 	if (!ns) {
1394 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1395 		err = -EOPNOTSUPP;
1396 		goto err_ns;
1397 	}
1398 
1399 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1400 	if (IS_ERR(ndev->rxft)) {
1401 		err = PTR_ERR(ndev->rxft);
1402 		goto err_ns;
1403 	}
1404 
1405 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1406 	if (IS_ERR(ndev->rx_counter)) {
1407 		err = PTR_ERR(ndev->rx_counter);
1408 		goto err_fc;
1409 	}
1410 
1411 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1412 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1413 	memset(dmac_c, 0xff, ETH_ALEN);
1414 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1415 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1416 	ether_addr_copy(dmac_v, ndev->config.mac);
1417 
1418 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1419 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1420 	dest[0].tir_num = ndev->res.tirn;
1421 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1422 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1423 	ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2);
1424 
1425 	if (IS_ERR(ndev->rx_rule_ucast)) {
1426 		err = PTR_ERR(ndev->rx_rule_ucast);
1427 		ndev->rx_rule_ucast = NULL;
1428 		goto err_rule_ucast;
1429 	}
1430 
1431 	memset(dmac_c, 0, ETH_ALEN);
1432 	memset(dmac_v, 0, ETH_ALEN);
1433 	dmac_c[0] = 1;
1434 	dmac_v[0] = 1;
1435 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1436 	ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1);
1437 	if (IS_ERR(ndev->rx_rule_mcast)) {
1438 		err = PTR_ERR(ndev->rx_rule_mcast);
1439 		ndev->rx_rule_mcast = NULL;
1440 		goto err_rule_mcast;
1441 	}
1442 
1443 	kvfree(spec);
1444 	return 0;
1445 
1446 err_rule_mcast:
1447 	mlx5_del_flow_rules(ndev->rx_rule_ucast);
1448 	ndev->rx_rule_ucast = NULL;
1449 err_rule_ucast:
1450 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1451 err_fc:
1452 	mlx5_destroy_flow_table(ndev->rxft);
1453 err_ns:
1454 	kvfree(spec);
1455 	return err;
1456 }
1457 
1458 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1459 {
1460 	if (!ndev->rx_rule_ucast)
1461 		return;
1462 
1463 	mlx5_del_flow_rules(ndev->rx_rule_mcast);
1464 	ndev->rx_rule_mcast = NULL;
1465 	mlx5_del_flow_rules(ndev->rx_rule_ucast);
1466 	ndev->rx_rule_ucast = NULL;
1467 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1468 	mlx5_destroy_flow_table(ndev->rxft);
1469 }
1470 
1471 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1472 {
1473 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1474 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1475 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1476 	struct mlx5_core_dev *pfmdev;
1477 	size_t read;
1478 	u8 mac[ETH_ALEN];
1479 
1480 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1481 	switch (cmd) {
1482 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1483 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1484 		if (read != ETH_ALEN)
1485 			break;
1486 
1487 		if (!memcmp(ndev->config.mac, mac, 6)) {
1488 			status = VIRTIO_NET_OK;
1489 			break;
1490 		}
1491 
1492 		if (!is_zero_ether_addr(ndev->config.mac)) {
1493 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1494 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1495 					       ndev->config.mac);
1496 				break;
1497 			}
1498 		}
1499 
1500 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1501 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1502 				       mac);
1503 			break;
1504 		}
1505 
1506 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1507 		status = VIRTIO_NET_OK;
1508 		break;
1509 
1510 	default:
1511 		break;
1512 	}
1513 
1514 	return status;
1515 }
1516 
1517 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1518 {
1519 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1520 	int cur_qps = ndev->cur_num_vqs / 2;
1521 	int err;
1522 	int i;
1523 
1524 	if (cur_qps > newqps) {
1525 		err = modify_rqt(ndev, 2 * newqps);
1526 		if (err)
1527 			return err;
1528 
1529 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1530 			teardown_vq(ndev, &ndev->vqs[i]);
1531 
1532 		ndev->cur_num_vqs = 2 * newqps;
1533 	} else {
1534 		ndev->cur_num_vqs = 2 * newqps;
1535 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1536 			err = setup_vq(ndev, &ndev->vqs[i]);
1537 			if (err)
1538 				goto clean_added;
1539 		}
1540 		err = modify_rqt(ndev, 2 * newqps);
1541 		if (err)
1542 			goto clean_added;
1543 	}
1544 	return 0;
1545 
1546 clean_added:
1547 	for (--i; i >= 2 * cur_qps; --i)
1548 		teardown_vq(ndev, &ndev->vqs[i]);
1549 
1550 	ndev->cur_num_vqs = 2 * cur_qps;
1551 
1552 	return err;
1553 }
1554 
1555 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1556 {
1557 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1558 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1559 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1560 	struct virtio_net_ctrl_mq mq;
1561 	size_t read;
1562 	u16 newqps;
1563 
1564 	switch (cmd) {
1565 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1566 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1567 		if (read != sizeof(mq))
1568 			break;
1569 
1570 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1571 		if (ndev->cur_num_vqs == 2 * newqps) {
1572 			status = VIRTIO_NET_OK;
1573 			break;
1574 		}
1575 
1576 		if (!change_num_qps(mvdev, newqps))
1577 			status = VIRTIO_NET_OK;
1578 
1579 		break;
1580 	default:
1581 		break;
1582 	}
1583 
1584 	return status;
1585 }
1586 
1587 static void mlx5_cvq_kick_handler(struct work_struct *work)
1588 {
1589 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1590 	struct virtio_net_ctrl_hdr ctrl;
1591 	struct mlx5_vdpa_wq_ent *wqent;
1592 	struct mlx5_vdpa_dev *mvdev;
1593 	struct mlx5_control_vq *cvq;
1594 	struct mlx5_vdpa_net *ndev;
1595 	size_t read, write;
1596 	int err;
1597 
1598 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1599 	mvdev = wqent->mvdev;
1600 	ndev = to_mlx5_vdpa_ndev(mvdev);
1601 	cvq = &mvdev->cvq;
1602 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1603 		goto out;
1604 
1605 	if (!cvq->ready)
1606 		goto out;
1607 
1608 	while (true) {
1609 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1610 					   GFP_ATOMIC);
1611 		if (err <= 0)
1612 			break;
1613 
1614 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1615 		if (read != sizeof(ctrl))
1616 			break;
1617 
1618 		switch (ctrl.class) {
1619 		case VIRTIO_NET_CTRL_MAC:
1620 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
1621 			break;
1622 		case VIRTIO_NET_CTRL_MQ:
1623 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
1624 			break;
1625 
1626 		default:
1627 			break;
1628 		}
1629 
1630 		/* Make sure data is written before advancing index */
1631 		smp_wmb();
1632 
1633 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1634 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1635 		vringh_kiov_cleanup(&cvq->riov);
1636 		vringh_kiov_cleanup(&cvq->wiov);
1637 
1638 		if (vringh_need_notify_iotlb(&cvq->vring))
1639 			vringh_notify(&cvq->vring);
1640 	}
1641 out:
1642 	kfree(wqent);
1643 }
1644 
1645 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1646 {
1647 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1648 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1649 	struct mlx5_vdpa_virtqueue *mvq;
1650 	struct mlx5_vdpa_wq_ent *wqent;
1651 
1652 	if (!is_index_valid(mvdev, idx))
1653 		return;
1654 
1655 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1656 		if (!mvdev->cvq.ready)
1657 			return;
1658 
1659 		wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
1660 		if (!wqent)
1661 			return;
1662 
1663 		wqent->mvdev = mvdev;
1664 		INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
1665 		queue_work(mvdev->wq, &wqent->work);
1666 		return;
1667 	}
1668 
1669 	mvq = &ndev->vqs[idx];
1670 	if (unlikely(!mvq->ready))
1671 		return;
1672 
1673 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1674 }
1675 
1676 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1677 				    u64 driver_area, u64 device_area)
1678 {
1679 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1680 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1681 	struct mlx5_vdpa_virtqueue *mvq;
1682 
1683 	if (!is_index_valid(mvdev, idx))
1684 		return -EINVAL;
1685 
1686 	if (is_ctrl_vq_idx(mvdev, idx)) {
1687 		mvdev->cvq.desc_addr = desc_area;
1688 		mvdev->cvq.device_addr = device_area;
1689 		mvdev->cvq.driver_addr = driver_area;
1690 		return 0;
1691 	}
1692 
1693 	mvq = &ndev->vqs[idx];
1694 	mvq->desc_addr = desc_area;
1695 	mvq->device_addr = device_area;
1696 	mvq->driver_addr = driver_area;
1697 	return 0;
1698 }
1699 
1700 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1701 {
1702 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1703 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1704 	struct mlx5_vdpa_virtqueue *mvq;
1705 
1706 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1707 		return;
1708 
1709 	mvq = &ndev->vqs[idx];
1710 	mvq->num_ent = num;
1711 }
1712 
1713 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1714 {
1715 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1716 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1717 
1718 	ndev->event_cbs[idx] = *cb;
1719 }
1720 
1721 static void mlx5_cvq_notify(struct vringh *vring)
1722 {
1723 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
1724 
1725 	if (!cvq->event_cb.callback)
1726 		return;
1727 
1728 	cvq->event_cb.callback(cvq->event_cb.private);
1729 }
1730 
1731 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
1732 {
1733 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1734 
1735 	cvq->ready = ready;
1736 	if (!ready)
1737 		return;
1738 
1739 	cvq->vring.notify = mlx5_cvq_notify;
1740 }
1741 
1742 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1743 {
1744 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1745 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1746 	struct mlx5_vdpa_virtqueue *mvq;
1747 
1748 	if (!mvdev->actual_features)
1749 		return;
1750 
1751 	if (!is_index_valid(mvdev, idx))
1752 		return;
1753 
1754 	if (is_ctrl_vq_idx(mvdev, idx)) {
1755 		set_cvq_ready(mvdev, ready);
1756 		return;
1757 	}
1758 
1759 	mvq = &ndev->vqs[idx];
1760 	if (!ready)
1761 		suspend_vq(ndev, mvq);
1762 
1763 	mvq->ready = ready;
1764 }
1765 
1766 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1767 {
1768 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1769 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1770 
1771 	if (!is_index_valid(mvdev, idx))
1772 		return false;
1773 
1774 	if (is_ctrl_vq_idx(mvdev, idx))
1775 		return mvdev->cvq.ready;
1776 
1777 	return ndev->vqs[idx].ready;
1778 }
1779 
1780 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1781 				  const struct vdpa_vq_state *state)
1782 {
1783 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1784 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1785 	struct mlx5_vdpa_virtqueue *mvq;
1786 
1787 	if (!is_index_valid(mvdev, idx))
1788 		return -EINVAL;
1789 
1790 	if (is_ctrl_vq_idx(mvdev, idx)) {
1791 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
1792 		return 0;
1793 	}
1794 
1795 	mvq = &ndev->vqs[idx];
1796 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1797 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1798 		return -EINVAL;
1799 	}
1800 
1801 	mvq->used_idx = state->split.avail_index;
1802 	mvq->avail_idx = state->split.avail_index;
1803 	return 0;
1804 }
1805 
1806 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1807 {
1808 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1809 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1810 	struct mlx5_vdpa_virtqueue *mvq;
1811 	struct mlx5_virtq_attr attr;
1812 	int err;
1813 
1814 	if (!is_index_valid(mvdev, idx))
1815 		return -EINVAL;
1816 
1817 	if (is_ctrl_vq_idx(mvdev, idx)) {
1818 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
1819 		return 0;
1820 	}
1821 
1822 	mvq = &ndev->vqs[idx];
1823 	/* If the virtq object was destroyed, use the value saved at
1824 	 * the last minute of suspend_vq. This caters for userspace
1825 	 * that cares about emulating the index after vq is stopped.
1826 	 */
1827 	if (!mvq->initialized) {
1828 		/* Firmware returns a wrong value for the available index.
1829 		 * Since both values should be identical, we take the value of
1830 		 * used_idx which is reported correctly.
1831 		 */
1832 		state->split.avail_index = mvq->used_idx;
1833 		return 0;
1834 	}
1835 
1836 	err = query_virtqueue(ndev, mvq, &attr);
1837 	if (err) {
1838 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1839 		return err;
1840 	}
1841 	state->split.avail_index = attr.used_index;
1842 	return 0;
1843 }
1844 
1845 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1846 {
1847 	return PAGE_SIZE;
1848 }
1849 
1850 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1851 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1852 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1853 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1854 };
1855 
1856 static u64 mlx_to_vritio_features(u16 dev_features)
1857 {
1858 	u64 result = 0;
1859 
1860 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1861 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1862 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1863 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1864 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1865 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1866 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1867 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1868 
1869 	return result;
1870 }
1871 
1872 static u64 get_supported_features(struct mlx5_core_dev *mdev)
1873 {
1874 	u64 mlx_vdpa_features = 0;
1875 	u16 dev_features;
1876 
1877 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
1878 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
1879 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
1880 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1881 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1882 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
1883 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
1884 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
1885 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
1886 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
1887 
1888 	return mlx_vdpa_features;
1889 }
1890 
1891 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
1892 {
1893 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1894 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1895 
1896 	print_features(mvdev, ndev->mvdev.mlx_features, false);
1897 	return ndev->mvdev.mlx_features;
1898 }
1899 
1900 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1901 {
1902 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1903 		return -EOPNOTSUPP;
1904 
1905 	return 0;
1906 }
1907 
1908 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
1909 {
1910 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1911 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1912 	int err;
1913 	int i;
1914 
1915 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
1916 		err = setup_vq(ndev, &ndev->vqs[i]);
1917 		if (err)
1918 			goto err_vq;
1919 	}
1920 
1921 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
1922 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
1923 					MLX5_CVQ_MAX_ENT, false,
1924 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
1925 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
1926 					(struct vring_used *)(uintptr_t)cvq->device_addr);
1927 		if (err)
1928 			goto err_vq;
1929 	}
1930 
1931 	return 0;
1932 
1933 err_vq:
1934 	for (--i; i >= 0; i--)
1935 		teardown_vq(ndev, &ndev->vqs[i]);
1936 
1937 	return err;
1938 }
1939 
1940 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1941 {
1942 	struct mlx5_vdpa_virtqueue *mvq;
1943 	int i;
1944 
1945 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1946 		mvq = &ndev->vqs[i];
1947 		if (!mvq->initialized)
1948 			continue;
1949 
1950 		teardown_vq(ndev, mvq);
1951 	}
1952 }
1953 
1954 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
1955 {
1956 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
1957 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
1958 			/* MQ supported. CVQ index is right above the last data virtqueue's */
1959 			mvdev->max_idx = mvdev->max_vqs;
1960 		} else {
1961 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
1962 			 * CVQ gets index 2
1963 			 */
1964 			mvdev->max_idx = 2;
1965 		}
1966 	} else {
1967 		/* Two data virtqueues only: one for rx and one for tx */
1968 		mvdev->max_idx = 1;
1969 	}
1970 }
1971 
1972 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
1973 {
1974 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1975 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1976 	int err;
1977 
1978 	print_features(mvdev, features, true);
1979 
1980 	err = verify_min_features(mvdev, features);
1981 	if (err)
1982 		return err;
1983 
1984 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1985 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
1986 		ndev->cur_num_vqs = 2 * mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
1987 	else
1988 		ndev->cur_num_vqs = 2;
1989 
1990 	update_cvq_info(mvdev);
1991 	return err;
1992 }
1993 
1994 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1995 {
1996 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1997 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1998 
1999 	ndev->config_cb = *cb;
2000 }
2001 
2002 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2003 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2004 {
2005 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2006 }
2007 
2008 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2009 {
2010 	return VIRTIO_ID_NET;
2011 }
2012 
2013 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2014 {
2015 	return PCI_VENDOR_ID_MELLANOX;
2016 }
2017 
2018 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2019 {
2020 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2021 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2022 
2023 	print_status(mvdev, ndev->mvdev.status, false);
2024 	return ndev->mvdev.status;
2025 }
2026 
2027 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2028 {
2029 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2030 	struct mlx5_virtq_attr attr = {};
2031 	int err;
2032 
2033 	if (mvq->initialized) {
2034 		err = query_virtqueue(ndev, mvq, &attr);
2035 		if (err)
2036 			return err;
2037 	}
2038 
2039 	ri->avail_index = attr.available_index;
2040 	ri->used_index = attr.used_index;
2041 	ri->ready = mvq->ready;
2042 	ri->num_ent = mvq->num_ent;
2043 	ri->desc_addr = mvq->desc_addr;
2044 	ri->device_addr = mvq->device_addr;
2045 	ri->driver_addr = mvq->driver_addr;
2046 	ri->restore = true;
2047 	return 0;
2048 }
2049 
2050 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2051 {
2052 	int i;
2053 
2054 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2055 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2056 		save_channel_info(ndev, &ndev->vqs[i]);
2057 	}
2058 	return 0;
2059 }
2060 
2061 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2062 {
2063 	int i;
2064 
2065 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2066 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2067 }
2068 
2069 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2070 {
2071 	struct mlx5_vdpa_virtqueue *mvq;
2072 	struct mlx5_vq_restore_info *ri;
2073 	int i;
2074 
2075 	mlx5_clear_vqs(ndev);
2076 	init_mvqs(ndev);
2077 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2078 		mvq = &ndev->vqs[i];
2079 		ri = &mvq->ri;
2080 		if (!ri->restore)
2081 			continue;
2082 
2083 		mvq->avail_idx = ri->avail_index;
2084 		mvq->used_idx = ri->used_index;
2085 		mvq->ready = ri->ready;
2086 		mvq->num_ent = ri->num_ent;
2087 		mvq->desc_addr = ri->desc_addr;
2088 		mvq->device_addr = ri->device_addr;
2089 		mvq->driver_addr = ri->driver_addr;
2090 	}
2091 }
2092 
2093 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2094 {
2095 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2096 	int err;
2097 
2098 	suspend_vqs(ndev);
2099 	err = save_channels_info(ndev);
2100 	if (err)
2101 		goto err_mr;
2102 
2103 	teardown_driver(ndev);
2104 	mlx5_vdpa_destroy_mr(mvdev);
2105 	err = mlx5_vdpa_create_mr(mvdev, iotlb);
2106 	if (err)
2107 		goto err_mr;
2108 
2109 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2110 		return 0;
2111 
2112 	restore_channels_info(ndev);
2113 	err = setup_driver(mvdev);
2114 	if (err)
2115 		goto err_setup;
2116 
2117 	return 0;
2118 
2119 err_setup:
2120 	mlx5_vdpa_destroy_mr(mvdev);
2121 err_mr:
2122 	return err;
2123 }
2124 
2125 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2126 {
2127 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2128 	int err;
2129 
2130 	mutex_lock(&ndev->reslock);
2131 	if (ndev->setup) {
2132 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2133 		err = 0;
2134 		goto out;
2135 	}
2136 	err = setup_virtqueues(mvdev);
2137 	if (err) {
2138 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2139 		goto out;
2140 	}
2141 
2142 	err = create_rqt(ndev);
2143 	if (err) {
2144 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2145 		goto err_rqt;
2146 	}
2147 
2148 	err = create_tir(ndev);
2149 	if (err) {
2150 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2151 		goto err_tir;
2152 	}
2153 
2154 	err = add_fwd_to_tir(ndev);
2155 	if (err) {
2156 		mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
2157 		goto err_fwd;
2158 	}
2159 	ndev->setup = true;
2160 	mutex_unlock(&ndev->reslock);
2161 
2162 	return 0;
2163 
2164 err_fwd:
2165 	destroy_tir(ndev);
2166 err_tir:
2167 	destroy_rqt(ndev);
2168 err_rqt:
2169 	teardown_virtqueues(ndev);
2170 out:
2171 	mutex_unlock(&ndev->reslock);
2172 	return err;
2173 }
2174 
2175 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2176 {
2177 	mutex_lock(&ndev->reslock);
2178 	if (!ndev->setup)
2179 		goto out;
2180 
2181 	remove_fwd_to_tir(ndev);
2182 	destroy_tir(ndev);
2183 	destroy_rqt(ndev);
2184 	teardown_virtqueues(ndev);
2185 	ndev->setup = false;
2186 out:
2187 	mutex_unlock(&ndev->reslock);
2188 }
2189 
2190 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2191 {
2192 	int i;
2193 
2194 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2195 		ndev->vqs[i].ready = false;
2196 
2197 	ndev->mvdev.cvq.ready = false;
2198 }
2199 
2200 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2201 {
2202 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2203 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2204 	int err;
2205 
2206 	print_status(mvdev, status, true);
2207 
2208 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2209 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2210 			err = setup_driver(mvdev);
2211 			if (err) {
2212 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2213 				goto err_setup;
2214 			}
2215 		} else {
2216 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2217 			return;
2218 		}
2219 	}
2220 
2221 	ndev->mvdev.status = status;
2222 	return;
2223 
2224 err_setup:
2225 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2226 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2227 }
2228 
2229 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2230 {
2231 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2232 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2233 
2234 	print_status(mvdev, 0, true);
2235 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2236 	teardown_driver(ndev);
2237 	clear_vqs_ready(ndev);
2238 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2239 	ndev->mvdev.status = 0;
2240 	ndev->cur_num_vqs = 0;
2241 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2242 	ndev->mvdev.actual_features = 0;
2243 	++mvdev->generation;
2244 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2245 		if (mlx5_vdpa_create_mr(mvdev, NULL))
2246 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2247 	}
2248 
2249 	return 0;
2250 }
2251 
2252 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2253 {
2254 	return sizeof(struct virtio_net_config);
2255 }
2256 
2257 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2258 				 unsigned int len)
2259 {
2260 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2261 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2262 
2263 	if (offset + len <= sizeof(struct virtio_net_config))
2264 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2265 }
2266 
2267 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2268 				 unsigned int len)
2269 {
2270 	/* not supported */
2271 }
2272 
2273 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2274 {
2275 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2276 
2277 	return mvdev->generation;
2278 }
2279 
2280 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
2281 {
2282 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2283 	bool change_map;
2284 	int err;
2285 
2286 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
2287 	if (err) {
2288 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2289 		return err;
2290 	}
2291 
2292 	if (change_map)
2293 		return mlx5_vdpa_change_map(mvdev, iotlb);
2294 
2295 	return 0;
2296 }
2297 
2298 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2299 {
2300 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2301 	struct mlx5_core_dev *pfmdev;
2302 	struct mlx5_vdpa_net *ndev;
2303 
2304 	ndev = to_mlx5_vdpa_ndev(mvdev);
2305 
2306 	free_resources(ndev);
2307 	mlx5_vdpa_destroy_mr(mvdev);
2308 	if (!is_zero_ether_addr(ndev->config.mac)) {
2309 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2310 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2311 	}
2312 	mlx5_vdpa_free_resources(&ndev->mvdev);
2313 	mutex_destroy(&ndev->reslock);
2314 	kfree(ndev->event_cbs);
2315 	kfree(ndev->vqs);
2316 }
2317 
2318 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2319 {
2320 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2321 	struct vdpa_notification_area ret = {};
2322 	struct mlx5_vdpa_net *ndev;
2323 	phys_addr_t addr;
2324 
2325 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2326 		return ret;
2327 
2328 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2329 	 * notification to avoid the risk of mapping pages that contain BAR of more
2330 	 * than one SF
2331 	 */
2332 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2333 		return ret;
2334 
2335 	ndev = to_mlx5_vdpa_ndev(mvdev);
2336 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2337 	ret.addr = addr;
2338 	ret.size = PAGE_SIZE;
2339 	return ret;
2340 }
2341 
2342 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2343 {
2344 	return -EOPNOTSUPP;
2345 }
2346 
2347 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2348 {
2349 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2350 
2351 	return mvdev->actual_features;
2352 }
2353 
2354 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2355 	.set_vq_address = mlx5_vdpa_set_vq_address,
2356 	.set_vq_num = mlx5_vdpa_set_vq_num,
2357 	.kick_vq = mlx5_vdpa_kick_vq,
2358 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
2359 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
2360 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
2361 	.set_vq_state = mlx5_vdpa_set_vq_state,
2362 	.get_vq_state = mlx5_vdpa_get_vq_state,
2363 	.get_vq_notification = mlx5_get_vq_notification,
2364 	.get_vq_irq = mlx5_get_vq_irq,
2365 	.get_vq_align = mlx5_vdpa_get_vq_align,
2366 	.get_device_features = mlx5_vdpa_get_device_features,
2367 	.set_driver_features = mlx5_vdpa_set_driver_features,
2368 	.get_driver_features = mlx5_vdpa_get_driver_features,
2369 	.set_config_cb = mlx5_vdpa_set_config_cb,
2370 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2371 	.get_device_id = mlx5_vdpa_get_device_id,
2372 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
2373 	.get_status = mlx5_vdpa_get_status,
2374 	.set_status = mlx5_vdpa_set_status,
2375 	.reset = mlx5_vdpa_reset,
2376 	.get_config_size = mlx5_vdpa_get_config_size,
2377 	.get_config = mlx5_vdpa_get_config,
2378 	.set_config = mlx5_vdpa_set_config,
2379 	.get_generation = mlx5_vdpa_get_generation,
2380 	.set_map = mlx5_vdpa_set_map,
2381 	.free = mlx5_vdpa_free,
2382 };
2383 
2384 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2385 {
2386 	u16 hw_mtu;
2387 	int err;
2388 
2389 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2390 	if (err)
2391 		return err;
2392 
2393 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2394 	return 0;
2395 }
2396 
2397 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2398 {
2399 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2400 	int err;
2401 
2402 	if (res->valid) {
2403 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2404 		return -EEXIST;
2405 	}
2406 
2407 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2408 	if (err)
2409 		return err;
2410 
2411 	err = create_tis(ndev);
2412 	if (err)
2413 		goto err_tis;
2414 
2415 	res->valid = true;
2416 
2417 	return 0;
2418 
2419 err_tis:
2420 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2421 	return err;
2422 }
2423 
2424 static void free_resources(struct mlx5_vdpa_net *ndev)
2425 {
2426 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2427 
2428 	if (!res->valid)
2429 		return;
2430 
2431 	destroy_tis(ndev);
2432 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2433 	res->valid = false;
2434 }
2435 
2436 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2437 {
2438 	struct mlx5_vdpa_virtqueue *mvq;
2439 	int i;
2440 
2441 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
2442 		mvq = &ndev->vqs[i];
2443 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2444 		mvq->index = i;
2445 		mvq->ndev = ndev;
2446 		mvq->fwqp.fw = true;
2447 	}
2448 	for (; i < ndev->mvdev.max_vqs; i++) {
2449 		mvq = &ndev->vqs[i];
2450 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2451 		mvq->index = i;
2452 		mvq->ndev = ndev;
2453 	}
2454 }
2455 
2456 struct mlx5_vdpa_mgmtdev {
2457 	struct vdpa_mgmt_dev mgtdev;
2458 	struct mlx5_adev *madev;
2459 	struct mlx5_vdpa_net *ndev;
2460 };
2461 
2462 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2463 {
2464 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2465 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2466 	int err;
2467 
2468 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2469 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2470 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2471 	if (vport)
2472 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2473 
2474 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2475 	if (err)
2476 		return 0;
2477 
2478 	return MLX5_GET(query_vport_state_out, out, state);
2479 }
2480 
2481 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2482 {
2483 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2484 	    VPORT_STATE_UP)
2485 		return true;
2486 
2487 	return false;
2488 }
2489 
2490 static void update_carrier(struct work_struct *work)
2491 {
2492 	struct mlx5_vdpa_wq_ent *wqent;
2493 	struct mlx5_vdpa_dev *mvdev;
2494 	struct mlx5_vdpa_net *ndev;
2495 
2496 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2497 	mvdev = wqent->mvdev;
2498 	ndev = to_mlx5_vdpa_ndev(mvdev);
2499 	if (get_link_state(mvdev))
2500 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2501 	else
2502 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2503 
2504 	if (ndev->config_cb.callback)
2505 		ndev->config_cb.callback(ndev->config_cb.private);
2506 
2507 	kfree(wqent);
2508 }
2509 
2510 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2511 {
2512 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2513 	struct mlx5_eqe *eqe = param;
2514 	int ret = NOTIFY_DONE;
2515 	struct mlx5_vdpa_wq_ent *wqent;
2516 
2517 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2518 		switch (eqe->sub_type) {
2519 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2520 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2521 			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2522 			if (!wqent)
2523 				return NOTIFY_DONE;
2524 
2525 			wqent->mvdev = &ndev->mvdev;
2526 			INIT_WORK(&wqent->work, update_carrier);
2527 			queue_work(ndev->mvdev.wq, &wqent->work);
2528 			ret = NOTIFY_OK;
2529 			break;
2530 		default:
2531 			return NOTIFY_DONE;
2532 		}
2533 		return ret;
2534 	}
2535 	return ret;
2536 }
2537 
2538 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
2539 			     const struct vdpa_dev_set_config *add_config)
2540 {
2541 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2542 	struct virtio_net_config *config;
2543 	struct mlx5_core_dev *pfmdev;
2544 	struct mlx5_vdpa_dev *mvdev;
2545 	struct mlx5_vdpa_net *ndev;
2546 	struct mlx5_core_dev *mdev;
2547 	u32 max_vqs;
2548 	u16 mtu;
2549 	int err;
2550 
2551 	if (mgtdev->ndev)
2552 		return -ENOSPC;
2553 
2554 	mdev = mgtdev->madev->mdev;
2555 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2556 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2557 		dev_warn(mdev->device, "missing support for split virtqueues\n");
2558 		return -EOPNOTSUPP;
2559 	}
2560 
2561 	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2562 	if (max_vqs < 2) {
2563 		dev_warn(mdev->device,
2564 			 "%d virtqueues are supported. At least 2 are required\n",
2565 			 max_vqs);
2566 		return -EAGAIN;
2567 	}
2568 
2569 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
2570 		if (add_config->net.max_vq_pairs > max_vqs / 2)
2571 			return -EINVAL;
2572 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
2573 	} else {
2574 		max_vqs = 2;
2575 	}
2576 
2577 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2578 				 name, false);
2579 	if (IS_ERR(ndev))
2580 		return PTR_ERR(ndev);
2581 
2582 	ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
2583 	ndev->mvdev.max_vqs = max_vqs;
2584 	mvdev = &ndev->mvdev;
2585 	mvdev->mdev = mdev;
2586 
2587 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
2588 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
2589 	if (!ndev->vqs || !ndev->event_cbs) {
2590 		err = -ENOMEM;
2591 		goto err_alloc;
2592 	}
2593 
2594 	init_mvqs(ndev);
2595 	mutex_init(&ndev->reslock);
2596 	config = &ndev->config;
2597 	err = query_mtu(mdev, &mtu);
2598 	if (err)
2599 		goto err_mtu;
2600 
2601 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
2602 
2603 	if (get_link_state(mvdev))
2604 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2605 	else
2606 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2607 
2608 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
2609 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
2610 	} else {
2611 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2612 		if (err)
2613 			goto err_mtu;
2614 	}
2615 
2616 	if (!is_zero_ether_addr(config->mac)) {
2617 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2618 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2619 		if (err)
2620 			goto err_mtu;
2621 
2622 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2623 	}
2624 
2625 	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
2626 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
2627 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2628 	if (err)
2629 		goto err_mpfs;
2630 
2631 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2632 		err = mlx5_vdpa_create_mr(mvdev, NULL);
2633 		if (err)
2634 			goto err_res;
2635 	}
2636 
2637 	err = alloc_resources(ndev);
2638 	if (err)
2639 		goto err_mr;
2640 
2641 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
2642 	if (!mvdev->wq) {
2643 		err = -ENOMEM;
2644 		goto err_res2;
2645 	}
2646 
2647 	ndev->nb.notifier_call = event_handler;
2648 	mlx5_notifier_register(mdev, &ndev->nb);
2649 	mvdev->vdev.mdev = &mgtdev->mgtdev;
2650 	err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs) + 1);
2651 	if (err)
2652 		goto err_reg;
2653 
2654 	mgtdev->ndev = ndev;
2655 	return 0;
2656 
2657 err_reg:
2658 	destroy_workqueue(mvdev->wq);
2659 err_res2:
2660 	free_resources(ndev);
2661 err_mr:
2662 	mlx5_vdpa_destroy_mr(mvdev);
2663 err_res:
2664 	mlx5_vdpa_free_resources(&ndev->mvdev);
2665 err_mpfs:
2666 	if (!is_zero_ether_addr(config->mac))
2667 		mlx5_mpfs_del_mac(pfmdev, config->mac);
2668 err_mtu:
2669 	mutex_destroy(&ndev->reslock);
2670 err_alloc:
2671 	put_device(&mvdev->vdev.dev);
2672 	return err;
2673 }
2674 
2675 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2676 {
2677 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2678 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
2679 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2680 
2681 	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
2682 	destroy_workqueue(mvdev->wq);
2683 	_vdpa_unregister_device(dev);
2684 	mgtdev->ndev = NULL;
2685 }
2686 
2687 static const struct vdpa_mgmtdev_ops mdev_ops = {
2688 	.dev_add = mlx5_vdpa_dev_add,
2689 	.dev_del = mlx5_vdpa_dev_del,
2690 };
2691 
2692 static struct virtio_device_id id_table[] = {
2693 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2694 	{ 0 },
2695 };
2696 
2697 static int mlx5v_probe(struct auxiliary_device *adev,
2698 		       const struct auxiliary_device_id *id)
2699 
2700 {
2701 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2702 	struct mlx5_core_dev *mdev = madev->mdev;
2703 	struct mlx5_vdpa_mgmtdev *mgtdev;
2704 	int err;
2705 
2706 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2707 	if (!mgtdev)
2708 		return -ENOMEM;
2709 
2710 	mgtdev->mgtdev.ops = &mdev_ops;
2711 	mgtdev->mgtdev.device = mdev->device;
2712 	mgtdev->mgtdev.id_table = id_table;
2713 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
2714 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
2715 	mgtdev->mgtdev.max_supported_vqs =
2716 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
2717 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
2718 	mgtdev->madev = madev;
2719 
2720 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2721 	if (err)
2722 		goto reg_err;
2723 
2724 	auxiliary_set_drvdata(adev, mgtdev);
2725 
2726 	return 0;
2727 
2728 reg_err:
2729 	kfree(mgtdev);
2730 	return err;
2731 }
2732 
2733 static void mlx5v_remove(struct auxiliary_device *adev)
2734 {
2735 	struct mlx5_vdpa_mgmtdev *mgtdev;
2736 
2737 	mgtdev = auxiliary_get_drvdata(adev);
2738 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2739 	kfree(mgtdev);
2740 }
2741 
2742 static const struct auxiliary_device_id mlx5v_id_table[] = {
2743 	{ .name = MLX5_ADEV_NAME ".vnet", },
2744 	{},
2745 };
2746 
2747 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2748 
2749 static struct auxiliary_driver mlx5v_driver = {
2750 	.name = "vnet",
2751 	.probe = mlx5v_probe,
2752 	.remove = mlx5v_remove,
2753 	.id_table = mlx5v_id_table,
2754 };
2755 
2756 module_auxiliary_driver(mlx5v_driver);
2757