xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision bcda5fd3)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include <linux/mlx5/mpfs.h>
19 #include "mlx5_vdpa.h"
20 
21 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
22 MODULE_DESCRIPTION("Mellanox VDPA driver");
23 MODULE_LICENSE("Dual BSD/GPL");
24 
25 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
26 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
27 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
28 
29 #define VALID_FEATURES_MASK                                                                        \
30 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
31 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
33 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
34 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
35 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
37 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
38 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
39 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
40 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
41 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
42 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
43 
44 #define VALID_STATUS_MASK                                                                          \
45 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
46 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
47 
48 struct mlx5_vdpa_net_resources {
49 	u32 tisn;
50 	u32 tdn;
51 	u32 tirn;
52 	u32 rqtn;
53 	bool valid;
54 };
55 
56 struct mlx5_vdpa_cq_buf {
57 	struct mlx5_frag_buf_ctrl fbc;
58 	struct mlx5_frag_buf frag_buf;
59 	int cqe_size;
60 	int nent;
61 };
62 
63 struct mlx5_vdpa_cq {
64 	struct mlx5_core_cq mcq;
65 	struct mlx5_vdpa_cq_buf buf;
66 	struct mlx5_db db;
67 	int cqe;
68 };
69 
70 struct mlx5_vdpa_umem {
71 	struct mlx5_frag_buf_ctrl fbc;
72 	struct mlx5_frag_buf frag_buf;
73 	int size;
74 	u32 id;
75 };
76 
77 struct mlx5_vdpa_qp {
78 	struct mlx5_core_qp mqp;
79 	struct mlx5_frag_buf frag_buf;
80 	struct mlx5_db db;
81 	u16 head;
82 	bool fw;
83 };
84 
85 struct mlx5_vq_restore_info {
86 	u32 num_ent;
87 	u64 desc_addr;
88 	u64 device_addr;
89 	u64 driver_addr;
90 	u16 avail_index;
91 	u16 used_index;
92 	bool ready;
93 	struct vdpa_callback cb;
94 	bool restore;
95 };
96 
97 struct mlx5_vdpa_virtqueue {
98 	bool ready;
99 	u64 desc_addr;
100 	u64 device_addr;
101 	u64 driver_addr;
102 	u32 num_ent;
103 	struct vdpa_callback event_cb;
104 
105 	/* Resources for implementing the notification channel from the device
106 	 * to the driver. fwqp is the firmware end of an RC connection; the
107 	 * other end is vqqp used by the driver. cq is is where completions are
108 	 * reported.
109 	 */
110 	struct mlx5_vdpa_cq cq;
111 	struct mlx5_vdpa_qp fwqp;
112 	struct mlx5_vdpa_qp vqqp;
113 
114 	/* umem resources are required for the virtqueue operation. They're use
115 	 * is internal and they must be provided by the driver.
116 	 */
117 	struct mlx5_vdpa_umem umem1;
118 	struct mlx5_vdpa_umem umem2;
119 	struct mlx5_vdpa_umem umem3;
120 
121 	bool initialized;
122 	int index;
123 	u32 virtq_id;
124 	struct mlx5_vdpa_net *ndev;
125 	u16 avail_idx;
126 	u16 used_idx;
127 	int fw_state;
128 
129 	/* keep last in the struct */
130 	struct mlx5_vq_restore_info ri;
131 };
132 
133 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
134  * provides for driver space allocation
135  */
136 #define MLX5_MAX_SUPPORTED_VQS 16
137 
138 struct mlx5_vdpa_net {
139 	struct mlx5_vdpa_dev mvdev;
140 	struct mlx5_vdpa_net_resources res;
141 	struct virtio_net_config config;
142 	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
143 
144 	/* Serialize vq resources creation and destruction. This is required
145 	 * since memory map might change and we need to destroy and create
146 	 * resources while driver in operational.
147 	 */
148 	struct mutex reslock;
149 	struct mlx5_flow_table *rxft;
150 	struct mlx5_fc *rx_counter;
151 	struct mlx5_flow_handle *rx_rule;
152 	bool setup;
153 	u16 mtu;
154 };
155 
156 static void free_resources(struct mlx5_vdpa_net *ndev);
157 static void init_mvqs(struct mlx5_vdpa_net *ndev);
158 static int setup_driver(struct mlx5_vdpa_net *ndev);
159 static void teardown_driver(struct mlx5_vdpa_net *ndev);
160 
161 static bool mlx5_vdpa_debug;
162 
163 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
164 	do {                                                                                       \
165 		if (features & BIT_ULL(_feature))                                                  \
166 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
167 	} while (0)
168 
169 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
170 	do {                                                                                       \
171 		if (status & (_status))                                                            \
172 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
173 	} while (0)
174 
175 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
176 {
177 	return max_vqs / 2;
178 }
179 
180 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
181 {
182 	if (status & ~VALID_STATUS_MASK)
183 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
184 			       status & ~VALID_STATUS_MASK);
185 
186 	if (!mlx5_vdpa_debug)
187 		return;
188 
189 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
190 	if (set && !status) {
191 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
192 		return;
193 	}
194 
195 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
196 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
197 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
198 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
199 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
200 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
201 }
202 
203 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
204 {
205 	if (features & ~VALID_FEATURES_MASK)
206 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
207 			       features & ~VALID_FEATURES_MASK);
208 
209 	if (!mlx5_vdpa_debug)
210 		return;
211 
212 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
213 	if (!features)
214 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
215 
216 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
217 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
218 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
219 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
220 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
221 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
222 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
223 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
224 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
225 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
226 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
227 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
250 }
251 
252 static int create_tis(struct mlx5_vdpa_net *ndev)
253 {
254 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
255 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
256 	void *tisc;
257 	int err;
258 
259 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
260 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
261 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
262 	if (err)
263 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
264 
265 	return err;
266 }
267 
268 static void destroy_tis(struct mlx5_vdpa_net *ndev)
269 {
270 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
271 }
272 
273 #define MLX5_VDPA_CQE_SIZE 64
274 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
275 
276 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
277 {
278 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
279 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
280 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
281 	int err;
282 
283 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
284 				       ndev->mvdev.mdev->priv.numa_node);
285 	if (err)
286 		return err;
287 
288 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
289 
290 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
291 	buf->nent = nent;
292 
293 	return 0;
294 }
295 
296 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
297 {
298 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
299 
300 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
301 					ndev->mvdev.mdev->priv.numa_node);
302 }
303 
304 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
305 {
306 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
307 }
308 
309 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
310 {
311 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
312 }
313 
314 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
315 {
316 	struct mlx5_cqe64 *cqe64;
317 	void *cqe;
318 	int i;
319 
320 	for (i = 0; i < buf->nent; i++) {
321 		cqe = get_cqe(vcq, i);
322 		cqe64 = cqe;
323 		cqe64->op_own = MLX5_CQE_INVALID << 4;
324 	}
325 }
326 
327 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
328 {
329 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
330 
331 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
332 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
333 		return cqe64;
334 
335 	return NULL;
336 }
337 
338 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
339 {
340 	vqp->head += n;
341 	vqp->db.db[0] = cpu_to_be32(vqp->head);
342 }
343 
344 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
345 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
346 {
347 	struct mlx5_vdpa_qp *vqp;
348 	__be64 *pas;
349 	void *qpc;
350 
351 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
352 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
353 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
354 	if (vqp->fw) {
355 		/* Firmware QP is allocated by the driver for the firmware's
356 		 * use so we can skip part of the params as they will be chosen by firmware
357 		 */
358 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
359 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
360 		MLX5_SET(qpc, qpc, no_sq, 1);
361 		return;
362 	}
363 
364 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
365 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
366 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
367 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
368 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
369 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
370 	MLX5_SET(qpc, qpc, no_sq, 1);
371 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
372 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
373 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
374 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
375 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
376 }
377 
378 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
379 {
380 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
381 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
382 					ndev->mvdev.mdev->priv.numa_node);
383 }
384 
385 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
386 {
387 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
388 }
389 
390 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
391 		     struct mlx5_vdpa_qp *vqp)
392 {
393 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
394 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
395 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
396 	void *qpc;
397 	void *in;
398 	int err;
399 
400 	if (!vqp->fw) {
401 		vqp = &mvq->vqqp;
402 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
403 		if (err)
404 			return err;
405 
406 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
407 		if (err)
408 			goto err_db;
409 		inlen += vqp->frag_buf.npages * sizeof(__be64);
410 	}
411 
412 	in = kzalloc(inlen, GFP_KERNEL);
413 	if (!in) {
414 		err = -ENOMEM;
415 		goto err_kzalloc;
416 	}
417 
418 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
419 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
420 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
421 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
422 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
423 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
424 	if (!vqp->fw)
425 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
426 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
427 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
428 	kfree(in);
429 	if (err)
430 		goto err_kzalloc;
431 
432 	vqp->mqp.uid = ndev->mvdev.res.uid;
433 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
434 
435 	if (!vqp->fw)
436 		rx_post(vqp, mvq->num_ent);
437 
438 	return 0;
439 
440 err_kzalloc:
441 	if (!vqp->fw)
442 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
443 err_db:
444 	if (!vqp->fw)
445 		rq_buf_free(ndev, vqp);
446 
447 	return err;
448 }
449 
450 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
451 {
452 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
453 
454 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
455 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
456 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
457 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
458 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
459 	if (!vqp->fw) {
460 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
461 		rq_buf_free(ndev, vqp);
462 	}
463 }
464 
465 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
466 {
467 	return get_sw_cqe(cq, cq->mcq.cons_index);
468 }
469 
470 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
471 {
472 	struct mlx5_cqe64 *cqe64;
473 
474 	cqe64 = next_cqe_sw(vcq);
475 	if (!cqe64)
476 		return -EAGAIN;
477 
478 	vcq->mcq.cons_index++;
479 	return 0;
480 }
481 
482 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
483 {
484 	mlx5_cq_set_ci(&mvq->cq.mcq);
485 
486 	/* make sure CQ cosumer update is visible to the hardware before updating
487 	 * RX doorbell record.
488 	 */
489 	dma_wmb();
490 	rx_post(&mvq->vqqp, num);
491 	if (mvq->event_cb.callback)
492 		mvq->event_cb.callback(mvq->event_cb.private);
493 }
494 
495 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
496 {
497 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
498 	struct mlx5_vdpa_net *ndev = mvq->ndev;
499 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
500 	int num = 0;
501 
502 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
503 		num++;
504 		if (num > mvq->num_ent / 2) {
505 			/* If completions keep coming while we poll, we want to
506 			 * let the hardware know that we consumed them by
507 			 * updating the doorbell record.  We also let vdpa core
508 			 * know about this so it passes it on the virtio driver
509 			 * on the guest.
510 			 */
511 			mlx5_vdpa_handle_completions(mvq, num);
512 			num = 0;
513 		}
514 	}
515 
516 	if (num)
517 		mlx5_vdpa_handle_completions(mvq, num);
518 
519 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
520 }
521 
522 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
523 {
524 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
525 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
526 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
527 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
528 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
529 	__be64 *pas;
530 	int inlen;
531 	void *cqc;
532 	void *in;
533 	int err;
534 	int eqn;
535 
536 	err = mlx5_db_alloc(mdev, &vcq->db);
537 	if (err)
538 		return err;
539 
540 	vcq->mcq.set_ci_db = vcq->db.db;
541 	vcq->mcq.arm_db = vcq->db.db + 1;
542 	vcq->mcq.cqe_sz = 64;
543 
544 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
545 	if (err)
546 		goto err_db;
547 
548 	cq_frag_buf_init(vcq, &vcq->buf);
549 
550 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
551 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
552 	in = kzalloc(inlen, GFP_KERNEL);
553 	if (!in) {
554 		err = -ENOMEM;
555 		goto err_vzalloc;
556 	}
557 
558 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
559 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
560 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
561 
562 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
563 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
564 
565 	/* Use vector 0 by default. Consider adding code to choose least used
566 	 * vector.
567 	 */
568 	err = mlx5_vector2eqn(mdev, 0, &eqn);
569 	if (err)
570 		goto err_vec;
571 
572 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
573 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
574 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
575 	MLX5_SET(cqc, cqc, c_eqn, eqn);
576 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
577 
578 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
579 	if (err)
580 		goto err_vec;
581 
582 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
583 	vcq->cqe = num_ent;
584 	vcq->mcq.set_ci_db = vcq->db.db;
585 	vcq->mcq.arm_db = vcq->db.db + 1;
586 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
587 	kfree(in);
588 	return 0;
589 
590 err_vec:
591 	kfree(in);
592 err_vzalloc:
593 	cq_frag_buf_free(ndev, &vcq->buf);
594 err_db:
595 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
596 	return err;
597 }
598 
599 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
600 {
601 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
602 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
603 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
604 
605 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
606 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
607 		return;
608 	}
609 	cq_frag_buf_free(ndev, &vcq->buf);
610 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
611 }
612 
613 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
614 			  struct mlx5_vdpa_umem **umemp)
615 {
616 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
617 	int p_a;
618 	int p_b;
619 
620 	switch (num) {
621 	case 1:
622 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
623 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
624 		*umemp = &mvq->umem1;
625 		break;
626 	case 2:
627 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
628 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
629 		*umemp = &mvq->umem2;
630 		break;
631 	case 3:
632 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
633 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
634 		*umemp = &mvq->umem3;
635 		break;
636 	}
637 	(*umemp)->size = p_a * mvq->num_ent + p_b;
638 }
639 
640 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
641 {
642 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
643 }
644 
645 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
646 {
647 	int inlen;
648 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
649 	void *um;
650 	void *in;
651 	int err;
652 	__be64 *pas;
653 	struct mlx5_vdpa_umem *umem;
654 
655 	set_umem_size(ndev, mvq, num, &umem);
656 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
657 	if (err)
658 		return err;
659 
660 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
661 
662 	in = kzalloc(inlen, GFP_KERNEL);
663 	if (!in) {
664 		err = -ENOMEM;
665 		goto err_in;
666 	}
667 
668 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
669 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
670 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
671 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
672 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
673 
674 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
675 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
676 
677 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
678 	if (err) {
679 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
680 		goto err_cmd;
681 	}
682 
683 	kfree(in);
684 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
685 
686 	return 0;
687 
688 err_cmd:
689 	kfree(in);
690 err_in:
691 	umem_frag_buf_free(ndev, umem);
692 	return err;
693 }
694 
695 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
696 {
697 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
698 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
699 	struct mlx5_vdpa_umem *umem;
700 
701 	switch (num) {
702 	case 1:
703 		umem = &mvq->umem1;
704 		break;
705 	case 2:
706 		umem = &mvq->umem2;
707 		break;
708 	case 3:
709 		umem = &mvq->umem3;
710 		break;
711 	}
712 
713 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
714 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
715 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
716 		return;
717 
718 	umem_frag_buf_free(ndev, umem);
719 }
720 
721 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
722 {
723 	int num;
724 	int err;
725 
726 	for (num = 1; num <= 3; num++) {
727 		err = create_umem(ndev, mvq, num);
728 		if (err)
729 			goto err_umem;
730 	}
731 	return 0;
732 
733 err_umem:
734 	for (num--; num > 0; num--)
735 		umem_destroy(ndev, mvq, num);
736 
737 	return err;
738 }
739 
740 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
741 {
742 	int num;
743 
744 	for (num = 3; num > 0; num--)
745 		umem_destroy(ndev, mvq, num);
746 }
747 
748 static int get_queue_type(struct mlx5_vdpa_net *ndev)
749 {
750 	u32 type_mask;
751 
752 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
753 
754 	/* prefer split queue */
755 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
756 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
757 
758 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
759 
760 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
761 }
762 
763 static bool vq_is_tx(u16 idx)
764 {
765 	return idx % 2;
766 }
767 
768 static u16 get_features_12_3(u64 features)
769 {
770 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
771 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
772 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
773 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
774 }
775 
776 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
777 {
778 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
779 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
780 	void *obj_context;
781 	void *cmd_hdr;
782 	void *vq_ctx;
783 	void *in;
784 	int err;
785 
786 	err = umems_create(ndev, mvq);
787 	if (err)
788 		return err;
789 
790 	in = kzalloc(inlen, GFP_KERNEL);
791 	if (!in) {
792 		err = -ENOMEM;
793 		goto err_alloc;
794 	}
795 
796 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
797 
798 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
799 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
800 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
801 
802 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
803 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
804 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
805 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
806 		 get_features_12_3(ndev->mvdev.actual_features));
807 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
808 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
809 
810 	if (vq_is_tx(mvq->index))
811 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
812 
813 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
814 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
815 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
816 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
817 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
818 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
819 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
820 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
821 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
822 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
823 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
824 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
825 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
826 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
827 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
828 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
829 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
830 	if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
831 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
832 
833 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
834 	if (err)
835 		goto err_cmd;
836 
837 	kfree(in);
838 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
839 
840 	return 0;
841 
842 err_cmd:
843 	kfree(in);
844 err_alloc:
845 	umems_destroy(ndev, mvq);
846 	return err;
847 }
848 
849 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
850 {
851 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
852 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
853 
854 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
855 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
856 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
857 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
858 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
859 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
860 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
861 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
862 		return;
863 	}
864 	umems_destroy(ndev, mvq);
865 }
866 
867 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
868 {
869 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
870 }
871 
872 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
873 {
874 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
875 }
876 
877 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
878 			int *outlen, u32 qpn, u32 rqpn)
879 {
880 	void *qpc;
881 	void *pp;
882 
883 	switch (cmd) {
884 	case MLX5_CMD_OP_2RST_QP:
885 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
886 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
887 		*in = kzalloc(*inlen, GFP_KERNEL);
888 		*out = kzalloc(*outlen, GFP_KERNEL);
889 		if (!*in || !*out)
890 			goto outerr;
891 
892 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
893 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
894 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
895 		break;
896 	case MLX5_CMD_OP_RST2INIT_QP:
897 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
898 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
899 		*in = kzalloc(*inlen, GFP_KERNEL);
900 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
901 		if (!*in || !*out)
902 			goto outerr;
903 
904 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
905 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
906 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
907 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
908 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
909 		MLX5_SET(qpc, qpc, rwe, 1);
910 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
911 		MLX5_SET(ads, pp, vhca_port_num, 1);
912 		break;
913 	case MLX5_CMD_OP_INIT2RTR_QP:
914 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
915 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
916 		*in = kzalloc(*inlen, GFP_KERNEL);
917 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
918 		if (!*in || !*out)
919 			goto outerr;
920 
921 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
922 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
923 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
924 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
925 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
926 		MLX5_SET(qpc, qpc, log_msg_max, 30);
927 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
928 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
929 		MLX5_SET(ads, pp, fl, 1);
930 		break;
931 	case MLX5_CMD_OP_RTR2RTS_QP:
932 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
933 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
934 		*in = kzalloc(*inlen, GFP_KERNEL);
935 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
936 		if (!*in || !*out)
937 			goto outerr;
938 
939 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
940 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
941 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
942 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
943 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
944 		MLX5_SET(ads, pp, ack_timeout, 14);
945 		MLX5_SET(qpc, qpc, retry_count, 7);
946 		MLX5_SET(qpc, qpc, rnr_retry, 7);
947 		break;
948 	default:
949 		goto outerr_nullify;
950 	}
951 
952 	return;
953 
954 outerr:
955 	kfree(*in);
956 	kfree(*out);
957 outerr_nullify:
958 	*in = NULL;
959 	*out = NULL;
960 }
961 
962 static void free_inout(void *in, void *out)
963 {
964 	kfree(in);
965 	kfree(out);
966 }
967 
968 /* Two QPs are used by each virtqueue. One is used by the driver and one by
969  * firmware. The fw argument indicates whether the subjected QP is the one used
970  * by firmware.
971  */
972 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
973 {
974 	int outlen;
975 	int inlen;
976 	void *out;
977 	void *in;
978 	int err;
979 
980 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
981 	if (!in || !out)
982 		return -ENOMEM;
983 
984 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
985 	free_inout(in, out);
986 	return err;
987 }
988 
989 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
990 {
991 	int err;
992 
993 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
994 	if (err)
995 		return err;
996 
997 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
998 	if (err)
999 		return err;
1000 
1001 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1002 	if (err)
1003 		return err;
1004 
1005 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1006 	if (err)
1007 		return err;
1008 
1009 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1010 	if (err)
1011 		return err;
1012 
1013 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1014 	if (err)
1015 		return err;
1016 
1017 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1018 }
1019 
1020 struct mlx5_virtq_attr {
1021 	u8 state;
1022 	u16 available_index;
1023 	u16 used_index;
1024 };
1025 
1026 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1027 			   struct mlx5_virtq_attr *attr)
1028 {
1029 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1030 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1031 	void *out;
1032 	void *obj_context;
1033 	void *cmd_hdr;
1034 	int err;
1035 
1036 	out = kzalloc(outlen, GFP_KERNEL);
1037 	if (!out)
1038 		return -ENOMEM;
1039 
1040 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1041 
1042 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1043 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1044 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1045 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1046 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1047 	if (err)
1048 		goto err_cmd;
1049 
1050 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1051 	memset(attr, 0, sizeof(*attr));
1052 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1053 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1054 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1055 	kfree(out);
1056 	return 0;
1057 
1058 err_cmd:
1059 	kfree(out);
1060 	return err;
1061 }
1062 
1063 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1064 {
1065 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1066 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1067 	void *obj_context;
1068 	void *cmd_hdr;
1069 	void *in;
1070 	int err;
1071 
1072 	in = kzalloc(inlen, GFP_KERNEL);
1073 	if (!in)
1074 		return -ENOMEM;
1075 
1076 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1077 
1078 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1079 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1080 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1081 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1082 
1083 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1084 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1085 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1086 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1087 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1088 	kfree(in);
1089 	if (!err)
1090 		mvq->fw_state = state;
1091 
1092 	return err;
1093 }
1094 
1095 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1096 {
1097 	u16 idx = mvq->index;
1098 	int err;
1099 
1100 	if (!mvq->num_ent)
1101 		return 0;
1102 
1103 	if (mvq->initialized) {
1104 		mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1105 		return -EINVAL;
1106 	}
1107 
1108 	err = cq_create(ndev, idx, mvq->num_ent);
1109 	if (err)
1110 		return err;
1111 
1112 	err = qp_create(ndev, mvq, &mvq->fwqp);
1113 	if (err)
1114 		goto err_fwqp;
1115 
1116 	err = qp_create(ndev, mvq, &mvq->vqqp);
1117 	if (err)
1118 		goto err_vqqp;
1119 
1120 	err = connect_qps(ndev, mvq);
1121 	if (err)
1122 		goto err_connect;
1123 
1124 	err = create_virtqueue(ndev, mvq);
1125 	if (err)
1126 		goto err_connect;
1127 
1128 	if (mvq->ready) {
1129 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1130 		if (err) {
1131 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1132 				       idx, err);
1133 			goto err_connect;
1134 		}
1135 	}
1136 
1137 	mvq->initialized = true;
1138 	return 0;
1139 
1140 err_connect:
1141 	qp_destroy(ndev, &mvq->vqqp);
1142 err_vqqp:
1143 	qp_destroy(ndev, &mvq->fwqp);
1144 err_fwqp:
1145 	cq_destroy(ndev, idx);
1146 	return err;
1147 }
1148 
1149 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1150 {
1151 	struct mlx5_virtq_attr attr;
1152 
1153 	if (!mvq->initialized)
1154 		return;
1155 
1156 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1157 		return;
1158 
1159 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1160 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1161 
1162 	if (query_virtqueue(ndev, mvq, &attr)) {
1163 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1164 		return;
1165 	}
1166 	mvq->avail_idx = attr.available_index;
1167 	mvq->used_idx = attr.used_index;
1168 }
1169 
1170 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1171 {
1172 	int i;
1173 
1174 	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1175 		suspend_vq(ndev, &ndev->vqs[i]);
1176 }
1177 
1178 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1179 {
1180 	if (!mvq->initialized)
1181 		return;
1182 
1183 	suspend_vq(ndev, mvq);
1184 	destroy_virtqueue(ndev, mvq);
1185 	qp_destroy(ndev, &mvq->vqqp);
1186 	qp_destroy(ndev, &mvq->fwqp);
1187 	cq_destroy(ndev, mvq->index);
1188 	mvq->initialized = false;
1189 }
1190 
1191 static int create_rqt(struct mlx5_vdpa_net *ndev)
1192 {
1193 	int log_max_rqt;
1194 	__be32 *list;
1195 	void *rqtc;
1196 	int inlen;
1197 	void *in;
1198 	int i, j;
1199 	int err;
1200 
1201 	log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1202 	if (log_max_rqt < 1)
1203 		return -EOPNOTSUPP;
1204 
1205 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1206 	in = kzalloc(inlen, GFP_KERNEL);
1207 	if (!in)
1208 		return -ENOMEM;
1209 
1210 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1211 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1212 
1213 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1214 	MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1215 	MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1216 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1217 	for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1218 		if (!ndev->vqs[j].initialized)
1219 			continue;
1220 
1221 		if (!vq_is_tx(ndev->vqs[j].index)) {
1222 			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1223 			i++;
1224 		}
1225 	}
1226 
1227 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1228 	kfree(in);
1229 	if (err)
1230 		return err;
1231 
1232 	return 0;
1233 }
1234 
1235 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1236 {
1237 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1238 }
1239 
1240 static int create_tir(struct mlx5_vdpa_net *ndev)
1241 {
1242 #define HASH_IP_L4PORTS                                                                            \
1243 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1244 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1245 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1246 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1247 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1248 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1249 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1250 	void *rss_key;
1251 	void *outer;
1252 	void *tirc;
1253 	void *in;
1254 	int err;
1255 
1256 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1257 	if (!in)
1258 		return -ENOMEM;
1259 
1260 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1261 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1262 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1263 
1264 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1265 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1266 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1267 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1268 
1269 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1270 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1271 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1272 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1273 
1274 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1275 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1276 
1277 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1278 	kfree(in);
1279 	return err;
1280 }
1281 
1282 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1283 {
1284 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1285 }
1286 
1287 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1288 {
1289 	struct mlx5_flow_destination dest[2] = {};
1290 	struct mlx5_flow_table_attr ft_attr = {};
1291 	struct mlx5_flow_act flow_act = {};
1292 	struct mlx5_flow_namespace *ns;
1293 	int err;
1294 
1295 	/* for now, one entry, match all, forward to tir */
1296 	ft_attr.max_fte = 1;
1297 	ft_attr.autogroup.max_num_groups = 1;
1298 
1299 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1300 	if (!ns) {
1301 		mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1302 		return -EOPNOTSUPP;
1303 	}
1304 
1305 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1306 	if (IS_ERR(ndev->rxft))
1307 		return PTR_ERR(ndev->rxft);
1308 
1309 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1310 	if (IS_ERR(ndev->rx_counter)) {
1311 		err = PTR_ERR(ndev->rx_counter);
1312 		goto err_fc;
1313 	}
1314 
1315 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1316 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1317 	dest[0].tir_num = ndev->res.tirn;
1318 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1319 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1320 	ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1321 	if (IS_ERR(ndev->rx_rule)) {
1322 		err = PTR_ERR(ndev->rx_rule);
1323 		ndev->rx_rule = NULL;
1324 		goto err_rule;
1325 	}
1326 
1327 	return 0;
1328 
1329 err_rule:
1330 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1331 err_fc:
1332 	mlx5_destroy_flow_table(ndev->rxft);
1333 	return err;
1334 }
1335 
1336 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1337 {
1338 	if (!ndev->rx_rule)
1339 		return;
1340 
1341 	mlx5_del_flow_rules(ndev->rx_rule);
1342 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1343 	mlx5_destroy_flow_table(ndev->rxft);
1344 
1345 	ndev->rx_rule = NULL;
1346 }
1347 
1348 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1349 {
1350 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1351 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1352 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1353 
1354 	if (unlikely(!mvq->ready))
1355 		return;
1356 
1357 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1358 }
1359 
1360 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1361 				    u64 driver_area, u64 device_area)
1362 {
1363 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1364 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1365 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1366 
1367 	mvq->desc_addr = desc_area;
1368 	mvq->device_addr = device_area;
1369 	mvq->driver_addr = driver_area;
1370 	return 0;
1371 }
1372 
1373 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1374 {
1375 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1376 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1377 	struct mlx5_vdpa_virtqueue *mvq;
1378 
1379 	mvq = &ndev->vqs[idx];
1380 	mvq->num_ent = num;
1381 }
1382 
1383 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1384 {
1385 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1386 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1387 	struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1388 
1389 	vq->event_cb = *cb;
1390 }
1391 
1392 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1393 {
1394 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1395 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1396 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1397 
1398 	if (!ready)
1399 		suspend_vq(ndev, mvq);
1400 
1401 	mvq->ready = ready;
1402 }
1403 
1404 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1405 {
1406 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1407 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1408 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1409 
1410 	return mvq->ready;
1411 }
1412 
1413 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1414 				  const struct vdpa_vq_state *state)
1415 {
1416 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1417 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1418 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1419 
1420 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1421 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1422 		return -EINVAL;
1423 	}
1424 
1425 	mvq->used_idx = state->split.avail_index;
1426 	mvq->avail_idx = state->split.avail_index;
1427 	return 0;
1428 }
1429 
1430 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1431 {
1432 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1433 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1434 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1435 	struct mlx5_virtq_attr attr;
1436 	int err;
1437 
1438 	/* If the virtq object was destroyed, use the value saved at
1439 	 * the last minute of suspend_vq. This caters for userspace
1440 	 * that cares about emulating the index after vq is stopped.
1441 	 */
1442 	if (!mvq->initialized) {
1443 		/* Firmware returns a wrong value for the available index.
1444 		 * Since both values should be identical, we take the value of
1445 		 * used_idx which is reported correctly.
1446 		 */
1447 		state->split.avail_index = mvq->used_idx;
1448 		return 0;
1449 	}
1450 
1451 	err = query_virtqueue(ndev, mvq, &attr);
1452 	if (err) {
1453 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1454 		return err;
1455 	}
1456 	state->split.avail_index = attr.used_index;
1457 	return 0;
1458 }
1459 
1460 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1461 {
1462 	return PAGE_SIZE;
1463 }
1464 
1465 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1466 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1467 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1468 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1469 };
1470 
1471 static u64 mlx_to_vritio_features(u16 dev_features)
1472 {
1473 	u64 result = 0;
1474 
1475 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1476 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1477 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1478 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1479 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1480 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1481 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1482 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1483 
1484 	return result;
1485 }
1486 
1487 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1488 {
1489 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1490 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1491 	u16 dev_features;
1492 
1493 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1494 	ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1495 	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1496 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1497 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1498 	print_features(mvdev, ndev->mvdev.mlx_features, false);
1499 	return ndev->mvdev.mlx_features;
1500 }
1501 
1502 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1503 {
1504 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1505 		return -EOPNOTSUPP;
1506 
1507 	return 0;
1508 }
1509 
1510 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1511 {
1512 	int err;
1513 	int i;
1514 
1515 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1516 		err = setup_vq(ndev, &ndev->vqs[i]);
1517 		if (err)
1518 			goto err_vq;
1519 	}
1520 
1521 	return 0;
1522 
1523 err_vq:
1524 	for (--i; i >= 0; i--)
1525 		teardown_vq(ndev, &ndev->vqs[i]);
1526 
1527 	return err;
1528 }
1529 
1530 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1531 {
1532 	struct mlx5_vdpa_virtqueue *mvq;
1533 	int i;
1534 
1535 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1536 		mvq = &ndev->vqs[i];
1537 		if (!mvq->initialized)
1538 			continue;
1539 
1540 		teardown_vq(ndev, mvq);
1541 	}
1542 }
1543 
1544 /* TODO: cross-endian support */
1545 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1546 {
1547 	return virtio_legacy_is_little_endian() ||
1548 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
1549 }
1550 
1551 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1552 {
1553 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1554 }
1555 
1556 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1557 {
1558 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1559 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1560 	int err;
1561 
1562 	print_features(mvdev, features, true);
1563 
1564 	err = verify_min_features(mvdev, features);
1565 	if (err)
1566 		return err;
1567 
1568 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1569 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1570 	ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1571 	return err;
1572 }
1573 
1574 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1575 {
1576 	/* not implemented */
1577 	mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1578 }
1579 
1580 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1581 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1582 {
1583 	return MLX5_VDPA_MAX_VQ_ENTRIES;
1584 }
1585 
1586 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1587 {
1588 	return VIRTIO_ID_NET;
1589 }
1590 
1591 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1592 {
1593 	return PCI_VENDOR_ID_MELLANOX;
1594 }
1595 
1596 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1597 {
1598 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1599 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1600 
1601 	print_status(mvdev, ndev->mvdev.status, false);
1602 	return ndev->mvdev.status;
1603 }
1604 
1605 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1606 {
1607 	struct mlx5_vq_restore_info *ri = &mvq->ri;
1608 	struct mlx5_virtq_attr attr;
1609 	int err;
1610 
1611 	if (!mvq->initialized)
1612 		return 0;
1613 
1614 	err = query_virtqueue(ndev, mvq, &attr);
1615 	if (err)
1616 		return err;
1617 
1618 	ri->avail_index = attr.available_index;
1619 	ri->used_index = attr.used_index;
1620 	ri->ready = mvq->ready;
1621 	ri->num_ent = mvq->num_ent;
1622 	ri->desc_addr = mvq->desc_addr;
1623 	ri->device_addr = mvq->device_addr;
1624 	ri->driver_addr = mvq->driver_addr;
1625 	ri->cb = mvq->event_cb;
1626 	ri->restore = true;
1627 	return 0;
1628 }
1629 
1630 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1631 {
1632 	int i;
1633 
1634 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1635 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1636 		save_channel_info(ndev, &ndev->vqs[i]);
1637 	}
1638 	return 0;
1639 }
1640 
1641 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1642 {
1643 	int i;
1644 
1645 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1646 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1647 }
1648 
1649 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1650 {
1651 	struct mlx5_vdpa_virtqueue *mvq;
1652 	struct mlx5_vq_restore_info *ri;
1653 	int i;
1654 
1655 	mlx5_clear_vqs(ndev);
1656 	init_mvqs(ndev);
1657 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1658 		mvq = &ndev->vqs[i];
1659 		ri = &mvq->ri;
1660 		if (!ri->restore)
1661 			continue;
1662 
1663 		mvq->avail_idx = ri->avail_index;
1664 		mvq->used_idx = ri->used_index;
1665 		mvq->ready = ri->ready;
1666 		mvq->num_ent = ri->num_ent;
1667 		mvq->desc_addr = ri->desc_addr;
1668 		mvq->device_addr = ri->device_addr;
1669 		mvq->driver_addr = ri->driver_addr;
1670 		mvq->event_cb = ri->cb;
1671 	}
1672 }
1673 
1674 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1675 {
1676 	int err;
1677 
1678 	suspend_vqs(ndev);
1679 	err = save_channels_info(ndev);
1680 	if (err)
1681 		goto err_mr;
1682 
1683 	teardown_driver(ndev);
1684 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1685 	err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1686 	if (err)
1687 		goto err_mr;
1688 
1689 	if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1690 		return 0;
1691 
1692 	restore_channels_info(ndev);
1693 	err = setup_driver(ndev);
1694 	if (err)
1695 		goto err_setup;
1696 
1697 	return 0;
1698 
1699 err_setup:
1700 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1701 err_mr:
1702 	return err;
1703 }
1704 
1705 static int setup_driver(struct mlx5_vdpa_net *ndev)
1706 {
1707 	int err;
1708 
1709 	mutex_lock(&ndev->reslock);
1710 	if (ndev->setup) {
1711 		mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1712 		err = 0;
1713 		goto out;
1714 	}
1715 	err = setup_virtqueues(ndev);
1716 	if (err) {
1717 		mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1718 		goto out;
1719 	}
1720 
1721 	err = create_rqt(ndev);
1722 	if (err) {
1723 		mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1724 		goto err_rqt;
1725 	}
1726 
1727 	err = create_tir(ndev);
1728 	if (err) {
1729 		mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1730 		goto err_tir;
1731 	}
1732 
1733 	err = add_fwd_to_tir(ndev);
1734 	if (err) {
1735 		mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1736 		goto err_fwd;
1737 	}
1738 	ndev->setup = true;
1739 	mutex_unlock(&ndev->reslock);
1740 
1741 	return 0;
1742 
1743 err_fwd:
1744 	destroy_tir(ndev);
1745 err_tir:
1746 	destroy_rqt(ndev);
1747 err_rqt:
1748 	teardown_virtqueues(ndev);
1749 out:
1750 	mutex_unlock(&ndev->reslock);
1751 	return err;
1752 }
1753 
1754 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1755 {
1756 	mutex_lock(&ndev->reslock);
1757 	if (!ndev->setup)
1758 		goto out;
1759 
1760 	remove_fwd_to_tir(ndev);
1761 	destroy_tir(ndev);
1762 	destroy_rqt(ndev);
1763 	teardown_virtqueues(ndev);
1764 	ndev->setup = false;
1765 out:
1766 	mutex_unlock(&ndev->reslock);
1767 }
1768 
1769 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
1770 {
1771 	int i;
1772 
1773 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1774 		ndev->vqs[i].ready = false;
1775 }
1776 
1777 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1778 {
1779 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1780 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1781 	int err;
1782 
1783 	print_status(mvdev, status, true);
1784 	if (!status) {
1785 		mlx5_vdpa_info(mvdev, "performing device reset\n");
1786 		teardown_driver(ndev);
1787 		clear_vqs_ready(ndev);
1788 		mlx5_vdpa_destroy_mr(&ndev->mvdev);
1789 		ndev->mvdev.status = 0;
1790 		ndev->mvdev.mlx_features = 0;
1791 		++mvdev->generation;
1792 		if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
1793 			if (mlx5_vdpa_create_mr(mvdev, NULL))
1794 				mlx5_vdpa_warn(mvdev, "create MR failed\n");
1795 		}
1796 		return;
1797 	}
1798 
1799 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1800 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1801 			err = setup_driver(ndev);
1802 			if (err) {
1803 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1804 				goto err_setup;
1805 			}
1806 		} else {
1807 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1808 			return;
1809 		}
1810 	}
1811 
1812 	ndev->mvdev.status = status;
1813 	return;
1814 
1815 err_setup:
1816 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1817 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1818 }
1819 
1820 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
1821 {
1822 	return sizeof(struct virtio_net_config);
1823 }
1824 
1825 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1826 				 unsigned int len)
1827 {
1828 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1829 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1830 
1831 	if (offset + len <= sizeof(struct virtio_net_config))
1832 		memcpy(buf, (u8 *)&ndev->config + offset, len);
1833 }
1834 
1835 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1836 				 unsigned int len)
1837 {
1838 	/* not supported */
1839 }
1840 
1841 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1842 {
1843 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1844 
1845 	return mvdev->generation;
1846 }
1847 
1848 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1849 {
1850 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1851 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1852 	bool change_map;
1853 	int err;
1854 
1855 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1856 	if (err) {
1857 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1858 		return err;
1859 	}
1860 
1861 	if (change_map)
1862 		return mlx5_vdpa_change_map(ndev, iotlb);
1863 
1864 	return 0;
1865 }
1866 
1867 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1868 {
1869 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1870 	struct mlx5_core_dev *pfmdev;
1871 	struct mlx5_vdpa_net *ndev;
1872 
1873 	ndev = to_mlx5_vdpa_ndev(mvdev);
1874 
1875 	free_resources(ndev);
1876 	mlx5_vdpa_destroy_mr(mvdev);
1877 	if (!is_zero_ether_addr(ndev->config.mac)) {
1878 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1879 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
1880 	}
1881 	mlx5_vdpa_free_resources(&ndev->mvdev);
1882 	mutex_destroy(&ndev->reslock);
1883 }
1884 
1885 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1886 {
1887 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1888 	struct vdpa_notification_area ret = {};
1889 	struct mlx5_vdpa_net *ndev;
1890 	phys_addr_t addr;
1891 
1892 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
1893 	 * notification to avoid the risk of mapping pages that contain BAR of more
1894 	 * than one SF
1895 	 */
1896 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
1897 		return ret;
1898 
1899 	ndev = to_mlx5_vdpa_ndev(mvdev);
1900 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
1901 	ret.addr = addr;
1902 	ret.size = PAGE_SIZE;
1903 	return ret;
1904 }
1905 
1906 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1907 {
1908 	return -EOPNOTSUPP;
1909 }
1910 
1911 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1912 	.set_vq_address = mlx5_vdpa_set_vq_address,
1913 	.set_vq_num = mlx5_vdpa_set_vq_num,
1914 	.kick_vq = mlx5_vdpa_kick_vq,
1915 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
1916 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
1917 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
1918 	.set_vq_state = mlx5_vdpa_set_vq_state,
1919 	.get_vq_state = mlx5_vdpa_get_vq_state,
1920 	.get_vq_notification = mlx5_get_vq_notification,
1921 	.get_vq_irq = mlx5_get_vq_irq,
1922 	.get_vq_align = mlx5_vdpa_get_vq_align,
1923 	.get_features = mlx5_vdpa_get_features,
1924 	.set_features = mlx5_vdpa_set_features,
1925 	.set_config_cb = mlx5_vdpa_set_config_cb,
1926 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1927 	.get_device_id = mlx5_vdpa_get_device_id,
1928 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
1929 	.get_status = mlx5_vdpa_get_status,
1930 	.set_status = mlx5_vdpa_set_status,
1931 	.get_config_size = mlx5_vdpa_get_config_size,
1932 	.get_config = mlx5_vdpa_get_config,
1933 	.set_config = mlx5_vdpa_set_config,
1934 	.get_generation = mlx5_vdpa_get_generation,
1935 	.set_map = mlx5_vdpa_set_map,
1936 	.free = mlx5_vdpa_free,
1937 };
1938 
1939 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
1940 {
1941 	u16 hw_mtu;
1942 	int err;
1943 
1944 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
1945 	if (err)
1946 		return err;
1947 
1948 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
1949 	return 0;
1950 }
1951 
1952 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1953 {
1954 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1955 	int err;
1956 
1957 	if (res->valid) {
1958 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1959 		return -EEXIST;
1960 	}
1961 
1962 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1963 	if (err)
1964 		return err;
1965 
1966 	err = create_tis(ndev);
1967 	if (err)
1968 		goto err_tis;
1969 
1970 	res->valid = true;
1971 
1972 	return 0;
1973 
1974 err_tis:
1975 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1976 	return err;
1977 }
1978 
1979 static void free_resources(struct mlx5_vdpa_net *ndev)
1980 {
1981 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1982 
1983 	if (!res->valid)
1984 		return;
1985 
1986 	destroy_tis(ndev);
1987 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1988 	res->valid = false;
1989 }
1990 
1991 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1992 {
1993 	struct mlx5_vdpa_virtqueue *mvq;
1994 	int i;
1995 
1996 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1997 		mvq = &ndev->vqs[i];
1998 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1999 		mvq->index = i;
2000 		mvq->ndev = ndev;
2001 		mvq->fwqp.fw = true;
2002 	}
2003 	for (; i < ndev->mvdev.max_vqs; i++) {
2004 		mvq = &ndev->vqs[i];
2005 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2006 		mvq->index = i;
2007 		mvq->ndev = ndev;
2008 	}
2009 }
2010 
2011 struct mlx5_vdpa_mgmtdev {
2012 	struct vdpa_mgmt_dev mgtdev;
2013 	struct mlx5_adev *madev;
2014 	struct mlx5_vdpa_net *ndev;
2015 };
2016 
2017 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
2018 {
2019 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2020 	struct virtio_net_config *config;
2021 	struct mlx5_core_dev *pfmdev;
2022 	struct mlx5_vdpa_dev *mvdev;
2023 	struct mlx5_vdpa_net *ndev;
2024 	struct mlx5_core_dev *mdev;
2025 	u32 max_vqs;
2026 	int err;
2027 
2028 	if (mgtdev->ndev)
2029 		return -ENOSPC;
2030 
2031 	mdev = mgtdev->madev->mdev;
2032 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2033 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2034 		dev_warn(mdev->device, "missing support for split virtqueues\n");
2035 		return -EOPNOTSUPP;
2036 	}
2037 
2038 	/* we save one virtqueue for control virtqueue should we require it */
2039 	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2040 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2041 
2042 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2043 				 name);
2044 	if (IS_ERR(ndev))
2045 		return PTR_ERR(ndev);
2046 
2047 	ndev->mvdev.max_vqs = max_vqs;
2048 	mvdev = &ndev->mvdev;
2049 	mvdev->mdev = mdev;
2050 	init_mvqs(ndev);
2051 	mutex_init(&ndev->reslock);
2052 	config = &ndev->config;
2053 	err = query_mtu(mdev, &ndev->mtu);
2054 	if (err)
2055 		goto err_mtu;
2056 
2057 	err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2058 	if (err)
2059 		goto err_mtu;
2060 
2061 	if (!is_zero_ether_addr(config->mac)) {
2062 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2063 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2064 		if (err)
2065 			goto err_mtu;
2066 	}
2067 
2068 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
2069 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2070 	if (err)
2071 		goto err_mpfs;
2072 
2073 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2074 		err = mlx5_vdpa_create_mr(mvdev, NULL);
2075 		if (err)
2076 			goto err_res;
2077 	}
2078 
2079 	err = alloc_resources(ndev);
2080 	if (err)
2081 		goto err_mr;
2082 
2083 	mvdev->vdev.mdev = &mgtdev->mgtdev;
2084 	err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
2085 	if (err)
2086 		goto err_reg;
2087 
2088 	mgtdev->ndev = ndev;
2089 	return 0;
2090 
2091 err_reg:
2092 	free_resources(ndev);
2093 err_mr:
2094 	mlx5_vdpa_destroy_mr(mvdev);
2095 err_res:
2096 	mlx5_vdpa_free_resources(&ndev->mvdev);
2097 err_mpfs:
2098 	if (!is_zero_ether_addr(config->mac))
2099 		mlx5_mpfs_del_mac(pfmdev, config->mac);
2100 err_mtu:
2101 	mutex_destroy(&ndev->reslock);
2102 	put_device(&mvdev->vdev.dev);
2103 	return err;
2104 }
2105 
2106 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2107 {
2108 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2109 
2110 	_vdpa_unregister_device(dev);
2111 	mgtdev->ndev = NULL;
2112 }
2113 
2114 static const struct vdpa_mgmtdev_ops mdev_ops = {
2115 	.dev_add = mlx5_vdpa_dev_add,
2116 	.dev_del = mlx5_vdpa_dev_del,
2117 };
2118 
2119 static struct virtio_device_id id_table[] = {
2120 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2121 	{ 0 },
2122 };
2123 
2124 static int mlx5v_probe(struct auxiliary_device *adev,
2125 		       const struct auxiliary_device_id *id)
2126 
2127 {
2128 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2129 	struct mlx5_core_dev *mdev = madev->mdev;
2130 	struct mlx5_vdpa_mgmtdev *mgtdev;
2131 	int err;
2132 
2133 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2134 	if (!mgtdev)
2135 		return -ENOMEM;
2136 
2137 	mgtdev->mgtdev.ops = &mdev_ops;
2138 	mgtdev->mgtdev.device = mdev->device;
2139 	mgtdev->mgtdev.id_table = id_table;
2140 	mgtdev->madev = madev;
2141 
2142 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2143 	if (err)
2144 		goto reg_err;
2145 
2146 	dev_set_drvdata(&adev->dev, mgtdev);
2147 
2148 	return 0;
2149 
2150 reg_err:
2151 	kfree(mgtdev);
2152 	return err;
2153 }
2154 
2155 static void mlx5v_remove(struct auxiliary_device *adev)
2156 {
2157 	struct mlx5_vdpa_mgmtdev *mgtdev;
2158 
2159 	mgtdev = dev_get_drvdata(&adev->dev);
2160 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2161 	kfree(mgtdev);
2162 }
2163 
2164 static const struct auxiliary_device_id mlx5v_id_table[] = {
2165 	{ .name = MLX5_ADEV_NAME ".vnet", },
2166 	{},
2167 };
2168 
2169 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2170 
2171 static struct auxiliary_driver mlx5v_driver = {
2172 	.name = "vnet",
2173 	.probe = mlx5v_probe,
2174 	.remove = mlx5v_remove,
2175 	.id_table = mlx5v_id_table,
2176 };
2177 
2178 module_auxiliary_driver(mlx5v_driver);
2179