xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision c0ecca6604b80e438b032578634c6e133c7028f6)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <linux/virtio_config.h>
10 #include <linux/auxiliary_bus.h>
11 #include <linux/mlx5/cq.h>
12 #include <linux/mlx5/qp.h>
13 #include <linux/mlx5/device.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/vport.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/mlx5_ifc_vdpa.h>
18 #include <linux/mlx5/mpfs.h>
19 #include "mlx5_vdpa.h"
20 
21 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
22 MODULE_DESCRIPTION("Mellanox VDPA driver");
23 MODULE_LICENSE("Dual BSD/GPL");
24 
25 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
26 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
27 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
28 
29 #define VALID_FEATURES_MASK                                                                        \
30 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
31 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
33 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
34 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
35 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
37 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
38 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
39 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
40 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
41 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
42 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
43 
44 #define VALID_STATUS_MASK                                                                          \
45 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
46 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
47 
48 struct mlx5_vdpa_net_resources {
49 	u32 tisn;
50 	u32 tdn;
51 	u32 tirn;
52 	u32 rqtn;
53 	bool valid;
54 };
55 
56 struct mlx5_vdpa_cq_buf {
57 	struct mlx5_frag_buf_ctrl fbc;
58 	struct mlx5_frag_buf frag_buf;
59 	int cqe_size;
60 	int nent;
61 };
62 
63 struct mlx5_vdpa_cq {
64 	struct mlx5_core_cq mcq;
65 	struct mlx5_vdpa_cq_buf buf;
66 	struct mlx5_db db;
67 	int cqe;
68 };
69 
70 struct mlx5_vdpa_umem {
71 	struct mlx5_frag_buf_ctrl fbc;
72 	struct mlx5_frag_buf frag_buf;
73 	int size;
74 	u32 id;
75 };
76 
77 struct mlx5_vdpa_qp {
78 	struct mlx5_core_qp mqp;
79 	struct mlx5_frag_buf frag_buf;
80 	struct mlx5_db db;
81 	u16 head;
82 	bool fw;
83 };
84 
85 struct mlx5_vq_restore_info {
86 	u32 num_ent;
87 	u64 desc_addr;
88 	u64 device_addr;
89 	u64 driver_addr;
90 	u16 avail_index;
91 	u16 used_index;
92 	bool ready;
93 	struct vdpa_callback cb;
94 	bool restore;
95 };
96 
97 struct mlx5_vdpa_virtqueue {
98 	bool ready;
99 	u64 desc_addr;
100 	u64 device_addr;
101 	u64 driver_addr;
102 	u32 num_ent;
103 	struct vdpa_callback event_cb;
104 
105 	/* Resources for implementing the notification channel from the device
106 	 * to the driver. fwqp is the firmware end of an RC connection; the
107 	 * other end is vqqp used by the driver. cq is is where completions are
108 	 * reported.
109 	 */
110 	struct mlx5_vdpa_cq cq;
111 	struct mlx5_vdpa_qp fwqp;
112 	struct mlx5_vdpa_qp vqqp;
113 
114 	/* umem resources are required for the virtqueue operation. They're use
115 	 * is internal and they must be provided by the driver.
116 	 */
117 	struct mlx5_vdpa_umem umem1;
118 	struct mlx5_vdpa_umem umem2;
119 	struct mlx5_vdpa_umem umem3;
120 
121 	bool initialized;
122 	int index;
123 	u32 virtq_id;
124 	struct mlx5_vdpa_net *ndev;
125 	u16 avail_idx;
126 	u16 used_idx;
127 	int fw_state;
128 
129 	/* keep last in the struct */
130 	struct mlx5_vq_restore_info ri;
131 };
132 
133 /* We will remove this limitation once mlx5_vdpa_alloc_resources()
134  * provides for driver space allocation
135  */
136 #define MLX5_MAX_SUPPORTED_VQS 16
137 
138 struct mlx5_vdpa_net {
139 	struct mlx5_vdpa_dev mvdev;
140 	struct mlx5_vdpa_net_resources res;
141 	struct virtio_net_config config;
142 	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
143 
144 	/* Serialize vq resources creation and destruction. This is required
145 	 * since memory map might change and we need to destroy and create
146 	 * resources while driver in operational.
147 	 */
148 	struct mutex reslock;
149 	struct mlx5_flow_table *rxft;
150 	struct mlx5_fc *rx_counter;
151 	struct mlx5_flow_handle *rx_rule;
152 	bool setup;
153 	u16 mtu;
154 };
155 
156 static void free_resources(struct mlx5_vdpa_net *ndev);
157 static void init_mvqs(struct mlx5_vdpa_net *ndev);
158 static int setup_driver(struct mlx5_vdpa_net *ndev);
159 static void teardown_driver(struct mlx5_vdpa_net *ndev);
160 
161 static bool mlx5_vdpa_debug;
162 
163 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
164 	do {                                                                                       \
165 		if (features & BIT_ULL(_feature))                                                  \
166 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
167 	} while (0)
168 
169 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
170 	do {                                                                                       \
171 		if (status & (_status))                                                            \
172 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
173 	} while (0)
174 
175 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
176 {
177 	return max_vqs / 2;
178 }
179 
180 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
181 {
182 	if (status & ~VALID_STATUS_MASK)
183 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
184 			       status & ~VALID_STATUS_MASK);
185 
186 	if (!mlx5_vdpa_debug)
187 		return;
188 
189 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
190 	if (set && !status) {
191 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
192 		return;
193 	}
194 
195 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
196 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
197 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
198 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
199 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
200 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
201 }
202 
203 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
204 {
205 	if (features & ~VALID_FEATURES_MASK)
206 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
207 			       features & ~VALID_FEATURES_MASK);
208 
209 	if (!mlx5_vdpa_debug)
210 		return;
211 
212 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
213 	if (!features)
214 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
215 
216 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
217 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
218 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
219 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
220 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
221 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
222 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
223 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
224 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
225 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
226 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
227 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
250 }
251 
252 static int create_tis(struct mlx5_vdpa_net *ndev)
253 {
254 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
255 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
256 	void *tisc;
257 	int err;
258 
259 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
260 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
261 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
262 	if (err)
263 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
264 
265 	return err;
266 }
267 
268 static void destroy_tis(struct mlx5_vdpa_net *ndev)
269 {
270 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
271 }
272 
273 #define MLX5_VDPA_CQE_SIZE 64
274 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
275 
276 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
277 {
278 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
279 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
280 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
281 	int err;
282 
283 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
284 				       ndev->mvdev.mdev->priv.numa_node);
285 	if (err)
286 		return err;
287 
288 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
289 
290 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
291 	buf->nent = nent;
292 
293 	return 0;
294 }
295 
296 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
297 {
298 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
299 
300 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
301 					ndev->mvdev.mdev->priv.numa_node);
302 }
303 
304 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
305 {
306 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
307 }
308 
309 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
310 {
311 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
312 }
313 
314 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
315 {
316 	struct mlx5_cqe64 *cqe64;
317 	void *cqe;
318 	int i;
319 
320 	for (i = 0; i < buf->nent; i++) {
321 		cqe = get_cqe(vcq, i);
322 		cqe64 = cqe;
323 		cqe64->op_own = MLX5_CQE_INVALID << 4;
324 	}
325 }
326 
327 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
328 {
329 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
330 
331 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
332 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
333 		return cqe64;
334 
335 	return NULL;
336 }
337 
338 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
339 {
340 	vqp->head += n;
341 	vqp->db.db[0] = cpu_to_be32(vqp->head);
342 }
343 
344 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
345 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
346 {
347 	struct mlx5_vdpa_qp *vqp;
348 	__be64 *pas;
349 	void *qpc;
350 
351 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
352 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
353 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
354 	if (vqp->fw) {
355 		/* Firmware QP is allocated by the driver for the firmware's
356 		 * use so we can skip part of the params as they will be chosen by firmware
357 		 */
358 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
359 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
360 		MLX5_SET(qpc, qpc, no_sq, 1);
361 		return;
362 	}
363 
364 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
365 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
366 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
367 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
368 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
369 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
370 	MLX5_SET(qpc, qpc, no_sq, 1);
371 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
372 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
373 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
374 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
375 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
376 }
377 
378 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
379 {
380 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
381 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
382 					ndev->mvdev.mdev->priv.numa_node);
383 }
384 
385 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
386 {
387 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
388 }
389 
390 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
391 		     struct mlx5_vdpa_qp *vqp)
392 {
393 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
394 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
395 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
396 	void *qpc;
397 	void *in;
398 	int err;
399 
400 	if (!vqp->fw) {
401 		vqp = &mvq->vqqp;
402 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
403 		if (err)
404 			return err;
405 
406 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
407 		if (err)
408 			goto err_db;
409 		inlen += vqp->frag_buf.npages * sizeof(__be64);
410 	}
411 
412 	in = kzalloc(inlen, GFP_KERNEL);
413 	if (!in) {
414 		err = -ENOMEM;
415 		goto err_kzalloc;
416 	}
417 
418 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
419 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
420 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
421 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
422 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
423 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
424 	if (!vqp->fw)
425 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
426 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
427 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
428 	kfree(in);
429 	if (err)
430 		goto err_kzalloc;
431 
432 	vqp->mqp.uid = ndev->mvdev.res.uid;
433 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
434 
435 	if (!vqp->fw)
436 		rx_post(vqp, mvq->num_ent);
437 
438 	return 0;
439 
440 err_kzalloc:
441 	if (!vqp->fw)
442 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
443 err_db:
444 	if (!vqp->fw)
445 		rq_buf_free(ndev, vqp);
446 
447 	return err;
448 }
449 
450 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
451 {
452 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
453 
454 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
455 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
456 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
457 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
458 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
459 	if (!vqp->fw) {
460 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
461 		rq_buf_free(ndev, vqp);
462 	}
463 }
464 
465 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
466 {
467 	return get_sw_cqe(cq, cq->mcq.cons_index);
468 }
469 
470 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
471 {
472 	struct mlx5_cqe64 *cqe64;
473 
474 	cqe64 = next_cqe_sw(vcq);
475 	if (!cqe64)
476 		return -EAGAIN;
477 
478 	vcq->mcq.cons_index++;
479 	return 0;
480 }
481 
482 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
483 {
484 	mlx5_cq_set_ci(&mvq->cq.mcq);
485 
486 	/* make sure CQ cosumer update is visible to the hardware before updating
487 	 * RX doorbell record.
488 	 */
489 	dma_wmb();
490 	rx_post(&mvq->vqqp, num);
491 	if (mvq->event_cb.callback)
492 		mvq->event_cb.callback(mvq->event_cb.private);
493 }
494 
495 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
496 {
497 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
498 	struct mlx5_vdpa_net *ndev = mvq->ndev;
499 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
500 	int num = 0;
501 
502 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
503 		num++;
504 		if (num > mvq->num_ent / 2) {
505 			/* If completions keep coming while we poll, we want to
506 			 * let the hardware know that we consumed them by
507 			 * updating the doorbell record.  We also let vdpa core
508 			 * know about this so it passes it on the virtio driver
509 			 * on the guest.
510 			 */
511 			mlx5_vdpa_handle_completions(mvq, num);
512 			num = 0;
513 		}
514 	}
515 
516 	if (num)
517 		mlx5_vdpa_handle_completions(mvq, num);
518 
519 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
520 }
521 
522 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
523 {
524 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
525 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
526 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
527 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
528 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
529 	unsigned int irqn;
530 	__be64 *pas;
531 	int inlen;
532 	void *cqc;
533 	void *in;
534 	int err;
535 	int eqn;
536 
537 	err = mlx5_db_alloc(mdev, &vcq->db);
538 	if (err)
539 		return err;
540 
541 	vcq->mcq.set_ci_db = vcq->db.db;
542 	vcq->mcq.arm_db = vcq->db.db + 1;
543 	vcq->mcq.cqe_sz = 64;
544 
545 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
546 	if (err)
547 		goto err_db;
548 
549 	cq_frag_buf_init(vcq, &vcq->buf);
550 
551 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
552 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
553 	in = kzalloc(inlen, GFP_KERNEL);
554 	if (!in) {
555 		err = -ENOMEM;
556 		goto err_vzalloc;
557 	}
558 
559 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
560 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
561 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
562 
563 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
564 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
565 
566 	/* Use vector 0 by default. Consider adding code to choose least used
567 	 * vector.
568 	 */
569 	err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
570 	if (err)
571 		goto err_vec;
572 
573 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
574 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
575 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
576 	MLX5_SET(cqc, cqc, c_eqn, eqn);
577 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
578 
579 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
580 	if (err)
581 		goto err_vec;
582 
583 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
584 	vcq->cqe = num_ent;
585 	vcq->mcq.set_ci_db = vcq->db.db;
586 	vcq->mcq.arm_db = vcq->db.db + 1;
587 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
588 	kfree(in);
589 	return 0;
590 
591 err_vec:
592 	kfree(in);
593 err_vzalloc:
594 	cq_frag_buf_free(ndev, &vcq->buf);
595 err_db:
596 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
597 	return err;
598 }
599 
600 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
601 {
602 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
603 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
604 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
605 
606 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
607 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
608 		return;
609 	}
610 	cq_frag_buf_free(ndev, &vcq->buf);
611 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
612 }
613 
614 static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
615 		     struct mlx5_vdpa_umem **umemp)
616 {
617 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
618 	int p_a;
619 	int p_b;
620 
621 	switch (num) {
622 	case 1:
623 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
624 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
625 		*umemp = &mvq->umem1;
626 		break;
627 	case 2:
628 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
629 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
630 		*umemp = &mvq->umem2;
631 		break;
632 	case 3:
633 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
634 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
635 		*umemp = &mvq->umem3;
636 		break;
637 	}
638 	return p_a * mvq->num_ent + p_b;
639 }
640 
641 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
642 {
643 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
644 }
645 
646 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
647 {
648 	int inlen;
649 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
650 	void *um;
651 	void *in;
652 	int err;
653 	__be64 *pas;
654 	int size;
655 	struct mlx5_vdpa_umem *umem;
656 
657 	size = umem_size(ndev, mvq, num, &umem);
658 	if (size < 0)
659 		return size;
660 
661 	umem->size = size;
662 	err = umem_frag_buf_alloc(ndev, umem, size);
663 	if (err)
664 		return err;
665 
666 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
667 
668 	in = kzalloc(inlen, GFP_KERNEL);
669 	if (!in) {
670 		err = -ENOMEM;
671 		goto err_in;
672 	}
673 
674 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
675 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
676 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
677 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
678 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
679 
680 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
681 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
682 
683 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
684 	if (err) {
685 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
686 		goto err_cmd;
687 	}
688 
689 	kfree(in);
690 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
691 
692 	return 0;
693 
694 err_cmd:
695 	kfree(in);
696 err_in:
697 	umem_frag_buf_free(ndev, umem);
698 	return err;
699 }
700 
701 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
702 {
703 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
704 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
705 	struct mlx5_vdpa_umem *umem;
706 
707 	switch (num) {
708 	case 1:
709 		umem = &mvq->umem1;
710 		break;
711 	case 2:
712 		umem = &mvq->umem2;
713 		break;
714 	case 3:
715 		umem = &mvq->umem3;
716 		break;
717 	}
718 
719 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
720 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
721 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
722 		return;
723 
724 	umem_frag_buf_free(ndev, umem);
725 }
726 
727 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
728 {
729 	int num;
730 	int err;
731 
732 	for (num = 1; num <= 3; num++) {
733 		err = create_umem(ndev, mvq, num);
734 		if (err)
735 			goto err_umem;
736 	}
737 	return 0;
738 
739 err_umem:
740 	for (num--; num > 0; num--)
741 		umem_destroy(ndev, mvq, num);
742 
743 	return err;
744 }
745 
746 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
747 {
748 	int num;
749 
750 	for (num = 3; num > 0; num--)
751 		umem_destroy(ndev, mvq, num);
752 }
753 
754 static int get_queue_type(struct mlx5_vdpa_net *ndev)
755 {
756 	u32 type_mask;
757 
758 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
759 
760 	/* prefer split queue */
761 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
762 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
763 
764 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
765 
766 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
767 }
768 
769 static bool vq_is_tx(u16 idx)
770 {
771 	return idx % 2;
772 }
773 
774 static u16 get_features_12_3(u64 features)
775 {
776 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
777 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
778 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
779 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
780 }
781 
782 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
783 {
784 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
785 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
786 	void *obj_context;
787 	void *cmd_hdr;
788 	void *vq_ctx;
789 	void *in;
790 	int err;
791 
792 	err = umems_create(ndev, mvq);
793 	if (err)
794 		return err;
795 
796 	in = kzalloc(inlen, GFP_KERNEL);
797 	if (!in) {
798 		err = -ENOMEM;
799 		goto err_alloc;
800 	}
801 
802 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
803 
804 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
805 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
806 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
807 
808 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
809 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
810 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
811 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
812 		 get_features_12_3(ndev->mvdev.actual_features));
813 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
814 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
815 
816 	if (vq_is_tx(mvq->index))
817 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
818 
819 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
820 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
821 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
822 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
823 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
824 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
825 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
826 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
827 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
828 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
829 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
830 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
831 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
832 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size);
833 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
834 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size);
835 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
836 	if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
837 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
838 
839 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
840 	if (err)
841 		goto err_cmd;
842 
843 	kfree(in);
844 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
845 
846 	return 0;
847 
848 err_cmd:
849 	kfree(in);
850 err_alloc:
851 	umems_destroy(ndev, mvq);
852 	return err;
853 }
854 
855 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
856 {
857 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
858 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
859 
860 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
861 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
862 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
863 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
864 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
865 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
866 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
867 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
868 		return;
869 	}
870 	umems_destroy(ndev, mvq);
871 }
872 
873 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
874 {
875 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
876 }
877 
878 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
879 {
880 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
881 }
882 
883 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
884 			int *outlen, u32 qpn, u32 rqpn)
885 {
886 	void *qpc;
887 	void *pp;
888 
889 	switch (cmd) {
890 	case MLX5_CMD_OP_2RST_QP:
891 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
892 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
893 		*in = kzalloc(*inlen, GFP_KERNEL);
894 		*out = kzalloc(*outlen, GFP_KERNEL);
895 		if (!*in || !*out)
896 			goto outerr;
897 
898 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
899 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
900 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
901 		break;
902 	case MLX5_CMD_OP_RST2INIT_QP:
903 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
904 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
905 		*in = kzalloc(*inlen, GFP_KERNEL);
906 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
907 		if (!*in || !*out)
908 			goto outerr;
909 
910 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
911 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
912 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
913 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
914 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
915 		MLX5_SET(qpc, qpc, rwe, 1);
916 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
917 		MLX5_SET(ads, pp, vhca_port_num, 1);
918 		break;
919 	case MLX5_CMD_OP_INIT2RTR_QP:
920 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
921 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
922 		*in = kzalloc(*inlen, GFP_KERNEL);
923 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
924 		if (!*in || !*out)
925 			goto outerr;
926 
927 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
928 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
929 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
930 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
931 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
932 		MLX5_SET(qpc, qpc, log_msg_max, 30);
933 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
934 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
935 		MLX5_SET(ads, pp, fl, 1);
936 		break;
937 	case MLX5_CMD_OP_RTR2RTS_QP:
938 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
939 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
940 		*in = kzalloc(*inlen, GFP_KERNEL);
941 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
942 		if (!*in || !*out)
943 			goto outerr;
944 
945 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
946 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
947 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
948 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
949 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
950 		MLX5_SET(ads, pp, ack_timeout, 14);
951 		MLX5_SET(qpc, qpc, retry_count, 7);
952 		MLX5_SET(qpc, qpc, rnr_retry, 7);
953 		break;
954 	default:
955 		goto outerr_nullify;
956 	}
957 
958 	return;
959 
960 outerr:
961 	kfree(*in);
962 	kfree(*out);
963 outerr_nullify:
964 	*in = NULL;
965 	*out = NULL;
966 }
967 
968 static void free_inout(void *in, void *out)
969 {
970 	kfree(in);
971 	kfree(out);
972 }
973 
974 /* Two QPs are used by each virtqueue. One is used by the driver and one by
975  * firmware. The fw argument indicates whether the subjected QP is the one used
976  * by firmware.
977  */
978 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
979 {
980 	int outlen;
981 	int inlen;
982 	void *out;
983 	void *in;
984 	int err;
985 
986 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
987 	if (!in || !out)
988 		return -ENOMEM;
989 
990 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
991 	free_inout(in, out);
992 	return err;
993 }
994 
995 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
996 {
997 	int err;
998 
999 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1000 	if (err)
1001 		return err;
1002 
1003 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1004 	if (err)
1005 		return err;
1006 
1007 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1008 	if (err)
1009 		return err;
1010 
1011 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1012 	if (err)
1013 		return err;
1014 
1015 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1016 	if (err)
1017 		return err;
1018 
1019 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1020 	if (err)
1021 		return err;
1022 
1023 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1024 }
1025 
1026 struct mlx5_virtq_attr {
1027 	u8 state;
1028 	u16 available_index;
1029 	u16 used_index;
1030 };
1031 
1032 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1033 			   struct mlx5_virtq_attr *attr)
1034 {
1035 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1036 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1037 	void *out;
1038 	void *obj_context;
1039 	void *cmd_hdr;
1040 	int err;
1041 
1042 	out = kzalloc(outlen, GFP_KERNEL);
1043 	if (!out)
1044 		return -ENOMEM;
1045 
1046 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1047 
1048 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1049 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1050 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1051 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1052 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1053 	if (err)
1054 		goto err_cmd;
1055 
1056 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1057 	memset(attr, 0, sizeof(*attr));
1058 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1059 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1060 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1061 	kfree(out);
1062 	return 0;
1063 
1064 err_cmd:
1065 	kfree(out);
1066 	return err;
1067 }
1068 
1069 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1070 {
1071 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1072 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1073 	void *obj_context;
1074 	void *cmd_hdr;
1075 	void *in;
1076 	int err;
1077 
1078 	in = kzalloc(inlen, GFP_KERNEL);
1079 	if (!in)
1080 		return -ENOMEM;
1081 
1082 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1083 
1084 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1085 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1086 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1087 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1088 
1089 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1090 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1091 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1092 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1093 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1094 	kfree(in);
1095 	if (!err)
1096 		mvq->fw_state = state;
1097 
1098 	return err;
1099 }
1100 
1101 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1102 {
1103 	u16 idx = mvq->index;
1104 	int err;
1105 
1106 	if (!mvq->num_ent)
1107 		return 0;
1108 
1109 	if (mvq->initialized) {
1110 		mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1111 		return -EINVAL;
1112 	}
1113 
1114 	err = cq_create(ndev, idx, mvq->num_ent);
1115 	if (err)
1116 		return err;
1117 
1118 	err = qp_create(ndev, mvq, &mvq->fwqp);
1119 	if (err)
1120 		goto err_fwqp;
1121 
1122 	err = qp_create(ndev, mvq, &mvq->vqqp);
1123 	if (err)
1124 		goto err_vqqp;
1125 
1126 	err = connect_qps(ndev, mvq);
1127 	if (err)
1128 		goto err_connect;
1129 
1130 	err = create_virtqueue(ndev, mvq);
1131 	if (err)
1132 		goto err_connect;
1133 
1134 	if (mvq->ready) {
1135 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1136 		if (err) {
1137 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1138 				       idx, err);
1139 			goto err_connect;
1140 		}
1141 	}
1142 
1143 	mvq->initialized = true;
1144 	return 0;
1145 
1146 err_connect:
1147 	qp_destroy(ndev, &mvq->vqqp);
1148 err_vqqp:
1149 	qp_destroy(ndev, &mvq->fwqp);
1150 err_fwqp:
1151 	cq_destroy(ndev, idx);
1152 	return err;
1153 }
1154 
1155 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1156 {
1157 	struct mlx5_virtq_attr attr;
1158 
1159 	if (!mvq->initialized)
1160 		return;
1161 
1162 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1163 		return;
1164 
1165 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1166 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1167 
1168 	if (query_virtqueue(ndev, mvq, &attr)) {
1169 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1170 		return;
1171 	}
1172 	mvq->avail_idx = attr.available_index;
1173 	mvq->used_idx = attr.used_index;
1174 }
1175 
1176 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1177 {
1178 	int i;
1179 
1180 	for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1181 		suspend_vq(ndev, &ndev->vqs[i]);
1182 }
1183 
1184 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1185 {
1186 	if (!mvq->initialized)
1187 		return;
1188 
1189 	suspend_vq(ndev, mvq);
1190 	destroy_virtqueue(ndev, mvq);
1191 	qp_destroy(ndev, &mvq->vqqp);
1192 	qp_destroy(ndev, &mvq->fwqp);
1193 	cq_destroy(ndev, mvq->index);
1194 	mvq->initialized = false;
1195 }
1196 
1197 static int create_rqt(struct mlx5_vdpa_net *ndev)
1198 {
1199 	int log_max_rqt;
1200 	__be32 *list;
1201 	void *rqtc;
1202 	int inlen;
1203 	void *in;
1204 	int i, j;
1205 	int err;
1206 
1207 	log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1208 	if (log_max_rqt < 1)
1209 		return -EOPNOTSUPP;
1210 
1211 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1212 	in = kzalloc(inlen, GFP_KERNEL);
1213 	if (!in)
1214 		return -ENOMEM;
1215 
1216 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1217 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1218 
1219 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1220 	MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1221 	MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1222 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1223 	for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1224 		if (!ndev->vqs[j].initialized)
1225 			continue;
1226 
1227 		if (!vq_is_tx(ndev->vqs[j].index)) {
1228 			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1229 			i++;
1230 		}
1231 	}
1232 
1233 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1234 	kfree(in);
1235 	if (err)
1236 		return err;
1237 
1238 	return 0;
1239 }
1240 
1241 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1242 {
1243 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1244 }
1245 
1246 static int create_tir(struct mlx5_vdpa_net *ndev)
1247 {
1248 #define HASH_IP_L4PORTS                                                                            \
1249 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1250 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1251 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1252 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1253 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1254 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1255 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1256 	void *rss_key;
1257 	void *outer;
1258 	void *tirc;
1259 	void *in;
1260 	int err;
1261 
1262 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1263 	if (!in)
1264 		return -ENOMEM;
1265 
1266 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1267 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1268 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1269 
1270 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1271 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1272 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1273 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1274 
1275 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1276 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1277 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1278 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1279 
1280 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1281 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1282 
1283 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1284 	kfree(in);
1285 	return err;
1286 }
1287 
1288 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1289 {
1290 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1291 }
1292 
1293 static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1294 {
1295 	struct mlx5_flow_destination dest[2] = {};
1296 	struct mlx5_flow_table_attr ft_attr = {};
1297 	struct mlx5_flow_act flow_act = {};
1298 	struct mlx5_flow_namespace *ns;
1299 	int err;
1300 
1301 	/* for now, one entry, match all, forward to tir */
1302 	ft_attr.max_fte = 1;
1303 	ft_attr.autogroup.max_num_groups = 1;
1304 
1305 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1306 	if (!ns) {
1307 		mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1308 		return -EOPNOTSUPP;
1309 	}
1310 
1311 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1312 	if (IS_ERR(ndev->rxft))
1313 		return PTR_ERR(ndev->rxft);
1314 
1315 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1316 	if (IS_ERR(ndev->rx_counter)) {
1317 		err = PTR_ERR(ndev->rx_counter);
1318 		goto err_fc;
1319 	}
1320 
1321 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1322 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1323 	dest[0].tir_num = ndev->res.tirn;
1324 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1325 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1326 	ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1327 	if (IS_ERR(ndev->rx_rule)) {
1328 		err = PTR_ERR(ndev->rx_rule);
1329 		ndev->rx_rule = NULL;
1330 		goto err_rule;
1331 	}
1332 
1333 	return 0;
1334 
1335 err_rule:
1336 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1337 err_fc:
1338 	mlx5_destroy_flow_table(ndev->rxft);
1339 	return err;
1340 }
1341 
1342 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1343 {
1344 	if (!ndev->rx_rule)
1345 		return;
1346 
1347 	mlx5_del_flow_rules(ndev->rx_rule);
1348 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1349 	mlx5_destroy_flow_table(ndev->rxft);
1350 
1351 	ndev->rx_rule = NULL;
1352 }
1353 
1354 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1355 {
1356 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1357 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1358 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1359 
1360 	if (unlikely(!mvq->ready))
1361 		return;
1362 
1363 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1364 }
1365 
1366 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1367 				    u64 driver_area, u64 device_area)
1368 {
1369 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1370 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1371 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1372 
1373 	mvq->desc_addr = desc_area;
1374 	mvq->device_addr = device_area;
1375 	mvq->driver_addr = driver_area;
1376 	return 0;
1377 }
1378 
1379 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1380 {
1381 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1382 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1383 	struct mlx5_vdpa_virtqueue *mvq;
1384 
1385 	mvq = &ndev->vqs[idx];
1386 	mvq->num_ent = num;
1387 }
1388 
1389 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1390 {
1391 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1392 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1393 	struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1394 
1395 	vq->event_cb = *cb;
1396 }
1397 
1398 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1399 {
1400 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1401 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1402 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1403 
1404 	if (!ready)
1405 		suspend_vq(ndev, mvq);
1406 
1407 	mvq->ready = ready;
1408 }
1409 
1410 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1411 {
1412 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1413 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1414 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1415 
1416 	return mvq->ready;
1417 }
1418 
1419 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1420 				  const struct vdpa_vq_state *state)
1421 {
1422 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1423 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1424 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1425 
1426 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1427 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1428 		return -EINVAL;
1429 	}
1430 
1431 	mvq->used_idx = state->avail_index;
1432 	mvq->avail_idx = state->avail_index;
1433 	return 0;
1434 }
1435 
1436 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1437 {
1438 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1439 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1440 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1441 	struct mlx5_virtq_attr attr;
1442 	int err;
1443 
1444 	/* If the virtq object was destroyed, use the value saved at
1445 	 * the last minute of suspend_vq. This caters for userspace
1446 	 * that cares about emulating the index after vq is stopped.
1447 	 */
1448 	if (!mvq->initialized) {
1449 		/* Firmware returns a wrong value for the available index.
1450 		 * Since both values should be identical, we take the value of
1451 		 * used_idx which is reported correctly.
1452 		 */
1453 		state->avail_index = mvq->used_idx;
1454 		return 0;
1455 	}
1456 
1457 	err = query_virtqueue(ndev, mvq, &attr);
1458 	if (err) {
1459 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1460 		return err;
1461 	}
1462 	state->avail_index = attr.used_index;
1463 	return 0;
1464 }
1465 
1466 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1467 {
1468 	return PAGE_SIZE;
1469 }
1470 
1471 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1472 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1473 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1474 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1475 };
1476 
1477 static u64 mlx_to_vritio_features(u16 dev_features)
1478 {
1479 	u64 result = 0;
1480 
1481 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1482 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1483 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1484 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1485 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1486 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1487 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1488 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1489 
1490 	return result;
1491 }
1492 
1493 static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1494 {
1495 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1496 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1497 	u16 dev_features;
1498 
1499 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1500 	ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1501 	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1502 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1503 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1504 	print_features(mvdev, ndev->mvdev.mlx_features, false);
1505 	return ndev->mvdev.mlx_features;
1506 }
1507 
1508 static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1509 {
1510 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1511 		return -EOPNOTSUPP;
1512 
1513 	return 0;
1514 }
1515 
1516 static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1517 {
1518 	int err;
1519 	int i;
1520 
1521 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1522 		err = setup_vq(ndev, &ndev->vqs[i]);
1523 		if (err)
1524 			goto err_vq;
1525 	}
1526 
1527 	return 0;
1528 
1529 err_vq:
1530 	for (--i; i >= 0; i--)
1531 		teardown_vq(ndev, &ndev->vqs[i]);
1532 
1533 	return err;
1534 }
1535 
1536 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1537 {
1538 	struct mlx5_vdpa_virtqueue *mvq;
1539 	int i;
1540 
1541 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1542 		mvq = &ndev->vqs[i];
1543 		if (!mvq->initialized)
1544 			continue;
1545 
1546 		teardown_vq(ndev, mvq);
1547 	}
1548 }
1549 
1550 /* TODO: cross-endian support */
1551 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1552 {
1553 	return virtio_legacy_is_little_endian() ||
1554 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
1555 }
1556 
1557 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1558 {
1559 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1560 }
1561 
1562 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1563 {
1564 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1565 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1566 	int err;
1567 
1568 	print_features(mvdev, features, true);
1569 
1570 	err = verify_min_features(mvdev, features);
1571 	if (err)
1572 		return err;
1573 
1574 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1575 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1576 	ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1577 	return err;
1578 }
1579 
1580 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1581 {
1582 	/* not implemented */
1583 	mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1584 }
1585 
1586 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
1587 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1588 {
1589 	return MLX5_VDPA_MAX_VQ_ENTRIES;
1590 }
1591 
1592 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1593 {
1594 	return VIRTIO_ID_NET;
1595 }
1596 
1597 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1598 {
1599 	return PCI_VENDOR_ID_MELLANOX;
1600 }
1601 
1602 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1603 {
1604 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1605 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1606 
1607 	print_status(mvdev, ndev->mvdev.status, false);
1608 	return ndev->mvdev.status;
1609 }
1610 
1611 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1612 {
1613 	struct mlx5_vq_restore_info *ri = &mvq->ri;
1614 	struct mlx5_virtq_attr attr;
1615 	int err;
1616 
1617 	if (!mvq->initialized)
1618 		return 0;
1619 
1620 	err = query_virtqueue(ndev, mvq, &attr);
1621 	if (err)
1622 		return err;
1623 
1624 	ri->avail_index = attr.available_index;
1625 	ri->used_index = attr.used_index;
1626 	ri->ready = mvq->ready;
1627 	ri->num_ent = mvq->num_ent;
1628 	ri->desc_addr = mvq->desc_addr;
1629 	ri->device_addr = mvq->device_addr;
1630 	ri->driver_addr = mvq->driver_addr;
1631 	ri->cb = mvq->event_cb;
1632 	ri->restore = true;
1633 	return 0;
1634 }
1635 
1636 static int save_channels_info(struct mlx5_vdpa_net *ndev)
1637 {
1638 	int i;
1639 
1640 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1641 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1642 		save_channel_info(ndev, &ndev->vqs[i]);
1643 	}
1644 	return 0;
1645 }
1646 
1647 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1648 {
1649 	int i;
1650 
1651 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1652 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1653 }
1654 
1655 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1656 {
1657 	struct mlx5_vdpa_virtqueue *mvq;
1658 	struct mlx5_vq_restore_info *ri;
1659 	int i;
1660 
1661 	mlx5_clear_vqs(ndev);
1662 	init_mvqs(ndev);
1663 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1664 		mvq = &ndev->vqs[i];
1665 		ri = &mvq->ri;
1666 		if (!ri->restore)
1667 			continue;
1668 
1669 		mvq->avail_idx = ri->avail_index;
1670 		mvq->used_idx = ri->used_index;
1671 		mvq->ready = ri->ready;
1672 		mvq->num_ent = ri->num_ent;
1673 		mvq->desc_addr = ri->desc_addr;
1674 		mvq->device_addr = ri->device_addr;
1675 		mvq->driver_addr = ri->driver_addr;
1676 		mvq->event_cb = ri->cb;
1677 	}
1678 }
1679 
1680 static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1681 {
1682 	int err;
1683 
1684 	suspend_vqs(ndev);
1685 	err = save_channels_info(ndev);
1686 	if (err)
1687 		goto err_mr;
1688 
1689 	teardown_driver(ndev);
1690 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1691 	err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1692 	if (err)
1693 		goto err_mr;
1694 
1695 	if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1696 		return 0;
1697 
1698 	restore_channels_info(ndev);
1699 	err = setup_driver(ndev);
1700 	if (err)
1701 		goto err_setup;
1702 
1703 	return 0;
1704 
1705 err_setup:
1706 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1707 err_mr:
1708 	return err;
1709 }
1710 
1711 static int setup_driver(struct mlx5_vdpa_net *ndev)
1712 {
1713 	int err;
1714 
1715 	mutex_lock(&ndev->reslock);
1716 	if (ndev->setup) {
1717 		mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1718 		err = 0;
1719 		goto out;
1720 	}
1721 	err = setup_virtqueues(ndev);
1722 	if (err) {
1723 		mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1724 		goto out;
1725 	}
1726 
1727 	err = create_rqt(ndev);
1728 	if (err) {
1729 		mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1730 		goto err_rqt;
1731 	}
1732 
1733 	err = create_tir(ndev);
1734 	if (err) {
1735 		mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1736 		goto err_tir;
1737 	}
1738 
1739 	err = add_fwd_to_tir(ndev);
1740 	if (err) {
1741 		mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1742 		goto err_fwd;
1743 	}
1744 	ndev->setup = true;
1745 	mutex_unlock(&ndev->reslock);
1746 
1747 	return 0;
1748 
1749 err_fwd:
1750 	destroy_tir(ndev);
1751 err_tir:
1752 	destroy_rqt(ndev);
1753 err_rqt:
1754 	teardown_virtqueues(ndev);
1755 out:
1756 	mutex_unlock(&ndev->reslock);
1757 	return err;
1758 }
1759 
1760 static void teardown_driver(struct mlx5_vdpa_net *ndev)
1761 {
1762 	mutex_lock(&ndev->reslock);
1763 	if (!ndev->setup)
1764 		goto out;
1765 
1766 	remove_fwd_to_tir(ndev);
1767 	destroy_tir(ndev);
1768 	destroy_rqt(ndev);
1769 	teardown_virtqueues(ndev);
1770 	ndev->setup = false;
1771 out:
1772 	mutex_unlock(&ndev->reslock);
1773 }
1774 
1775 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1776 {
1777 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1778 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1779 	int err;
1780 
1781 	print_status(mvdev, status, true);
1782 	if (!status) {
1783 		mlx5_vdpa_info(mvdev, "performing device reset\n");
1784 		teardown_driver(ndev);
1785 		mlx5_vdpa_destroy_mr(&ndev->mvdev);
1786 		ndev->mvdev.status = 0;
1787 		ndev->mvdev.mlx_features = 0;
1788 		++mvdev->generation;
1789 		return;
1790 	}
1791 
1792 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1793 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1794 			err = setup_driver(ndev);
1795 			if (err) {
1796 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1797 				goto err_setup;
1798 			}
1799 		} else {
1800 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1801 			return;
1802 		}
1803 	}
1804 
1805 	ndev->mvdev.status = status;
1806 	return;
1807 
1808 err_setup:
1809 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
1810 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1811 }
1812 
1813 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
1814 {
1815 	return sizeof(struct virtio_net_config);
1816 }
1817 
1818 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1819 				 unsigned int len)
1820 {
1821 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1822 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1823 
1824 	if (offset + len <= sizeof(struct virtio_net_config))
1825 		memcpy(buf, (u8 *)&ndev->config + offset, len);
1826 }
1827 
1828 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1829 				 unsigned int len)
1830 {
1831 	/* not supported */
1832 }
1833 
1834 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1835 {
1836 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1837 
1838 	return mvdev->generation;
1839 }
1840 
1841 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1842 {
1843 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1844 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1845 	bool change_map;
1846 	int err;
1847 
1848 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1849 	if (err) {
1850 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1851 		return err;
1852 	}
1853 
1854 	if (change_map)
1855 		return mlx5_vdpa_change_map(ndev, iotlb);
1856 
1857 	return 0;
1858 }
1859 
1860 static void mlx5_vdpa_free(struct vdpa_device *vdev)
1861 {
1862 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1863 	struct mlx5_core_dev *pfmdev;
1864 	struct mlx5_vdpa_net *ndev;
1865 
1866 	ndev = to_mlx5_vdpa_ndev(mvdev);
1867 
1868 	free_resources(ndev);
1869 	if (!is_zero_ether_addr(ndev->config.mac)) {
1870 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1871 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
1872 	}
1873 	mlx5_vdpa_free_resources(&ndev->mvdev);
1874 	mutex_destroy(&ndev->reslock);
1875 }
1876 
1877 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1878 {
1879 	struct vdpa_notification_area ret = {};
1880 
1881 	return ret;
1882 }
1883 
1884 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1885 {
1886 	return -EOPNOTSUPP;
1887 }
1888 
1889 static const struct vdpa_config_ops mlx5_vdpa_ops = {
1890 	.set_vq_address = mlx5_vdpa_set_vq_address,
1891 	.set_vq_num = mlx5_vdpa_set_vq_num,
1892 	.kick_vq = mlx5_vdpa_kick_vq,
1893 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
1894 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
1895 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
1896 	.set_vq_state = mlx5_vdpa_set_vq_state,
1897 	.get_vq_state = mlx5_vdpa_get_vq_state,
1898 	.get_vq_notification = mlx5_get_vq_notification,
1899 	.get_vq_irq = mlx5_get_vq_irq,
1900 	.get_vq_align = mlx5_vdpa_get_vq_align,
1901 	.get_features = mlx5_vdpa_get_features,
1902 	.set_features = mlx5_vdpa_set_features,
1903 	.set_config_cb = mlx5_vdpa_set_config_cb,
1904 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1905 	.get_device_id = mlx5_vdpa_get_device_id,
1906 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
1907 	.get_status = mlx5_vdpa_get_status,
1908 	.set_status = mlx5_vdpa_set_status,
1909 	.get_config_size = mlx5_vdpa_get_config_size,
1910 	.get_config = mlx5_vdpa_get_config,
1911 	.set_config = mlx5_vdpa_set_config,
1912 	.get_generation = mlx5_vdpa_get_generation,
1913 	.set_map = mlx5_vdpa_set_map,
1914 	.free = mlx5_vdpa_free,
1915 };
1916 
1917 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
1918 {
1919 	u16 hw_mtu;
1920 	int err;
1921 
1922 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
1923 	if (err)
1924 		return err;
1925 
1926 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
1927 	return 0;
1928 }
1929 
1930 static int alloc_resources(struct mlx5_vdpa_net *ndev)
1931 {
1932 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1933 	int err;
1934 
1935 	if (res->valid) {
1936 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1937 		return -EEXIST;
1938 	}
1939 
1940 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1941 	if (err)
1942 		return err;
1943 
1944 	err = create_tis(ndev);
1945 	if (err)
1946 		goto err_tis;
1947 
1948 	res->valid = true;
1949 
1950 	return 0;
1951 
1952 err_tis:
1953 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1954 	return err;
1955 }
1956 
1957 static void free_resources(struct mlx5_vdpa_net *ndev)
1958 {
1959 	struct mlx5_vdpa_net_resources *res = &ndev->res;
1960 
1961 	if (!res->valid)
1962 		return;
1963 
1964 	destroy_tis(ndev);
1965 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1966 	res->valid = false;
1967 }
1968 
1969 static void init_mvqs(struct mlx5_vdpa_net *ndev)
1970 {
1971 	struct mlx5_vdpa_virtqueue *mvq;
1972 	int i;
1973 
1974 	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1975 		mvq = &ndev->vqs[i];
1976 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1977 		mvq->index = i;
1978 		mvq->ndev = ndev;
1979 		mvq->fwqp.fw = true;
1980 	}
1981 	for (; i < ndev->mvdev.max_vqs; i++) {
1982 		mvq = &ndev->vqs[i];
1983 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1984 		mvq->index = i;
1985 		mvq->ndev = ndev;
1986 	}
1987 }
1988 
1989 struct mlx5_vdpa_mgmtdev {
1990 	struct vdpa_mgmt_dev mgtdev;
1991 	struct mlx5_adev *madev;
1992 	struct mlx5_vdpa_net *ndev;
1993 };
1994 
1995 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
1996 {
1997 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
1998 	struct virtio_net_config *config;
1999 	struct mlx5_core_dev *pfmdev;
2000 	struct mlx5_vdpa_dev *mvdev;
2001 	struct mlx5_vdpa_net *ndev;
2002 	struct mlx5_core_dev *mdev;
2003 	u32 max_vqs;
2004 	int err;
2005 
2006 	if (mgtdev->ndev)
2007 		return -ENOSPC;
2008 
2009 	mdev = mgtdev->madev->mdev;
2010 	/* we save one virtqueue for control virtqueue should we require it */
2011 	max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2012 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2013 
2014 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2015 				 name);
2016 	if (IS_ERR(ndev))
2017 		return PTR_ERR(ndev);
2018 
2019 	ndev->mvdev.max_vqs = max_vqs;
2020 	mvdev = &ndev->mvdev;
2021 	mvdev->mdev = mdev;
2022 	init_mvqs(ndev);
2023 	mutex_init(&ndev->reslock);
2024 	config = &ndev->config;
2025 	err = query_mtu(mdev, &ndev->mtu);
2026 	if (err)
2027 		goto err_mtu;
2028 
2029 	err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2030 	if (err)
2031 		goto err_mtu;
2032 
2033 	if (!is_zero_ether_addr(config->mac)) {
2034 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2035 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2036 		if (err)
2037 			goto err_mtu;
2038 	}
2039 
2040 	mvdev->vdev.dma_dev = mdev->device;
2041 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2042 	if (err)
2043 		goto err_mpfs;
2044 
2045 	err = alloc_resources(ndev);
2046 	if (err)
2047 		goto err_res;
2048 
2049 	mvdev->vdev.mdev = &mgtdev->mgtdev;
2050 	err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
2051 	if (err)
2052 		goto err_reg;
2053 
2054 	mgtdev->ndev = ndev;
2055 	return 0;
2056 
2057 err_reg:
2058 	free_resources(ndev);
2059 err_res:
2060 	mlx5_vdpa_free_resources(&ndev->mvdev);
2061 err_mpfs:
2062 	if (!is_zero_ether_addr(config->mac))
2063 		mlx5_mpfs_del_mac(pfmdev, config->mac);
2064 err_mtu:
2065 	mutex_destroy(&ndev->reslock);
2066 	put_device(&mvdev->vdev.dev);
2067 	return err;
2068 }
2069 
2070 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2071 {
2072 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2073 
2074 	_vdpa_unregister_device(dev);
2075 	mgtdev->ndev = NULL;
2076 }
2077 
2078 static const struct vdpa_mgmtdev_ops mdev_ops = {
2079 	.dev_add = mlx5_vdpa_dev_add,
2080 	.dev_del = mlx5_vdpa_dev_del,
2081 };
2082 
2083 static struct virtio_device_id id_table[] = {
2084 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2085 	{ 0 },
2086 };
2087 
2088 static int mlx5v_probe(struct auxiliary_device *adev,
2089 		       const struct auxiliary_device_id *id)
2090 
2091 {
2092 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2093 	struct mlx5_core_dev *mdev = madev->mdev;
2094 	struct mlx5_vdpa_mgmtdev *mgtdev;
2095 	int err;
2096 
2097 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2098 	if (!mgtdev)
2099 		return -ENOMEM;
2100 
2101 	mgtdev->mgtdev.ops = &mdev_ops;
2102 	mgtdev->mgtdev.device = mdev->device;
2103 	mgtdev->mgtdev.id_table = id_table;
2104 	mgtdev->madev = madev;
2105 
2106 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2107 	if (err)
2108 		goto reg_err;
2109 
2110 	dev_set_drvdata(&adev->dev, mgtdev);
2111 
2112 	return 0;
2113 
2114 reg_err:
2115 	kfree(mgtdev);
2116 	return err;
2117 }
2118 
2119 static void mlx5v_remove(struct auxiliary_device *adev)
2120 {
2121 	struct mlx5_vdpa_mgmtdev *mgtdev;
2122 
2123 	mgtdev = dev_get_drvdata(&adev->dev);
2124 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2125 	kfree(mgtdev);
2126 }
2127 
2128 static const struct auxiliary_device_id mlx5v_id_table[] = {
2129 	{ .name = MLX5_ADEV_NAME ".vnet", },
2130 	{},
2131 };
2132 
2133 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2134 
2135 static struct auxiliary_driver mlx5v_driver = {
2136 	.name = "vnet",
2137 	.probe = mlx5v_probe,
2138 	.remove = mlx5v_remove,
2139 	.id_table = mlx5v_id_table,
2140 };
2141 
2142 module_auxiliary_driver(mlx5v_driver);
2143