xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 2a9eb57e)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 
22 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
23 MODULE_DESCRIPTION("Mellanox VDPA driver");
24 MODULE_LICENSE("Dual BSD/GPL");
25 
26 #define to_mlx5_vdpa_ndev(__mvdev)                                             \
27 	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
28 #define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
29 
30 #define VALID_FEATURES_MASK                                                                        \
31 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
32 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
33 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
34 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
35 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
36 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
37 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
38 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
39 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
40 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
41 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
42 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
43 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
44 
45 #define VALID_STATUS_MASK                                                                          \
46 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
47 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
48 
49 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
50 
51 #define MLX5V_UNTAGGED 0x1000
52 
53 struct mlx5_vdpa_net_resources {
54 	u32 tisn;
55 	u32 tdn;
56 	u32 tirn;
57 	u32 rqtn;
58 	bool valid;
59 };
60 
61 struct mlx5_vdpa_cq_buf {
62 	struct mlx5_frag_buf_ctrl fbc;
63 	struct mlx5_frag_buf frag_buf;
64 	int cqe_size;
65 	int nent;
66 };
67 
68 struct mlx5_vdpa_cq {
69 	struct mlx5_core_cq mcq;
70 	struct mlx5_vdpa_cq_buf buf;
71 	struct mlx5_db db;
72 	int cqe;
73 };
74 
75 struct mlx5_vdpa_umem {
76 	struct mlx5_frag_buf_ctrl fbc;
77 	struct mlx5_frag_buf frag_buf;
78 	int size;
79 	u32 id;
80 };
81 
82 struct mlx5_vdpa_qp {
83 	struct mlx5_core_qp mqp;
84 	struct mlx5_frag_buf frag_buf;
85 	struct mlx5_db db;
86 	u16 head;
87 	bool fw;
88 };
89 
90 struct mlx5_vq_restore_info {
91 	u32 num_ent;
92 	u64 desc_addr;
93 	u64 device_addr;
94 	u64 driver_addr;
95 	u16 avail_index;
96 	u16 used_index;
97 	bool ready;
98 	bool restore;
99 };
100 
101 struct mlx5_vdpa_virtqueue {
102 	bool ready;
103 	u64 desc_addr;
104 	u64 device_addr;
105 	u64 driver_addr;
106 	u32 num_ent;
107 
108 	/* Resources for implementing the notification channel from the device
109 	 * to the driver. fwqp is the firmware end of an RC connection; the
110 	 * other end is vqqp used by the driver. cq is where completions are
111 	 * reported.
112 	 */
113 	struct mlx5_vdpa_cq cq;
114 	struct mlx5_vdpa_qp fwqp;
115 	struct mlx5_vdpa_qp vqqp;
116 
117 	/* umem resources are required for the virtqueue operation. They're use
118 	 * is internal and they must be provided by the driver.
119 	 */
120 	struct mlx5_vdpa_umem umem1;
121 	struct mlx5_vdpa_umem umem2;
122 	struct mlx5_vdpa_umem umem3;
123 
124 	u32 counter_set_id;
125 	bool initialized;
126 	int index;
127 	u32 virtq_id;
128 	struct mlx5_vdpa_net *ndev;
129 	u16 avail_idx;
130 	u16 used_idx;
131 	int fw_state;
132 
133 	/* keep last in the struct */
134 	struct mlx5_vq_restore_info ri;
135 };
136 
137 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
138 {
139 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
140 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
141 			return idx < 2;
142 		else
143 			return idx < 3;
144 	}
145 
146 	return idx <= mvdev->max_idx;
147 }
148 
149 #define MLX5V_MACVLAN_SIZE 256
150 
151 struct mlx5_vdpa_net {
152 	struct mlx5_vdpa_dev mvdev;
153 	struct mlx5_vdpa_net_resources res;
154 	struct virtio_net_config config;
155 	struct mlx5_vdpa_virtqueue *vqs;
156 	struct vdpa_callback *event_cbs;
157 
158 	/* Serialize vq resources creation and destruction. This is required
159 	 * since memory map might change and we need to destroy and create
160 	 * resources while driver in operational.
161 	 */
162 	struct rw_semaphore reslock;
163 	struct mlx5_flow_table *rxft;
164 	bool setup;
165 	u32 cur_num_vqs;
166 	u32 rqt_size;
167 	bool nb_registered;
168 	struct notifier_block nb;
169 	struct vdpa_callback config_cb;
170 	struct mlx5_vdpa_wq_ent cvq_ent;
171 	struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
172 };
173 
174 struct macvlan_node {
175 	struct hlist_node hlist;
176 	struct mlx5_flow_handle *ucast_rule;
177 	struct mlx5_flow_handle *mcast_rule;
178 	u64 macvlan;
179 };
180 
181 static void free_resources(struct mlx5_vdpa_net *ndev);
182 static void init_mvqs(struct mlx5_vdpa_net *ndev);
183 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
184 static void teardown_driver(struct mlx5_vdpa_net *ndev);
185 
186 static bool mlx5_vdpa_debug;
187 
188 #define MLX5_CVQ_MAX_ENT 16
189 
190 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
191 	do {                                                                                       \
192 		if (features & BIT_ULL(_feature))                                                  \
193 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
194 	} while (0)
195 
196 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
197 	do {                                                                                       \
198 		if (status & (_status))                                                            \
199 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
200 	} while (0)
201 
202 /* TODO: cross-endian support */
203 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
204 {
205 	return virtio_legacy_is_little_endian() ||
206 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
207 }
208 
209 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
210 {
211 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
212 }
213 
214 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
215 {
216 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
217 }
218 
219 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
220 {
221 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
222 		return 2;
223 
224 	return mvdev->max_vqs;
225 }
226 
227 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
228 {
229 	return idx == ctrl_vq_idx(mvdev);
230 }
231 
232 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
233 {
234 	if (status & ~VALID_STATUS_MASK)
235 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
236 			       status & ~VALID_STATUS_MASK);
237 
238 	if (!mlx5_vdpa_debug)
239 		return;
240 
241 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
242 	if (set && !status) {
243 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
244 		return;
245 	}
246 
247 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
248 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
249 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
250 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
251 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
252 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
253 }
254 
255 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
256 {
257 	if (features & ~VALID_FEATURES_MASK)
258 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
259 			       features & ~VALID_FEATURES_MASK);
260 
261 	if (!mlx5_vdpa_debug)
262 		return;
263 
264 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
265 	if (!features)
266 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
267 
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
279 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
280 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
281 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
282 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
283 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
284 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
285 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
286 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
287 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
288 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
289 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
290 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
291 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
292 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
293 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
294 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
295 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
296 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
297 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
298 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
299 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
300 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
301 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
302 }
303 
304 static int create_tis(struct mlx5_vdpa_net *ndev)
305 {
306 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
307 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
308 	void *tisc;
309 	int err;
310 
311 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
312 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
313 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
314 	if (err)
315 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
316 
317 	return err;
318 }
319 
320 static void destroy_tis(struct mlx5_vdpa_net *ndev)
321 {
322 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
323 }
324 
325 #define MLX5_VDPA_CQE_SIZE 64
326 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
327 
328 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
329 {
330 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
331 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
332 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
333 	int err;
334 
335 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
336 				       ndev->mvdev.mdev->priv.numa_node);
337 	if (err)
338 		return err;
339 
340 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
341 
342 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
343 	buf->nent = nent;
344 
345 	return 0;
346 }
347 
348 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
349 {
350 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
351 
352 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
353 					ndev->mvdev.mdev->priv.numa_node);
354 }
355 
356 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
357 {
358 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
359 }
360 
361 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
362 {
363 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
364 }
365 
366 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
367 {
368 	struct mlx5_cqe64 *cqe64;
369 	void *cqe;
370 	int i;
371 
372 	for (i = 0; i < buf->nent; i++) {
373 		cqe = get_cqe(vcq, i);
374 		cqe64 = cqe;
375 		cqe64->op_own = MLX5_CQE_INVALID << 4;
376 	}
377 }
378 
379 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
380 {
381 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
382 
383 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
384 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
385 		return cqe64;
386 
387 	return NULL;
388 }
389 
390 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
391 {
392 	vqp->head += n;
393 	vqp->db.db[0] = cpu_to_be32(vqp->head);
394 }
395 
396 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
397 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
398 {
399 	struct mlx5_vdpa_qp *vqp;
400 	__be64 *pas;
401 	void *qpc;
402 
403 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
404 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
405 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
406 	if (vqp->fw) {
407 		/* Firmware QP is allocated by the driver for the firmware's
408 		 * use so we can skip part of the params as they will be chosen by firmware
409 		 */
410 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
411 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
412 		MLX5_SET(qpc, qpc, no_sq, 1);
413 		return;
414 	}
415 
416 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
417 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
418 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
419 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
420 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
421 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
422 	MLX5_SET(qpc, qpc, no_sq, 1);
423 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
424 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
425 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
426 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
427 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
428 }
429 
430 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
431 {
432 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
433 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
434 					ndev->mvdev.mdev->priv.numa_node);
435 }
436 
437 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
438 {
439 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
440 }
441 
442 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
443 		     struct mlx5_vdpa_qp *vqp)
444 {
445 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
446 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
447 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
448 	void *qpc;
449 	void *in;
450 	int err;
451 
452 	if (!vqp->fw) {
453 		vqp = &mvq->vqqp;
454 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
455 		if (err)
456 			return err;
457 
458 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
459 		if (err)
460 			goto err_db;
461 		inlen += vqp->frag_buf.npages * sizeof(__be64);
462 	}
463 
464 	in = kzalloc(inlen, GFP_KERNEL);
465 	if (!in) {
466 		err = -ENOMEM;
467 		goto err_kzalloc;
468 	}
469 
470 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
471 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
472 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
473 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
474 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
475 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
476 	if (!vqp->fw)
477 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
478 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
479 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
480 	kfree(in);
481 	if (err)
482 		goto err_kzalloc;
483 
484 	vqp->mqp.uid = ndev->mvdev.res.uid;
485 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
486 
487 	if (!vqp->fw)
488 		rx_post(vqp, mvq->num_ent);
489 
490 	return 0;
491 
492 err_kzalloc:
493 	if (!vqp->fw)
494 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
495 err_db:
496 	if (!vqp->fw)
497 		rq_buf_free(ndev, vqp);
498 
499 	return err;
500 }
501 
502 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
503 {
504 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
505 
506 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
507 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
508 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
509 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
510 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
511 	if (!vqp->fw) {
512 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
513 		rq_buf_free(ndev, vqp);
514 	}
515 }
516 
517 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
518 {
519 	return get_sw_cqe(cq, cq->mcq.cons_index);
520 }
521 
522 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
523 {
524 	struct mlx5_cqe64 *cqe64;
525 
526 	cqe64 = next_cqe_sw(vcq);
527 	if (!cqe64)
528 		return -EAGAIN;
529 
530 	vcq->mcq.cons_index++;
531 	return 0;
532 }
533 
534 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
535 {
536 	struct mlx5_vdpa_net *ndev = mvq->ndev;
537 	struct vdpa_callback *event_cb;
538 
539 	event_cb = &ndev->event_cbs[mvq->index];
540 	mlx5_cq_set_ci(&mvq->cq.mcq);
541 
542 	/* make sure CQ cosumer update is visible to the hardware before updating
543 	 * RX doorbell record.
544 	 */
545 	dma_wmb();
546 	rx_post(&mvq->vqqp, num);
547 	if (event_cb->callback)
548 		event_cb->callback(event_cb->private);
549 }
550 
551 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
552 {
553 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
554 	struct mlx5_vdpa_net *ndev = mvq->ndev;
555 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
556 	int num = 0;
557 
558 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
559 		num++;
560 		if (num > mvq->num_ent / 2) {
561 			/* If completions keep coming while we poll, we want to
562 			 * let the hardware know that we consumed them by
563 			 * updating the doorbell record.  We also let vdpa core
564 			 * know about this so it passes it on the virtio driver
565 			 * on the guest.
566 			 */
567 			mlx5_vdpa_handle_completions(mvq, num);
568 			num = 0;
569 		}
570 	}
571 
572 	if (num)
573 		mlx5_vdpa_handle_completions(mvq, num);
574 
575 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
576 }
577 
578 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
579 {
580 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
581 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
582 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
583 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
584 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
585 	__be64 *pas;
586 	int inlen;
587 	void *cqc;
588 	void *in;
589 	int err;
590 	int eqn;
591 
592 	err = mlx5_db_alloc(mdev, &vcq->db);
593 	if (err)
594 		return err;
595 
596 	vcq->mcq.set_ci_db = vcq->db.db;
597 	vcq->mcq.arm_db = vcq->db.db + 1;
598 	vcq->mcq.cqe_sz = 64;
599 
600 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
601 	if (err)
602 		goto err_db;
603 
604 	cq_frag_buf_init(vcq, &vcq->buf);
605 
606 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
607 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
608 	in = kzalloc(inlen, GFP_KERNEL);
609 	if (!in) {
610 		err = -ENOMEM;
611 		goto err_vzalloc;
612 	}
613 
614 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
615 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
616 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
617 
618 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
619 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
620 
621 	/* Use vector 0 by default. Consider adding code to choose least used
622 	 * vector.
623 	 */
624 	err = mlx5_vector2eqn(mdev, 0, &eqn);
625 	if (err)
626 		goto err_vec;
627 
628 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
629 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
630 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
631 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
632 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
633 
634 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
635 	if (err)
636 		goto err_vec;
637 
638 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
639 	vcq->cqe = num_ent;
640 	vcq->mcq.set_ci_db = vcq->db.db;
641 	vcq->mcq.arm_db = vcq->db.db + 1;
642 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
643 	kfree(in);
644 	return 0;
645 
646 err_vec:
647 	kfree(in);
648 err_vzalloc:
649 	cq_frag_buf_free(ndev, &vcq->buf);
650 err_db:
651 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
652 	return err;
653 }
654 
655 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
656 {
657 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
658 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
659 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
660 
661 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
662 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
663 		return;
664 	}
665 	cq_frag_buf_free(ndev, &vcq->buf);
666 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
667 }
668 
669 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
670 			  struct mlx5_vdpa_umem **umemp)
671 {
672 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
673 	int p_a;
674 	int p_b;
675 
676 	switch (num) {
677 	case 1:
678 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
679 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
680 		*umemp = &mvq->umem1;
681 		break;
682 	case 2:
683 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
684 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
685 		*umemp = &mvq->umem2;
686 		break;
687 	case 3:
688 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
689 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
690 		*umemp = &mvq->umem3;
691 		break;
692 	}
693 	(*umemp)->size = p_a * mvq->num_ent + p_b;
694 }
695 
696 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
697 {
698 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
699 }
700 
701 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
702 {
703 	int inlen;
704 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
705 	void *um;
706 	void *in;
707 	int err;
708 	__be64 *pas;
709 	struct mlx5_vdpa_umem *umem;
710 
711 	set_umem_size(ndev, mvq, num, &umem);
712 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
713 	if (err)
714 		return err;
715 
716 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
717 
718 	in = kzalloc(inlen, GFP_KERNEL);
719 	if (!in) {
720 		err = -ENOMEM;
721 		goto err_in;
722 	}
723 
724 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
725 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
726 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
727 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
728 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
729 
730 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
731 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
732 
733 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
734 	if (err) {
735 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
736 		goto err_cmd;
737 	}
738 
739 	kfree(in);
740 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
741 
742 	return 0;
743 
744 err_cmd:
745 	kfree(in);
746 err_in:
747 	umem_frag_buf_free(ndev, umem);
748 	return err;
749 }
750 
751 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
752 {
753 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
754 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
755 	struct mlx5_vdpa_umem *umem;
756 
757 	switch (num) {
758 	case 1:
759 		umem = &mvq->umem1;
760 		break;
761 	case 2:
762 		umem = &mvq->umem2;
763 		break;
764 	case 3:
765 		umem = &mvq->umem3;
766 		break;
767 	}
768 
769 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
770 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
771 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
772 		return;
773 
774 	umem_frag_buf_free(ndev, umem);
775 }
776 
777 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
778 {
779 	int num;
780 	int err;
781 
782 	for (num = 1; num <= 3; num++) {
783 		err = create_umem(ndev, mvq, num);
784 		if (err)
785 			goto err_umem;
786 	}
787 	return 0;
788 
789 err_umem:
790 	for (num--; num > 0; num--)
791 		umem_destroy(ndev, mvq, num);
792 
793 	return err;
794 }
795 
796 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
797 {
798 	int num;
799 
800 	for (num = 3; num > 0; num--)
801 		umem_destroy(ndev, mvq, num);
802 }
803 
804 static int get_queue_type(struct mlx5_vdpa_net *ndev)
805 {
806 	u32 type_mask;
807 
808 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
809 
810 	/* prefer split queue */
811 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
812 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
813 
814 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
815 
816 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
817 }
818 
819 static bool vq_is_tx(u16 idx)
820 {
821 	return idx % 2;
822 }
823 
824 static u16 get_features_12_3(u64 features)
825 {
826 	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
827 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
828 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
829 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
830 }
831 
832 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
833 {
834 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
835 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
836 }
837 
838 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
839 {
840 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
841 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
842 	void *obj_context;
843 	void *cmd_hdr;
844 	void *vq_ctx;
845 	void *in;
846 	int err;
847 
848 	err = umems_create(ndev, mvq);
849 	if (err)
850 		return err;
851 
852 	in = kzalloc(inlen, GFP_KERNEL);
853 	if (!in) {
854 		err = -ENOMEM;
855 		goto err_alloc;
856 	}
857 
858 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
859 
860 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
861 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
862 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
863 
864 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
865 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
866 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
867 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
868 		 get_features_12_3(ndev->mvdev.actual_features));
869 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
870 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
871 
872 	if (vq_is_tx(mvq->index))
873 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
874 
875 	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
876 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
877 	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
878 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
879 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
880 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
881 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
882 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
883 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
884 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
885 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
886 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
887 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
888 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
889 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
890 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
891 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
892 	if (counters_supported(&ndev->mvdev))
893 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
894 
895 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
896 	if (err)
897 		goto err_cmd;
898 
899 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
900 	kfree(in);
901 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
902 
903 	return 0;
904 
905 err_cmd:
906 	kfree(in);
907 err_alloc:
908 	umems_destroy(ndev, mvq);
909 	return err;
910 }
911 
912 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
913 {
914 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
915 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
916 
917 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
918 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
919 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
920 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
921 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
922 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
923 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
924 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
925 		return;
926 	}
927 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
928 	umems_destroy(ndev, mvq);
929 }
930 
931 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
932 {
933 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
934 }
935 
936 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
937 {
938 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
939 }
940 
941 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
942 			int *outlen, u32 qpn, u32 rqpn)
943 {
944 	void *qpc;
945 	void *pp;
946 
947 	switch (cmd) {
948 	case MLX5_CMD_OP_2RST_QP:
949 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
950 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
951 		*in = kzalloc(*inlen, GFP_KERNEL);
952 		*out = kzalloc(*outlen, GFP_KERNEL);
953 		if (!*in || !*out)
954 			goto outerr;
955 
956 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
957 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
958 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
959 		break;
960 	case MLX5_CMD_OP_RST2INIT_QP:
961 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
962 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
963 		*in = kzalloc(*inlen, GFP_KERNEL);
964 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
965 		if (!*in || !*out)
966 			goto outerr;
967 
968 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
969 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
970 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
971 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
972 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
973 		MLX5_SET(qpc, qpc, rwe, 1);
974 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
975 		MLX5_SET(ads, pp, vhca_port_num, 1);
976 		break;
977 	case MLX5_CMD_OP_INIT2RTR_QP:
978 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
979 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
980 		*in = kzalloc(*inlen, GFP_KERNEL);
981 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
982 		if (!*in || !*out)
983 			goto outerr;
984 
985 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
986 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
987 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
988 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
989 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
990 		MLX5_SET(qpc, qpc, log_msg_max, 30);
991 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
992 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
993 		MLX5_SET(ads, pp, fl, 1);
994 		break;
995 	case MLX5_CMD_OP_RTR2RTS_QP:
996 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
997 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
998 		*in = kzalloc(*inlen, GFP_KERNEL);
999 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1000 		if (!*in || !*out)
1001 			goto outerr;
1002 
1003 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1004 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1005 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1006 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1007 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1008 		MLX5_SET(ads, pp, ack_timeout, 14);
1009 		MLX5_SET(qpc, qpc, retry_count, 7);
1010 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1011 		break;
1012 	default:
1013 		goto outerr_nullify;
1014 	}
1015 
1016 	return;
1017 
1018 outerr:
1019 	kfree(*in);
1020 	kfree(*out);
1021 outerr_nullify:
1022 	*in = NULL;
1023 	*out = NULL;
1024 }
1025 
1026 static void free_inout(void *in, void *out)
1027 {
1028 	kfree(in);
1029 	kfree(out);
1030 }
1031 
1032 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1033  * firmware. The fw argument indicates whether the subjected QP is the one used
1034  * by firmware.
1035  */
1036 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1037 {
1038 	int outlen;
1039 	int inlen;
1040 	void *out;
1041 	void *in;
1042 	int err;
1043 
1044 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1045 	if (!in || !out)
1046 		return -ENOMEM;
1047 
1048 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1049 	free_inout(in, out);
1050 	return err;
1051 }
1052 
1053 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1054 {
1055 	int err;
1056 
1057 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1058 	if (err)
1059 		return err;
1060 
1061 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1062 	if (err)
1063 		return err;
1064 
1065 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1066 	if (err)
1067 		return err;
1068 
1069 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1070 	if (err)
1071 		return err;
1072 
1073 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1074 	if (err)
1075 		return err;
1076 
1077 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1078 	if (err)
1079 		return err;
1080 
1081 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1082 }
1083 
1084 struct mlx5_virtq_attr {
1085 	u8 state;
1086 	u16 available_index;
1087 	u16 used_index;
1088 };
1089 
1090 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1091 			   struct mlx5_virtq_attr *attr)
1092 {
1093 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1094 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1095 	void *out;
1096 	void *obj_context;
1097 	void *cmd_hdr;
1098 	int err;
1099 
1100 	out = kzalloc(outlen, GFP_KERNEL);
1101 	if (!out)
1102 		return -ENOMEM;
1103 
1104 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1105 
1106 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1107 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1108 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1109 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1110 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1111 	if (err)
1112 		goto err_cmd;
1113 
1114 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1115 	memset(attr, 0, sizeof(*attr));
1116 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1117 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1118 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1119 	kfree(out);
1120 	return 0;
1121 
1122 err_cmd:
1123 	kfree(out);
1124 	return err;
1125 }
1126 
1127 static bool is_valid_state_change(int oldstate, int newstate)
1128 {
1129 	switch (oldstate) {
1130 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1131 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1132 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1133 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1134 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1135 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1136 	default:
1137 		return false;
1138 	}
1139 }
1140 
1141 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1142 {
1143 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1144 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1145 	void *obj_context;
1146 	void *cmd_hdr;
1147 	void *in;
1148 	int err;
1149 
1150 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1151 		return 0;
1152 
1153 	if (!is_valid_state_change(mvq->fw_state, state))
1154 		return -EINVAL;
1155 
1156 	in = kzalloc(inlen, GFP_KERNEL);
1157 	if (!in)
1158 		return -ENOMEM;
1159 
1160 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1161 
1162 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1163 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1164 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1165 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1166 
1167 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1168 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1169 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1170 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1171 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1172 	kfree(in);
1173 	if (!err)
1174 		mvq->fw_state = state;
1175 
1176 	return err;
1177 }
1178 
1179 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1180 {
1181 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1182 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1183 	void *cmd_hdr;
1184 	int err;
1185 
1186 	if (!counters_supported(&ndev->mvdev))
1187 		return 0;
1188 
1189 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1190 
1191 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1192 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1193 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1194 
1195 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1196 	if (err)
1197 		return err;
1198 
1199 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1200 
1201 	return 0;
1202 }
1203 
1204 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1205 {
1206 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1207 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1208 
1209 	if (!counters_supported(&ndev->mvdev))
1210 		return;
1211 
1212 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1213 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1214 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1215 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1216 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1217 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1218 }
1219 
1220 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1221 {
1222 	u16 idx = mvq->index;
1223 	int err;
1224 
1225 	if (!mvq->num_ent)
1226 		return 0;
1227 
1228 	if (mvq->initialized)
1229 		return 0;
1230 
1231 	err = cq_create(ndev, idx, mvq->num_ent);
1232 	if (err)
1233 		return err;
1234 
1235 	err = qp_create(ndev, mvq, &mvq->fwqp);
1236 	if (err)
1237 		goto err_fwqp;
1238 
1239 	err = qp_create(ndev, mvq, &mvq->vqqp);
1240 	if (err)
1241 		goto err_vqqp;
1242 
1243 	err = connect_qps(ndev, mvq);
1244 	if (err)
1245 		goto err_connect;
1246 
1247 	err = counter_set_alloc(ndev, mvq);
1248 	if (err)
1249 		goto err_counter;
1250 
1251 	err = create_virtqueue(ndev, mvq);
1252 	if (err)
1253 		goto err_connect;
1254 
1255 	if (mvq->ready) {
1256 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1257 		if (err) {
1258 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1259 				       idx, err);
1260 			goto err_connect;
1261 		}
1262 	}
1263 
1264 	mvq->initialized = true;
1265 	return 0;
1266 
1267 err_connect:
1268 	counter_set_dealloc(ndev, mvq);
1269 err_counter:
1270 	qp_destroy(ndev, &mvq->vqqp);
1271 err_vqqp:
1272 	qp_destroy(ndev, &mvq->fwqp);
1273 err_fwqp:
1274 	cq_destroy(ndev, idx);
1275 	return err;
1276 }
1277 
1278 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1279 {
1280 	struct mlx5_virtq_attr attr;
1281 
1282 	if (!mvq->initialized)
1283 		return;
1284 
1285 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1286 		return;
1287 
1288 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1289 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1290 
1291 	if (query_virtqueue(ndev, mvq, &attr)) {
1292 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1293 		return;
1294 	}
1295 	mvq->avail_idx = attr.available_index;
1296 	mvq->used_idx = attr.used_index;
1297 }
1298 
1299 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1300 {
1301 	int i;
1302 
1303 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1304 		suspend_vq(ndev, &ndev->vqs[i]);
1305 }
1306 
1307 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1308 {
1309 	if (!mvq->initialized)
1310 		return;
1311 
1312 	suspend_vq(ndev, mvq);
1313 	destroy_virtqueue(ndev, mvq);
1314 	counter_set_dealloc(ndev, mvq);
1315 	qp_destroy(ndev, &mvq->vqqp);
1316 	qp_destroy(ndev, &mvq->fwqp);
1317 	cq_destroy(ndev, mvq->index);
1318 	mvq->initialized = false;
1319 }
1320 
1321 static int create_rqt(struct mlx5_vdpa_net *ndev)
1322 {
1323 	__be32 *list;
1324 	void *rqtc;
1325 	int inlen;
1326 	void *in;
1327 	int i, j;
1328 	int err;
1329 
1330 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
1331 	in = kzalloc(inlen, GFP_KERNEL);
1332 	if (!in)
1333 		return -ENOMEM;
1334 
1335 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1336 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1337 
1338 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1339 	MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size);
1340 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1341 	for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
1342 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1343 
1344 	MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
1345 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1346 	kfree(in);
1347 	if (err)
1348 		return err;
1349 
1350 	return 0;
1351 }
1352 
1353 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1354 
1355 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1356 {
1357 	__be32 *list;
1358 	void *rqtc;
1359 	int inlen;
1360 	void *in;
1361 	int i, j;
1362 	int err;
1363 
1364 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
1365 	in = kzalloc(inlen, GFP_KERNEL);
1366 	if (!in)
1367 		return -ENOMEM;
1368 
1369 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1370 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1371 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1372 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1373 
1374 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1375 	for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
1376 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1377 
1378 	MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
1379 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1380 	kfree(in);
1381 	if (err)
1382 		return err;
1383 
1384 	return 0;
1385 }
1386 
1387 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1388 {
1389 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1390 }
1391 
1392 static int create_tir(struct mlx5_vdpa_net *ndev)
1393 {
1394 #define HASH_IP_L4PORTS                                                                            \
1395 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1396 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1397 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1398 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1399 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1400 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1401 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1402 	void *rss_key;
1403 	void *outer;
1404 	void *tirc;
1405 	void *in;
1406 	int err;
1407 
1408 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1409 	if (!in)
1410 		return -ENOMEM;
1411 
1412 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1413 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1414 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1415 
1416 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1417 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1418 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1419 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1420 
1421 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1422 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1423 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1424 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1425 
1426 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1427 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1428 
1429 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1430 	kfree(in);
1431 	return err;
1432 }
1433 
1434 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1435 {
1436 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1437 }
1438 
1439 #define MAX_STEERING_ENT 0x8000
1440 #define MAX_STEERING_GROUPS 2
1441 
1442 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1443 					u16 vid, bool tagged,
1444 					struct mlx5_flow_handle **ucast,
1445 					struct mlx5_flow_handle **mcast)
1446 {
1447 	struct mlx5_flow_destination dest = {};
1448 	struct mlx5_flow_act flow_act = {};
1449 	struct mlx5_flow_handle *rule;
1450 	struct mlx5_flow_spec *spec;
1451 	void *headers_c;
1452 	void *headers_v;
1453 	u8 *dmac_c;
1454 	u8 *dmac_v;
1455 	int err;
1456 
1457 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1458 	if (!spec)
1459 		return -ENOMEM;
1460 
1461 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1462 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1463 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1464 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1465 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1466 	eth_broadcast_addr(dmac_c);
1467 	ether_addr_copy(dmac_v, mac);
1468 	MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1469 	if (tagged) {
1470 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1471 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1472 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid);
1473 	}
1474 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1475 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1476 	dest.tir_num = ndev->res.tirn;
1477 	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1478 	if (IS_ERR(rule))
1479 		return PTR_ERR(rule);
1480 
1481 	*ucast = rule;
1482 
1483 	memset(dmac_c, 0, ETH_ALEN);
1484 	memset(dmac_v, 0, ETH_ALEN);
1485 	dmac_c[0] = 1;
1486 	dmac_v[0] = 1;
1487 	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
1488 	kvfree(spec);
1489 	if (IS_ERR(rule)) {
1490 		err = PTR_ERR(rule);
1491 		goto err_mcast;
1492 	}
1493 
1494 	*mcast = rule;
1495 	return 0;
1496 
1497 err_mcast:
1498 	mlx5_del_flow_rules(*ucast);
1499 	return err;
1500 }
1501 
1502 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1503 					 struct mlx5_flow_handle *ucast,
1504 					 struct mlx5_flow_handle *mcast)
1505 {
1506 	mlx5_del_flow_rules(ucast);
1507 	mlx5_del_flow_rules(mcast);
1508 }
1509 
1510 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1511 {
1512 	u64 val;
1513 
1514 	if (!tagged)
1515 		vlan = MLX5V_UNTAGGED;
1516 
1517 	val = (u64)vlan << 48 |
1518 	      (u64)mac[0] << 40 |
1519 	      (u64)mac[1] << 32 |
1520 	      (u64)mac[2] << 24 |
1521 	      (u64)mac[3] << 16 |
1522 	      (u64)mac[4] << 8 |
1523 	      (u64)mac[5];
1524 
1525 	return val;
1526 }
1527 
1528 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1529 {
1530 	struct macvlan_node *pos;
1531 	u32 idx;
1532 
1533 	idx = hash_64(value, 8); // tbd 8
1534 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1535 		if (pos->macvlan == value)
1536 			return pos;
1537 	}
1538 	return NULL;
1539 }
1540 
1541 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid
1542 {
1543 	struct macvlan_node *ptr;
1544 	u64 val;
1545 	u32 idx;
1546 	int err;
1547 
1548 	val = search_val(mac, vlan, tagged);
1549 	if (mac_vlan_lookup(ndev, val))
1550 		return -EEXIST;
1551 
1552 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1553 	if (!ptr)
1554 		return -ENOMEM;
1555 
1556 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged,
1557 					   &ptr->ucast_rule, &ptr->mcast_rule);
1558 	if (err)
1559 		goto err_add;
1560 
1561 	ptr->macvlan = val;
1562 	idx = hash_64(val, 8);
1563 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1564 	return 0;
1565 
1566 err_add:
1567 	kfree(ptr);
1568 	return err;
1569 }
1570 
1571 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1572 {
1573 	struct macvlan_node *ptr;
1574 
1575 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1576 	if (!ptr)
1577 		return;
1578 
1579 	hlist_del(&ptr->hlist);
1580 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule);
1581 	kfree(ptr);
1582 }
1583 
1584 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1585 {
1586 	struct macvlan_node *pos;
1587 	struct hlist_node *n;
1588 	int i;
1589 
1590 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1591 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1592 			hlist_del(&pos->hlist);
1593 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule);
1594 			kfree(pos);
1595 		}
1596 	}
1597 }
1598 
1599 static int setup_steering(struct mlx5_vdpa_net *ndev)
1600 {
1601 	struct mlx5_flow_table_attr ft_attr = {};
1602 	struct mlx5_flow_namespace *ns;
1603 	int err;
1604 
1605 	ft_attr.max_fte = MAX_STEERING_ENT;
1606 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1607 
1608 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1609 	if (!ns) {
1610 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1611 		return -EOPNOTSUPP;
1612 	}
1613 
1614 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1615 	if (IS_ERR(ndev->rxft)) {
1616 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1617 		return PTR_ERR(ndev->rxft);
1618 	}
1619 
1620 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1621 	if (err)
1622 		goto err_add;
1623 
1624 	return 0;
1625 
1626 err_add:
1627 	mlx5_destroy_flow_table(ndev->rxft);
1628 	return err;
1629 }
1630 
1631 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1632 {
1633 	clear_mac_vlan_table(ndev);
1634 	mlx5_destroy_flow_table(ndev->rxft);
1635 }
1636 
1637 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1638 {
1639 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1640 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1641 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1642 	struct mlx5_core_dev *pfmdev;
1643 	size_t read;
1644 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1645 
1646 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1647 	switch (cmd) {
1648 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1649 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1650 		if (read != ETH_ALEN)
1651 			break;
1652 
1653 		if (!memcmp(ndev->config.mac, mac, 6)) {
1654 			status = VIRTIO_NET_OK;
1655 			break;
1656 		}
1657 
1658 		if (is_zero_ether_addr(mac))
1659 			break;
1660 
1661 		if (!is_zero_ether_addr(ndev->config.mac)) {
1662 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1663 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1664 					       ndev->config.mac);
1665 				break;
1666 			}
1667 		}
1668 
1669 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1670 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1671 				       mac);
1672 			break;
1673 		}
1674 
1675 		/* backup the original mac address so that if failed to add the forward rules
1676 		 * we could restore it
1677 		 */
1678 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1679 
1680 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1681 
1682 		/* Need recreate the flow table entry, so that the packet could forward back
1683 		 */
1684 		mac_vlan_del(ndev, ndev->config.mac, 0, false);
1685 
1686 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1687 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1688 
1689 			/* Although it hardly run here, we still need double check */
1690 			if (is_zero_ether_addr(mac_back)) {
1691 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1692 				break;
1693 			}
1694 
1695 			/* Try to restore original mac address to MFPS table, and try to restore
1696 			 * the forward rule entry.
1697 			 */
1698 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1699 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1700 					       ndev->config.mac);
1701 			}
1702 
1703 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1704 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1705 					       mac_back);
1706 			}
1707 
1708 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1709 
1710 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1711 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1712 
1713 			break;
1714 		}
1715 
1716 		status = VIRTIO_NET_OK;
1717 		break;
1718 
1719 	default:
1720 		break;
1721 	}
1722 
1723 	return status;
1724 }
1725 
1726 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1727 {
1728 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1729 	int cur_qps = ndev->cur_num_vqs / 2;
1730 	int err;
1731 	int i;
1732 
1733 	if (cur_qps > newqps) {
1734 		err = modify_rqt(ndev, 2 * newqps);
1735 		if (err)
1736 			return err;
1737 
1738 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1739 			teardown_vq(ndev, &ndev->vqs[i]);
1740 
1741 		ndev->cur_num_vqs = 2 * newqps;
1742 	} else {
1743 		ndev->cur_num_vqs = 2 * newqps;
1744 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1745 			err = setup_vq(ndev, &ndev->vqs[i]);
1746 			if (err)
1747 				goto clean_added;
1748 		}
1749 		err = modify_rqt(ndev, 2 * newqps);
1750 		if (err)
1751 			goto clean_added;
1752 	}
1753 	return 0;
1754 
1755 clean_added:
1756 	for (--i; i >= 2 * cur_qps; --i)
1757 		teardown_vq(ndev, &ndev->vqs[i]);
1758 
1759 	ndev->cur_num_vqs = 2 * cur_qps;
1760 
1761 	return err;
1762 }
1763 
1764 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1765 {
1766 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1767 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1768 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1769 	struct virtio_net_ctrl_mq mq;
1770 	size_t read;
1771 	u16 newqps;
1772 
1773 	switch (cmd) {
1774 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1775 		/* This mq feature check aligns with pre-existing userspace
1776 		 * implementation.
1777 		 *
1778 		 * Without it, an untrusted driver could fake a multiqueue config
1779 		 * request down to a non-mq device that may cause kernel to
1780 		 * panic due to uninitialized resources for extra vqs. Even with
1781 		 * a well behaving guest driver, it is not expected to allow
1782 		 * changing the number of vqs on a non-mq device.
1783 		 */
1784 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1785 			break;
1786 
1787 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1788 		if (read != sizeof(mq))
1789 			break;
1790 
1791 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1792 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1793 		    newqps > ndev->rqt_size)
1794 			break;
1795 
1796 		if (ndev->cur_num_vqs == 2 * newqps) {
1797 			status = VIRTIO_NET_OK;
1798 			break;
1799 		}
1800 
1801 		if (!change_num_qps(mvdev, newqps))
1802 			status = VIRTIO_NET_OK;
1803 
1804 		break;
1805 	default:
1806 		break;
1807 	}
1808 
1809 	return status;
1810 }
1811 
1812 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1813 {
1814 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1815 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1816 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1817 	__virtio16 vlan;
1818 	size_t read;
1819 	u16 id;
1820 
1821 	switch (cmd) {
1822 	case VIRTIO_NET_CTRL_VLAN_ADD:
1823 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1824 		if (read != sizeof(vlan))
1825 			break;
1826 
1827 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1828 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1829 			break;
1830 
1831 		status = VIRTIO_NET_OK;
1832 		break;
1833 	case VIRTIO_NET_CTRL_VLAN_DEL:
1834 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1835 		if (read != sizeof(vlan))
1836 			break;
1837 
1838 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1839 		mac_vlan_del(ndev, ndev->config.mac, id, true);
1840 		status = VIRTIO_NET_OK;
1841 		break;
1842 	default:
1843 		break;
1844 	}
1845 
1846 	return status;
1847 }
1848 
1849 static void mlx5_cvq_kick_handler(struct work_struct *work)
1850 {
1851 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1852 	struct virtio_net_ctrl_hdr ctrl;
1853 	struct mlx5_vdpa_wq_ent *wqent;
1854 	struct mlx5_vdpa_dev *mvdev;
1855 	struct mlx5_control_vq *cvq;
1856 	struct mlx5_vdpa_net *ndev;
1857 	size_t read, write;
1858 	int err;
1859 
1860 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1861 	mvdev = wqent->mvdev;
1862 	ndev = to_mlx5_vdpa_ndev(mvdev);
1863 	cvq = &mvdev->cvq;
1864 
1865 	down_write(&ndev->reslock);
1866 
1867 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1868 		goto out;
1869 
1870 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1871 		goto out;
1872 
1873 	if (!cvq->ready)
1874 		goto out;
1875 
1876 	while (true) {
1877 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1878 					   GFP_ATOMIC);
1879 		if (err <= 0)
1880 			break;
1881 
1882 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1883 		if (read != sizeof(ctrl))
1884 			break;
1885 
1886 		cvq->received_desc++;
1887 		switch (ctrl.class) {
1888 		case VIRTIO_NET_CTRL_MAC:
1889 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
1890 			break;
1891 		case VIRTIO_NET_CTRL_MQ:
1892 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
1893 			break;
1894 		case VIRTIO_NET_CTRL_VLAN:
1895 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
1896 			break;
1897 		default:
1898 			break;
1899 		}
1900 
1901 		/* Make sure data is written before advancing index */
1902 		smp_wmb();
1903 
1904 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1905 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1906 		vringh_kiov_cleanup(&cvq->riov);
1907 		vringh_kiov_cleanup(&cvq->wiov);
1908 
1909 		if (vringh_need_notify_iotlb(&cvq->vring))
1910 			vringh_notify(&cvq->vring);
1911 
1912 		cvq->completed_desc++;
1913 		queue_work(mvdev->wq, &wqent->work);
1914 		break;
1915 	}
1916 
1917 out:
1918 	up_write(&ndev->reslock);
1919 }
1920 
1921 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1922 {
1923 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1924 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1925 	struct mlx5_vdpa_virtqueue *mvq;
1926 
1927 	if (!is_index_valid(mvdev, idx))
1928 		return;
1929 
1930 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1931 		if (!mvdev->wq || !mvdev->cvq.ready)
1932 			return;
1933 
1934 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
1935 		return;
1936 	}
1937 
1938 	mvq = &ndev->vqs[idx];
1939 	if (unlikely(!mvq->ready))
1940 		return;
1941 
1942 	iowrite16(idx, ndev->mvdev.res.kick_addr);
1943 }
1944 
1945 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1946 				    u64 driver_area, u64 device_area)
1947 {
1948 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1949 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1950 	struct mlx5_vdpa_virtqueue *mvq;
1951 
1952 	if (!is_index_valid(mvdev, idx))
1953 		return -EINVAL;
1954 
1955 	if (is_ctrl_vq_idx(mvdev, idx)) {
1956 		mvdev->cvq.desc_addr = desc_area;
1957 		mvdev->cvq.device_addr = device_area;
1958 		mvdev->cvq.driver_addr = driver_area;
1959 		return 0;
1960 	}
1961 
1962 	mvq = &ndev->vqs[idx];
1963 	mvq->desc_addr = desc_area;
1964 	mvq->device_addr = device_area;
1965 	mvq->driver_addr = driver_area;
1966 	return 0;
1967 }
1968 
1969 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1970 {
1971 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1972 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1973 	struct mlx5_vdpa_virtqueue *mvq;
1974 
1975 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1976 		return;
1977 
1978 	mvq = &ndev->vqs[idx];
1979 	mvq->num_ent = num;
1980 }
1981 
1982 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1983 {
1984 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1985 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1986 
1987 	ndev->event_cbs[idx] = *cb;
1988 	if (is_ctrl_vq_idx(mvdev, idx))
1989 		mvdev->cvq.event_cb = *cb;
1990 }
1991 
1992 static void mlx5_cvq_notify(struct vringh *vring)
1993 {
1994 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
1995 
1996 	if (!cvq->event_cb.callback)
1997 		return;
1998 
1999 	cvq->event_cb.callback(cvq->event_cb.private);
2000 }
2001 
2002 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2003 {
2004 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2005 
2006 	cvq->ready = ready;
2007 	if (!ready)
2008 		return;
2009 
2010 	cvq->vring.notify = mlx5_cvq_notify;
2011 }
2012 
2013 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2014 {
2015 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2016 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2017 	struct mlx5_vdpa_virtqueue *mvq;
2018 	int err;
2019 
2020 	if (!mvdev->actual_features)
2021 		return;
2022 
2023 	if (!is_index_valid(mvdev, idx))
2024 		return;
2025 
2026 	if (is_ctrl_vq_idx(mvdev, idx)) {
2027 		set_cvq_ready(mvdev, ready);
2028 		return;
2029 	}
2030 
2031 	mvq = &ndev->vqs[idx];
2032 	if (!ready) {
2033 		suspend_vq(ndev, mvq);
2034 	} else {
2035 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2036 		if (err) {
2037 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2038 			ready = false;
2039 		}
2040 	}
2041 
2042 
2043 	mvq->ready = ready;
2044 }
2045 
2046 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2047 {
2048 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2049 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2050 
2051 	if (!is_index_valid(mvdev, idx))
2052 		return false;
2053 
2054 	if (is_ctrl_vq_idx(mvdev, idx))
2055 		return mvdev->cvq.ready;
2056 
2057 	return ndev->vqs[idx].ready;
2058 }
2059 
2060 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2061 				  const struct vdpa_vq_state *state)
2062 {
2063 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2064 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2065 	struct mlx5_vdpa_virtqueue *mvq;
2066 
2067 	if (!is_index_valid(mvdev, idx))
2068 		return -EINVAL;
2069 
2070 	if (is_ctrl_vq_idx(mvdev, idx)) {
2071 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2072 		return 0;
2073 	}
2074 
2075 	mvq = &ndev->vqs[idx];
2076 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2077 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2078 		return -EINVAL;
2079 	}
2080 
2081 	mvq->used_idx = state->split.avail_index;
2082 	mvq->avail_idx = state->split.avail_index;
2083 	return 0;
2084 }
2085 
2086 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2087 {
2088 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2089 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2090 	struct mlx5_vdpa_virtqueue *mvq;
2091 	struct mlx5_virtq_attr attr;
2092 	int err;
2093 
2094 	if (!is_index_valid(mvdev, idx))
2095 		return -EINVAL;
2096 
2097 	if (is_ctrl_vq_idx(mvdev, idx)) {
2098 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2099 		return 0;
2100 	}
2101 
2102 	mvq = &ndev->vqs[idx];
2103 	/* If the virtq object was destroyed, use the value saved at
2104 	 * the last minute of suspend_vq. This caters for userspace
2105 	 * that cares about emulating the index after vq is stopped.
2106 	 */
2107 	if (!mvq->initialized) {
2108 		/* Firmware returns a wrong value for the available index.
2109 		 * Since both values should be identical, we take the value of
2110 		 * used_idx which is reported correctly.
2111 		 */
2112 		state->split.avail_index = mvq->used_idx;
2113 		return 0;
2114 	}
2115 
2116 	err = query_virtqueue(ndev, mvq, &attr);
2117 	if (err) {
2118 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2119 		return err;
2120 	}
2121 	state->split.avail_index = attr.used_index;
2122 	return 0;
2123 }
2124 
2125 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2126 {
2127 	return PAGE_SIZE;
2128 }
2129 
2130 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2131 {
2132 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2133 
2134 	if (is_ctrl_vq_idx(mvdev, idx))
2135 		return MLX5_VDPA_CVQ_GROUP;
2136 
2137 	return MLX5_VDPA_DATAVQ_GROUP;
2138 }
2139 
2140 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
2141 	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
2142 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
2143 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
2144 };
2145 
2146 static u64 mlx_to_vritio_features(u16 dev_features)
2147 {
2148 	u64 result = 0;
2149 
2150 	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
2151 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2152 	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
2153 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2154 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
2155 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2156 	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
2157 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2158 
2159 	return result;
2160 }
2161 
2162 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2163 {
2164 	u64 mlx_vdpa_features = 0;
2165 	u16 dev_features;
2166 
2167 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2168 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2169 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2170 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2171 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2172 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2173 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2174 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2175 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2176 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2177 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2178 
2179 	return mlx_vdpa_features;
2180 }
2181 
2182 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2183 {
2184 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2185 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2186 
2187 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2188 	return ndev->mvdev.mlx_features;
2189 }
2190 
2191 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2192 {
2193 	/* Minimum features to expect */
2194 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2195 		return -EOPNOTSUPP;
2196 
2197 	/* Double check features combination sent down by the driver.
2198 	 * Fail invalid features due to absence of the depended feature.
2199 	 *
2200 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2201 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2202 	 * By failing the invalid features sent down by untrusted drivers,
2203 	 * we're assured the assumption made upon is_index_valid() and
2204 	 * is_ctrl_vq_idx() will not be compromised.
2205 	 */
2206 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2207             BIT_ULL(VIRTIO_NET_F_MQ))
2208 		return -EINVAL;
2209 
2210 	return 0;
2211 }
2212 
2213 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2214 {
2215 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2216 	int err;
2217 	int i;
2218 
2219 	for (i = 0; i < mvdev->max_vqs; i++) {
2220 		err = setup_vq(ndev, &ndev->vqs[i]);
2221 		if (err)
2222 			goto err_vq;
2223 	}
2224 
2225 	return 0;
2226 
2227 err_vq:
2228 	for (--i; i >= 0; i--)
2229 		teardown_vq(ndev, &ndev->vqs[i]);
2230 
2231 	return err;
2232 }
2233 
2234 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2235 {
2236 	struct mlx5_vdpa_virtqueue *mvq;
2237 	int i;
2238 
2239 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2240 		mvq = &ndev->vqs[i];
2241 		if (!mvq->initialized)
2242 			continue;
2243 
2244 		teardown_vq(ndev, mvq);
2245 	}
2246 }
2247 
2248 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2249 {
2250 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2251 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2252 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2253 			mvdev->max_idx = mvdev->max_vqs;
2254 		} else {
2255 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2256 			 * CVQ gets index 2
2257 			 */
2258 			mvdev->max_idx = 2;
2259 		}
2260 	} else {
2261 		/* Two data virtqueues only: one for rx and one for tx */
2262 		mvdev->max_idx = 1;
2263 	}
2264 }
2265 
2266 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2267 {
2268 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2269 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2270 	int err;
2271 
2272 	print_features(mvdev, features, true);
2273 
2274 	err = verify_driver_features(mvdev, features);
2275 	if (err)
2276 		return err;
2277 
2278 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2279 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2280 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2281 	else
2282 		ndev->rqt_size = 1;
2283 
2284 	ndev->cur_num_vqs = 2 * ndev->rqt_size;
2285 
2286 	update_cvq_info(mvdev);
2287 	return err;
2288 }
2289 
2290 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2291 {
2292 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2293 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2294 
2295 	ndev->config_cb = *cb;
2296 }
2297 
2298 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2299 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2300 {
2301 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2302 }
2303 
2304 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2305 {
2306 	return VIRTIO_ID_NET;
2307 }
2308 
2309 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2310 {
2311 	return PCI_VENDOR_ID_MELLANOX;
2312 }
2313 
2314 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2315 {
2316 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2317 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2318 
2319 	print_status(mvdev, ndev->mvdev.status, false);
2320 	return ndev->mvdev.status;
2321 }
2322 
2323 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2324 {
2325 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2326 	struct mlx5_virtq_attr attr = {};
2327 	int err;
2328 
2329 	if (mvq->initialized) {
2330 		err = query_virtqueue(ndev, mvq, &attr);
2331 		if (err)
2332 			return err;
2333 	}
2334 
2335 	ri->avail_index = attr.available_index;
2336 	ri->used_index = attr.used_index;
2337 	ri->ready = mvq->ready;
2338 	ri->num_ent = mvq->num_ent;
2339 	ri->desc_addr = mvq->desc_addr;
2340 	ri->device_addr = mvq->device_addr;
2341 	ri->driver_addr = mvq->driver_addr;
2342 	ri->restore = true;
2343 	return 0;
2344 }
2345 
2346 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2347 {
2348 	int i;
2349 
2350 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2351 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2352 		save_channel_info(ndev, &ndev->vqs[i]);
2353 	}
2354 	return 0;
2355 }
2356 
2357 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2358 {
2359 	int i;
2360 
2361 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2362 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2363 }
2364 
2365 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2366 {
2367 	struct mlx5_vdpa_virtqueue *mvq;
2368 	struct mlx5_vq_restore_info *ri;
2369 	int i;
2370 
2371 	mlx5_clear_vqs(ndev);
2372 	init_mvqs(ndev);
2373 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2374 		mvq = &ndev->vqs[i];
2375 		ri = &mvq->ri;
2376 		if (!ri->restore)
2377 			continue;
2378 
2379 		mvq->avail_idx = ri->avail_index;
2380 		mvq->used_idx = ri->used_index;
2381 		mvq->ready = ri->ready;
2382 		mvq->num_ent = ri->num_ent;
2383 		mvq->desc_addr = ri->desc_addr;
2384 		mvq->device_addr = ri->device_addr;
2385 		mvq->driver_addr = ri->driver_addr;
2386 	}
2387 }
2388 
2389 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2390 {
2391 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2392 	int err;
2393 
2394 	suspend_vqs(ndev);
2395 	err = save_channels_info(ndev);
2396 	if (err)
2397 		goto err_mr;
2398 
2399 	teardown_driver(ndev);
2400 	mlx5_vdpa_destroy_mr(mvdev);
2401 	err = mlx5_vdpa_create_mr(mvdev, iotlb);
2402 	if (err)
2403 		goto err_mr;
2404 
2405 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2406 		goto err_mr;
2407 
2408 	restore_channels_info(ndev);
2409 	err = setup_driver(mvdev);
2410 	if (err)
2411 		goto err_setup;
2412 
2413 	return 0;
2414 
2415 err_setup:
2416 	mlx5_vdpa_destroy_mr(mvdev);
2417 err_mr:
2418 	return err;
2419 }
2420 
2421 /* reslock must be held for this function */
2422 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2423 {
2424 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2425 	int err;
2426 
2427 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2428 
2429 	if (ndev->setup) {
2430 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2431 		err = 0;
2432 		goto out;
2433 	}
2434 	err = setup_virtqueues(mvdev);
2435 	if (err) {
2436 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2437 		goto out;
2438 	}
2439 
2440 	err = create_rqt(ndev);
2441 	if (err) {
2442 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2443 		goto err_rqt;
2444 	}
2445 
2446 	err = create_tir(ndev);
2447 	if (err) {
2448 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2449 		goto err_tir;
2450 	}
2451 
2452 	err = setup_steering(ndev);
2453 	if (err) {
2454 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2455 		goto err_fwd;
2456 	}
2457 	ndev->setup = true;
2458 
2459 	return 0;
2460 
2461 err_fwd:
2462 	destroy_tir(ndev);
2463 err_tir:
2464 	destroy_rqt(ndev);
2465 err_rqt:
2466 	teardown_virtqueues(ndev);
2467 out:
2468 	return err;
2469 }
2470 
2471 /* reslock must be held for this function */
2472 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2473 {
2474 
2475 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2476 
2477 	if (!ndev->setup)
2478 		return;
2479 
2480 	teardown_steering(ndev);
2481 	destroy_tir(ndev);
2482 	destroy_rqt(ndev);
2483 	teardown_virtqueues(ndev);
2484 	ndev->setup = false;
2485 }
2486 
2487 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2488 {
2489 	int i;
2490 
2491 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2492 		ndev->vqs[i].ready = false;
2493 
2494 	ndev->mvdev.cvq.ready = false;
2495 }
2496 
2497 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2498 {
2499 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2500 	int err = 0;
2501 
2502 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2503 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2504 					MLX5_CVQ_MAX_ENT, false,
2505 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2506 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2507 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2508 
2509 	return err;
2510 }
2511 
2512 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2513 {
2514 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2515 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2516 	int err;
2517 
2518 	print_status(mvdev, status, true);
2519 
2520 	down_write(&ndev->reslock);
2521 
2522 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2523 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2524 			err = setup_cvq_vring(mvdev);
2525 			if (err) {
2526 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2527 				goto err_setup;
2528 			}
2529 			err = setup_driver(mvdev);
2530 			if (err) {
2531 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2532 				goto err_setup;
2533 			}
2534 		} else {
2535 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2536 			goto err_clear;
2537 		}
2538 	}
2539 
2540 	ndev->mvdev.status = status;
2541 	up_write(&ndev->reslock);
2542 	return;
2543 
2544 err_setup:
2545 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2546 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2547 err_clear:
2548 	up_write(&ndev->reslock);
2549 }
2550 
2551 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2552 {
2553 	int i;
2554 
2555 	/* default mapping all groups are mapped to asid 0 */
2556 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2557 		mvdev->group2asid[i] = 0;
2558 }
2559 
2560 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2561 {
2562 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2563 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2564 
2565 	print_status(mvdev, 0, true);
2566 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2567 
2568 	down_write(&ndev->reslock);
2569 	teardown_driver(ndev);
2570 	clear_vqs_ready(ndev);
2571 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2572 	ndev->mvdev.status = 0;
2573 	ndev->cur_num_vqs = 0;
2574 	ndev->mvdev.cvq.received_desc = 0;
2575 	ndev->mvdev.cvq.completed_desc = 0;
2576 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2577 	ndev->mvdev.actual_features = 0;
2578 	init_group_to_asid_map(mvdev);
2579 	++mvdev->generation;
2580 
2581 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2582 		if (mlx5_vdpa_create_mr(mvdev, NULL))
2583 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2584 	}
2585 	up_write(&ndev->reslock);
2586 
2587 	return 0;
2588 }
2589 
2590 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2591 {
2592 	return sizeof(struct virtio_net_config);
2593 }
2594 
2595 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2596 				 unsigned int len)
2597 {
2598 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2599 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2600 
2601 	if (offset + len <= sizeof(struct virtio_net_config))
2602 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2603 }
2604 
2605 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2606 				 unsigned int len)
2607 {
2608 	/* not supported */
2609 }
2610 
2611 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2612 {
2613 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2614 
2615 	return mvdev->generation;
2616 }
2617 
2618 static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2619 {
2620 	u64 start = 0ULL, last = 0ULL - 1;
2621 	struct vhost_iotlb_map *map;
2622 	int err = 0;
2623 
2624 	spin_lock(&mvdev->cvq.iommu_lock);
2625 	vhost_iotlb_reset(mvdev->cvq.iotlb);
2626 
2627 	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
2628 	     map = vhost_iotlb_itree_next(map, start, last)) {
2629 		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
2630 					    map->last, map->addr, map->perm);
2631 		if (err)
2632 			goto out;
2633 	}
2634 
2635 out:
2636 	spin_unlock(&mvdev->cvq.iommu_lock);
2637 	return err;
2638 }
2639 
2640 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2641 {
2642 	bool change_map;
2643 	int err;
2644 
2645 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
2646 	if (err) {
2647 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2648 		return err;
2649 	}
2650 
2651 	if (change_map)
2652 		err = mlx5_vdpa_change_map(mvdev, iotlb);
2653 
2654 	return err;
2655 }
2656 
2657 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2658 			     struct vhost_iotlb *iotlb)
2659 {
2660 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2661 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2662 	int err = -EINVAL;
2663 
2664 	down_write(&ndev->reslock);
2665 	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
2666 		err = set_map_data(mvdev, iotlb);
2667 		if (err)
2668 			goto out;
2669 	}
2670 
2671 	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
2672 		err = set_map_control(mvdev, iotlb);
2673 
2674 out:
2675 	up_write(&ndev->reslock);
2676 	return err;
2677 }
2678 
2679 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2680 {
2681 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2682 	struct mlx5_core_dev *pfmdev;
2683 	struct mlx5_vdpa_net *ndev;
2684 
2685 	ndev = to_mlx5_vdpa_ndev(mvdev);
2686 
2687 	free_resources(ndev);
2688 	mlx5_vdpa_destroy_mr(mvdev);
2689 	if (!is_zero_ether_addr(ndev->config.mac)) {
2690 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2691 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2692 	}
2693 	mlx5_vdpa_free_resources(&ndev->mvdev);
2694 	kfree(ndev->event_cbs);
2695 	kfree(ndev->vqs);
2696 }
2697 
2698 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2699 {
2700 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2701 	struct vdpa_notification_area ret = {};
2702 	struct mlx5_vdpa_net *ndev;
2703 	phys_addr_t addr;
2704 
2705 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2706 		return ret;
2707 
2708 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2709 	 * notification to avoid the risk of mapping pages that contain BAR of more
2710 	 * than one SF
2711 	 */
2712 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2713 		return ret;
2714 
2715 	ndev = to_mlx5_vdpa_ndev(mvdev);
2716 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2717 	ret.addr = addr;
2718 	ret.size = PAGE_SIZE;
2719 	return ret;
2720 }
2721 
2722 static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2723 {
2724 	return -EOPNOTSUPP;
2725 }
2726 
2727 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2728 {
2729 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2730 
2731 	return mvdev->actual_features;
2732 }
2733 
2734 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
2735 			     u64 *received_desc, u64 *completed_desc)
2736 {
2737 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
2738 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
2739 	void *cmd_hdr;
2740 	void *ctx;
2741 	int err;
2742 
2743 	if (!counters_supported(&ndev->mvdev))
2744 		return -EOPNOTSUPP;
2745 
2746 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
2747 		return -EAGAIN;
2748 
2749 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
2750 
2751 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
2752 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
2753 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
2754 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
2755 
2756 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
2757 	if (err)
2758 		return err;
2759 
2760 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
2761 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
2762 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
2763 	return 0;
2764 }
2765 
2766 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
2767 					 struct sk_buff *msg,
2768 					 struct netlink_ext_ack *extack)
2769 {
2770 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2771 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2772 	struct mlx5_vdpa_virtqueue *mvq;
2773 	struct mlx5_control_vq *cvq;
2774 	u64 received_desc;
2775 	u64 completed_desc;
2776 	int err = 0;
2777 
2778 	down_read(&ndev->reslock);
2779 	if (!is_index_valid(mvdev, idx)) {
2780 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
2781 		err = -EINVAL;
2782 		goto out_err;
2783 	}
2784 
2785 	if (idx == ctrl_vq_idx(mvdev)) {
2786 		cvq = &mvdev->cvq;
2787 		received_desc = cvq->received_desc;
2788 		completed_desc = cvq->completed_desc;
2789 		goto out;
2790 	}
2791 
2792 	mvq = &ndev->vqs[idx];
2793 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
2794 	if (err) {
2795 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
2796 		goto out_err;
2797 	}
2798 
2799 out:
2800 	err = -EMSGSIZE;
2801 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
2802 		goto out_err;
2803 
2804 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
2805 			      VDPA_ATTR_PAD))
2806 		goto out_err;
2807 
2808 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
2809 		goto out_err;
2810 
2811 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
2812 			      VDPA_ATTR_PAD))
2813 		goto out_err;
2814 
2815 	err = 0;
2816 out_err:
2817 	up_read(&ndev->reslock);
2818 	return err;
2819 }
2820 
2821 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
2822 {
2823 	struct mlx5_control_vq *cvq;
2824 
2825 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2826 		return;
2827 
2828 	cvq = &mvdev->cvq;
2829 	cvq->ready = false;
2830 }
2831 
2832 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
2833 {
2834 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2835 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2836 	struct mlx5_vdpa_virtqueue *mvq;
2837 	int i;
2838 
2839 	down_write(&ndev->reslock);
2840 	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
2841 	ndev->nb_registered = false;
2842 	flush_workqueue(ndev->mvdev.wq);
2843 	for (i = 0; i < ndev->cur_num_vqs; i++) {
2844 		mvq = &ndev->vqs[i];
2845 		suspend_vq(ndev, mvq);
2846 	}
2847 	mlx5_vdpa_cvq_suspend(mvdev);
2848 	up_write(&ndev->reslock);
2849 	return 0;
2850 }
2851 
2852 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
2853 			       unsigned int asid)
2854 {
2855 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2856 
2857 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
2858 		return -EINVAL;
2859 
2860 	mvdev->group2asid[group] = asid;
2861 	return 0;
2862 }
2863 
2864 static const struct vdpa_config_ops mlx5_vdpa_ops = {
2865 	.set_vq_address = mlx5_vdpa_set_vq_address,
2866 	.set_vq_num = mlx5_vdpa_set_vq_num,
2867 	.kick_vq = mlx5_vdpa_kick_vq,
2868 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
2869 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
2870 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
2871 	.set_vq_state = mlx5_vdpa_set_vq_state,
2872 	.get_vq_state = mlx5_vdpa_get_vq_state,
2873 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
2874 	.get_vq_notification = mlx5_get_vq_notification,
2875 	.get_vq_irq = mlx5_get_vq_irq,
2876 	.get_vq_align = mlx5_vdpa_get_vq_align,
2877 	.get_vq_group = mlx5_vdpa_get_vq_group,
2878 	.get_device_features = mlx5_vdpa_get_device_features,
2879 	.set_driver_features = mlx5_vdpa_set_driver_features,
2880 	.get_driver_features = mlx5_vdpa_get_driver_features,
2881 	.set_config_cb = mlx5_vdpa_set_config_cb,
2882 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2883 	.get_device_id = mlx5_vdpa_get_device_id,
2884 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
2885 	.get_status = mlx5_vdpa_get_status,
2886 	.set_status = mlx5_vdpa_set_status,
2887 	.reset = mlx5_vdpa_reset,
2888 	.get_config_size = mlx5_vdpa_get_config_size,
2889 	.get_config = mlx5_vdpa_get_config,
2890 	.set_config = mlx5_vdpa_set_config,
2891 	.get_generation = mlx5_vdpa_get_generation,
2892 	.set_map = mlx5_vdpa_set_map,
2893 	.set_group_asid = mlx5_set_group_asid,
2894 	.free = mlx5_vdpa_free,
2895 	.suspend = mlx5_vdpa_suspend,
2896 };
2897 
2898 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2899 {
2900 	u16 hw_mtu;
2901 	int err;
2902 
2903 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2904 	if (err)
2905 		return err;
2906 
2907 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2908 	return 0;
2909 }
2910 
2911 static int alloc_resources(struct mlx5_vdpa_net *ndev)
2912 {
2913 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2914 	int err;
2915 
2916 	if (res->valid) {
2917 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2918 		return -EEXIST;
2919 	}
2920 
2921 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2922 	if (err)
2923 		return err;
2924 
2925 	err = create_tis(ndev);
2926 	if (err)
2927 		goto err_tis;
2928 
2929 	res->valid = true;
2930 
2931 	return 0;
2932 
2933 err_tis:
2934 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2935 	return err;
2936 }
2937 
2938 static void free_resources(struct mlx5_vdpa_net *ndev)
2939 {
2940 	struct mlx5_vdpa_net_resources *res = &ndev->res;
2941 
2942 	if (!res->valid)
2943 		return;
2944 
2945 	destroy_tis(ndev);
2946 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2947 	res->valid = false;
2948 }
2949 
2950 static void init_mvqs(struct mlx5_vdpa_net *ndev)
2951 {
2952 	struct mlx5_vdpa_virtqueue *mvq;
2953 	int i;
2954 
2955 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
2956 		mvq = &ndev->vqs[i];
2957 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2958 		mvq->index = i;
2959 		mvq->ndev = ndev;
2960 		mvq->fwqp.fw = true;
2961 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
2962 	}
2963 	for (; i < ndev->mvdev.max_vqs; i++) {
2964 		mvq = &ndev->vqs[i];
2965 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2966 		mvq->index = i;
2967 		mvq->ndev = ndev;
2968 	}
2969 }
2970 
2971 struct mlx5_vdpa_mgmtdev {
2972 	struct vdpa_mgmt_dev mgtdev;
2973 	struct mlx5_adev *madev;
2974 	struct mlx5_vdpa_net *ndev;
2975 };
2976 
2977 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2978 {
2979 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2980 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2981 	int err;
2982 
2983 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2984 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2985 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2986 	if (vport)
2987 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2988 
2989 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2990 	if (err)
2991 		return 0;
2992 
2993 	return MLX5_GET(query_vport_state_out, out, state);
2994 }
2995 
2996 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2997 {
2998 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2999 	    VPORT_STATE_UP)
3000 		return true;
3001 
3002 	return false;
3003 }
3004 
3005 static void update_carrier(struct work_struct *work)
3006 {
3007 	struct mlx5_vdpa_wq_ent *wqent;
3008 	struct mlx5_vdpa_dev *mvdev;
3009 	struct mlx5_vdpa_net *ndev;
3010 
3011 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
3012 	mvdev = wqent->mvdev;
3013 	ndev = to_mlx5_vdpa_ndev(mvdev);
3014 	if (get_link_state(mvdev))
3015 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3016 	else
3017 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3018 
3019 	if (ndev->config_cb.callback)
3020 		ndev->config_cb.callback(ndev->config_cb.private);
3021 
3022 	kfree(wqent);
3023 }
3024 
3025 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
3026 {
3027 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
3028 	struct mlx5_eqe *eqe = param;
3029 	int ret = NOTIFY_DONE;
3030 	struct mlx5_vdpa_wq_ent *wqent;
3031 
3032 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
3033 		switch (eqe->sub_type) {
3034 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
3035 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
3036 			down_read(&ndev->reslock);
3037 			if (!ndev->nb_registered) {
3038 				up_read(&ndev->reslock);
3039 				return NOTIFY_DONE;
3040 			}
3041 			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
3042 			if (!wqent) {
3043 				up_read(&ndev->reslock);
3044 				return NOTIFY_DONE;
3045 			}
3046 
3047 			wqent->mvdev = &ndev->mvdev;
3048 			INIT_WORK(&wqent->work, update_carrier);
3049 			queue_work(ndev->mvdev.wq, &wqent->work);
3050 			up_read(&ndev->reslock);
3051 			ret = NOTIFY_OK;
3052 			break;
3053 		default:
3054 			return NOTIFY_DONE;
3055 		}
3056 		return ret;
3057 	}
3058 	return ret;
3059 }
3060 
3061 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3062 {
3063 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3064 	void *in;
3065 	int err;
3066 
3067 	in = kvzalloc(inlen, GFP_KERNEL);
3068 	if (!in)
3069 		return -ENOMEM;
3070 
3071 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3072 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3073 		 mtu + MLX5V_ETH_HARD_MTU);
3074 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3075 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3076 
3077 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3078 
3079 	kvfree(in);
3080 	return err;
3081 }
3082 
3083 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3084 			     const struct vdpa_dev_set_config *add_config)
3085 {
3086 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3087 	struct virtio_net_config *config;
3088 	struct mlx5_core_dev *pfmdev;
3089 	struct mlx5_vdpa_dev *mvdev;
3090 	struct mlx5_vdpa_net *ndev;
3091 	struct mlx5_core_dev *mdev;
3092 	u32 max_vqs;
3093 	u16 mtu;
3094 	int err;
3095 
3096 	if (mgtdev->ndev)
3097 		return -ENOSPC;
3098 
3099 	mdev = mgtdev->madev->mdev;
3100 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3101 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3102 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3103 		return -EOPNOTSUPP;
3104 	}
3105 
3106 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3107 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3108 	if (max_vqs < 2) {
3109 		dev_warn(mdev->device,
3110 			 "%d virtqueues are supported. At least 2 are required\n",
3111 			 max_vqs);
3112 		return -EAGAIN;
3113 	}
3114 
3115 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3116 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3117 			return -EINVAL;
3118 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3119 	} else {
3120 		max_vqs = 2;
3121 	}
3122 
3123 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3124 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3125 	if (IS_ERR(ndev))
3126 		return PTR_ERR(ndev);
3127 
3128 	ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
3129 	ndev->mvdev.max_vqs = max_vqs;
3130 	mvdev = &ndev->mvdev;
3131 	mvdev->mdev = mdev;
3132 
3133 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3134 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3135 	if (!ndev->vqs || !ndev->event_cbs) {
3136 		err = -ENOMEM;
3137 		goto err_alloc;
3138 	}
3139 
3140 	init_mvqs(ndev);
3141 	init_rwsem(&ndev->reslock);
3142 	config = &ndev->config;
3143 
3144 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3145 		err = config_func_mtu(mdev, add_config->net.mtu);
3146 		if (err)
3147 			goto err_alloc;
3148 	}
3149 
3150 	err = query_mtu(mdev, &mtu);
3151 	if (err)
3152 		goto err_alloc;
3153 
3154 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3155 
3156 	if (get_link_state(mvdev))
3157 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3158 	else
3159 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3160 
3161 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3162 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3163 	} else {
3164 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3165 		if (err)
3166 			goto err_alloc;
3167 	}
3168 
3169 	if (!is_zero_ether_addr(config->mac)) {
3170 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3171 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3172 		if (err)
3173 			goto err_alloc;
3174 
3175 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
3176 	}
3177 
3178 	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3179 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3180 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3181 	if (err)
3182 		goto err_mpfs;
3183 
3184 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3185 		err = mlx5_vdpa_create_mr(mvdev, NULL);
3186 		if (err)
3187 			goto err_res;
3188 	}
3189 
3190 	err = alloc_resources(ndev);
3191 	if (err)
3192 		goto err_mr;
3193 
3194 	ndev->cvq_ent.mvdev = mvdev;
3195 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3196 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3197 	if (!mvdev->wq) {
3198 		err = -ENOMEM;
3199 		goto err_res2;
3200 	}
3201 
3202 	ndev->nb.notifier_call = event_handler;
3203 	mlx5_notifier_register(mdev, &ndev->nb);
3204 	ndev->nb_registered = true;
3205 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3206 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3207 	if (err)
3208 		goto err_reg;
3209 
3210 	mgtdev->ndev = ndev;
3211 	return 0;
3212 
3213 err_reg:
3214 	destroy_workqueue(mvdev->wq);
3215 err_res2:
3216 	free_resources(ndev);
3217 err_mr:
3218 	mlx5_vdpa_destroy_mr(mvdev);
3219 err_res:
3220 	mlx5_vdpa_free_resources(&ndev->mvdev);
3221 err_mpfs:
3222 	if (!is_zero_ether_addr(config->mac))
3223 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3224 err_alloc:
3225 	put_device(&mvdev->vdev.dev);
3226 	return err;
3227 }
3228 
3229 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3230 {
3231 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3232 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3233 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3234 	struct workqueue_struct *wq;
3235 
3236 	if (ndev->nb_registered) {
3237 		mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
3238 		ndev->nb_registered = false;
3239 	}
3240 	wq = mvdev->wq;
3241 	mvdev->wq = NULL;
3242 	destroy_workqueue(wq);
3243 	_vdpa_unregister_device(dev);
3244 	mgtdev->ndev = NULL;
3245 }
3246 
3247 static const struct vdpa_mgmtdev_ops mdev_ops = {
3248 	.dev_add = mlx5_vdpa_dev_add,
3249 	.dev_del = mlx5_vdpa_dev_del,
3250 };
3251 
3252 static struct virtio_device_id id_table[] = {
3253 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3254 	{ 0 },
3255 };
3256 
3257 static int mlx5v_probe(struct auxiliary_device *adev,
3258 		       const struct auxiliary_device_id *id)
3259 
3260 {
3261 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3262 	struct mlx5_core_dev *mdev = madev->mdev;
3263 	struct mlx5_vdpa_mgmtdev *mgtdev;
3264 	int err;
3265 
3266 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3267 	if (!mgtdev)
3268 		return -ENOMEM;
3269 
3270 	mgtdev->mgtdev.ops = &mdev_ops;
3271 	mgtdev->mgtdev.device = mdev->device;
3272 	mgtdev->mgtdev.id_table = id_table;
3273 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3274 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3275 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
3276 	mgtdev->mgtdev.max_supported_vqs =
3277 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3278 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3279 	mgtdev->madev = madev;
3280 
3281 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3282 	if (err)
3283 		goto reg_err;
3284 
3285 	auxiliary_set_drvdata(adev, mgtdev);
3286 
3287 	return 0;
3288 
3289 reg_err:
3290 	kfree(mgtdev);
3291 	return err;
3292 }
3293 
3294 static void mlx5v_remove(struct auxiliary_device *adev)
3295 {
3296 	struct mlx5_vdpa_mgmtdev *mgtdev;
3297 
3298 	mgtdev = auxiliary_get_drvdata(adev);
3299 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3300 	kfree(mgtdev);
3301 }
3302 
3303 static const struct auxiliary_device_id mlx5v_id_table[] = {
3304 	{ .name = MLX5_ADEV_NAME ".vnet", },
3305 	{},
3306 };
3307 
3308 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3309 
3310 static struct auxiliary_driver mlx5v_driver = {
3311 	.name = "vnet",
3312 	.probe = mlx5v_probe,
3313 	.remove = mlx5v_remove,
3314 	.id_table = mlx5v_id_table,
3315 };
3316 
3317 module_auxiliary_driver(mlx5v_driver);
3318