xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision cc3519b8)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 #include "mlx5_vnet.h"
22 
23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24 MODULE_DESCRIPTION("Mellanox VDPA driver");
25 MODULE_LICENSE("Dual BSD/GPL");
26 
27 #define VALID_FEATURES_MASK                                                                        \
28 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
29 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
30 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
33 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
34 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
36 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
37 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
38 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
39 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
40 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
41 
42 #define VALID_STATUS_MASK                                                                          \
43 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
44 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
45 
46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
47 
48 #define MLX5V_UNTAGGED 0x1000
49 
50 struct mlx5_vdpa_cq_buf {
51 	struct mlx5_frag_buf_ctrl fbc;
52 	struct mlx5_frag_buf frag_buf;
53 	int cqe_size;
54 	int nent;
55 };
56 
57 struct mlx5_vdpa_cq {
58 	struct mlx5_core_cq mcq;
59 	struct mlx5_vdpa_cq_buf buf;
60 	struct mlx5_db db;
61 	int cqe;
62 };
63 
64 struct mlx5_vdpa_umem {
65 	struct mlx5_frag_buf_ctrl fbc;
66 	struct mlx5_frag_buf frag_buf;
67 	int size;
68 	u32 id;
69 };
70 
71 struct mlx5_vdpa_qp {
72 	struct mlx5_core_qp mqp;
73 	struct mlx5_frag_buf frag_buf;
74 	struct mlx5_db db;
75 	u16 head;
76 	bool fw;
77 };
78 
79 struct mlx5_vq_restore_info {
80 	u32 num_ent;
81 	u64 desc_addr;
82 	u64 device_addr;
83 	u64 driver_addr;
84 	u16 avail_index;
85 	u16 used_index;
86 	struct msi_map map;
87 	bool ready;
88 	bool restore;
89 };
90 
91 struct mlx5_vdpa_virtqueue {
92 	bool ready;
93 	u64 desc_addr;
94 	u64 device_addr;
95 	u64 driver_addr;
96 	u32 num_ent;
97 
98 	/* Resources for implementing the notification channel from the device
99 	 * to the driver. fwqp is the firmware end of an RC connection; the
100 	 * other end is vqqp used by the driver. cq is where completions are
101 	 * reported.
102 	 */
103 	struct mlx5_vdpa_cq cq;
104 	struct mlx5_vdpa_qp fwqp;
105 	struct mlx5_vdpa_qp vqqp;
106 
107 	/* umem resources are required for the virtqueue operation. They're use
108 	 * is internal and they must be provided by the driver.
109 	 */
110 	struct mlx5_vdpa_umem umem1;
111 	struct mlx5_vdpa_umem umem2;
112 	struct mlx5_vdpa_umem umem3;
113 
114 	u32 counter_set_id;
115 	bool initialized;
116 	int index;
117 	u32 virtq_id;
118 	struct mlx5_vdpa_net *ndev;
119 	u16 avail_idx;
120 	u16 used_idx;
121 	int fw_state;
122 	struct msi_map map;
123 
124 	/* keep last in the struct */
125 	struct mlx5_vq_restore_info ri;
126 };
127 
128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
129 {
130 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
131 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
132 			return idx < 2;
133 		else
134 			return idx < 3;
135 	}
136 
137 	return idx <= mvdev->max_idx;
138 }
139 
140 static void free_resources(struct mlx5_vdpa_net *ndev);
141 static void init_mvqs(struct mlx5_vdpa_net *ndev);
142 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
143 static void teardown_driver(struct mlx5_vdpa_net *ndev);
144 
145 static bool mlx5_vdpa_debug;
146 
147 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
148 	do {                                                                                       \
149 		if (features & BIT_ULL(_feature))                                                  \
150 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
151 	} while (0)
152 
153 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
154 	do {                                                                                       \
155 		if (status & (_status))                                                            \
156 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
157 	} while (0)
158 
159 /* TODO: cross-endian support */
160 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
161 {
162 	return virtio_legacy_is_little_endian() ||
163 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
164 }
165 
166 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
167 {
168 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
169 }
170 
171 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
172 {
173 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
174 }
175 
176 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
177 {
178 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
179 		return 2;
180 
181 	return mvdev->max_vqs;
182 }
183 
184 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
185 {
186 	return idx == ctrl_vq_idx(mvdev);
187 }
188 
189 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
190 {
191 	if (status & ~VALID_STATUS_MASK)
192 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
193 			       status & ~VALID_STATUS_MASK);
194 
195 	if (!mlx5_vdpa_debug)
196 		return;
197 
198 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
199 	if (set && !status) {
200 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
201 		return;
202 	}
203 
204 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
205 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
206 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
207 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
208 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
209 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
210 }
211 
212 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
213 {
214 	if (features & ~VALID_FEATURES_MASK)
215 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
216 			       features & ~VALID_FEATURES_MASK);
217 
218 	if (!mlx5_vdpa_debug)
219 		return;
220 
221 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
222 	if (!features)
223 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
224 
225 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
226 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
227 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
259 }
260 
261 static int create_tis(struct mlx5_vdpa_net *ndev)
262 {
263 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
264 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
265 	void *tisc;
266 	int err;
267 
268 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
269 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
270 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
271 	if (err)
272 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
273 
274 	return err;
275 }
276 
277 static void destroy_tis(struct mlx5_vdpa_net *ndev)
278 {
279 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
280 }
281 
282 #define MLX5_VDPA_CQE_SIZE 64
283 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
284 
285 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
286 {
287 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
288 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
289 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
290 	int err;
291 
292 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
293 				       ndev->mvdev.mdev->priv.numa_node);
294 	if (err)
295 		return err;
296 
297 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
298 
299 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
300 	buf->nent = nent;
301 
302 	return 0;
303 }
304 
305 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
306 {
307 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
308 
309 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
310 					ndev->mvdev.mdev->priv.numa_node);
311 }
312 
313 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
314 {
315 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
316 }
317 
318 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
319 {
320 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
321 }
322 
323 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
324 {
325 	struct mlx5_cqe64 *cqe64;
326 	void *cqe;
327 	int i;
328 
329 	for (i = 0; i < buf->nent; i++) {
330 		cqe = get_cqe(vcq, i);
331 		cqe64 = cqe;
332 		cqe64->op_own = MLX5_CQE_INVALID << 4;
333 	}
334 }
335 
336 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
337 {
338 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
339 
340 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
341 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
342 		return cqe64;
343 
344 	return NULL;
345 }
346 
347 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
348 {
349 	vqp->head += n;
350 	vqp->db.db[0] = cpu_to_be32(vqp->head);
351 }
352 
353 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
354 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
355 {
356 	struct mlx5_vdpa_qp *vqp;
357 	__be64 *pas;
358 	void *qpc;
359 
360 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
361 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
362 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
363 	if (vqp->fw) {
364 		/* Firmware QP is allocated by the driver for the firmware's
365 		 * use so we can skip part of the params as they will be chosen by firmware
366 		 */
367 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
368 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
369 		MLX5_SET(qpc, qpc, no_sq, 1);
370 		return;
371 	}
372 
373 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
374 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
375 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
376 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
377 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
378 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
379 	MLX5_SET(qpc, qpc, no_sq, 1);
380 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
381 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
382 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
383 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
384 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
385 }
386 
387 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
388 {
389 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
390 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
391 					ndev->mvdev.mdev->priv.numa_node);
392 }
393 
394 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
395 {
396 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
397 }
398 
399 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
400 		     struct mlx5_vdpa_qp *vqp)
401 {
402 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
403 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
404 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
405 	void *qpc;
406 	void *in;
407 	int err;
408 
409 	if (!vqp->fw) {
410 		vqp = &mvq->vqqp;
411 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
412 		if (err)
413 			return err;
414 
415 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
416 		if (err)
417 			goto err_db;
418 		inlen += vqp->frag_buf.npages * sizeof(__be64);
419 	}
420 
421 	in = kzalloc(inlen, GFP_KERNEL);
422 	if (!in) {
423 		err = -ENOMEM;
424 		goto err_kzalloc;
425 	}
426 
427 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
428 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
429 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
430 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
431 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
432 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
433 	if (!vqp->fw)
434 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
435 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
436 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
437 	kfree(in);
438 	if (err)
439 		goto err_kzalloc;
440 
441 	vqp->mqp.uid = ndev->mvdev.res.uid;
442 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
443 
444 	if (!vqp->fw)
445 		rx_post(vqp, mvq->num_ent);
446 
447 	return 0;
448 
449 err_kzalloc:
450 	if (!vqp->fw)
451 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
452 err_db:
453 	if (!vqp->fw)
454 		rq_buf_free(ndev, vqp);
455 
456 	return err;
457 }
458 
459 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
460 {
461 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
462 
463 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
464 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
465 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
466 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
467 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
468 	if (!vqp->fw) {
469 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
470 		rq_buf_free(ndev, vqp);
471 	}
472 }
473 
474 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
475 {
476 	return get_sw_cqe(cq, cq->mcq.cons_index);
477 }
478 
479 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
480 {
481 	struct mlx5_cqe64 *cqe64;
482 
483 	cqe64 = next_cqe_sw(vcq);
484 	if (!cqe64)
485 		return -EAGAIN;
486 
487 	vcq->mcq.cons_index++;
488 	return 0;
489 }
490 
491 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
492 {
493 	struct mlx5_vdpa_net *ndev = mvq->ndev;
494 	struct vdpa_callback *event_cb;
495 
496 	event_cb = &ndev->event_cbs[mvq->index];
497 	mlx5_cq_set_ci(&mvq->cq.mcq);
498 
499 	/* make sure CQ cosumer update is visible to the hardware before updating
500 	 * RX doorbell record.
501 	 */
502 	dma_wmb();
503 	rx_post(&mvq->vqqp, num);
504 	if (event_cb->callback)
505 		event_cb->callback(event_cb->private);
506 }
507 
508 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
509 {
510 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
511 	struct mlx5_vdpa_net *ndev = mvq->ndev;
512 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
513 	int num = 0;
514 
515 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
516 		num++;
517 		if (num > mvq->num_ent / 2) {
518 			/* If completions keep coming while we poll, we want to
519 			 * let the hardware know that we consumed them by
520 			 * updating the doorbell record.  We also let vdpa core
521 			 * know about this so it passes it on the virtio driver
522 			 * on the guest.
523 			 */
524 			mlx5_vdpa_handle_completions(mvq, num);
525 			num = 0;
526 		}
527 	}
528 
529 	if (num)
530 		mlx5_vdpa_handle_completions(mvq, num);
531 
532 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
533 }
534 
535 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
536 {
537 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
538 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
539 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
540 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
541 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
542 	__be64 *pas;
543 	int inlen;
544 	void *cqc;
545 	void *in;
546 	int err;
547 	int eqn;
548 
549 	err = mlx5_db_alloc(mdev, &vcq->db);
550 	if (err)
551 		return err;
552 
553 	vcq->mcq.set_ci_db = vcq->db.db;
554 	vcq->mcq.arm_db = vcq->db.db + 1;
555 	vcq->mcq.cqe_sz = 64;
556 
557 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
558 	if (err)
559 		goto err_db;
560 
561 	cq_frag_buf_init(vcq, &vcq->buf);
562 
563 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
564 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
565 	in = kzalloc(inlen, GFP_KERNEL);
566 	if (!in) {
567 		err = -ENOMEM;
568 		goto err_vzalloc;
569 	}
570 
571 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
572 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
573 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
574 
575 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
576 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
577 
578 	/* Use vector 0 by default. Consider adding code to choose least used
579 	 * vector.
580 	 */
581 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
582 	if (err)
583 		goto err_vec;
584 
585 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
586 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
587 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
588 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
589 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
590 
591 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
592 	if (err)
593 		goto err_vec;
594 
595 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
596 	vcq->cqe = num_ent;
597 	vcq->mcq.set_ci_db = vcq->db.db;
598 	vcq->mcq.arm_db = vcq->db.db + 1;
599 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
600 	kfree(in);
601 	return 0;
602 
603 err_vec:
604 	kfree(in);
605 err_vzalloc:
606 	cq_frag_buf_free(ndev, &vcq->buf);
607 err_db:
608 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
609 	return err;
610 }
611 
612 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
613 {
614 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
615 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
616 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
617 
618 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
619 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
620 		return;
621 	}
622 	cq_frag_buf_free(ndev, &vcq->buf);
623 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
624 }
625 
626 static int read_umem_params(struct mlx5_vdpa_net *ndev)
627 {
628 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
629 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
630 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
631 	int out_size;
632 	void *caps;
633 	void *out;
634 	int err;
635 
636 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
637 	out = kzalloc(out_size, GFP_KERNEL);
638 	if (!out)
639 		return -ENOMEM;
640 
641 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
642 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
643 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
644 	if (err) {
645 		mlx5_vdpa_warn(&ndev->mvdev,
646 			"Failed reading vdpa umem capabilities with err %d\n", err);
647 		goto out;
648 	}
649 
650 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
651 
652 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
653 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
654 
655 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
656 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
657 
658 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
659 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
660 
661 out:
662 	kfree(out);
663 	return 0;
664 }
665 
666 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
667 			  struct mlx5_vdpa_umem **umemp)
668 {
669 	u32 p_a;
670 	u32 p_b;
671 
672 	switch (num) {
673 	case 1:
674 		p_a = ndev->umem_1_buffer_param_a;
675 		p_b = ndev->umem_1_buffer_param_b;
676 		*umemp = &mvq->umem1;
677 		break;
678 	case 2:
679 		p_a = ndev->umem_2_buffer_param_a;
680 		p_b = ndev->umem_2_buffer_param_b;
681 		*umemp = &mvq->umem2;
682 		break;
683 	case 3:
684 		p_a = ndev->umem_3_buffer_param_a;
685 		p_b = ndev->umem_3_buffer_param_b;
686 		*umemp = &mvq->umem3;
687 		break;
688 	}
689 
690 	(*umemp)->size = p_a * mvq->num_ent + p_b;
691 }
692 
693 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
694 {
695 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
696 }
697 
698 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
699 {
700 	int inlen;
701 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
702 	void *um;
703 	void *in;
704 	int err;
705 	__be64 *pas;
706 	struct mlx5_vdpa_umem *umem;
707 
708 	set_umem_size(ndev, mvq, num, &umem);
709 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
710 	if (err)
711 		return err;
712 
713 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
714 
715 	in = kzalloc(inlen, GFP_KERNEL);
716 	if (!in) {
717 		err = -ENOMEM;
718 		goto err_in;
719 	}
720 
721 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
722 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
723 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
724 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
725 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
726 
727 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
728 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
729 
730 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
731 	if (err) {
732 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
733 		goto err_cmd;
734 	}
735 
736 	kfree(in);
737 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
738 
739 	return 0;
740 
741 err_cmd:
742 	kfree(in);
743 err_in:
744 	umem_frag_buf_free(ndev, umem);
745 	return err;
746 }
747 
748 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
749 {
750 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
751 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
752 	struct mlx5_vdpa_umem *umem;
753 
754 	switch (num) {
755 	case 1:
756 		umem = &mvq->umem1;
757 		break;
758 	case 2:
759 		umem = &mvq->umem2;
760 		break;
761 	case 3:
762 		umem = &mvq->umem3;
763 		break;
764 	}
765 
766 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
767 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
768 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
769 		return;
770 
771 	umem_frag_buf_free(ndev, umem);
772 }
773 
774 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
775 {
776 	int num;
777 	int err;
778 
779 	for (num = 1; num <= 3; num++) {
780 		err = create_umem(ndev, mvq, num);
781 		if (err)
782 			goto err_umem;
783 	}
784 	return 0;
785 
786 err_umem:
787 	for (num--; num > 0; num--)
788 		umem_destroy(ndev, mvq, num);
789 
790 	return err;
791 }
792 
793 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
794 {
795 	int num;
796 
797 	for (num = 3; num > 0; num--)
798 		umem_destroy(ndev, mvq, num);
799 }
800 
801 static int get_queue_type(struct mlx5_vdpa_net *ndev)
802 {
803 	u32 type_mask;
804 
805 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
806 
807 	/* prefer split queue */
808 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
809 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
810 
811 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
812 
813 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
814 }
815 
816 static bool vq_is_tx(u16 idx)
817 {
818 	return idx % 2;
819 }
820 
821 enum {
822 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
823 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
824 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
825 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
826 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
827 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
828 	MLX5_VIRTIO_NET_F_CSUM = 10,
829 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
830 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
831 };
832 
833 static u16 get_features(u64 features)
834 {
835 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
836 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
837 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
838 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
839 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
840 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
841 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
842 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
843 }
844 
845 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
846 {
847 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
848 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
849 }
850 
851 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
852 {
853 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
854 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
855 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
856 }
857 
858 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
859 {
860 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
861 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
862 	void *obj_context;
863 	u16 mlx_features;
864 	void *cmd_hdr;
865 	void *vq_ctx;
866 	void *in;
867 	int err;
868 
869 	err = umems_create(ndev, mvq);
870 	if (err)
871 		return err;
872 
873 	in = kzalloc(inlen, GFP_KERNEL);
874 	if (!in) {
875 		err = -ENOMEM;
876 		goto err_alloc;
877 	}
878 
879 	mlx_features = get_features(ndev->mvdev.actual_features);
880 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
881 
882 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
883 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
884 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
885 
886 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
887 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
888 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
889 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
890 		 mlx_features >> 3);
891 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
892 		 mlx_features & 7);
893 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
894 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
895 
896 	if (vq_is_tx(mvq->index))
897 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
898 
899 	if (mvq->map.virq) {
900 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
901 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
902 	} else {
903 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
904 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
905 	}
906 
907 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
908 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
909 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
910 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
911 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
912 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
913 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
914 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
915 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
916 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
917 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
918 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
919 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
920 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
921 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
922 	if (counters_supported(&ndev->mvdev))
923 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
924 
925 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
926 	if (err)
927 		goto err_cmd;
928 
929 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
930 	kfree(in);
931 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
932 
933 	return 0;
934 
935 err_cmd:
936 	kfree(in);
937 err_alloc:
938 	umems_destroy(ndev, mvq);
939 	return err;
940 }
941 
942 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
943 {
944 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
945 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
946 
947 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
948 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
949 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
950 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
951 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
952 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
953 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
954 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
955 		return;
956 	}
957 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
958 	umems_destroy(ndev, mvq);
959 }
960 
961 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
962 {
963 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
964 }
965 
966 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
967 {
968 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
969 }
970 
971 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
972 			int *outlen, u32 qpn, u32 rqpn)
973 {
974 	void *qpc;
975 	void *pp;
976 
977 	switch (cmd) {
978 	case MLX5_CMD_OP_2RST_QP:
979 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
980 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
981 		*in = kzalloc(*inlen, GFP_KERNEL);
982 		*out = kzalloc(*outlen, GFP_KERNEL);
983 		if (!*in || !*out)
984 			goto outerr;
985 
986 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
987 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
988 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
989 		break;
990 	case MLX5_CMD_OP_RST2INIT_QP:
991 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
992 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
993 		*in = kzalloc(*inlen, GFP_KERNEL);
994 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
995 		if (!*in || !*out)
996 			goto outerr;
997 
998 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
999 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1000 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1001 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1002 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1003 		MLX5_SET(qpc, qpc, rwe, 1);
1004 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1005 		MLX5_SET(ads, pp, vhca_port_num, 1);
1006 		break;
1007 	case MLX5_CMD_OP_INIT2RTR_QP:
1008 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1009 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1010 		*in = kzalloc(*inlen, GFP_KERNEL);
1011 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1012 		if (!*in || !*out)
1013 			goto outerr;
1014 
1015 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1016 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1017 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1018 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1019 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1020 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1021 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1022 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1023 		MLX5_SET(ads, pp, fl, 1);
1024 		break;
1025 	case MLX5_CMD_OP_RTR2RTS_QP:
1026 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1027 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1028 		*in = kzalloc(*inlen, GFP_KERNEL);
1029 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1030 		if (!*in || !*out)
1031 			goto outerr;
1032 
1033 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1034 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1035 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1036 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1037 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1038 		MLX5_SET(ads, pp, ack_timeout, 14);
1039 		MLX5_SET(qpc, qpc, retry_count, 7);
1040 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1041 		break;
1042 	default:
1043 		goto outerr_nullify;
1044 	}
1045 
1046 	return;
1047 
1048 outerr:
1049 	kfree(*in);
1050 	kfree(*out);
1051 outerr_nullify:
1052 	*in = NULL;
1053 	*out = NULL;
1054 }
1055 
1056 static void free_inout(void *in, void *out)
1057 {
1058 	kfree(in);
1059 	kfree(out);
1060 }
1061 
1062 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1063  * firmware. The fw argument indicates whether the subjected QP is the one used
1064  * by firmware.
1065  */
1066 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1067 {
1068 	int outlen;
1069 	int inlen;
1070 	void *out;
1071 	void *in;
1072 	int err;
1073 
1074 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1075 	if (!in || !out)
1076 		return -ENOMEM;
1077 
1078 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1079 	free_inout(in, out);
1080 	return err;
1081 }
1082 
1083 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1084 {
1085 	int err;
1086 
1087 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1088 	if (err)
1089 		return err;
1090 
1091 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1092 	if (err)
1093 		return err;
1094 
1095 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1096 	if (err)
1097 		return err;
1098 
1099 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1100 	if (err)
1101 		return err;
1102 
1103 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1104 	if (err)
1105 		return err;
1106 
1107 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1108 	if (err)
1109 		return err;
1110 
1111 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1112 }
1113 
1114 struct mlx5_virtq_attr {
1115 	u8 state;
1116 	u16 available_index;
1117 	u16 used_index;
1118 };
1119 
1120 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1121 			   struct mlx5_virtq_attr *attr)
1122 {
1123 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1124 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1125 	void *out;
1126 	void *obj_context;
1127 	void *cmd_hdr;
1128 	int err;
1129 
1130 	out = kzalloc(outlen, GFP_KERNEL);
1131 	if (!out)
1132 		return -ENOMEM;
1133 
1134 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1135 
1136 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1137 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1138 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1139 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1140 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1141 	if (err)
1142 		goto err_cmd;
1143 
1144 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1145 	memset(attr, 0, sizeof(*attr));
1146 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1147 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1148 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1149 	kfree(out);
1150 	return 0;
1151 
1152 err_cmd:
1153 	kfree(out);
1154 	return err;
1155 }
1156 
1157 static bool is_valid_state_change(int oldstate, int newstate)
1158 {
1159 	switch (oldstate) {
1160 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1161 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1162 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1163 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1164 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1165 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1166 	default:
1167 		return false;
1168 	}
1169 }
1170 
1171 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1172 {
1173 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1174 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1175 	void *obj_context;
1176 	void *cmd_hdr;
1177 	void *in;
1178 	int err;
1179 
1180 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1181 		return 0;
1182 
1183 	if (!is_valid_state_change(mvq->fw_state, state))
1184 		return -EINVAL;
1185 
1186 	in = kzalloc(inlen, GFP_KERNEL);
1187 	if (!in)
1188 		return -ENOMEM;
1189 
1190 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1191 
1192 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1193 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1194 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1195 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1196 
1197 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1198 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1199 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1200 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1201 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1202 	kfree(in);
1203 	if (!err)
1204 		mvq->fw_state = state;
1205 
1206 	return err;
1207 }
1208 
1209 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1210 {
1211 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1212 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1213 	void *cmd_hdr;
1214 	int err;
1215 
1216 	if (!counters_supported(&ndev->mvdev))
1217 		return 0;
1218 
1219 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1220 
1221 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1222 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1223 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1224 
1225 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1226 	if (err)
1227 		return err;
1228 
1229 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1230 
1231 	return 0;
1232 }
1233 
1234 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1235 {
1236 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1237 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1238 
1239 	if (!counters_supported(&ndev->mvdev))
1240 		return;
1241 
1242 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1243 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1244 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1245 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1246 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1247 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1248 }
1249 
1250 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1251 {
1252 	struct vdpa_callback *cb = priv;
1253 
1254 	if (cb->callback)
1255 		return cb->callback(cb->private);
1256 
1257 	return IRQ_HANDLED;
1258 }
1259 
1260 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1261 			 struct mlx5_vdpa_virtqueue *mvq)
1262 {
1263 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1264 	struct mlx5_vdpa_irq_pool_entry *ent;
1265 	int err;
1266 	int i;
1267 
1268 	for (i = 0; i < irqp->num_ent; i++) {
1269 		ent = &irqp->entries[i];
1270 		if (!ent->used) {
1271 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1272 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1273 			ent->dev_id = &ndev->event_cbs[mvq->index];
1274 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1275 					  ent->name, ent->dev_id);
1276 			if (err)
1277 				return;
1278 
1279 			ent->used = true;
1280 			mvq->map = ent->map;
1281 			return;
1282 		}
1283 	}
1284 }
1285 
1286 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1287 			   struct mlx5_vdpa_virtqueue *mvq)
1288 {
1289 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1290 	int i;
1291 
1292 	for (i = 0; i < irqp->num_ent; i++)
1293 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1294 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1295 			irqp->entries[i].used = false;
1296 			return;
1297 		}
1298 }
1299 
1300 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1301 {
1302 	u16 idx = mvq->index;
1303 	int err;
1304 
1305 	if (!mvq->num_ent)
1306 		return 0;
1307 
1308 	if (mvq->initialized)
1309 		return 0;
1310 
1311 	err = cq_create(ndev, idx, mvq->num_ent);
1312 	if (err)
1313 		return err;
1314 
1315 	err = qp_create(ndev, mvq, &mvq->fwqp);
1316 	if (err)
1317 		goto err_fwqp;
1318 
1319 	err = qp_create(ndev, mvq, &mvq->vqqp);
1320 	if (err)
1321 		goto err_vqqp;
1322 
1323 	err = connect_qps(ndev, mvq);
1324 	if (err)
1325 		goto err_connect;
1326 
1327 	err = counter_set_alloc(ndev, mvq);
1328 	if (err)
1329 		goto err_connect;
1330 
1331 	alloc_vector(ndev, mvq);
1332 	err = create_virtqueue(ndev, mvq);
1333 	if (err)
1334 		goto err_vq;
1335 
1336 	if (mvq->ready) {
1337 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1338 		if (err) {
1339 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1340 				       idx, err);
1341 			goto err_modify;
1342 		}
1343 	}
1344 
1345 	mvq->initialized = true;
1346 	return 0;
1347 
1348 err_modify:
1349 	destroy_virtqueue(ndev, mvq);
1350 err_vq:
1351 	dealloc_vector(ndev, mvq);
1352 	counter_set_dealloc(ndev, mvq);
1353 err_connect:
1354 	qp_destroy(ndev, &mvq->vqqp);
1355 err_vqqp:
1356 	qp_destroy(ndev, &mvq->fwqp);
1357 err_fwqp:
1358 	cq_destroy(ndev, idx);
1359 	return err;
1360 }
1361 
1362 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1363 {
1364 	struct mlx5_virtq_attr attr;
1365 
1366 	if (!mvq->initialized)
1367 		return;
1368 
1369 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1370 		return;
1371 
1372 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1373 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1374 
1375 	if (query_virtqueue(ndev, mvq, &attr)) {
1376 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1377 		return;
1378 	}
1379 	mvq->avail_idx = attr.available_index;
1380 	mvq->used_idx = attr.used_index;
1381 }
1382 
1383 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1384 {
1385 	int i;
1386 
1387 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1388 		suspend_vq(ndev, &ndev->vqs[i]);
1389 }
1390 
1391 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1392 {
1393 	if (!mvq->initialized)
1394 		return;
1395 
1396 	suspend_vq(ndev, mvq);
1397 	destroy_virtqueue(ndev, mvq);
1398 	dealloc_vector(ndev, mvq);
1399 	counter_set_dealloc(ndev, mvq);
1400 	qp_destroy(ndev, &mvq->vqqp);
1401 	qp_destroy(ndev, &mvq->fwqp);
1402 	cq_destroy(ndev, mvq->index);
1403 	mvq->initialized = false;
1404 }
1405 
1406 static int create_rqt(struct mlx5_vdpa_net *ndev)
1407 {
1408 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1409 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1410 	__be32 *list;
1411 	void *rqtc;
1412 	int inlen;
1413 	void *in;
1414 	int i, j;
1415 	int err;
1416 
1417 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1418 	in = kzalloc(inlen, GFP_KERNEL);
1419 	if (!in)
1420 		return -ENOMEM;
1421 
1422 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1423 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1424 
1425 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1426 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1427 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1428 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1429 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1430 
1431 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1432 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1433 	kfree(in);
1434 	if (err)
1435 		return err;
1436 
1437 	return 0;
1438 }
1439 
1440 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1441 
1442 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1443 {
1444 	int act_sz = roundup_pow_of_two(num / 2);
1445 	__be32 *list;
1446 	void *rqtc;
1447 	int inlen;
1448 	void *in;
1449 	int i, j;
1450 	int err;
1451 
1452 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1453 	in = kzalloc(inlen, GFP_KERNEL);
1454 	if (!in)
1455 		return -ENOMEM;
1456 
1457 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1458 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1459 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1460 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1461 
1462 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1463 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1464 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1465 
1466 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1467 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1468 	kfree(in);
1469 	if (err)
1470 		return err;
1471 
1472 	return 0;
1473 }
1474 
1475 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1476 {
1477 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1478 }
1479 
1480 static int create_tir(struct mlx5_vdpa_net *ndev)
1481 {
1482 #define HASH_IP_L4PORTS                                                                            \
1483 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1484 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1485 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1486 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1487 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1488 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1489 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1490 	void *rss_key;
1491 	void *outer;
1492 	void *tirc;
1493 	void *in;
1494 	int err;
1495 
1496 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1497 	if (!in)
1498 		return -ENOMEM;
1499 
1500 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1501 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1502 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1503 
1504 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1505 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1506 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1507 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1508 
1509 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1510 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1511 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1512 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1513 
1514 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1515 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1516 
1517 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1518 	kfree(in);
1519 	if (err)
1520 		return err;
1521 
1522 	mlx5_vdpa_add_tirn(ndev);
1523 	return err;
1524 }
1525 
1526 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1527 {
1528 	mlx5_vdpa_remove_tirn(ndev);
1529 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1530 }
1531 
1532 #define MAX_STEERING_ENT 0x8000
1533 #define MAX_STEERING_GROUPS 2
1534 
1535 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1536        #define NUM_DESTS 2
1537 #else
1538        #define NUM_DESTS 1
1539 #endif
1540 
1541 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1542 				 struct macvlan_node *node,
1543 				 struct mlx5_flow_act *flow_act,
1544 				 struct mlx5_flow_destination *dests)
1545 {
1546 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1547 	int err;
1548 
1549 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1550 	if (IS_ERR(node->ucast_counter.counter))
1551 		return PTR_ERR(node->ucast_counter.counter);
1552 
1553 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1554 	if (IS_ERR(node->mcast_counter.counter)) {
1555 		err = PTR_ERR(node->mcast_counter.counter);
1556 		goto err_mcast_counter;
1557 	}
1558 
1559 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1560 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1561 	return 0;
1562 
1563 err_mcast_counter:
1564 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1565 	return err;
1566 #else
1567 	return 0;
1568 #endif
1569 }
1570 
1571 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1572 				     struct macvlan_node *node)
1573 {
1574 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1575 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1576 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1577 #endif
1578 }
1579 
1580 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1581 					struct macvlan_node *node)
1582 {
1583 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1584 	struct mlx5_flow_act flow_act = {};
1585 	struct mlx5_flow_spec *spec;
1586 	void *headers_c;
1587 	void *headers_v;
1588 	u8 *dmac_c;
1589 	u8 *dmac_v;
1590 	int err;
1591 	u16 vid;
1592 
1593 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1594 	if (!spec)
1595 		return -ENOMEM;
1596 
1597 	vid = key2vid(node->macvlan);
1598 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1599 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1600 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1601 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1602 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1603 	eth_broadcast_addr(dmac_c);
1604 	ether_addr_copy(dmac_v, mac);
1605 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1606 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1607 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1608 	}
1609 	if (node->tagged) {
1610 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1611 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1612 	}
1613 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1614 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1615 	dests[0].tir_num = ndev->res.tirn;
1616 	err = add_steering_counters(ndev, node, &flow_act, dests);
1617 	if (err)
1618 		goto out_free;
1619 
1620 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1621 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1622 #endif
1623 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1624 	if (IS_ERR(node->ucast_rule)) {
1625 		err = PTR_ERR(node->ucast_rule);
1626 		goto err_ucast;
1627 	}
1628 
1629 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1630 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1631 #endif
1632 
1633 	memset(dmac_c, 0, ETH_ALEN);
1634 	memset(dmac_v, 0, ETH_ALEN);
1635 	dmac_c[0] = 1;
1636 	dmac_v[0] = 1;
1637 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1638 	if (IS_ERR(node->mcast_rule)) {
1639 		err = PTR_ERR(node->mcast_rule);
1640 		goto err_mcast;
1641 	}
1642 	kvfree(spec);
1643 	mlx5_vdpa_add_rx_counters(ndev, node);
1644 	return 0;
1645 
1646 err_mcast:
1647 	mlx5_del_flow_rules(node->ucast_rule);
1648 err_ucast:
1649 	remove_steering_counters(ndev, node);
1650 out_free:
1651 	kvfree(spec);
1652 	return err;
1653 }
1654 
1655 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1656 					 struct macvlan_node *node)
1657 {
1658 	mlx5_vdpa_remove_rx_counters(ndev, node);
1659 	mlx5_del_flow_rules(node->ucast_rule);
1660 	mlx5_del_flow_rules(node->mcast_rule);
1661 }
1662 
1663 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1664 {
1665 	u64 val;
1666 
1667 	if (!tagged)
1668 		vlan = MLX5V_UNTAGGED;
1669 
1670 	val = (u64)vlan << 48 |
1671 	      (u64)mac[0] << 40 |
1672 	      (u64)mac[1] << 32 |
1673 	      (u64)mac[2] << 24 |
1674 	      (u64)mac[3] << 16 |
1675 	      (u64)mac[4] << 8 |
1676 	      (u64)mac[5];
1677 
1678 	return val;
1679 }
1680 
1681 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1682 {
1683 	struct macvlan_node *pos;
1684 	u32 idx;
1685 
1686 	idx = hash_64(value, 8); // tbd 8
1687 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1688 		if (pos->macvlan == value)
1689 			return pos;
1690 	}
1691 	return NULL;
1692 }
1693 
1694 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1695 {
1696 	struct macvlan_node *ptr;
1697 	u64 val;
1698 	u32 idx;
1699 	int err;
1700 
1701 	val = search_val(mac, vid, tagged);
1702 	if (mac_vlan_lookup(ndev, val))
1703 		return -EEXIST;
1704 
1705 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1706 	if (!ptr)
1707 		return -ENOMEM;
1708 
1709 	ptr->tagged = tagged;
1710 	ptr->macvlan = val;
1711 	ptr->ndev = ndev;
1712 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1713 	if (err)
1714 		goto err_add;
1715 
1716 	idx = hash_64(val, 8);
1717 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1718 	return 0;
1719 
1720 err_add:
1721 	kfree(ptr);
1722 	return err;
1723 }
1724 
1725 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1726 {
1727 	struct macvlan_node *ptr;
1728 
1729 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1730 	if (!ptr)
1731 		return;
1732 
1733 	hlist_del(&ptr->hlist);
1734 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1735 	remove_steering_counters(ndev, ptr);
1736 	kfree(ptr);
1737 }
1738 
1739 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1740 {
1741 	struct macvlan_node *pos;
1742 	struct hlist_node *n;
1743 	int i;
1744 
1745 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1746 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1747 			hlist_del(&pos->hlist);
1748 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1749 			remove_steering_counters(ndev, pos);
1750 			kfree(pos);
1751 		}
1752 	}
1753 }
1754 
1755 static int setup_steering(struct mlx5_vdpa_net *ndev)
1756 {
1757 	struct mlx5_flow_table_attr ft_attr = {};
1758 	struct mlx5_flow_namespace *ns;
1759 	int err;
1760 
1761 	ft_attr.max_fte = MAX_STEERING_ENT;
1762 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1763 
1764 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1765 	if (!ns) {
1766 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1767 		return -EOPNOTSUPP;
1768 	}
1769 
1770 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1771 	if (IS_ERR(ndev->rxft)) {
1772 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1773 		return PTR_ERR(ndev->rxft);
1774 	}
1775 	mlx5_vdpa_add_rx_flow_table(ndev);
1776 
1777 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1778 	if (err)
1779 		goto err_add;
1780 
1781 	return 0;
1782 
1783 err_add:
1784 	mlx5_vdpa_remove_rx_flow_table(ndev);
1785 	mlx5_destroy_flow_table(ndev->rxft);
1786 	return err;
1787 }
1788 
1789 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1790 {
1791 	clear_mac_vlan_table(ndev);
1792 	mlx5_vdpa_remove_rx_flow_table(ndev);
1793 	mlx5_destroy_flow_table(ndev->rxft);
1794 }
1795 
1796 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1797 {
1798 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1799 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1800 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1801 	struct mlx5_core_dev *pfmdev;
1802 	size_t read;
1803 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1804 
1805 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1806 	switch (cmd) {
1807 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1808 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1809 		if (read != ETH_ALEN)
1810 			break;
1811 
1812 		if (!memcmp(ndev->config.mac, mac, 6)) {
1813 			status = VIRTIO_NET_OK;
1814 			break;
1815 		}
1816 
1817 		if (is_zero_ether_addr(mac))
1818 			break;
1819 
1820 		if (!is_zero_ether_addr(ndev->config.mac)) {
1821 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1822 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1823 					       ndev->config.mac);
1824 				break;
1825 			}
1826 		}
1827 
1828 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1829 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1830 				       mac);
1831 			break;
1832 		}
1833 
1834 		/* backup the original mac address so that if failed to add the forward rules
1835 		 * we could restore it
1836 		 */
1837 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1838 
1839 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1840 
1841 		/* Need recreate the flow table entry, so that the packet could forward back
1842 		 */
1843 		mac_vlan_del(ndev, mac_back, 0, false);
1844 
1845 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1846 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1847 
1848 			/* Although it hardly run here, we still need double check */
1849 			if (is_zero_ether_addr(mac_back)) {
1850 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1851 				break;
1852 			}
1853 
1854 			/* Try to restore original mac address to MFPS table, and try to restore
1855 			 * the forward rule entry.
1856 			 */
1857 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1858 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1859 					       ndev->config.mac);
1860 			}
1861 
1862 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1863 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1864 					       mac_back);
1865 			}
1866 
1867 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1868 
1869 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1870 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1871 
1872 			break;
1873 		}
1874 
1875 		status = VIRTIO_NET_OK;
1876 		break;
1877 
1878 	default:
1879 		break;
1880 	}
1881 
1882 	return status;
1883 }
1884 
1885 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1886 {
1887 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1888 	int cur_qps = ndev->cur_num_vqs / 2;
1889 	int err;
1890 	int i;
1891 
1892 	if (cur_qps > newqps) {
1893 		err = modify_rqt(ndev, 2 * newqps);
1894 		if (err)
1895 			return err;
1896 
1897 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1898 			teardown_vq(ndev, &ndev->vqs[i]);
1899 
1900 		ndev->cur_num_vqs = 2 * newqps;
1901 	} else {
1902 		ndev->cur_num_vqs = 2 * newqps;
1903 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1904 			err = setup_vq(ndev, &ndev->vqs[i]);
1905 			if (err)
1906 				goto clean_added;
1907 		}
1908 		err = modify_rqt(ndev, 2 * newqps);
1909 		if (err)
1910 			goto clean_added;
1911 	}
1912 	return 0;
1913 
1914 clean_added:
1915 	for (--i; i >= 2 * cur_qps; --i)
1916 		teardown_vq(ndev, &ndev->vqs[i]);
1917 
1918 	ndev->cur_num_vqs = 2 * cur_qps;
1919 
1920 	return err;
1921 }
1922 
1923 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1924 {
1925 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1926 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1927 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1928 	struct virtio_net_ctrl_mq mq;
1929 	size_t read;
1930 	u16 newqps;
1931 
1932 	switch (cmd) {
1933 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1934 		/* This mq feature check aligns with pre-existing userspace
1935 		 * implementation.
1936 		 *
1937 		 * Without it, an untrusted driver could fake a multiqueue config
1938 		 * request down to a non-mq device that may cause kernel to
1939 		 * panic due to uninitialized resources for extra vqs. Even with
1940 		 * a well behaving guest driver, it is not expected to allow
1941 		 * changing the number of vqs on a non-mq device.
1942 		 */
1943 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1944 			break;
1945 
1946 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1947 		if (read != sizeof(mq))
1948 			break;
1949 
1950 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1951 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1952 		    newqps > ndev->rqt_size)
1953 			break;
1954 
1955 		if (ndev->cur_num_vqs == 2 * newqps) {
1956 			status = VIRTIO_NET_OK;
1957 			break;
1958 		}
1959 
1960 		if (!change_num_qps(mvdev, newqps))
1961 			status = VIRTIO_NET_OK;
1962 
1963 		break;
1964 	default:
1965 		break;
1966 	}
1967 
1968 	return status;
1969 }
1970 
1971 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1972 {
1973 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1974 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1975 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1976 	__virtio16 vlan;
1977 	size_t read;
1978 	u16 id;
1979 
1980 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1981 		return status;
1982 
1983 	switch (cmd) {
1984 	case VIRTIO_NET_CTRL_VLAN_ADD:
1985 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1986 		if (read != sizeof(vlan))
1987 			break;
1988 
1989 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1990 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1991 			break;
1992 
1993 		status = VIRTIO_NET_OK;
1994 		break;
1995 	case VIRTIO_NET_CTRL_VLAN_DEL:
1996 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1997 		if (read != sizeof(vlan))
1998 			break;
1999 
2000 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2001 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2002 		status = VIRTIO_NET_OK;
2003 		break;
2004 	default:
2005 		break;
2006 	}
2007 
2008 	return status;
2009 }
2010 
2011 static void mlx5_cvq_kick_handler(struct work_struct *work)
2012 {
2013 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2014 	struct virtio_net_ctrl_hdr ctrl;
2015 	struct mlx5_vdpa_wq_ent *wqent;
2016 	struct mlx5_vdpa_dev *mvdev;
2017 	struct mlx5_control_vq *cvq;
2018 	struct mlx5_vdpa_net *ndev;
2019 	size_t read, write;
2020 	int err;
2021 
2022 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2023 	mvdev = wqent->mvdev;
2024 	ndev = to_mlx5_vdpa_ndev(mvdev);
2025 	cvq = &mvdev->cvq;
2026 
2027 	down_write(&ndev->reslock);
2028 
2029 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2030 		goto out;
2031 
2032 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2033 		goto out;
2034 
2035 	if (!cvq->ready)
2036 		goto out;
2037 
2038 	while (true) {
2039 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2040 					   GFP_ATOMIC);
2041 		if (err <= 0)
2042 			break;
2043 
2044 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2045 		if (read != sizeof(ctrl))
2046 			break;
2047 
2048 		cvq->received_desc++;
2049 		switch (ctrl.class) {
2050 		case VIRTIO_NET_CTRL_MAC:
2051 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2052 			break;
2053 		case VIRTIO_NET_CTRL_MQ:
2054 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2055 			break;
2056 		case VIRTIO_NET_CTRL_VLAN:
2057 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2058 			break;
2059 		default:
2060 			break;
2061 		}
2062 
2063 		/* Make sure data is written before advancing index */
2064 		smp_wmb();
2065 
2066 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2067 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2068 		vringh_kiov_cleanup(&cvq->riov);
2069 		vringh_kiov_cleanup(&cvq->wiov);
2070 
2071 		if (vringh_need_notify_iotlb(&cvq->vring))
2072 			vringh_notify(&cvq->vring);
2073 
2074 		cvq->completed_desc++;
2075 		queue_work(mvdev->wq, &wqent->work);
2076 		break;
2077 	}
2078 
2079 out:
2080 	up_write(&ndev->reslock);
2081 }
2082 
2083 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2084 {
2085 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2086 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2087 	struct mlx5_vdpa_virtqueue *mvq;
2088 
2089 	if (!is_index_valid(mvdev, idx))
2090 		return;
2091 
2092 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2093 		if (!mvdev->wq || !mvdev->cvq.ready)
2094 			return;
2095 
2096 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2097 		return;
2098 	}
2099 
2100 	mvq = &ndev->vqs[idx];
2101 	if (unlikely(!mvq->ready))
2102 		return;
2103 
2104 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2105 }
2106 
2107 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2108 				    u64 driver_area, u64 device_area)
2109 {
2110 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2111 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2112 	struct mlx5_vdpa_virtqueue *mvq;
2113 
2114 	if (!is_index_valid(mvdev, idx))
2115 		return -EINVAL;
2116 
2117 	if (is_ctrl_vq_idx(mvdev, idx)) {
2118 		mvdev->cvq.desc_addr = desc_area;
2119 		mvdev->cvq.device_addr = device_area;
2120 		mvdev->cvq.driver_addr = driver_area;
2121 		return 0;
2122 	}
2123 
2124 	mvq = &ndev->vqs[idx];
2125 	mvq->desc_addr = desc_area;
2126 	mvq->device_addr = device_area;
2127 	mvq->driver_addr = driver_area;
2128 	return 0;
2129 }
2130 
2131 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2132 {
2133 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2134 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2135 	struct mlx5_vdpa_virtqueue *mvq;
2136 
2137 	if (!is_index_valid(mvdev, idx))
2138 		return;
2139 
2140         if (is_ctrl_vq_idx(mvdev, idx)) {
2141                 struct mlx5_control_vq *cvq = &mvdev->cvq;
2142 
2143                 cvq->vring.vring.num = num;
2144                 return;
2145         }
2146 
2147 	mvq = &ndev->vqs[idx];
2148 	mvq->num_ent = num;
2149 }
2150 
2151 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2152 {
2153 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2154 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2155 
2156 	ndev->event_cbs[idx] = *cb;
2157 	if (is_ctrl_vq_idx(mvdev, idx))
2158 		mvdev->cvq.event_cb = *cb;
2159 }
2160 
2161 static void mlx5_cvq_notify(struct vringh *vring)
2162 {
2163 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2164 
2165 	if (!cvq->event_cb.callback)
2166 		return;
2167 
2168 	cvq->event_cb.callback(cvq->event_cb.private);
2169 }
2170 
2171 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2172 {
2173 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2174 
2175 	cvq->ready = ready;
2176 	if (!ready)
2177 		return;
2178 
2179 	cvq->vring.notify = mlx5_cvq_notify;
2180 }
2181 
2182 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2183 {
2184 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2185 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2186 	struct mlx5_vdpa_virtqueue *mvq;
2187 	int err;
2188 
2189 	if (!mvdev->actual_features)
2190 		return;
2191 
2192 	if (!is_index_valid(mvdev, idx))
2193 		return;
2194 
2195 	if (is_ctrl_vq_idx(mvdev, idx)) {
2196 		set_cvq_ready(mvdev, ready);
2197 		return;
2198 	}
2199 
2200 	mvq = &ndev->vqs[idx];
2201 	if (!ready) {
2202 		suspend_vq(ndev, mvq);
2203 	} else {
2204 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2205 		if (err) {
2206 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2207 			ready = false;
2208 		}
2209 	}
2210 
2211 
2212 	mvq->ready = ready;
2213 }
2214 
2215 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2216 {
2217 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2218 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2219 
2220 	if (!is_index_valid(mvdev, idx))
2221 		return false;
2222 
2223 	if (is_ctrl_vq_idx(mvdev, idx))
2224 		return mvdev->cvq.ready;
2225 
2226 	return ndev->vqs[idx].ready;
2227 }
2228 
2229 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2230 				  const struct vdpa_vq_state *state)
2231 {
2232 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2233 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2234 	struct mlx5_vdpa_virtqueue *mvq;
2235 
2236 	if (!is_index_valid(mvdev, idx))
2237 		return -EINVAL;
2238 
2239 	if (is_ctrl_vq_idx(mvdev, idx)) {
2240 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2241 		return 0;
2242 	}
2243 
2244 	mvq = &ndev->vqs[idx];
2245 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2246 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2247 		return -EINVAL;
2248 	}
2249 
2250 	mvq->used_idx = state->split.avail_index;
2251 	mvq->avail_idx = state->split.avail_index;
2252 	return 0;
2253 }
2254 
2255 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2256 {
2257 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2258 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2259 	struct mlx5_vdpa_virtqueue *mvq;
2260 	struct mlx5_virtq_attr attr;
2261 	int err;
2262 
2263 	if (!is_index_valid(mvdev, idx))
2264 		return -EINVAL;
2265 
2266 	if (is_ctrl_vq_idx(mvdev, idx)) {
2267 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2268 		return 0;
2269 	}
2270 
2271 	mvq = &ndev->vqs[idx];
2272 	/* If the virtq object was destroyed, use the value saved at
2273 	 * the last minute of suspend_vq. This caters for userspace
2274 	 * that cares about emulating the index after vq is stopped.
2275 	 */
2276 	if (!mvq->initialized) {
2277 		/* Firmware returns a wrong value for the available index.
2278 		 * Since both values should be identical, we take the value of
2279 		 * used_idx which is reported correctly.
2280 		 */
2281 		state->split.avail_index = mvq->used_idx;
2282 		return 0;
2283 	}
2284 
2285 	err = query_virtqueue(ndev, mvq, &attr);
2286 	if (err) {
2287 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2288 		return err;
2289 	}
2290 	state->split.avail_index = attr.used_index;
2291 	return 0;
2292 }
2293 
2294 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2295 {
2296 	return PAGE_SIZE;
2297 }
2298 
2299 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2300 {
2301 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2302 
2303 	if (is_ctrl_vq_idx(mvdev, idx))
2304 		return MLX5_VDPA_CVQ_GROUP;
2305 
2306 	return MLX5_VDPA_DATAVQ_GROUP;
2307 }
2308 
2309 static u64 mlx_to_vritio_features(u16 dev_features)
2310 {
2311 	u64 result = 0;
2312 
2313 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2314 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2315 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2316 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2317 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2318 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2319 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2320 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2321 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2322 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2323 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2324 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2325 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2326 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2327 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2328 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2329 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2330 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2331 
2332 	return result;
2333 }
2334 
2335 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2336 {
2337 	u64 mlx_vdpa_features = 0;
2338 	u16 dev_features;
2339 
2340 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2341 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2342 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2343 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2344 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2345 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2346 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2347 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2348 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2349 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2350 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2351 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2352 
2353 	return mlx_vdpa_features;
2354 }
2355 
2356 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2357 {
2358 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2359 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2360 
2361 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2362 	return ndev->mvdev.mlx_features;
2363 }
2364 
2365 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2366 {
2367 	/* Minimum features to expect */
2368 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2369 		return -EOPNOTSUPP;
2370 
2371 	/* Double check features combination sent down by the driver.
2372 	 * Fail invalid features due to absence of the depended feature.
2373 	 *
2374 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2375 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2376 	 * By failing the invalid features sent down by untrusted drivers,
2377 	 * we're assured the assumption made upon is_index_valid() and
2378 	 * is_ctrl_vq_idx() will not be compromised.
2379 	 */
2380 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2381             BIT_ULL(VIRTIO_NET_F_MQ))
2382 		return -EINVAL;
2383 
2384 	return 0;
2385 }
2386 
2387 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2388 {
2389 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2390 	int err;
2391 	int i;
2392 
2393 	for (i = 0; i < mvdev->max_vqs; i++) {
2394 		err = setup_vq(ndev, &ndev->vqs[i]);
2395 		if (err)
2396 			goto err_vq;
2397 	}
2398 
2399 	return 0;
2400 
2401 err_vq:
2402 	for (--i; i >= 0; i--)
2403 		teardown_vq(ndev, &ndev->vqs[i]);
2404 
2405 	return err;
2406 }
2407 
2408 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2409 {
2410 	struct mlx5_vdpa_virtqueue *mvq;
2411 	int i;
2412 
2413 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2414 		mvq = &ndev->vqs[i];
2415 		if (!mvq->initialized)
2416 			continue;
2417 
2418 		teardown_vq(ndev, mvq);
2419 	}
2420 }
2421 
2422 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2423 {
2424 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2425 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2426 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2427 			mvdev->max_idx = mvdev->max_vqs;
2428 		} else {
2429 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2430 			 * CVQ gets index 2
2431 			 */
2432 			mvdev->max_idx = 2;
2433 		}
2434 	} else {
2435 		/* Two data virtqueues only: one for rx and one for tx */
2436 		mvdev->max_idx = 1;
2437 	}
2438 }
2439 
2440 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2441 {
2442 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2443 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2444 	int err;
2445 
2446 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2447 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2448 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2449 	if (vport)
2450 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2451 
2452 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2453 	if (err)
2454 		return 0;
2455 
2456 	return MLX5_GET(query_vport_state_out, out, state);
2457 }
2458 
2459 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2460 {
2461 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2462 	    VPORT_STATE_UP)
2463 		return true;
2464 
2465 	return false;
2466 }
2467 
2468 static void update_carrier(struct work_struct *work)
2469 {
2470 	struct mlx5_vdpa_wq_ent *wqent;
2471 	struct mlx5_vdpa_dev *mvdev;
2472 	struct mlx5_vdpa_net *ndev;
2473 
2474 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2475 	mvdev = wqent->mvdev;
2476 	ndev = to_mlx5_vdpa_ndev(mvdev);
2477 	if (get_link_state(mvdev))
2478 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2479 	else
2480 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2481 
2482 	if (ndev->config_cb.callback)
2483 		ndev->config_cb.callback(ndev->config_cb.private);
2484 
2485 	kfree(wqent);
2486 }
2487 
2488 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2489 {
2490 	struct mlx5_vdpa_wq_ent *wqent;
2491 
2492 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2493 	if (!wqent)
2494 		return -ENOMEM;
2495 
2496 	wqent->mvdev = &ndev->mvdev;
2497 	INIT_WORK(&wqent->work, update_carrier);
2498 	queue_work(ndev->mvdev.wq, &wqent->work);
2499 	return 0;
2500 }
2501 
2502 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2503 {
2504 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2505 	struct mlx5_eqe *eqe = param;
2506 	int ret = NOTIFY_DONE;
2507 
2508 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2509 		switch (eqe->sub_type) {
2510 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2511 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2512 			if (queue_link_work(ndev))
2513 				return NOTIFY_DONE;
2514 
2515 			ret = NOTIFY_OK;
2516 			break;
2517 		default:
2518 			return NOTIFY_DONE;
2519 		}
2520 		return ret;
2521 	}
2522 	return ret;
2523 }
2524 
2525 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2526 {
2527 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2528 		return;
2529 
2530 	ndev->nb.notifier_call = event_handler;
2531 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2532 	ndev->nb_registered = true;
2533 	queue_link_work(ndev);
2534 }
2535 
2536 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2537 {
2538 	if (!ndev->nb_registered)
2539 		return;
2540 
2541 	ndev->nb_registered = false;
2542 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2543 	if (ndev->mvdev.wq)
2544 		flush_workqueue(ndev->mvdev.wq);
2545 }
2546 
2547 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2548 {
2549 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2550 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2551 	int err;
2552 
2553 	print_features(mvdev, features, true);
2554 
2555 	err = verify_driver_features(mvdev, features);
2556 	if (err)
2557 		return err;
2558 
2559 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2560 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2561 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2562 	else
2563 		ndev->rqt_size = 1;
2564 
2565 	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2566 	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2567 	 *
2568 	 * Multiqueue is disabled by default.
2569 	 * The driver enables multiqueue by sending a command using class
2570 	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2571 	 * operation, as follows: ...
2572 	 */
2573 	ndev->cur_num_vqs = 2;
2574 
2575 	update_cvq_info(mvdev);
2576 	return err;
2577 }
2578 
2579 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2580 {
2581 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2582 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2583 
2584 	ndev->config_cb = *cb;
2585 }
2586 
2587 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2588 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2589 {
2590 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2591 }
2592 
2593 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2594 {
2595 	return VIRTIO_ID_NET;
2596 }
2597 
2598 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2599 {
2600 	return PCI_VENDOR_ID_MELLANOX;
2601 }
2602 
2603 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2604 {
2605 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2606 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2607 
2608 	print_status(mvdev, ndev->mvdev.status, false);
2609 	return ndev->mvdev.status;
2610 }
2611 
2612 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2613 {
2614 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2615 	struct mlx5_virtq_attr attr = {};
2616 	int err;
2617 
2618 	if (mvq->initialized) {
2619 		err = query_virtqueue(ndev, mvq, &attr);
2620 		if (err)
2621 			return err;
2622 	}
2623 
2624 	ri->avail_index = attr.available_index;
2625 	ri->used_index = attr.used_index;
2626 	ri->ready = mvq->ready;
2627 	ri->num_ent = mvq->num_ent;
2628 	ri->desc_addr = mvq->desc_addr;
2629 	ri->device_addr = mvq->device_addr;
2630 	ri->driver_addr = mvq->driver_addr;
2631 	ri->map = mvq->map;
2632 	ri->restore = true;
2633 	return 0;
2634 }
2635 
2636 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2637 {
2638 	int i;
2639 
2640 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2641 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2642 		save_channel_info(ndev, &ndev->vqs[i]);
2643 	}
2644 	return 0;
2645 }
2646 
2647 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2648 {
2649 	int i;
2650 
2651 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2652 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2653 }
2654 
2655 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2656 {
2657 	struct mlx5_vdpa_virtqueue *mvq;
2658 	struct mlx5_vq_restore_info *ri;
2659 	int i;
2660 
2661 	mlx5_clear_vqs(ndev);
2662 	init_mvqs(ndev);
2663 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2664 		mvq = &ndev->vqs[i];
2665 		ri = &mvq->ri;
2666 		if (!ri->restore)
2667 			continue;
2668 
2669 		mvq->avail_idx = ri->avail_index;
2670 		mvq->used_idx = ri->used_index;
2671 		mvq->ready = ri->ready;
2672 		mvq->num_ent = ri->num_ent;
2673 		mvq->desc_addr = ri->desc_addr;
2674 		mvq->device_addr = ri->device_addr;
2675 		mvq->driver_addr = ri->driver_addr;
2676 		mvq->map = ri->map;
2677 	}
2678 }
2679 
2680 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2681 				struct vhost_iotlb *iotlb, unsigned int asid)
2682 {
2683 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2684 	int err;
2685 
2686 	suspend_vqs(ndev);
2687 	err = save_channels_info(ndev);
2688 	if (err)
2689 		goto err_mr;
2690 
2691 	teardown_driver(ndev);
2692 	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2693 	err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2694 	if (err)
2695 		goto err_mr;
2696 
2697 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2698 		goto err_mr;
2699 
2700 	restore_channels_info(ndev);
2701 	err = setup_driver(mvdev);
2702 	if (err)
2703 		goto err_setup;
2704 
2705 	return 0;
2706 
2707 err_setup:
2708 	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2709 err_mr:
2710 	return err;
2711 }
2712 
2713 /* reslock must be held for this function */
2714 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2715 {
2716 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2717 	int err;
2718 
2719 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2720 
2721 	if (ndev->setup) {
2722 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2723 		err = 0;
2724 		goto out;
2725 	}
2726 	mlx5_vdpa_add_debugfs(ndev);
2727 
2728 	err = read_umem_params(ndev);
2729 	if (err)
2730 		goto err_setup;
2731 
2732 	err = setup_virtqueues(mvdev);
2733 	if (err) {
2734 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2735 		goto err_setup;
2736 	}
2737 
2738 	err = create_rqt(ndev);
2739 	if (err) {
2740 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2741 		goto err_rqt;
2742 	}
2743 
2744 	err = create_tir(ndev);
2745 	if (err) {
2746 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2747 		goto err_tir;
2748 	}
2749 
2750 	err = setup_steering(ndev);
2751 	if (err) {
2752 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2753 		goto err_fwd;
2754 	}
2755 	ndev->setup = true;
2756 
2757 	return 0;
2758 
2759 err_fwd:
2760 	destroy_tir(ndev);
2761 err_tir:
2762 	destroy_rqt(ndev);
2763 err_rqt:
2764 	teardown_virtqueues(ndev);
2765 err_setup:
2766 	mlx5_vdpa_remove_debugfs(ndev);
2767 out:
2768 	return err;
2769 }
2770 
2771 /* reslock must be held for this function */
2772 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2773 {
2774 
2775 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2776 
2777 	if (!ndev->setup)
2778 		return;
2779 
2780 	mlx5_vdpa_remove_debugfs(ndev);
2781 	teardown_steering(ndev);
2782 	destroy_tir(ndev);
2783 	destroy_rqt(ndev);
2784 	teardown_virtqueues(ndev);
2785 	ndev->setup = false;
2786 }
2787 
2788 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2789 {
2790 	int i;
2791 
2792 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2793 		ndev->vqs[i].ready = false;
2794 
2795 	ndev->mvdev.cvq.ready = false;
2796 }
2797 
2798 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2799 {
2800 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2801 	int err = 0;
2802 
2803 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2804 		u16 idx = cvq->vring.last_avail_idx;
2805 
2806 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2807 					cvq->vring.vring.num, false,
2808 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2809 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2810 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2811 
2812 		if (!err)
2813 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2814 	}
2815 	return err;
2816 }
2817 
2818 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2819 {
2820 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2821 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2822 	int err;
2823 
2824 	print_status(mvdev, status, true);
2825 
2826 	down_write(&ndev->reslock);
2827 
2828 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2829 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2830 			err = setup_cvq_vring(mvdev);
2831 			if (err) {
2832 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2833 				goto err_setup;
2834 			}
2835 			register_link_notifier(ndev);
2836 			err = setup_driver(mvdev);
2837 			if (err) {
2838 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2839 				goto err_driver;
2840 			}
2841 		} else {
2842 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2843 			goto err_clear;
2844 		}
2845 	}
2846 
2847 	ndev->mvdev.status = status;
2848 	up_write(&ndev->reslock);
2849 	return;
2850 
2851 err_driver:
2852 	unregister_link_notifier(ndev);
2853 err_setup:
2854 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2855 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2856 err_clear:
2857 	up_write(&ndev->reslock);
2858 }
2859 
2860 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2861 {
2862 	int i;
2863 
2864 	/* default mapping all groups are mapped to asid 0 */
2865 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2866 		mvdev->group2asid[i] = 0;
2867 }
2868 
2869 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2870 {
2871 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2872 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2873 
2874 	print_status(mvdev, 0, true);
2875 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2876 
2877 	down_write(&ndev->reslock);
2878 	unregister_link_notifier(ndev);
2879 	teardown_driver(ndev);
2880 	clear_vqs_ready(ndev);
2881 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2882 	ndev->mvdev.status = 0;
2883 	ndev->mvdev.suspended = false;
2884 	ndev->cur_num_vqs = 0;
2885 	ndev->mvdev.cvq.received_desc = 0;
2886 	ndev->mvdev.cvq.completed_desc = 0;
2887 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2888 	ndev->mvdev.actual_features = 0;
2889 	init_group_to_asid_map(mvdev);
2890 	++mvdev->generation;
2891 
2892 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2893 		if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2894 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2895 	}
2896 	up_write(&ndev->reslock);
2897 
2898 	return 0;
2899 }
2900 
2901 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2902 {
2903 	return sizeof(struct virtio_net_config);
2904 }
2905 
2906 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2907 				 unsigned int len)
2908 {
2909 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2910 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2911 
2912 	if (offset + len <= sizeof(struct virtio_net_config))
2913 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2914 }
2915 
2916 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2917 				 unsigned int len)
2918 {
2919 	/* not supported */
2920 }
2921 
2922 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2923 {
2924 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2925 
2926 	return mvdev->generation;
2927 }
2928 
2929 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2930 			unsigned int asid)
2931 {
2932 	bool change_map;
2933 	int err;
2934 
2935 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2936 	if (err) {
2937 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2938 		return err;
2939 	}
2940 
2941 	if (change_map)
2942 		err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2943 
2944 	return err;
2945 }
2946 
2947 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2948 			     struct vhost_iotlb *iotlb)
2949 {
2950 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2951 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2952 	int err = -EINVAL;
2953 
2954 	down_write(&ndev->reslock);
2955 	err = set_map_data(mvdev, iotlb, asid);
2956 	up_write(&ndev->reslock);
2957 	return err;
2958 }
2959 
2960 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2961 {
2962 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2963 
2964 	if (is_ctrl_vq_idx(mvdev, idx))
2965 		return &vdev->dev;
2966 
2967 	return mvdev->vdev.dma_dev;
2968 }
2969 
2970 static void free_irqs(struct mlx5_vdpa_net *ndev)
2971 {
2972 	struct mlx5_vdpa_irq_pool_entry *ent;
2973 	int i;
2974 
2975 	if (!msix_mode_supported(&ndev->mvdev))
2976 		return;
2977 
2978 	if (!ndev->irqp.entries)
2979 		return;
2980 
2981 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
2982 		ent = ndev->irqp.entries + i;
2983 		if (ent->map.virq)
2984 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
2985 	}
2986 	kfree(ndev->irqp.entries);
2987 }
2988 
2989 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2990 {
2991 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2992 	struct mlx5_core_dev *pfmdev;
2993 	struct mlx5_vdpa_net *ndev;
2994 
2995 	ndev = to_mlx5_vdpa_ndev(mvdev);
2996 
2997 	free_resources(ndev);
2998 	mlx5_vdpa_destroy_mr(mvdev);
2999 	if (!is_zero_ether_addr(ndev->config.mac)) {
3000 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3001 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3002 	}
3003 	mlx5_vdpa_free_resources(&ndev->mvdev);
3004 	free_irqs(ndev);
3005 	kfree(ndev->event_cbs);
3006 	kfree(ndev->vqs);
3007 }
3008 
3009 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3010 {
3011 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3012 	struct vdpa_notification_area ret = {};
3013 	struct mlx5_vdpa_net *ndev;
3014 	phys_addr_t addr;
3015 
3016 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3017 		return ret;
3018 
3019 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3020 	 * notification to avoid the risk of mapping pages that contain BAR of more
3021 	 * than one SF
3022 	 */
3023 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3024 		return ret;
3025 
3026 	ndev = to_mlx5_vdpa_ndev(mvdev);
3027 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3028 	ret.addr = addr;
3029 	ret.size = PAGE_SIZE;
3030 	return ret;
3031 }
3032 
3033 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3034 {
3035 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3036 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3037 	struct mlx5_vdpa_virtqueue *mvq;
3038 
3039 	if (!is_index_valid(mvdev, idx))
3040 		return -EINVAL;
3041 
3042 	if (is_ctrl_vq_idx(mvdev, idx))
3043 		return -EOPNOTSUPP;
3044 
3045 	mvq = &ndev->vqs[idx];
3046 	if (!mvq->map.virq)
3047 		return -EOPNOTSUPP;
3048 
3049 	return mvq->map.virq;
3050 }
3051 
3052 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3053 {
3054 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3055 
3056 	return mvdev->actual_features;
3057 }
3058 
3059 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3060 			     u64 *received_desc, u64 *completed_desc)
3061 {
3062 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3063 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3064 	void *cmd_hdr;
3065 	void *ctx;
3066 	int err;
3067 
3068 	if (!counters_supported(&ndev->mvdev))
3069 		return -EOPNOTSUPP;
3070 
3071 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3072 		return -EAGAIN;
3073 
3074 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3075 
3076 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3077 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3078 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3079 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3080 
3081 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3082 	if (err)
3083 		return err;
3084 
3085 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3086 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3087 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3088 	return 0;
3089 }
3090 
3091 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3092 					 struct sk_buff *msg,
3093 					 struct netlink_ext_ack *extack)
3094 {
3095 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3096 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3097 	struct mlx5_vdpa_virtqueue *mvq;
3098 	struct mlx5_control_vq *cvq;
3099 	u64 received_desc;
3100 	u64 completed_desc;
3101 	int err = 0;
3102 
3103 	down_read(&ndev->reslock);
3104 	if (!is_index_valid(mvdev, idx)) {
3105 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3106 		err = -EINVAL;
3107 		goto out_err;
3108 	}
3109 
3110 	if (idx == ctrl_vq_idx(mvdev)) {
3111 		cvq = &mvdev->cvq;
3112 		received_desc = cvq->received_desc;
3113 		completed_desc = cvq->completed_desc;
3114 		goto out;
3115 	}
3116 
3117 	mvq = &ndev->vqs[idx];
3118 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3119 	if (err) {
3120 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3121 		goto out_err;
3122 	}
3123 
3124 out:
3125 	err = -EMSGSIZE;
3126 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3127 		goto out_err;
3128 
3129 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3130 			      VDPA_ATTR_PAD))
3131 		goto out_err;
3132 
3133 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3134 		goto out_err;
3135 
3136 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3137 			      VDPA_ATTR_PAD))
3138 		goto out_err;
3139 
3140 	err = 0;
3141 out_err:
3142 	up_read(&ndev->reslock);
3143 	return err;
3144 }
3145 
3146 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3147 {
3148 	struct mlx5_control_vq *cvq;
3149 
3150 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3151 		return;
3152 
3153 	cvq = &mvdev->cvq;
3154 	cvq->ready = false;
3155 }
3156 
3157 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3158 {
3159 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3160 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3161 	struct mlx5_vdpa_virtqueue *mvq;
3162 	int i;
3163 
3164 	mlx5_vdpa_info(mvdev, "suspending device\n");
3165 
3166 	down_write(&ndev->reslock);
3167 	unregister_link_notifier(ndev);
3168 	for (i = 0; i < ndev->cur_num_vqs; i++) {
3169 		mvq = &ndev->vqs[i];
3170 		suspend_vq(ndev, mvq);
3171 	}
3172 	mlx5_vdpa_cvq_suspend(mvdev);
3173 	mvdev->suspended = true;
3174 	up_write(&ndev->reslock);
3175 	return 0;
3176 }
3177 
3178 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3179 			       unsigned int asid)
3180 {
3181 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3182 
3183 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3184 		return -EINVAL;
3185 
3186 	mvdev->group2asid[group] = asid;
3187 	return 0;
3188 }
3189 
3190 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3191 	.set_vq_address = mlx5_vdpa_set_vq_address,
3192 	.set_vq_num = mlx5_vdpa_set_vq_num,
3193 	.kick_vq = mlx5_vdpa_kick_vq,
3194 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3195 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3196 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3197 	.set_vq_state = mlx5_vdpa_set_vq_state,
3198 	.get_vq_state = mlx5_vdpa_get_vq_state,
3199 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3200 	.get_vq_notification = mlx5_get_vq_notification,
3201 	.get_vq_irq = mlx5_get_vq_irq,
3202 	.get_vq_align = mlx5_vdpa_get_vq_align,
3203 	.get_vq_group = mlx5_vdpa_get_vq_group,
3204 	.get_device_features = mlx5_vdpa_get_device_features,
3205 	.set_driver_features = mlx5_vdpa_set_driver_features,
3206 	.get_driver_features = mlx5_vdpa_get_driver_features,
3207 	.set_config_cb = mlx5_vdpa_set_config_cb,
3208 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3209 	.get_device_id = mlx5_vdpa_get_device_id,
3210 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3211 	.get_status = mlx5_vdpa_get_status,
3212 	.set_status = mlx5_vdpa_set_status,
3213 	.reset = mlx5_vdpa_reset,
3214 	.get_config_size = mlx5_vdpa_get_config_size,
3215 	.get_config = mlx5_vdpa_get_config,
3216 	.set_config = mlx5_vdpa_set_config,
3217 	.get_generation = mlx5_vdpa_get_generation,
3218 	.set_map = mlx5_vdpa_set_map,
3219 	.set_group_asid = mlx5_set_group_asid,
3220 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3221 	.free = mlx5_vdpa_free,
3222 	.suspend = mlx5_vdpa_suspend,
3223 };
3224 
3225 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3226 {
3227 	u16 hw_mtu;
3228 	int err;
3229 
3230 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3231 	if (err)
3232 		return err;
3233 
3234 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3235 	return 0;
3236 }
3237 
3238 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3239 {
3240 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3241 	int err;
3242 
3243 	if (res->valid) {
3244 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3245 		return -EEXIST;
3246 	}
3247 
3248 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3249 	if (err)
3250 		return err;
3251 
3252 	err = create_tis(ndev);
3253 	if (err)
3254 		goto err_tis;
3255 
3256 	res->valid = true;
3257 
3258 	return 0;
3259 
3260 err_tis:
3261 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3262 	return err;
3263 }
3264 
3265 static void free_resources(struct mlx5_vdpa_net *ndev)
3266 {
3267 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3268 
3269 	if (!res->valid)
3270 		return;
3271 
3272 	destroy_tis(ndev);
3273 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3274 	res->valid = false;
3275 }
3276 
3277 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3278 {
3279 	struct mlx5_vdpa_virtqueue *mvq;
3280 	int i;
3281 
3282 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3283 		mvq = &ndev->vqs[i];
3284 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3285 		mvq->index = i;
3286 		mvq->ndev = ndev;
3287 		mvq->fwqp.fw = true;
3288 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3289 	}
3290 	for (; i < ndev->mvdev.max_vqs; i++) {
3291 		mvq = &ndev->vqs[i];
3292 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3293 		mvq->index = i;
3294 		mvq->ndev = ndev;
3295 	}
3296 }
3297 
3298 struct mlx5_vdpa_mgmtdev {
3299 	struct vdpa_mgmt_dev mgtdev;
3300 	struct mlx5_adev *madev;
3301 	struct mlx5_vdpa_net *ndev;
3302 };
3303 
3304 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3305 {
3306 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3307 	void *in;
3308 	int err;
3309 
3310 	in = kvzalloc(inlen, GFP_KERNEL);
3311 	if (!in)
3312 		return -ENOMEM;
3313 
3314 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3315 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3316 		 mtu + MLX5V_ETH_HARD_MTU);
3317 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3318 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3319 
3320 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3321 
3322 	kvfree(in);
3323 	return err;
3324 }
3325 
3326 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3327 {
3328 	struct mlx5_vdpa_irq_pool_entry *ent;
3329 	int i;
3330 
3331 	if (!msix_mode_supported(&ndev->mvdev))
3332 		return;
3333 
3334 	if (!ndev->mvdev.mdev->pdev)
3335 		return;
3336 
3337 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3338 	if (!ndev->irqp.entries)
3339 		return;
3340 
3341 
3342 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3343 		ent = ndev->irqp.entries + i;
3344 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3345 			 dev_name(&ndev->mvdev.vdev.dev), i);
3346 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3347 		if (!ent->map.virq)
3348 			return;
3349 
3350 		ndev->irqp.num_ent++;
3351 	}
3352 }
3353 
3354 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3355 			     const struct vdpa_dev_set_config *add_config)
3356 {
3357 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3358 	struct virtio_net_config *config;
3359 	struct mlx5_core_dev *pfmdev;
3360 	struct mlx5_vdpa_dev *mvdev;
3361 	struct mlx5_vdpa_net *ndev;
3362 	struct mlx5_core_dev *mdev;
3363 	u64 device_features;
3364 	u32 max_vqs;
3365 	u16 mtu;
3366 	int err;
3367 
3368 	if (mgtdev->ndev)
3369 		return -ENOSPC;
3370 
3371 	mdev = mgtdev->madev->mdev;
3372 	device_features = mgtdev->mgtdev.supported_features;
3373 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3374 		if (add_config->device_features & ~device_features) {
3375 			dev_warn(mdev->device,
3376 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3377 				 add_config->device_features, device_features);
3378 			return -EINVAL;
3379 		}
3380 		device_features &= add_config->device_features;
3381 	} else {
3382 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3383 	}
3384 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3385 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3386 		dev_warn(mdev->device,
3387 			 "Must provision minimum features 0x%llx for this device",
3388 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3389 		return -EOPNOTSUPP;
3390 	}
3391 
3392 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3393 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3394 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3395 		return -EOPNOTSUPP;
3396 	}
3397 
3398 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3399 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3400 	if (max_vqs < 2) {
3401 		dev_warn(mdev->device,
3402 			 "%d virtqueues are supported. At least 2 are required\n",
3403 			 max_vqs);
3404 		return -EAGAIN;
3405 	}
3406 
3407 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3408 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3409 			return -EINVAL;
3410 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3411 	} else {
3412 		max_vqs = 2;
3413 	}
3414 
3415 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3416 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3417 	if (IS_ERR(ndev))
3418 		return PTR_ERR(ndev);
3419 
3420 	ndev->mvdev.max_vqs = max_vqs;
3421 	mvdev = &ndev->mvdev;
3422 	mvdev->mdev = mdev;
3423 
3424 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3425 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3426 	if (!ndev->vqs || !ndev->event_cbs) {
3427 		err = -ENOMEM;
3428 		goto err_alloc;
3429 	}
3430 
3431 	init_mvqs(ndev);
3432 	allocate_irqs(ndev);
3433 	init_rwsem(&ndev->reslock);
3434 	config = &ndev->config;
3435 
3436 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3437 		err = config_func_mtu(mdev, add_config->net.mtu);
3438 		if (err)
3439 			goto err_alloc;
3440 	}
3441 
3442 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3443 		err = query_mtu(mdev, &mtu);
3444 		if (err)
3445 			goto err_alloc;
3446 
3447 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3448 	}
3449 
3450 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3451 		if (get_link_state(mvdev))
3452 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3453 		else
3454 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3455 	}
3456 
3457 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3458 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3459 	/* No bother setting mac address in config if not going to provision _F_MAC */
3460 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3461 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3462 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3463 		if (err)
3464 			goto err_alloc;
3465 	}
3466 
3467 	if (!is_zero_ether_addr(config->mac)) {
3468 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3469 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3470 		if (err)
3471 			goto err_alloc;
3472 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3473 		/*
3474 		 * We used to clear _F_MAC feature bit if seeing
3475 		 * zero mac address when device features are not
3476 		 * specifically provisioned. Keep the behaviour
3477 		 * so old scripts do not break.
3478 		 */
3479 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3480 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3481 		/* Don't provision zero mac address for _F_MAC */
3482 		mlx5_vdpa_warn(&ndev->mvdev,
3483 			       "No mac address provisioned?\n");
3484 		err = -EINVAL;
3485 		goto err_alloc;
3486 	}
3487 
3488 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3489 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3490 
3491 	ndev->mvdev.mlx_features = device_features;
3492 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3493 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3494 	if (err)
3495 		goto err_mpfs;
3496 
3497 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3498 		err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3499 		if (err)
3500 			goto err_res;
3501 	}
3502 
3503 	err = alloc_resources(ndev);
3504 	if (err)
3505 		goto err_mr;
3506 
3507 	ndev->cvq_ent.mvdev = mvdev;
3508 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3509 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3510 	if (!mvdev->wq) {
3511 		err = -ENOMEM;
3512 		goto err_res2;
3513 	}
3514 
3515 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3516 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3517 	if (err)
3518 		goto err_reg;
3519 
3520 	mgtdev->ndev = ndev;
3521 	return 0;
3522 
3523 err_reg:
3524 	destroy_workqueue(mvdev->wq);
3525 err_res2:
3526 	free_resources(ndev);
3527 err_mr:
3528 	mlx5_vdpa_destroy_mr(mvdev);
3529 err_res:
3530 	mlx5_vdpa_free_resources(&ndev->mvdev);
3531 err_mpfs:
3532 	if (!is_zero_ether_addr(config->mac))
3533 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3534 err_alloc:
3535 	put_device(&mvdev->vdev.dev);
3536 	return err;
3537 }
3538 
3539 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3540 {
3541 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3542 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3543 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3544 	struct workqueue_struct *wq;
3545 
3546 	unregister_link_notifier(ndev);
3547 	_vdpa_unregister_device(dev);
3548 	wq = mvdev->wq;
3549 	mvdev->wq = NULL;
3550 	destroy_workqueue(wq);
3551 	mgtdev->ndev = NULL;
3552 }
3553 
3554 static const struct vdpa_mgmtdev_ops mdev_ops = {
3555 	.dev_add = mlx5_vdpa_dev_add,
3556 	.dev_del = mlx5_vdpa_dev_del,
3557 };
3558 
3559 static struct virtio_device_id id_table[] = {
3560 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3561 	{ 0 },
3562 };
3563 
3564 static int mlx5v_probe(struct auxiliary_device *adev,
3565 		       const struct auxiliary_device_id *id)
3566 
3567 {
3568 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3569 	struct mlx5_core_dev *mdev = madev->mdev;
3570 	struct mlx5_vdpa_mgmtdev *mgtdev;
3571 	int err;
3572 
3573 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3574 	if (!mgtdev)
3575 		return -ENOMEM;
3576 
3577 	mgtdev->mgtdev.ops = &mdev_ops;
3578 	mgtdev->mgtdev.device = mdev->device;
3579 	mgtdev->mgtdev.id_table = id_table;
3580 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3581 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3582 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3583 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3584 	mgtdev->mgtdev.max_supported_vqs =
3585 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3586 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3587 	mgtdev->madev = madev;
3588 
3589 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3590 	if (err)
3591 		goto reg_err;
3592 
3593 	auxiliary_set_drvdata(adev, mgtdev);
3594 
3595 	return 0;
3596 
3597 reg_err:
3598 	kfree(mgtdev);
3599 	return err;
3600 }
3601 
3602 static void mlx5v_remove(struct auxiliary_device *adev)
3603 {
3604 	struct mlx5_vdpa_mgmtdev *mgtdev;
3605 
3606 	mgtdev = auxiliary_get_drvdata(adev);
3607 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3608 	kfree(mgtdev);
3609 }
3610 
3611 static const struct auxiliary_device_id mlx5v_id_table[] = {
3612 	{ .name = MLX5_ADEV_NAME ".vnet", },
3613 	{},
3614 };
3615 
3616 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3617 
3618 static struct auxiliary_driver mlx5v_driver = {
3619 	.name = "vnet",
3620 	.probe = mlx5v_probe,
3621 	.remove = mlx5v_remove,
3622 	.id_table = mlx5v_id_table,
3623 };
3624 
3625 module_auxiliary_driver(mlx5v_driver);
3626