xref: /openbmc/linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision e2ad626f)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <linux/virtio_config.h>
11 #include <linux/auxiliary_bus.h>
12 #include <linux/mlx5/cq.h>
13 #include <linux/mlx5/qp.h>
14 #include <linux/mlx5/device.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/vport.h>
17 #include <linux/mlx5/fs.h>
18 #include <linux/mlx5/mlx5_ifc_vdpa.h>
19 #include <linux/mlx5/mpfs.h>
20 #include "mlx5_vdpa.h"
21 #include "mlx5_vnet.h"
22 
23 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24 MODULE_DESCRIPTION("Mellanox VDPA driver");
25 MODULE_LICENSE("Dual BSD/GPL");
26 
27 #define VALID_FEATURES_MASK                                                                        \
28 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
29 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
30 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
33 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
34 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
36 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
37 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
38 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
39 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
40 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
41 
42 #define VALID_STATUS_MASK                                                                          \
43 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
44 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
45 
46 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
47 
48 #define MLX5V_UNTAGGED 0x1000
49 
50 struct mlx5_vdpa_cq_buf {
51 	struct mlx5_frag_buf_ctrl fbc;
52 	struct mlx5_frag_buf frag_buf;
53 	int cqe_size;
54 	int nent;
55 };
56 
57 struct mlx5_vdpa_cq {
58 	struct mlx5_core_cq mcq;
59 	struct mlx5_vdpa_cq_buf buf;
60 	struct mlx5_db db;
61 	int cqe;
62 };
63 
64 struct mlx5_vdpa_umem {
65 	struct mlx5_frag_buf_ctrl fbc;
66 	struct mlx5_frag_buf frag_buf;
67 	int size;
68 	u32 id;
69 };
70 
71 struct mlx5_vdpa_qp {
72 	struct mlx5_core_qp mqp;
73 	struct mlx5_frag_buf frag_buf;
74 	struct mlx5_db db;
75 	u16 head;
76 	bool fw;
77 };
78 
79 struct mlx5_vq_restore_info {
80 	u32 num_ent;
81 	u64 desc_addr;
82 	u64 device_addr;
83 	u64 driver_addr;
84 	u16 avail_index;
85 	u16 used_index;
86 	struct msi_map map;
87 	bool ready;
88 	bool restore;
89 };
90 
91 struct mlx5_vdpa_virtqueue {
92 	bool ready;
93 	u64 desc_addr;
94 	u64 device_addr;
95 	u64 driver_addr;
96 	u32 num_ent;
97 
98 	/* Resources for implementing the notification channel from the device
99 	 * to the driver. fwqp is the firmware end of an RC connection; the
100 	 * other end is vqqp used by the driver. cq is where completions are
101 	 * reported.
102 	 */
103 	struct mlx5_vdpa_cq cq;
104 	struct mlx5_vdpa_qp fwqp;
105 	struct mlx5_vdpa_qp vqqp;
106 
107 	/* umem resources are required for the virtqueue operation. They're use
108 	 * is internal and they must be provided by the driver.
109 	 */
110 	struct mlx5_vdpa_umem umem1;
111 	struct mlx5_vdpa_umem umem2;
112 	struct mlx5_vdpa_umem umem3;
113 
114 	u32 counter_set_id;
115 	bool initialized;
116 	int index;
117 	u32 virtq_id;
118 	struct mlx5_vdpa_net *ndev;
119 	u16 avail_idx;
120 	u16 used_idx;
121 	int fw_state;
122 	struct msi_map map;
123 
124 	/* keep last in the struct */
125 	struct mlx5_vq_restore_info ri;
126 };
127 
128 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
129 {
130 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
131 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
132 			return idx < 2;
133 		else
134 			return idx < 3;
135 	}
136 
137 	return idx <= mvdev->max_idx;
138 }
139 
140 static void free_resources(struct mlx5_vdpa_net *ndev);
141 static void init_mvqs(struct mlx5_vdpa_net *ndev);
142 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
143 static void teardown_driver(struct mlx5_vdpa_net *ndev);
144 
145 static bool mlx5_vdpa_debug;
146 
147 #define MLX5_CVQ_MAX_ENT 16
148 
149 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
150 	do {                                                                                       \
151 		if (features & BIT_ULL(_feature))                                                  \
152 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
153 	} while (0)
154 
155 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
156 	do {                                                                                       \
157 		if (status & (_status))                                                            \
158 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
159 	} while (0)
160 
161 /* TODO: cross-endian support */
162 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
163 {
164 	return virtio_legacy_is_little_endian() ||
165 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
166 }
167 
168 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
169 {
170 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
171 }
172 
173 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
174 {
175 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
176 }
177 
178 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
179 {
180 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
181 		return 2;
182 
183 	return mvdev->max_vqs;
184 }
185 
186 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
187 {
188 	return idx == ctrl_vq_idx(mvdev);
189 }
190 
191 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
192 {
193 	if (status & ~VALID_STATUS_MASK)
194 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
195 			       status & ~VALID_STATUS_MASK);
196 
197 	if (!mlx5_vdpa_debug)
198 		return;
199 
200 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
201 	if (set && !status) {
202 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
203 		return;
204 	}
205 
206 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
207 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
208 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
209 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
210 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
211 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
212 }
213 
214 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
215 {
216 	if (features & ~VALID_FEATURES_MASK)
217 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
218 			       features & ~VALID_FEATURES_MASK);
219 
220 	if (!mlx5_vdpa_debug)
221 		return;
222 
223 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
224 	if (!features)
225 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
226 
227 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
261 }
262 
263 static int create_tis(struct mlx5_vdpa_net *ndev)
264 {
265 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
266 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
267 	void *tisc;
268 	int err;
269 
270 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
271 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
272 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
273 	if (err)
274 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
275 
276 	return err;
277 }
278 
279 static void destroy_tis(struct mlx5_vdpa_net *ndev)
280 {
281 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
282 }
283 
284 #define MLX5_VDPA_CQE_SIZE 64
285 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
286 
287 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
288 {
289 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
290 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
291 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
292 	int err;
293 
294 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
295 				       ndev->mvdev.mdev->priv.numa_node);
296 	if (err)
297 		return err;
298 
299 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
300 
301 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
302 	buf->nent = nent;
303 
304 	return 0;
305 }
306 
307 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
308 {
309 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
310 
311 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
312 					ndev->mvdev.mdev->priv.numa_node);
313 }
314 
315 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
316 {
317 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
318 }
319 
320 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
321 {
322 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
323 }
324 
325 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
326 {
327 	struct mlx5_cqe64 *cqe64;
328 	void *cqe;
329 	int i;
330 
331 	for (i = 0; i < buf->nent; i++) {
332 		cqe = get_cqe(vcq, i);
333 		cqe64 = cqe;
334 		cqe64->op_own = MLX5_CQE_INVALID << 4;
335 	}
336 }
337 
338 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
339 {
340 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
341 
342 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
343 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
344 		return cqe64;
345 
346 	return NULL;
347 }
348 
349 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
350 {
351 	vqp->head += n;
352 	vqp->db.db[0] = cpu_to_be32(vqp->head);
353 }
354 
355 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
356 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
357 {
358 	struct mlx5_vdpa_qp *vqp;
359 	__be64 *pas;
360 	void *qpc;
361 
362 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
363 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
364 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
365 	if (vqp->fw) {
366 		/* Firmware QP is allocated by the driver for the firmware's
367 		 * use so we can skip part of the params as they will be chosen by firmware
368 		 */
369 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
370 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
371 		MLX5_SET(qpc, qpc, no_sq, 1);
372 		return;
373 	}
374 
375 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
376 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
377 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
378 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
379 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
380 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
381 	MLX5_SET(qpc, qpc, no_sq, 1);
382 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
383 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
384 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
385 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
386 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
387 }
388 
389 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
390 {
391 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
392 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
393 					ndev->mvdev.mdev->priv.numa_node);
394 }
395 
396 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
397 {
398 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
399 }
400 
401 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
402 		     struct mlx5_vdpa_qp *vqp)
403 {
404 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
405 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
406 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
407 	void *qpc;
408 	void *in;
409 	int err;
410 
411 	if (!vqp->fw) {
412 		vqp = &mvq->vqqp;
413 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
414 		if (err)
415 			return err;
416 
417 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
418 		if (err)
419 			goto err_db;
420 		inlen += vqp->frag_buf.npages * sizeof(__be64);
421 	}
422 
423 	in = kzalloc(inlen, GFP_KERNEL);
424 	if (!in) {
425 		err = -ENOMEM;
426 		goto err_kzalloc;
427 	}
428 
429 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
430 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
431 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
432 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
433 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
434 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
435 	if (!vqp->fw)
436 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
437 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
438 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
439 	kfree(in);
440 	if (err)
441 		goto err_kzalloc;
442 
443 	vqp->mqp.uid = ndev->mvdev.res.uid;
444 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
445 
446 	if (!vqp->fw)
447 		rx_post(vqp, mvq->num_ent);
448 
449 	return 0;
450 
451 err_kzalloc:
452 	if (!vqp->fw)
453 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
454 err_db:
455 	if (!vqp->fw)
456 		rq_buf_free(ndev, vqp);
457 
458 	return err;
459 }
460 
461 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
462 {
463 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
464 
465 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
466 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
467 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
468 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
469 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
470 	if (!vqp->fw) {
471 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 		rq_buf_free(ndev, vqp);
473 	}
474 }
475 
476 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
477 {
478 	return get_sw_cqe(cq, cq->mcq.cons_index);
479 }
480 
481 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
482 {
483 	struct mlx5_cqe64 *cqe64;
484 
485 	cqe64 = next_cqe_sw(vcq);
486 	if (!cqe64)
487 		return -EAGAIN;
488 
489 	vcq->mcq.cons_index++;
490 	return 0;
491 }
492 
493 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
494 {
495 	struct mlx5_vdpa_net *ndev = mvq->ndev;
496 	struct vdpa_callback *event_cb;
497 
498 	event_cb = &ndev->event_cbs[mvq->index];
499 	mlx5_cq_set_ci(&mvq->cq.mcq);
500 
501 	/* make sure CQ cosumer update is visible to the hardware before updating
502 	 * RX doorbell record.
503 	 */
504 	dma_wmb();
505 	rx_post(&mvq->vqqp, num);
506 	if (event_cb->callback)
507 		event_cb->callback(event_cb->private);
508 }
509 
510 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
511 {
512 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
513 	struct mlx5_vdpa_net *ndev = mvq->ndev;
514 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
515 	int num = 0;
516 
517 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
518 		num++;
519 		if (num > mvq->num_ent / 2) {
520 			/* If completions keep coming while we poll, we want to
521 			 * let the hardware know that we consumed them by
522 			 * updating the doorbell record.  We also let vdpa core
523 			 * know about this so it passes it on the virtio driver
524 			 * on the guest.
525 			 */
526 			mlx5_vdpa_handle_completions(mvq, num);
527 			num = 0;
528 		}
529 	}
530 
531 	if (num)
532 		mlx5_vdpa_handle_completions(mvq, num);
533 
534 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
535 }
536 
537 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
538 {
539 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
540 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
541 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
542 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
543 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
544 	__be64 *pas;
545 	int inlen;
546 	void *cqc;
547 	void *in;
548 	int err;
549 	int eqn;
550 
551 	err = mlx5_db_alloc(mdev, &vcq->db);
552 	if (err)
553 		return err;
554 
555 	vcq->mcq.set_ci_db = vcq->db.db;
556 	vcq->mcq.arm_db = vcq->db.db + 1;
557 	vcq->mcq.cqe_sz = 64;
558 
559 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
560 	if (err)
561 		goto err_db;
562 
563 	cq_frag_buf_init(vcq, &vcq->buf);
564 
565 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
566 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
567 	in = kzalloc(inlen, GFP_KERNEL);
568 	if (!in) {
569 		err = -ENOMEM;
570 		goto err_vzalloc;
571 	}
572 
573 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
574 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
575 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
576 
577 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
578 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
579 
580 	/* Use vector 0 by default. Consider adding code to choose least used
581 	 * vector.
582 	 */
583 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
584 	if (err)
585 		goto err_vec;
586 
587 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
588 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
589 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
590 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
591 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
592 
593 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
594 	if (err)
595 		goto err_vec;
596 
597 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
598 	vcq->cqe = num_ent;
599 	vcq->mcq.set_ci_db = vcq->db.db;
600 	vcq->mcq.arm_db = vcq->db.db + 1;
601 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
602 	kfree(in);
603 	return 0;
604 
605 err_vec:
606 	kfree(in);
607 err_vzalloc:
608 	cq_frag_buf_free(ndev, &vcq->buf);
609 err_db:
610 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
611 	return err;
612 }
613 
614 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
615 {
616 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
617 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
618 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
619 
620 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
621 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
622 		return;
623 	}
624 	cq_frag_buf_free(ndev, &vcq->buf);
625 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
626 }
627 
628 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
629 			  struct mlx5_vdpa_umem **umemp)
630 {
631 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
632 	int p_a;
633 	int p_b;
634 
635 	switch (num) {
636 	case 1:
637 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
638 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
639 		*umemp = &mvq->umem1;
640 		break;
641 	case 2:
642 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
643 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
644 		*umemp = &mvq->umem2;
645 		break;
646 	case 3:
647 		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
648 		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
649 		*umemp = &mvq->umem3;
650 		break;
651 	}
652 	(*umemp)->size = p_a * mvq->num_ent + p_b;
653 }
654 
655 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
656 {
657 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
658 }
659 
660 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
661 {
662 	int inlen;
663 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
664 	void *um;
665 	void *in;
666 	int err;
667 	__be64 *pas;
668 	struct mlx5_vdpa_umem *umem;
669 
670 	set_umem_size(ndev, mvq, num, &umem);
671 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
672 	if (err)
673 		return err;
674 
675 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
676 
677 	in = kzalloc(inlen, GFP_KERNEL);
678 	if (!in) {
679 		err = -ENOMEM;
680 		goto err_in;
681 	}
682 
683 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
684 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
685 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
686 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
687 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
688 
689 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
690 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
691 
692 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
693 	if (err) {
694 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
695 		goto err_cmd;
696 	}
697 
698 	kfree(in);
699 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
700 
701 	return 0;
702 
703 err_cmd:
704 	kfree(in);
705 err_in:
706 	umem_frag_buf_free(ndev, umem);
707 	return err;
708 }
709 
710 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
711 {
712 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
713 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
714 	struct mlx5_vdpa_umem *umem;
715 
716 	switch (num) {
717 	case 1:
718 		umem = &mvq->umem1;
719 		break;
720 	case 2:
721 		umem = &mvq->umem2;
722 		break;
723 	case 3:
724 		umem = &mvq->umem3;
725 		break;
726 	}
727 
728 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
729 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
730 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
731 		return;
732 
733 	umem_frag_buf_free(ndev, umem);
734 }
735 
736 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
737 {
738 	int num;
739 	int err;
740 
741 	for (num = 1; num <= 3; num++) {
742 		err = create_umem(ndev, mvq, num);
743 		if (err)
744 			goto err_umem;
745 	}
746 	return 0;
747 
748 err_umem:
749 	for (num--; num > 0; num--)
750 		umem_destroy(ndev, mvq, num);
751 
752 	return err;
753 }
754 
755 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
756 {
757 	int num;
758 
759 	for (num = 3; num > 0; num--)
760 		umem_destroy(ndev, mvq, num);
761 }
762 
763 static int get_queue_type(struct mlx5_vdpa_net *ndev)
764 {
765 	u32 type_mask;
766 
767 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
768 
769 	/* prefer split queue */
770 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
771 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
772 
773 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
774 
775 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
776 }
777 
778 static bool vq_is_tx(u16 idx)
779 {
780 	return idx % 2;
781 }
782 
783 enum {
784 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
785 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
786 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
787 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
788 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
789 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
790 	MLX5_VIRTIO_NET_F_CSUM = 10,
791 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
792 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
793 };
794 
795 static u16 get_features(u64 features)
796 {
797 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
798 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
799 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
800 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
801 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
802 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
803 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
804 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
805 }
806 
807 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
808 {
809 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
810 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
811 }
812 
813 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
814 {
815 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
816 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
817 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
818 }
819 
820 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
821 {
822 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
823 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
824 	void *obj_context;
825 	u16 mlx_features;
826 	void *cmd_hdr;
827 	void *vq_ctx;
828 	void *in;
829 	int err;
830 
831 	err = umems_create(ndev, mvq);
832 	if (err)
833 		return err;
834 
835 	in = kzalloc(inlen, GFP_KERNEL);
836 	if (!in) {
837 		err = -ENOMEM;
838 		goto err_alloc;
839 	}
840 
841 	mlx_features = get_features(ndev->mvdev.actual_features);
842 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
843 
844 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
845 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
846 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
847 
848 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
849 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
850 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
851 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
852 		 mlx_features >> 3);
853 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
854 		 mlx_features & 7);
855 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
856 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
857 
858 	if (vq_is_tx(mvq->index))
859 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
860 
861 	if (mvq->map.virq) {
862 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
863 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
864 	} else {
865 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
866 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
867 	}
868 
869 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
870 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
871 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
872 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
873 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
874 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
875 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
876 	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
877 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
878 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
879 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
880 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
881 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
882 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
883 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
884 	if (counters_supported(&ndev->mvdev))
885 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
886 
887 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
888 	if (err)
889 		goto err_cmd;
890 
891 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
892 	kfree(in);
893 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
894 
895 	return 0;
896 
897 err_cmd:
898 	kfree(in);
899 err_alloc:
900 	umems_destroy(ndev, mvq);
901 	return err;
902 }
903 
904 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
905 {
906 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
907 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
908 
909 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
910 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
911 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
912 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
913 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
914 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
915 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
916 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
917 		return;
918 	}
919 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
920 	umems_destroy(ndev, mvq);
921 }
922 
923 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
924 {
925 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
926 }
927 
928 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
929 {
930 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
931 }
932 
933 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
934 			int *outlen, u32 qpn, u32 rqpn)
935 {
936 	void *qpc;
937 	void *pp;
938 
939 	switch (cmd) {
940 	case MLX5_CMD_OP_2RST_QP:
941 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
942 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
943 		*in = kzalloc(*inlen, GFP_KERNEL);
944 		*out = kzalloc(*outlen, GFP_KERNEL);
945 		if (!*in || !*out)
946 			goto outerr;
947 
948 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
949 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
950 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
951 		break;
952 	case MLX5_CMD_OP_RST2INIT_QP:
953 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
954 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
955 		*in = kzalloc(*inlen, GFP_KERNEL);
956 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
957 		if (!*in || !*out)
958 			goto outerr;
959 
960 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
961 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
962 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
963 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
964 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
965 		MLX5_SET(qpc, qpc, rwe, 1);
966 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
967 		MLX5_SET(ads, pp, vhca_port_num, 1);
968 		break;
969 	case MLX5_CMD_OP_INIT2RTR_QP:
970 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
971 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
972 		*in = kzalloc(*inlen, GFP_KERNEL);
973 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
974 		if (!*in || !*out)
975 			goto outerr;
976 
977 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
978 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
979 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
980 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
981 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
982 		MLX5_SET(qpc, qpc, log_msg_max, 30);
983 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
984 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
985 		MLX5_SET(ads, pp, fl, 1);
986 		break;
987 	case MLX5_CMD_OP_RTR2RTS_QP:
988 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
989 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
990 		*in = kzalloc(*inlen, GFP_KERNEL);
991 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
992 		if (!*in || !*out)
993 			goto outerr;
994 
995 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
996 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
997 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
998 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
999 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1000 		MLX5_SET(ads, pp, ack_timeout, 14);
1001 		MLX5_SET(qpc, qpc, retry_count, 7);
1002 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1003 		break;
1004 	default:
1005 		goto outerr_nullify;
1006 	}
1007 
1008 	return;
1009 
1010 outerr:
1011 	kfree(*in);
1012 	kfree(*out);
1013 outerr_nullify:
1014 	*in = NULL;
1015 	*out = NULL;
1016 }
1017 
1018 static void free_inout(void *in, void *out)
1019 {
1020 	kfree(in);
1021 	kfree(out);
1022 }
1023 
1024 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1025  * firmware. The fw argument indicates whether the subjected QP is the one used
1026  * by firmware.
1027  */
1028 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1029 {
1030 	int outlen;
1031 	int inlen;
1032 	void *out;
1033 	void *in;
1034 	int err;
1035 
1036 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1037 	if (!in || !out)
1038 		return -ENOMEM;
1039 
1040 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1041 	free_inout(in, out);
1042 	return err;
1043 }
1044 
1045 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1046 {
1047 	int err;
1048 
1049 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1050 	if (err)
1051 		return err;
1052 
1053 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1054 	if (err)
1055 		return err;
1056 
1057 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1058 	if (err)
1059 		return err;
1060 
1061 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1062 	if (err)
1063 		return err;
1064 
1065 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1066 	if (err)
1067 		return err;
1068 
1069 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1070 	if (err)
1071 		return err;
1072 
1073 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1074 }
1075 
1076 struct mlx5_virtq_attr {
1077 	u8 state;
1078 	u16 available_index;
1079 	u16 used_index;
1080 };
1081 
1082 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1083 			   struct mlx5_virtq_attr *attr)
1084 {
1085 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1086 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1087 	void *out;
1088 	void *obj_context;
1089 	void *cmd_hdr;
1090 	int err;
1091 
1092 	out = kzalloc(outlen, GFP_KERNEL);
1093 	if (!out)
1094 		return -ENOMEM;
1095 
1096 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1097 
1098 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1099 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1100 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1101 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1102 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1103 	if (err)
1104 		goto err_cmd;
1105 
1106 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1107 	memset(attr, 0, sizeof(*attr));
1108 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1109 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1110 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1111 	kfree(out);
1112 	return 0;
1113 
1114 err_cmd:
1115 	kfree(out);
1116 	return err;
1117 }
1118 
1119 static bool is_valid_state_change(int oldstate, int newstate)
1120 {
1121 	switch (oldstate) {
1122 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1123 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1124 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1125 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1126 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1127 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1128 	default:
1129 		return false;
1130 	}
1131 }
1132 
1133 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1134 {
1135 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1136 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1137 	void *obj_context;
1138 	void *cmd_hdr;
1139 	void *in;
1140 	int err;
1141 
1142 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1143 		return 0;
1144 
1145 	if (!is_valid_state_change(mvq->fw_state, state))
1146 		return -EINVAL;
1147 
1148 	in = kzalloc(inlen, GFP_KERNEL);
1149 	if (!in)
1150 		return -ENOMEM;
1151 
1152 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1153 
1154 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1155 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1156 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1157 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1158 
1159 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1160 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1161 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1162 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1163 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1164 	kfree(in);
1165 	if (!err)
1166 		mvq->fw_state = state;
1167 
1168 	return err;
1169 }
1170 
1171 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1172 {
1173 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1174 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1175 	void *cmd_hdr;
1176 	int err;
1177 
1178 	if (!counters_supported(&ndev->mvdev))
1179 		return 0;
1180 
1181 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1182 
1183 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1184 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1185 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1186 
1187 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1188 	if (err)
1189 		return err;
1190 
1191 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1192 
1193 	return 0;
1194 }
1195 
1196 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1197 {
1198 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1199 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1200 
1201 	if (!counters_supported(&ndev->mvdev))
1202 		return;
1203 
1204 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1205 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1206 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1207 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1208 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1209 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1210 }
1211 
1212 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1213 {
1214 	struct vdpa_callback *cb = priv;
1215 
1216 	if (cb->callback)
1217 		return cb->callback(cb->private);
1218 
1219 	return IRQ_HANDLED;
1220 }
1221 
1222 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1223 			 struct mlx5_vdpa_virtqueue *mvq)
1224 {
1225 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1226 	struct mlx5_vdpa_irq_pool_entry *ent;
1227 	int err;
1228 	int i;
1229 
1230 	for (i = 0; i < irqp->num_ent; i++) {
1231 		ent = &irqp->entries[i];
1232 		if (!ent->used) {
1233 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1234 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1235 			ent->dev_id = &ndev->event_cbs[mvq->index];
1236 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1237 					  ent->name, ent->dev_id);
1238 			if (err)
1239 				return;
1240 
1241 			ent->used = true;
1242 			mvq->map = ent->map;
1243 			return;
1244 		}
1245 	}
1246 }
1247 
1248 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1249 			   struct mlx5_vdpa_virtqueue *mvq)
1250 {
1251 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1252 	int i;
1253 
1254 	for (i = 0; i < irqp->num_ent; i++)
1255 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1256 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1257 			irqp->entries[i].used = false;
1258 			return;
1259 		}
1260 }
1261 
1262 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1263 {
1264 	u16 idx = mvq->index;
1265 	int err;
1266 
1267 	if (!mvq->num_ent)
1268 		return 0;
1269 
1270 	if (mvq->initialized)
1271 		return 0;
1272 
1273 	err = cq_create(ndev, idx, mvq->num_ent);
1274 	if (err)
1275 		return err;
1276 
1277 	err = qp_create(ndev, mvq, &mvq->fwqp);
1278 	if (err)
1279 		goto err_fwqp;
1280 
1281 	err = qp_create(ndev, mvq, &mvq->vqqp);
1282 	if (err)
1283 		goto err_vqqp;
1284 
1285 	err = connect_qps(ndev, mvq);
1286 	if (err)
1287 		goto err_connect;
1288 
1289 	err = counter_set_alloc(ndev, mvq);
1290 	if (err)
1291 		goto err_connect;
1292 
1293 	alloc_vector(ndev, mvq);
1294 	err = create_virtqueue(ndev, mvq);
1295 	if (err)
1296 		goto err_vq;
1297 
1298 	if (mvq->ready) {
1299 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1300 		if (err) {
1301 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1302 				       idx, err);
1303 			goto err_modify;
1304 		}
1305 	}
1306 
1307 	mvq->initialized = true;
1308 	return 0;
1309 
1310 err_modify:
1311 	destroy_virtqueue(ndev, mvq);
1312 err_vq:
1313 	dealloc_vector(ndev, mvq);
1314 	counter_set_dealloc(ndev, mvq);
1315 err_connect:
1316 	qp_destroy(ndev, &mvq->vqqp);
1317 err_vqqp:
1318 	qp_destroy(ndev, &mvq->fwqp);
1319 err_fwqp:
1320 	cq_destroy(ndev, idx);
1321 	return err;
1322 }
1323 
1324 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1325 {
1326 	struct mlx5_virtq_attr attr;
1327 
1328 	if (!mvq->initialized)
1329 		return;
1330 
1331 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1332 		return;
1333 
1334 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1335 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1336 
1337 	if (query_virtqueue(ndev, mvq, &attr)) {
1338 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1339 		return;
1340 	}
1341 	mvq->avail_idx = attr.available_index;
1342 	mvq->used_idx = attr.used_index;
1343 }
1344 
1345 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1346 {
1347 	int i;
1348 
1349 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1350 		suspend_vq(ndev, &ndev->vqs[i]);
1351 }
1352 
1353 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1354 {
1355 	if (!mvq->initialized)
1356 		return;
1357 
1358 	suspend_vq(ndev, mvq);
1359 	destroy_virtqueue(ndev, mvq);
1360 	dealloc_vector(ndev, mvq);
1361 	counter_set_dealloc(ndev, mvq);
1362 	qp_destroy(ndev, &mvq->vqqp);
1363 	qp_destroy(ndev, &mvq->fwqp);
1364 	cq_destroy(ndev, mvq->index);
1365 	mvq->initialized = false;
1366 }
1367 
1368 static int create_rqt(struct mlx5_vdpa_net *ndev)
1369 {
1370 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1371 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1372 	__be32 *list;
1373 	void *rqtc;
1374 	int inlen;
1375 	void *in;
1376 	int i, j;
1377 	int err;
1378 
1379 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1380 	in = kzalloc(inlen, GFP_KERNEL);
1381 	if (!in)
1382 		return -ENOMEM;
1383 
1384 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1385 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1386 
1387 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1388 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1389 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1390 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1391 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1392 
1393 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1394 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1395 	kfree(in);
1396 	if (err)
1397 		return err;
1398 
1399 	return 0;
1400 }
1401 
1402 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1403 
1404 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1405 {
1406 	int act_sz = roundup_pow_of_two(num / 2);
1407 	__be32 *list;
1408 	void *rqtc;
1409 	int inlen;
1410 	void *in;
1411 	int i, j;
1412 	int err;
1413 
1414 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1415 	in = kzalloc(inlen, GFP_KERNEL);
1416 	if (!in)
1417 		return -ENOMEM;
1418 
1419 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1420 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1421 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1422 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1423 
1424 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1425 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1426 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1427 
1428 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1429 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1430 	kfree(in);
1431 	if (err)
1432 		return err;
1433 
1434 	return 0;
1435 }
1436 
1437 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1438 {
1439 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1440 }
1441 
1442 static int create_tir(struct mlx5_vdpa_net *ndev)
1443 {
1444 #define HASH_IP_L4PORTS                                                                            \
1445 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1446 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1447 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1448 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1449 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1450 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1451 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1452 	void *rss_key;
1453 	void *outer;
1454 	void *tirc;
1455 	void *in;
1456 	int err;
1457 
1458 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1459 	if (!in)
1460 		return -ENOMEM;
1461 
1462 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1463 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1464 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1465 
1466 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1467 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1468 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1469 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1470 
1471 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1472 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1473 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1474 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1475 
1476 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1477 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1478 
1479 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1480 	kfree(in);
1481 	if (err)
1482 		return err;
1483 
1484 	mlx5_vdpa_add_tirn(ndev);
1485 	return err;
1486 }
1487 
1488 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1489 {
1490 	mlx5_vdpa_remove_tirn(ndev);
1491 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1492 }
1493 
1494 #define MAX_STEERING_ENT 0x8000
1495 #define MAX_STEERING_GROUPS 2
1496 
1497 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1498        #define NUM_DESTS 2
1499 #else
1500        #define NUM_DESTS 1
1501 #endif
1502 
1503 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1504 				 struct macvlan_node *node,
1505 				 struct mlx5_flow_act *flow_act,
1506 				 struct mlx5_flow_destination *dests)
1507 {
1508 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1509 	int err;
1510 
1511 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1512 	if (IS_ERR(node->ucast_counter.counter))
1513 		return PTR_ERR(node->ucast_counter.counter);
1514 
1515 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1516 	if (IS_ERR(node->mcast_counter.counter)) {
1517 		err = PTR_ERR(node->mcast_counter.counter);
1518 		goto err_mcast_counter;
1519 	}
1520 
1521 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1522 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1523 	return 0;
1524 
1525 err_mcast_counter:
1526 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1527 	return err;
1528 #else
1529 	return 0;
1530 #endif
1531 }
1532 
1533 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1534 				     struct macvlan_node *node)
1535 {
1536 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1537 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1538 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1539 #endif
1540 }
1541 
1542 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1543 					struct macvlan_node *node)
1544 {
1545 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1546 	struct mlx5_flow_act flow_act = {};
1547 	struct mlx5_flow_spec *spec;
1548 	void *headers_c;
1549 	void *headers_v;
1550 	u8 *dmac_c;
1551 	u8 *dmac_v;
1552 	int err;
1553 	u16 vid;
1554 
1555 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1556 	if (!spec)
1557 		return -ENOMEM;
1558 
1559 	vid = key2vid(node->macvlan);
1560 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1561 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1562 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1563 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1564 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1565 	eth_broadcast_addr(dmac_c);
1566 	ether_addr_copy(dmac_v, mac);
1567 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1568 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1569 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1570 	}
1571 	if (node->tagged) {
1572 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1573 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1574 	}
1575 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1576 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1577 	dests[0].tir_num = ndev->res.tirn;
1578 	err = add_steering_counters(ndev, node, &flow_act, dests);
1579 	if (err)
1580 		goto out_free;
1581 
1582 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1583 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1584 #endif
1585 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1586 	if (IS_ERR(node->ucast_rule)) {
1587 		err = PTR_ERR(node->ucast_rule);
1588 		goto err_ucast;
1589 	}
1590 
1591 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1592 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1593 #endif
1594 
1595 	memset(dmac_c, 0, ETH_ALEN);
1596 	memset(dmac_v, 0, ETH_ALEN);
1597 	dmac_c[0] = 1;
1598 	dmac_v[0] = 1;
1599 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1600 	if (IS_ERR(node->mcast_rule)) {
1601 		err = PTR_ERR(node->mcast_rule);
1602 		goto err_mcast;
1603 	}
1604 	kvfree(spec);
1605 	mlx5_vdpa_add_rx_counters(ndev, node);
1606 	return 0;
1607 
1608 err_mcast:
1609 	mlx5_del_flow_rules(node->ucast_rule);
1610 err_ucast:
1611 	remove_steering_counters(ndev, node);
1612 out_free:
1613 	kvfree(spec);
1614 	return err;
1615 }
1616 
1617 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1618 					 struct macvlan_node *node)
1619 {
1620 	mlx5_vdpa_remove_rx_counters(ndev, node);
1621 	mlx5_del_flow_rules(node->ucast_rule);
1622 	mlx5_del_flow_rules(node->mcast_rule);
1623 }
1624 
1625 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1626 {
1627 	u64 val;
1628 
1629 	if (!tagged)
1630 		vlan = MLX5V_UNTAGGED;
1631 
1632 	val = (u64)vlan << 48 |
1633 	      (u64)mac[0] << 40 |
1634 	      (u64)mac[1] << 32 |
1635 	      (u64)mac[2] << 24 |
1636 	      (u64)mac[3] << 16 |
1637 	      (u64)mac[4] << 8 |
1638 	      (u64)mac[5];
1639 
1640 	return val;
1641 }
1642 
1643 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1644 {
1645 	struct macvlan_node *pos;
1646 	u32 idx;
1647 
1648 	idx = hash_64(value, 8); // tbd 8
1649 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1650 		if (pos->macvlan == value)
1651 			return pos;
1652 	}
1653 	return NULL;
1654 }
1655 
1656 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1657 {
1658 	struct macvlan_node *ptr;
1659 	u64 val;
1660 	u32 idx;
1661 	int err;
1662 
1663 	val = search_val(mac, vid, tagged);
1664 	if (mac_vlan_lookup(ndev, val))
1665 		return -EEXIST;
1666 
1667 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1668 	if (!ptr)
1669 		return -ENOMEM;
1670 
1671 	ptr->tagged = tagged;
1672 	ptr->macvlan = val;
1673 	ptr->ndev = ndev;
1674 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1675 	if (err)
1676 		goto err_add;
1677 
1678 	idx = hash_64(val, 8);
1679 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1680 	return 0;
1681 
1682 err_add:
1683 	kfree(ptr);
1684 	return err;
1685 }
1686 
1687 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1688 {
1689 	struct macvlan_node *ptr;
1690 
1691 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1692 	if (!ptr)
1693 		return;
1694 
1695 	hlist_del(&ptr->hlist);
1696 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1697 	remove_steering_counters(ndev, ptr);
1698 	kfree(ptr);
1699 }
1700 
1701 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1702 {
1703 	struct macvlan_node *pos;
1704 	struct hlist_node *n;
1705 	int i;
1706 
1707 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1708 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1709 			hlist_del(&pos->hlist);
1710 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1711 			remove_steering_counters(ndev, pos);
1712 			kfree(pos);
1713 		}
1714 	}
1715 }
1716 
1717 static int setup_steering(struct mlx5_vdpa_net *ndev)
1718 {
1719 	struct mlx5_flow_table_attr ft_attr = {};
1720 	struct mlx5_flow_namespace *ns;
1721 	int err;
1722 
1723 	ft_attr.max_fte = MAX_STEERING_ENT;
1724 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1725 
1726 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1727 	if (!ns) {
1728 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1729 		return -EOPNOTSUPP;
1730 	}
1731 
1732 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1733 	if (IS_ERR(ndev->rxft)) {
1734 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1735 		return PTR_ERR(ndev->rxft);
1736 	}
1737 	mlx5_vdpa_add_rx_flow_table(ndev);
1738 
1739 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1740 	if (err)
1741 		goto err_add;
1742 
1743 	return 0;
1744 
1745 err_add:
1746 	mlx5_vdpa_remove_rx_flow_table(ndev);
1747 	mlx5_destroy_flow_table(ndev->rxft);
1748 	return err;
1749 }
1750 
1751 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1752 {
1753 	clear_mac_vlan_table(ndev);
1754 	mlx5_vdpa_remove_rx_flow_table(ndev);
1755 	mlx5_destroy_flow_table(ndev->rxft);
1756 }
1757 
1758 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1759 {
1760 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1761 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1762 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1763 	struct mlx5_core_dev *pfmdev;
1764 	size_t read;
1765 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1766 
1767 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1768 	switch (cmd) {
1769 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1770 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1771 		if (read != ETH_ALEN)
1772 			break;
1773 
1774 		if (!memcmp(ndev->config.mac, mac, 6)) {
1775 			status = VIRTIO_NET_OK;
1776 			break;
1777 		}
1778 
1779 		if (is_zero_ether_addr(mac))
1780 			break;
1781 
1782 		if (!is_zero_ether_addr(ndev->config.mac)) {
1783 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1784 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1785 					       ndev->config.mac);
1786 				break;
1787 			}
1788 		}
1789 
1790 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1791 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1792 				       mac);
1793 			break;
1794 		}
1795 
1796 		/* backup the original mac address so that if failed to add the forward rules
1797 		 * we could restore it
1798 		 */
1799 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1800 
1801 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1802 
1803 		/* Need recreate the flow table entry, so that the packet could forward back
1804 		 */
1805 		mac_vlan_del(ndev, mac_back, 0, false);
1806 
1807 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1808 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1809 
1810 			/* Although it hardly run here, we still need double check */
1811 			if (is_zero_ether_addr(mac_back)) {
1812 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1813 				break;
1814 			}
1815 
1816 			/* Try to restore original mac address to MFPS table, and try to restore
1817 			 * the forward rule entry.
1818 			 */
1819 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1820 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1821 					       ndev->config.mac);
1822 			}
1823 
1824 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1825 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1826 					       mac_back);
1827 			}
1828 
1829 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1830 
1831 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1832 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1833 
1834 			break;
1835 		}
1836 
1837 		status = VIRTIO_NET_OK;
1838 		break;
1839 
1840 	default:
1841 		break;
1842 	}
1843 
1844 	return status;
1845 }
1846 
1847 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1848 {
1849 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1850 	int cur_qps = ndev->cur_num_vqs / 2;
1851 	int err;
1852 	int i;
1853 
1854 	if (cur_qps > newqps) {
1855 		err = modify_rqt(ndev, 2 * newqps);
1856 		if (err)
1857 			return err;
1858 
1859 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1860 			teardown_vq(ndev, &ndev->vqs[i]);
1861 
1862 		ndev->cur_num_vqs = 2 * newqps;
1863 	} else {
1864 		ndev->cur_num_vqs = 2 * newqps;
1865 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1866 			err = setup_vq(ndev, &ndev->vqs[i]);
1867 			if (err)
1868 				goto clean_added;
1869 		}
1870 		err = modify_rqt(ndev, 2 * newqps);
1871 		if (err)
1872 			goto clean_added;
1873 	}
1874 	return 0;
1875 
1876 clean_added:
1877 	for (--i; i >= 2 * cur_qps; --i)
1878 		teardown_vq(ndev, &ndev->vqs[i]);
1879 
1880 	ndev->cur_num_vqs = 2 * cur_qps;
1881 
1882 	return err;
1883 }
1884 
1885 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1886 {
1887 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1888 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1889 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1890 	struct virtio_net_ctrl_mq mq;
1891 	size_t read;
1892 	u16 newqps;
1893 
1894 	switch (cmd) {
1895 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1896 		/* This mq feature check aligns with pre-existing userspace
1897 		 * implementation.
1898 		 *
1899 		 * Without it, an untrusted driver could fake a multiqueue config
1900 		 * request down to a non-mq device that may cause kernel to
1901 		 * panic due to uninitialized resources for extra vqs. Even with
1902 		 * a well behaving guest driver, it is not expected to allow
1903 		 * changing the number of vqs on a non-mq device.
1904 		 */
1905 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1906 			break;
1907 
1908 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1909 		if (read != sizeof(mq))
1910 			break;
1911 
1912 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1913 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1914 		    newqps > ndev->rqt_size)
1915 			break;
1916 
1917 		if (ndev->cur_num_vqs == 2 * newqps) {
1918 			status = VIRTIO_NET_OK;
1919 			break;
1920 		}
1921 
1922 		if (!change_num_qps(mvdev, newqps))
1923 			status = VIRTIO_NET_OK;
1924 
1925 		break;
1926 	default:
1927 		break;
1928 	}
1929 
1930 	return status;
1931 }
1932 
1933 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1934 {
1935 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1936 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1937 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1938 	__virtio16 vlan;
1939 	size_t read;
1940 	u16 id;
1941 
1942 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1943 		return status;
1944 
1945 	switch (cmd) {
1946 	case VIRTIO_NET_CTRL_VLAN_ADD:
1947 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1948 		if (read != sizeof(vlan))
1949 			break;
1950 
1951 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1952 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1953 			break;
1954 
1955 		status = VIRTIO_NET_OK;
1956 		break;
1957 	case VIRTIO_NET_CTRL_VLAN_DEL:
1958 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1959 		if (read != sizeof(vlan))
1960 			break;
1961 
1962 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1963 		mac_vlan_del(ndev, ndev->config.mac, id, true);
1964 		status = VIRTIO_NET_OK;
1965 		break;
1966 	default:
1967 		break;
1968 	}
1969 
1970 	return status;
1971 }
1972 
1973 static void mlx5_cvq_kick_handler(struct work_struct *work)
1974 {
1975 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1976 	struct virtio_net_ctrl_hdr ctrl;
1977 	struct mlx5_vdpa_wq_ent *wqent;
1978 	struct mlx5_vdpa_dev *mvdev;
1979 	struct mlx5_control_vq *cvq;
1980 	struct mlx5_vdpa_net *ndev;
1981 	size_t read, write;
1982 	int err;
1983 
1984 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
1985 	mvdev = wqent->mvdev;
1986 	ndev = to_mlx5_vdpa_ndev(mvdev);
1987 	cvq = &mvdev->cvq;
1988 
1989 	down_write(&ndev->reslock);
1990 
1991 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1992 		goto out;
1993 
1994 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1995 		goto out;
1996 
1997 	if (!cvq->ready)
1998 		goto out;
1999 
2000 	while (true) {
2001 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2002 					   GFP_ATOMIC);
2003 		if (err <= 0)
2004 			break;
2005 
2006 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2007 		if (read != sizeof(ctrl))
2008 			break;
2009 
2010 		cvq->received_desc++;
2011 		switch (ctrl.class) {
2012 		case VIRTIO_NET_CTRL_MAC:
2013 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2014 			break;
2015 		case VIRTIO_NET_CTRL_MQ:
2016 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2017 			break;
2018 		case VIRTIO_NET_CTRL_VLAN:
2019 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2020 			break;
2021 		default:
2022 			break;
2023 		}
2024 
2025 		/* Make sure data is written before advancing index */
2026 		smp_wmb();
2027 
2028 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2029 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2030 		vringh_kiov_cleanup(&cvq->riov);
2031 		vringh_kiov_cleanup(&cvq->wiov);
2032 
2033 		if (vringh_need_notify_iotlb(&cvq->vring))
2034 			vringh_notify(&cvq->vring);
2035 
2036 		cvq->completed_desc++;
2037 		queue_work(mvdev->wq, &wqent->work);
2038 		break;
2039 	}
2040 
2041 out:
2042 	up_write(&ndev->reslock);
2043 }
2044 
2045 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2046 {
2047 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2048 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2049 	struct mlx5_vdpa_virtqueue *mvq;
2050 
2051 	if (!is_index_valid(mvdev, idx))
2052 		return;
2053 
2054 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2055 		if (!mvdev->wq || !mvdev->cvq.ready)
2056 			return;
2057 
2058 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2059 		return;
2060 	}
2061 
2062 	mvq = &ndev->vqs[idx];
2063 	if (unlikely(!mvq->ready))
2064 		return;
2065 
2066 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2067 }
2068 
2069 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2070 				    u64 driver_area, u64 device_area)
2071 {
2072 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2073 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2074 	struct mlx5_vdpa_virtqueue *mvq;
2075 
2076 	if (!is_index_valid(mvdev, idx))
2077 		return -EINVAL;
2078 
2079 	if (is_ctrl_vq_idx(mvdev, idx)) {
2080 		mvdev->cvq.desc_addr = desc_area;
2081 		mvdev->cvq.device_addr = device_area;
2082 		mvdev->cvq.driver_addr = driver_area;
2083 		return 0;
2084 	}
2085 
2086 	mvq = &ndev->vqs[idx];
2087 	mvq->desc_addr = desc_area;
2088 	mvq->device_addr = device_area;
2089 	mvq->driver_addr = driver_area;
2090 	return 0;
2091 }
2092 
2093 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2094 {
2095 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2096 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2097 	struct mlx5_vdpa_virtqueue *mvq;
2098 
2099 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2100 		return;
2101 
2102 	mvq = &ndev->vqs[idx];
2103 	mvq->num_ent = num;
2104 }
2105 
2106 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2107 {
2108 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2109 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2110 
2111 	ndev->event_cbs[idx] = *cb;
2112 	if (is_ctrl_vq_idx(mvdev, idx))
2113 		mvdev->cvq.event_cb = *cb;
2114 }
2115 
2116 static void mlx5_cvq_notify(struct vringh *vring)
2117 {
2118 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2119 
2120 	if (!cvq->event_cb.callback)
2121 		return;
2122 
2123 	cvq->event_cb.callback(cvq->event_cb.private);
2124 }
2125 
2126 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2127 {
2128 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2129 
2130 	cvq->ready = ready;
2131 	if (!ready)
2132 		return;
2133 
2134 	cvq->vring.notify = mlx5_cvq_notify;
2135 }
2136 
2137 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2138 {
2139 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2140 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2141 	struct mlx5_vdpa_virtqueue *mvq;
2142 	int err;
2143 
2144 	if (!mvdev->actual_features)
2145 		return;
2146 
2147 	if (!is_index_valid(mvdev, idx))
2148 		return;
2149 
2150 	if (is_ctrl_vq_idx(mvdev, idx)) {
2151 		set_cvq_ready(mvdev, ready);
2152 		return;
2153 	}
2154 
2155 	mvq = &ndev->vqs[idx];
2156 	if (!ready) {
2157 		suspend_vq(ndev, mvq);
2158 	} else {
2159 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2160 		if (err) {
2161 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2162 			ready = false;
2163 		}
2164 	}
2165 
2166 
2167 	mvq->ready = ready;
2168 }
2169 
2170 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2171 {
2172 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2173 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2174 
2175 	if (!is_index_valid(mvdev, idx))
2176 		return false;
2177 
2178 	if (is_ctrl_vq_idx(mvdev, idx))
2179 		return mvdev->cvq.ready;
2180 
2181 	return ndev->vqs[idx].ready;
2182 }
2183 
2184 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2185 				  const struct vdpa_vq_state *state)
2186 {
2187 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2188 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2189 	struct mlx5_vdpa_virtqueue *mvq;
2190 
2191 	if (!is_index_valid(mvdev, idx))
2192 		return -EINVAL;
2193 
2194 	if (is_ctrl_vq_idx(mvdev, idx)) {
2195 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2196 		return 0;
2197 	}
2198 
2199 	mvq = &ndev->vqs[idx];
2200 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2201 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2202 		return -EINVAL;
2203 	}
2204 
2205 	mvq->used_idx = state->split.avail_index;
2206 	mvq->avail_idx = state->split.avail_index;
2207 	return 0;
2208 }
2209 
2210 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2211 {
2212 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2213 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2214 	struct mlx5_vdpa_virtqueue *mvq;
2215 	struct mlx5_virtq_attr attr;
2216 	int err;
2217 
2218 	if (!is_index_valid(mvdev, idx))
2219 		return -EINVAL;
2220 
2221 	if (is_ctrl_vq_idx(mvdev, idx)) {
2222 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2223 		return 0;
2224 	}
2225 
2226 	mvq = &ndev->vqs[idx];
2227 	/* If the virtq object was destroyed, use the value saved at
2228 	 * the last minute of suspend_vq. This caters for userspace
2229 	 * that cares about emulating the index after vq is stopped.
2230 	 */
2231 	if (!mvq->initialized) {
2232 		/* Firmware returns a wrong value for the available index.
2233 		 * Since both values should be identical, we take the value of
2234 		 * used_idx which is reported correctly.
2235 		 */
2236 		state->split.avail_index = mvq->used_idx;
2237 		return 0;
2238 	}
2239 
2240 	err = query_virtqueue(ndev, mvq, &attr);
2241 	if (err) {
2242 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2243 		return err;
2244 	}
2245 	state->split.avail_index = attr.used_index;
2246 	return 0;
2247 }
2248 
2249 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2250 {
2251 	return PAGE_SIZE;
2252 }
2253 
2254 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2255 {
2256 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2257 
2258 	if (is_ctrl_vq_idx(mvdev, idx))
2259 		return MLX5_VDPA_CVQ_GROUP;
2260 
2261 	return MLX5_VDPA_DATAVQ_GROUP;
2262 }
2263 
2264 static u64 mlx_to_vritio_features(u16 dev_features)
2265 {
2266 	u64 result = 0;
2267 
2268 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2269 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2270 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2271 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2272 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2273 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2274 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2275 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2276 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2277 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2278 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2279 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2280 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2281 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2282 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2283 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2284 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2285 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2286 
2287 	return result;
2288 }
2289 
2290 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2291 {
2292 	u64 mlx_vdpa_features = 0;
2293 	u16 dev_features;
2294 
2295 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2296 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2297 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2298 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2299 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2300 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2301 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2302 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2303 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2304 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2305 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2306 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2307 
2308 	return mlx_vdpa_features;
2309 }
2310 
2311 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2312 {
2313 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2314 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2315 
2316 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2317 	return ndev->mvdev.mlx_features;
2318 }
2319 
2320 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2321 {
2322 	/* Minimum features to expect */
2323 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2324 		return -EOPNOTSUPP;
2325 
2326 	/* Double check features combination sent down by the driver.
2327 	 * Fail invalid features due to absence of the depended feature.
2328 	 *
2329 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2330 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2331 	 * By failing the invalid features sent down by untrusted drivers,
2332 	 * we're assured the assumption made upon is_index_valid() and
2333 	 * is_ctrl_vq_idx() will not be compromised.
2334 	 */
2335 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2336             BIT_ULL(VIRTIO_NET_F_MQ))
2337 		return -EINVAL;
2338 
2339 	return 0;
2340 }
2341 
2342 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2343 {
2344 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2345 	int err;
2346 	int i;
2347 
2348 	for (i = 0; i < mvdev->max_vqs; i++) {
2349 		err = setup_vq(ndev, &ndev->vqs[i]);
2350 		if (err)
2351 			goto err_vq;
2352 	}
2353 
2354 	return 0;
2355 
2356 err_vq:
2357 	for (--i; i >= 0; i--)
2358 		teardown_vq(ndev, &ndev->vqs[i]);
2359 
2360 	return err;
2361 }
2362 
2363 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2364 {
2365 	struct mlx5_vdpa_virtqueue *mvq;
2366 	int i;
2367 
2368 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2369 		mvq = &ndev->vqs[i];
2370 		if (!mvq->initialized)
2371 			continue;
2372 
2373 		teardown_vq(ndev, mvq);
2374 	}
2375 }
2376 
2377 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2378 {
2379 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2380 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2381 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2382 			mvdev->max_idx = mvdev->max_vqs;
2383 		} else {
2384 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2385 			 * CVQ gets index 2
2386 			 */
2387 			mvdev->max_idx = 2;
2388 		}
2389 	} else {
2390 		/* Two data virtqueues only: one for rx and one for tx */
2391 		mvdev->max_idx = 1;
2392 	}
2393 }
2394 
2395 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2396 {
2397 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2398 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2399 	int err;
2400 
2401 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2402 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2403 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2404 	if (vport)
2405 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2406 
2407 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2408 	if (err)
2409 		return 0;
2410 
2411 	return MLX5_GET(query_vport_state_out, out, state);
2412 }
2413 
2414 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2415 {
2416 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2417 	    VPORT_STATE_UP)
2418 		return true;
2419 
2420 	return false;
2421 }
2422 
2423 static void update_carrier(struct work_struct *work)
2424 {
2425 	struct mlx5_vdpa_wq_ent *wqent;
2426 	struct mlx5_vdpa_dev *mvdev;
2427 	struct mlx5_vdpa_net *ndev;
2428 
2429 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2430 	mvdev = wqent->mvdev;
2431 	ndev = to_mlx5_vdpa_ndev(mvdev);
2432 	if (get_link_state(mvdev))
2433 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2434 	else
2435 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2436 
2437 	if (ndev->config_cb.callback)
2438 		ndev->config_cb.callback(ndev->config_cb.private);
2439 
2440 	kfree(wqent);
2441 }
2442 
2443 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2444 {
2445 	struct mlx5_vdpa_wq_ent *wqent;
2446 
2447 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2448 	if (!wqent)
2449 		return -ENOMEM;
2450 
2451 	wqent->mvdev = &ndev->mvdev;
2452 	INIT_WORK(&wqent->work, update_carrier);
2453 	queue_work(ndev->mvdev.wq, &wqent->work);
2454 	return 0;
2455 }
2456 
2457 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2458 {
2459 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2460 	struct mlx5_eqe *eqe = param;
2461 	int ret = NOTIFY_DONE;
2462 
2463 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2464 		switch (eqe->sub_type) {
2465 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2466 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2467 			if (queue_link_work(ndev))
2468 				return NOTIFY_DONE;
2469 
2470 			ret = NOTIFY_OK;
2471 			break;
2472 		default:
2473 			return NOTIFY_DONE;
2474 		}
2475 		return ret;
2476 	}
2477 	return ret;
2478 }
2479 
2480 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2481 {
2482 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2483 		return;
2484 
2485 	ndev->nb.notifier_call = event_handler;
2486 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2487 	ndev->nb_registered = true;
2488 	queue_link_work(ndev);
2489 }
2490 
2491 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2492 {
2493 	if (!ndev->nb_registered)
2494 		return;
2495 
2496 	ndev->nb_registered = false;
2497 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2498 	if (ndev->mvdev.wq)
2499 		flush_workqueue(ndev->mvdev.wq);
2500 }
2501 
2502 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2503 {
2504 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2505 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2506 	int err;
2507 
2508 	print_features(mvdev, features, true);
2509 
2510 	err = verify_driver_features(mvdev, features);
2511 	if (err)
2512 		return err;
2513 
2514 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2515 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2516 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2517 	else
2518 		ndev->rqt_size = 1;
2519 
2520 	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2521 	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2522 	 *
2523 	 * Multiqueue is disabled by default.
2524 	 * The driver enables multiqueue by sending a command using class
2525 	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2526 	 * operation, as follows: ...
2527 	 */
2528 	ndev->cur_num_vqs = 2;
2529 
2530 	update_cvq_info(mvdev);
2531 	return err;
2532 }
2533 
2534 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2535 {
2536 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2537 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2538 
2539 	ndev->config_cb = *cb;
2540 }
2541 
2542 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2543 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2544 {
2545 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2546 }
2547 
2548 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2549 {
2550 	return VIRTIO_ID_NET;
2551 }
2552 
2553 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2554 {
2555 	return PCI_VENDOR_ID_MELLANOX;
2556 }
2557 
2558 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2559 {
2560 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2561 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2562 
2563 	print_status(mvdev, ndev->mvdev.status, false);
2564 	return ndev->mvdev.status;
2565 }
2566 
2567 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2568 {
2569 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2570 	struct mlx5_virtq_attr attr = {};
2571 	int err;
2572 
2573 	if (mvq->initialized) {
2574 		err = query_virtqueue(ndev, mvq, &attr);
2575 		if (err)
2576 			return err;
2577 	}
2578 
2579 	ri->avail_index = attr.available_index;
2580 	ri->used_index = attr.used_index;
2581 	ri->ready = mvq->ready;
2582 	ri->num_ent = mvq->num_ent;
2583 	ri->desc_addr = mvq->desc_addr;
2584 	ri->device_addr = mvq->device_addr;
2585 	ri->driver_addr = mvq->driver_addr;
2586 	ri->map = mvq->map;
2587 	ri->restore = true;
2588 	return 0;
2589 }
2590 
2591 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2592 {
2593 	int i;
2594 
2595 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2596 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2597 		save_channel_info(ndev, &ndev->vqs[i]);
2598 	}
2599 	return 0;
2600 }
2601 
2602 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2603 {
2604 	int i;
2605 
2606 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2607 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2608 }
2609 
2610 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2611 {
2612 	struct mlx5_vdpa_virtqueue *mvq;
2613 	struct mlx5_vq_restore_info *ri;
2614 	int i;
2615 
2616 	mlx5_clear_vqs(ndev);
2617 	init_mvqs(ndev);
2618 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2619 		mvq = &ndev->vqs[i];
2620 		ri = &mvq->ri;
2621 		if (!ri->restore)
2622 			continue;
2623 
2624 		mvq->avail_idx = ri->avail_index;
2625 		mvq->used_idx = ri->used_index;
2626 		mvq->ready = ri->ready;
2627 		mvq->num_ent = ri->num_ent;
2628 		mvq->desc_addr = ri->desc_addr;
2629 		mvq->device_addr = ri->device_addr;
2630 		mvq->driver_addr = ri->driver_addr;
2631 		mvq->map = ri->map;
2632 	}
2633 }
2634 
2635 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2636 				struct vhost_iotlb *iotlb, unsigned int asid)
2637 {
2638 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2639 	int err;
2640 
2641 	suspend_vqs(ndev);
2642 	err = save_channels_info(ndev);
2643 	if (err)
2644 		goto err_mr;
2645 
2646 	teardown_driver(ndev);
2647 	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2648 	err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2649 	if (err)
2650 		goto err_mr;
2651 
2652 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2653 		goto err_mr;
2654 
2655 	restore_channels_info(ndev);
2656 	err = setup_driver(mvdev);
2657 	if (err)
2658 		goto err_setup;
2659 
2660 	return 0;
2661 
2662 err_setup:
2663 	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2664 err_mr:
2665 	return err;
2666 }
2667 
2668 /* reslock must be held for this function */
2669 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2670 {
2671 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2672 	int err;
2673 
2674 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2675 
2676 	if (ndev->setup) {
2677 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2678 		err = 0;
2679 		goto out;
2680 	}
2681 	mlx5_vdpa_add_debugfs(ndev);
2682 	err = setup_virtqueues(mvdev);
2683 	if (err) {
2684 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2685 		goto err_setup;
2686 	}
2687 
2688 	err = create_rqt(ndev);
2689 	if (err) {
2690 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2691 		goto err_rqt;
2692 	}
2693 
2694 	err = create_tir(ndev);
2695 	if (err) {
2696 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2697 		goto err_tir;
2698 	}
2699 
2700 	err = setup_steering(ndev);
2701 	if (err) {
2702 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2703 		goto err_fwd;
2704 	}
2705 	ndev->setup = true;
2706 
2707 	return 0;
2708 
2709 err_fwd:
2710 	destroy_tir(ndev);
2711 err_tir:
2712 	destroy_rqt(ndev);
2713 err_rqt:
2714 	teardown_virtqueues(ndev);
2715 err_setup:
2716 	mlx5_vdpa_remove_debugfs(ndev->debugfs);
2717 out:
2718 	return err;
2719 }
2720 
2721 /* reslock must be held for this function */
2722 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2723 {
2724 
2725 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2726 
2727 	if (!ndev->setup)
2728 		return;
2729 
2730 	mlx5_vdpa_remove_debugfs(ndev->debugfs);
2731 	ndev->debugfs = NULL;
2732 	teardown_steering(ndev);
2733 	destroy_tir(ndev);
2734 	destroy_rqt(ndev);
2735 	teardown_virtqueues(ndev);
2736 	ndev->setup = false;
2737 }
2738 
2739 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2740 {
2741 	int i;
2742 
2743 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2744 		ndev->vqs[i].ready = false;
2745 
2746 	ndev->mvdev.cvq.ready = false;
2747 }
2748 
2749 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2750 {
2751 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2752 	int err = 0;
2753 
2754 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2755 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2756 					MLX5_CVQ_MAX_ENT, false,
2757 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2758 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2759 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2760 
2761 	return err;
2762 }
2763 
2764 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2765 {
2766 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2767 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2768 	int err;
2769 
2770 	print_status(mvdev, status, true);
2771 
2772 	down_write(&ndev->reslock);
2773 
2774 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2775 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2776 			err = setup_cvq_vring(mvdev);
2777 			if (err) {
2778 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2779 				goto err_setup;
2780 			}
2781 			register_link_notifier(ndev);
2782 			err = setup_driver(mvdev);
2783 			if (err) {
2784 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2785 				goto err_driver;
2786 			}
2787 		} else {
2788 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2789 			goto err_clear;
2790 		}
2791 	}
2792 
2793 	ndev->mvdev.status = status;
2794 	up_write(&ndev->reslock);
2795 	return;
2796 
2797 err_driver:
2798 	unregister_link_notifier(ndev);
2799 err_setup:
2800 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2801 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2802 err_clear:
2803 	up_write(&ndev->reslock);
2804 }
2805 
2806 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2807 {
2808 	int i;
2809 
2810 	/* default mapping all groups are mapped to asid 0 */
2811 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2812 		mvdev->group2asid[i] = 0;
2813 }
2814 
2815 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2816 {
2817 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2818 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2819 
2820 	print_status(mvdev, 0, true);
2821 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2822 
2823 	down_write(&ndev->reslock);
2824 	unregister_link_notifier(ndev);
2825 	teardown_driver(ndev);
2826 	clear_vqs_ready(ndev);
2827 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2828 	ndev->mvdev.status = 0;
2829 	ndev->mvdev.suspended = false;
2830 	ndev->cur_num_vqs = 0;
2831 	ndev->mvdev.cvq.received_desc = 0;
2832 	ndev->mvdev.cvq.completed_desc = 0;
2833 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2834 	ndev->mvdev.actual_features = 0;
2835 	init_group_to_asid_map(mvdev);
2836 	++mvdev->generation;
2837 
2838 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2839 		if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2840 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2841 	}
2842 	up_write(&ndev->reslock);
2843 
2844 	return 0;
2845 }
2846 
2847 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2848 {
2849 	return sizeof(struct virtio_net_config);
2850 }
2851 
2852 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2853 				 unsigned int len)
2854 {
2855 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2856 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2857 
2858 	if (offset + len <= sizeof(struct virtio_net_config))
2859 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2860 }
2861 
2862 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2863 				 unsigned int len)
2864 {
2865 	/* not supported */
2866 }
2867 
2868 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2869 {
2870 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2871 
2872 	return mvdev->generation;
2873 }
2874 
2875 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2876 			unsigned int asid)
2877 {
2878 	bool change_map;
2879 	int err;
2880 
2881 	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2882 	if (err) {
2883 		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2884 		return err;
2885 	}
2886 
2887 	if (change_map)
2888 		err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2889 
2890 	return err;
2891 }
2892 
2893 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2894 			     struct vhost_iotlb *iotlb)
2895 {
2896 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2897 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2898 	int err = -EINVAL;
2899 
2900 	down_write(&ndev->reslock);
2901 	err = set_map_data(mvdev, iotlb, asid);
2902 	up_write(&ndev->reslock);
2903 	return err;
2904 }
2905 
2906 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2907 {
2908 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2909 
2910 	if (is_ctrl_vq_idx(mvdev, idx))
2911 		return &vdev->dev;
2912 
2913 	return mvdev->vdev.dma_dev;
2914 }
2915 
2916 static void free_irqs(struct mlx5_vdpa_net *ndev)
2917 {
2918 	struct mlx5_vdpa_irq_pool_entry *ent;
2919 	int i;
2920 
2921 	if (!msix_mode_supported(&ndev->mvdev))
2922 		return;
2923 
2924 	if (!ndev->irqp.entries)
2925 		return;
2926 
2927 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
2928 		ent = ndev->irqp.entries + i;
2929 		if (ent->map.virq)
2930 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
2931 	}
2932 	kfree(ndev->irqp.entries);
2933 }
2934 
2935 static void mlx5_vdpa_free(struct vdpa_device *vdev)
2936 {
2937 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2938 	struct mlx5_core_dev *pfmdev;
2939 	struct mlx5_vdpa_net *ndev;
2940 
2941 	ndev = to_mlx5_vdpa_ndev(mvdev);
2942 
2943 	free_resources(ndev);
2944 	mlx5_vdpa_destroy_mr(mvdev);
2945 	if (!is_zero_ether_addr(ndev->config.mac)) {
2946 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2947 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2948 	}
2949 	mlx5_vdpa_free_resources(&ndev->mvdev);
2950 	free_irqs(ndev);
2951 	kfree(ndev->event_cbs);
2952 	kfree(ndev->vqs);
2953 }
2954 
2955 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2956 {
2957 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2958 	struct vdpa_notification_area ret = {};
2959 	struct mlx5_vdpa_net *ndev;
2960 	phys_addr_t addr;
2961 
2962 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2963 		return ret;
2964 
2965 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2966 	 * notification to avoid the risk of mapping pages that contain BAR of more
2967 	 * than one SF
2968 	 */
2969 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2970 		return ret;
2971 
2972 	ndev = to_mlx5_vdpa_ndev(mvdev);
2973 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2974 	ret.addr = addr;
2975 	ret.size = PAGE_SIZE;
2976 	return ret;
2977 }
2978 
2979 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
2980 {
2981 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2982 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2983 	struct mlx5_vdpa_virtqueue *mvq;
2984 
2985 	if (!is_index_valid(mvdev, idx))
2986 		return -EINVAL;
2987 
2988 	if (is_ctrl_vq_idx(mvdev, idx))
2989 		return -EOPNOTSUPP;
2990 
2991 	mvq = &ndev->vqs[idx];
2992 	if (!mvq->map.virq)
2993 		return -EOPNOTSUPP;
2994 
2995 	return mvq->map.virq;
2996 }
2997 
2998 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2999 {
3000 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3001 
3002 	return mvdev->actual_features;
3003 }
3004 
3005 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3006 			     u64 *received_desc, u64 *completed_desc)
3007 {
3008 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3009 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3010 	void *cmd_hdr;
3011 	void *ctx;
3012 	int err;
3013 
3014 	if (!counters_supported(&ndev->mvdev))
3015 		return -EOPNOTSUPP;
3016 
3017 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3018 		return -EAGAIN;
3019 
3020 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3021 
3022 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3023 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3024 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3025 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3026 
3027 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3028 	if (err)
3029 		return err;
3030 
3031 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3032 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3033 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3034 	return 0;
3035 }
3036 
3037 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3038 					 struct sk_buff *msg,
3039 					 struct netlink_ext_ack *extack)
3040 {
3041 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3042 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3043 	struct mlx5_vdpa_virtqueue *mvq;
3044 	struct mlx5_control_vq *cvq;
3045 	u64 received_desc;
3046 	u64 completed_desc;
3047 	int err = 0;
3048 
3049 	down_read(&ndev->reslock);
3050 	if (!is_index_valid(mvdev, idx)) {
3051 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3052 		err = -EINVAL;
3053 		goto out_err;
3054 	}
3055 
3056 	if (idx == ctrl_vq_idx(mvdev)) {
3057 		cvq = &mvdev->cvq;
3058 		received_desc = cvq->received_desc;
3059 		completed_desc = cvq->completed_desc;
3060 		goto out;
3061 	}
3062 
3063 	mvq = &ndev->vqs[idx];
3064 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3065 	if (err) {
3066 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3067 		goto out_err;
3068 	}
3069 
3070 out:
3071 	err = -EMSGSIZE;
3072 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3073 		goto out_err;
3074 
3075 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3076 			      VDPA_ATTR_PAD))
3077 		goto out_err;
3078 
3079 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3080 		goto out_err;
3081 
3082 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3083 			      VDPA_ATTR_PAD))
3084 		goto out_err;
3085 
3086 	err = 0;
3087 out_err:
3088 	up_read(&ndev->reslock);
3089 	return err;
3090 }
3091 
3092 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3093 {
3094 	struct mlx5_control_vq *cvq;
3095 
3096 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3097 		return;
3098 
3099 	cvq = &mvdev->cvq;
3100 	cvq->ready = false;
3101 }
3102 
3103 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3104 {
3105 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3106 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3107 	struct mlx5_vdpa_virtqueue *mvq;
3108 	int i;
3109 
3110 	mlx5_vdpa_info(mvdev, "suspending device\n");
3111 
3112 	down_write(&ndev->reslock);
3113 	unregister_link_notifier(ndev);
3114 	for (i = 0; i < ndev->cur_num_vqs; i++) {
3115 		mvq = &ndev->vqs[i];
3116 		suspend_vq(ndev, mvq);
3117 	}
3118 	mlx5_vdpa_cvq_suspend(mvdev);
3119 	mvdev->suspended = true;
3120 	up_write(&ndev->reslock);
3121 	return 0;
3122 }
3123 
3124 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3125 			       unsigned int asid)
3126 {
3127 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3128 
3129 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3130 		return -EINVAL;
3131 
3132 	mvdev->group2asid[group] = asid;
3133 	return 0;
3134 }
3135 
3136 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3137 	.set_vq_address = mlx5_vdpa_set_vq_address,
3138 	.set_vq_num = mlx5_vdpa_set_vq_num,
3139 	.kick_vq = mlx5_vdpa_kick_vq,
3140 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3141 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3142 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3143 	.set_vq_state = mlx5_vdpa_set_vq_state,
3144 	.get_vq_state = mlx5_vdpa_get_vq_state,
3145 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3146 	.get_vq_notification = mlx5_get_vq_notification,
3147 	.get_vq_irq = mlx5_get_vq_irq,
3148 	.get_vq_align = mlx5_vdpa_get_vq_align,
3149 	.get_vq_group = mlx5_vdpa_get_vq_group,
3150 	.get_device_features = mlx5_vdpa_get_device_features,
3151 	.set_driver_features = mlx5_vdpa_set_driver_features,
3152 	.get_driver_features = mlx5_vdpa_get_driver_features,
3153 	.set_config_cb = mlx5_vdpa_set_config_cb,
3154 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3155 	.get_device_id = mlx5_vdpa_get_device_id,
3156 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3157 	.get_status = mlx5_vdpa_get_status,
3158 	.set_status = mlx5_vdpa_set_status,
3159 	.reset = mlx5_vdpa_reset,
3160 	.get_config_size = mlx5_vdpa_get_config_size,
3161 	.get_config = mlx5_vdpa_get_config,
3162 	.set_config = mlx5_vdpa_set_config,
3163 	.get_generation = mlx5_vdpa_get_generation,
3164 	.set_map = mlx5_vdpa_set_map,
3165 	.set_group_asid = mlx5_set_group_asid,
3166 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3167 	.free = mlx5_vdpa_free,
3168 	.suspend = mlx5_vdpa_suspend,
3169 };
3170 
3171 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3172 {
3173 	u16 hw_mtu;
3174 	int err;
3175 
3176 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3177 	if (err)
3178 		return err;
3179 
3180 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3181 	return 0;
3182 }
3183 
3184 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3185 {
3186 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3187 	int err;
3188 
3189 	if (res->valid) {
3190 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3191 		return -EEXIST;
3192 	}
3193 
3194 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3195 	if (err)
3196 		return err;
3197 
3198 	err = create_tis(ndev);
3199 	if (err)
3200 		goto err_tis;
3201 
3202 	res->valid = true;
3203 
3204 	return 0;
3205 
3206 err_tis:
3207 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3208 	return err;
3209 }
3210 
3211 static void free_resources(struct mlx5_vdpa_net *ndev)
3212 {
3213 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3214 
3215 	if (!res->valid)
3216 		return;
3217 
3218 	destroy_tis(ndev);
3219 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3220 	res->valid = false;
3221 }
3222 
3223 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3224 {
3225 	struct mlx5_vdpa_virtqueue *mvq;
3226 	int i;
3227 
3228 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3229 		mvq = &ndev->vqs[i];
3230 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3231 		mvq->index = i;
3232 		mvq->ndev = ndev;
3233 		mvq->fwqp.fw = true;
3234 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3235 	}
3236 	for (; i < ndev->mvdev.max_vqs; i++) {
3237 		mvq = &ndev->vqs[i];
3238 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3239 		mvq->index = i;
3240 		mvq->ndev = ndev;
3241 	}
3242 }
3243 
3244 struct mlx5_vdpa_mgmtdev {
3245 	struct vdpa_mgmt_dev mgtdev;
3246 	struct mlx5_adev *madev;
3247 	struct mlx5_vdpa_net *ndev;
3248 };
3249 
3250 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3251 {
3252 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3253 	void *in;
3254 	int err;
3255 
3256 	in = kvzalloc(inlen, GFP_KERNEL);
3257 	if (!in)
3258 		return -ENOMEM;
3259 
3260 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3261 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3262 		 mtu + MLX5V_ETH_HARD_MTU);
3263 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3264 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3265 
3266 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3267 
3268 	kvfree(in);
3269 	return err;
3270 }
3271 
3272 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3273 {
3274 	struct mlx5_vdpa_irq_pool_entry *ent;
3275 	int i;
3276 
3277 	if (!msix_mode_supported(&ndev->mvdev))
3278 		return;
3279 
3280 	if (!ndev->mvdev.mdev->pdev)
3281 		return;
3282 
3283 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3284 	if (!ndev->irqp.entries)
3285 		return;
3286 
3287 
3288 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3289 		ent = ndev->irqp.entries + i;
3290 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3291 			 dev_name(&ndev->mvdev.vdev.dev), i);
3292 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3293 		if (!ent->map.virq)
3294 			return;
3295 
3296 		ndev->irqp.num_ent++;
3297 	}
3298 }
3299 
3300 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3301 			     const struct vdpa_dev_set_config *add_config)
3302 {
3303 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3304 	struct virtio_net_config *config;
3305 	struct mlx5_core_dev *pfmdev;
3306 	struct mlx5_vdpa_dev *mvdev;
3307 	struct mlx5_vdpa_net *ndev;
3308 	struct mlx5_core_dev *mdev;
3309 	u64 device_features;
3310 	u32 max_vqs;
3311 	u16 mtu;
3312 	int err;
3313 
3314 	if (mgtdev->ndev)
3315 		return -ENOSPC;
3316 
3317 	mdev = mgtdev->madev->mdev;
3318 	device_features = mgtdev->mgtdev.supported_features;
3319 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3320 		if (add_config->device_features & ~device_features) {
3321 			dev_warn(mdev->device,
3322 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3323 				 add_config->device_features, device_features);
3324 			return -EINVAL;
3325 		}
3326 		device_features &= add_config->device_features;
3327 	} else {
3328 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3329 	}
3330 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3331 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3332 		dev_warn(mdev->device,
3333 			 "Must provision minimum features 0x%llx for this device",
3334 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3335 		return -EOPNOTSUPP;
3336 	}
3337 
3338 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3339 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3340 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3341 		return -EOPNOTSUPP;
3342 	}
3343 
3344 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3345 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3346 	if (max_vqs < 2) {
3347 		dev_warn(mdev->device,
3348 			 "%d virtqueues are supported. At least 2 are required\n",
3349 			 max_vqs);
3350 		return -EAGAIN;
3351 	}
3352 
3353 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3354 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3355 			return -EINVAL;
3356 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3357 	} else {
3358 		max_vqs = 2;
3359 	}
3360 
3361 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3362 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3363 	if (IS_ERR(ndev))
3364 		return PTR_ERR(ndev);
3365 
3366 	ndev->mvdev.max_vqs = max_vqs;
3367 	mvdev = &ndev->mvdev;
3368 	mvdev->mdev = mdev;
3369 
3370 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3371 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3372 	if (!ndev->vqs || !ndev->event_cbs) {
3373 		err = -ENOMEM;
3374 		goto err_alloc;
3375 	}
3376 
3377 	init_mvqs(ndev);
3378 	allocate_irqs(ndev);
3379 	init_rwsem(&ndev->reslock);
3380 	config = &ndev->config;
3381 
3382 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3383 		err = config_func_mtu(mdev, add_config->net.mtu);
3384 		if (err)
3385 			goto err_alloc;
3386 	}
3387 
3388 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3389 		err = query_mtu(mdev, &mtu);
3390 		if (err)
3391 			goto err_alloc;
3392 
3393 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3394 	}
3395 
3396 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3397 		if (get_link_state(mvdev))
3398 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3399 		else
3400 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3401 	}
3402 
3403 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3404 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3405 	/* No bother setting mac address in config if not going to provision _F_MAC */
3406 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3407 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3408 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3409 		if (err)
3410 			goto err_alloc;
3411 	}
3412 
3413 	if (!is_zero_ether_addr(config->mac)) {
3414 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3415 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3416 		if (err)
3417 			goto err_alloc;
3418 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3419 		/*
3420 		 * We used to clear _F_MAC feature bit if seeing
3421 		 * zero mac address when device features are not
3422 		 * specifically provisioned. Keep the behaviour
3423 		 * so old scripts do not break.
3424 		 */
3425 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3426 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3427 		/* Don't provision zero mac address for _F_MAC */
3428 		mlx5_vdpa_warn(&ndev->mvdev,
3429 			       "No mac address provisioned?\n");
3430 		err = -EINVAL;
3431 		goto err_alloc;
3432 	}
3433 
3434 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3435 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3436 
3437 	ndev->mvdev.mlx_features = device_features;
3438 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3439 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3440 	if (err)
3441 		goto err_mpfs;
3442 
3443 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3444 		err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3445 		if (err)
3446 			goto err_res;
3447 	}
3448 
3449 	err = alloc_resources(ndev);
3450 	if (err)
3451 		goto err_mr;
3452 
3453 	ndev->cvq_ent.mvdev = mvdev;
3454 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3455 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3456 	if (!mvdev->wq) {
3457 		err = -ENOMEM;
3458 		goto err_res2;
3459 	}
3460 
3461 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3462 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3463 	if (err)
3464 		goto err_reg;
3465 
3466 	mgtdev->ndev = ndev;
3467 	return 0;
3468 
3469 err_reg:
3470 	destroy_workqueue(mvdev->wq);
3471 err_res2:
3472 	free_resources(ndev);
3473 err_mr:
3474 	mlx5_vdpa_destroy_mr(mvdev);
3475 err_res:
3476 	mlx5_vdpa_free_resources(&ndev->mvdev);
3477 err_mpfs:
3478 	if (!is_zero_ether_addr(config->mac))
3479 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3480 err_alloc:
3481 	put_device(&mvdev->vdev.dev);
3482 	return err;
3483 }
3484 
3485 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3486 {
3487 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3488 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3489 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3490 	struct workqueue_struct *wq;
3491 
3492 	mlx5_vdpa_remove_debugfs(ndev->debugfs);
3493 	ndev->debugfs = NULL;
3494 	unregister_link_notifier(ndev);
3495 	_vdpa_unregister_device(dev);
3496 	wq = mvdev->wq;
3497 	mvdev->wq = NULL;
3498 	destroy_workqueue(wq);
3499 	mgtdev->ndev = NULL;
3500 }
3501 
3502 static const struct vdpa_mgmtdev_ops mdev_ops = {
3503 	.dev_add = mlx5_vdpa_dev_add,
3504 	.dev_del = mlx5_vdpa_dev_del,
3505 };
3506 
3507 static struct virtio_device_id id_table[] = {
3508 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3509 	{ 0 },
3510 };
3511 
3512 static int mlx5v_probe(struct auxiliary_device *adev,
3513 		       const struct auxiliary_device_id *id)
3514 
3515 {
3516 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3517 	struct mlx5_core_dev *mdev = madev->mdev;
3518 	struct mlx5_vdpa_mgmtdev *mgtdev;
3519 	int err;
3520 
3521 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3522 	if (!mgtdev)
3523 		return -ENOMEM;
3524 
3525 	mgtdev->mgtdev.ops = &mdev_ops;
3526 	mgtdev->mgtdev.device = mdev->device;
3527 	mgtdev->mgtdev.id_table = id_table;
3528 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3529 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3530 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3531 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3532 	mgtdev->mgtdev.max_supported_vqs =
3533 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3534 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3535 	mgtdev->madev = madev;
3536 
3537 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3538 	if (err)
3539 		goto reg_err;
3540 
3541 	auxiliary_set_drvdata(adev, mgtdev);
3542 
3543 	return 0;
3544 
3545 reg_err:
3546 	kfree(mgtdev);
3547 	return err;
3548 }
3549 
3550 static void mlx5v_remove(struct auxiliary_device *adev)
3551 {
3552 	struct mlx5_vdpa_mgmtdev *mgtdev;
3553 
3554 	mgtdev = auxiliary_get_drvdata(adev);
3555 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3556 	kfree(mgtdev);
3557 }
3558 
3559 static const struct auxiliary_device_id mlx5v_id_table[] = {
3560 	{ .name = MLX5_ADEV_NAME ".vnet", },
3561 	{},
3562 };
3563 
3564 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3565 
3566 static struct auxiliary_driver mlx5v_driver = {
3567 	.name = "vnet",
3568 	.probe = mlx5v_probe,
3569 	.remove = mlx5v_remove,
3570 	.id_table = mlx5v_id_table,
3571 };
3572 
3573 module_auxiliary_driver(mlx5v_driver);
3574